[FFmpeg-devel] [RFC] lavfi: add opencl tonemap filter.

Ruiling Song ruiling.song at intel.com
Fri May 4 10:32:58 EEST 2018


It basically does hdr to sdr conversion with tonemapping.

Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
This patch tries to add a filter to do hdr to sdr conversion with tonemapping.
The filter does all the job of tonemapping in one pass, which is quite different from the vf_tonemap.c
I choose this way because I think this would introduce less memory access.

And I find that tonemaping shares lots of code with colorspace conversion.
So I move color space related code into seprated files (both OpenCL kernel and host code).

I am not sure whether the design seems OK?
Is there anybody would like to give some comments on the overall design or implementation details?


Thanks!
Ruiling

 configure                              |   1 +
 libavfilter/Makefile                   |   2 +
 libavfilter/allfilters.c               |   1 +
 libavfilter/colorspace_basic.c         |  89 +++++++
 libavfilter/colorspace_basic.h         |  40 +++
 libavfilter/opencl/colorspace_basic.cl | 137 ++++++++++
 libavfilter/opencl/tonemap.cl          | 136 ++++++++++
 libavfilter/opencl_source.h            |   2 +
 libavfilter/vf_tonemap_opencl.c        | 472 +++++++++++++++++++++++++++++++++
 9 files changed, 880 insertions(+)
 create mode 100644 libavfilter/colorspace_basic.c
 create mode 100644 libavfilter/colorspace_basic.h
 create mode 100644 libavfilter/opencl/colorspace_basic.cl
 create mode 100644 libavfilter/opencl/tonemap.cl
 create mode 100644 libavfilter/vf_tonemap_opencl.c

diff --git a/configure b/configure
index 7f199c6..b9e464d 100755
--- a/configure
+++ b/configure
@@ -3395,6 +3395,7 @@ tinterlace_filter_deps="gpl"
 tinterlace_merge_test_deps="tinterlace_filter"
 tinterlace_pad_test_deps="tinterlace_filter"
 tonemap_filter_deps="const_nan"
+tonemap_opencl_filter_deps="opencl"
 unsharp_opencl_filter_deps="opencl"
 uspp_filter_deps="gpl avcodec"
 vaguedenoiser_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 3454f25..7a1b0e8 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -348,6 +348,8 @@ OBJS-$(CONFIG_TINTERLACE_FILTER)             += vf_tinterlace.o
 OBJS-$(CONFIG_TLUT2_FILTER)                  += vf_lut2.o framesync.o
 OBJS-$(CONFIG_TMIX_FILTER)                   += vf_mix.o framesync.o
 OBJS-$(CONFIG_TONEMAP_FILTER)                += vf_tonemap.o
+OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER)         += vf_tonemap_opencl.o colorspace_basic.o opencl.o \
+                                                opencl/tonemap.o opencl/colorspace_basic.o
 OBJS-$(CONFIG_TRANSPOSE_FILTER)              += vf_transpose.o
 OBJS-$(CONFIG_TRIM_FILTER)                   += trim.o
 OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)          += vf_premultiply.o framesync.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index d958f9b..759097a 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -339,6 +339,7 @@ extern AVFilter ff_vf_tinterlace;
 extern AVFilter ff_vf_tlut2;
 extern AVFilter ff_vf_tmix;
 extern AVFilter ff_vf_tonemap;
+extern AVFilter ff_vf_tonemap_opencl;
 extern AVFilter ff_vf_transpose;
 extern AVFilter ff_vf_trim;
 extern AVFilter ff_vf_unpremultiply;
diff --git a/libavfilter/colorspace_basic.c b/libavfilter/colorspace_basic.c
new file mode 100644
index 0000000..93f9f08
--- /dev/null
+++ b/libavfilter/colorspace_basic.c
@@ -0,0 +1,89 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "colorspace_basic.h"
+
+
+void invert_matrix3x3(const double in[3][3], double out[3][3])
+{
+    double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2],
+           m10 = in[1][0], m11 = in[1][1], m12 = in[1][2],
+           m20 = in[2][0], m21 = in[2][1], m22 = in[2][2];
+    int i, j;
+    double det;
+
+    out[0][0] =  (m11 * m22 - m21 * m12);
+    out[0][1] = -(m01 * m22 - m21 * m02);
+    out[0][2] =  (m01 * m12 - m11 * m02);
+    out[1][0] = -(m10 * m22 - m20 * m12);
+    out[1][1] =  (m00 * m22 - m20 * m02);
+    out[1][2] = -(m00 * m12 - m10 * m02);
+    out[2][0] =  (m10 * m21 - m20 * m11);
+    out[2][1] = -(m00 * m21 - m20 * m01);
+    out[2][2] =  (m00 * m11 - m10 * m01);
+
+    det = m00 * out[0][0] + m10 * out[0][1] + m20 * out[0][2];
+    det = 1.0 / det;
+
+    for (i = 0; i < 3; i++) {
+        for (j = 0; j < 3; j++)
+            out[i][j] *= det;
+    }
+}
+
+void mul3x3(double dst[3][3], const double src1[3][3], const double src2[3][3])
+{
+    int m, n;
+
+    for (m = 0; m < 3; m++)
+        for (n = 0; n < 3; n++)
+            dst[m][n] = src2[m][0] * src1[0][n] +
+                        src2[m][1] * src1[1][n] +
+                        src2[m][2] * src1[2][n];
+}
+/*
+ * see e.g. http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html
+ */
+void fill_rgb2xyz_table(const struct ColorPrimaries *coeffs,
+                        const struct WhitePoint *wp,
+                        double rgb2xyz[3][3])
+{
+    double i[3][3], sr, sg, sb, zw;
+
+    rgb2xyz[0][0] = coeffs->xr / coeffs->yr;
+    rgb2xyz[0][1] = coeffs->xg / coeffs->yg;
+    rgb2xyz[0][2] = coeffs->xb / coeffs->yb;
+    rgb2xyz[1][0] = rgb2xyz[1][1] = rgb2xyz[1][2] = 1.0;
+    rgb2xyz[2][0] = (1.0 - coeffs->xr - coeffs->yr) / coeffs->yr;
+    rgb2xyz[2][1] = (1.0 - coeffs->xg - coeffs->yg) / coeffs->yg;
+    rgb2xyz[2][2] = (1.0 - coeffs->xb - coeffs->yb) / coeffs->yb;
+    invert_matrix3x3(rgb2xyz, i);
+    zw = 1.0 - wp->xw - wp->yw;
+    sr = i[0][0] * wp->xw + i[0][1] * wp->yw + i[0][2] * zw;
+    sg = i[1][0] * wp->xw + i[1][1] * wp->yw + i[1][2] * zw;
+    sb = i[2][0] * wp->xw + i[2][1] * wp->yw + i[2][2] * zw;
+    rgb2xyz[0][0] *= sr;
+    rgb2xyz[0][1] *= sg;
+    rgb2xyz[0][2] *= sb;
+    rgb2xyz[1][0] *= sr;
+    rgb2xyz[1][1] *= sg;
+    rgb2xyz[1][2] *= sb;
+    rgb2xyz[2][0] *= sr;
+    rgb2xyz[2][1] *= sg;
+    rgb2xyz[2][2] *= sb;
+}
diff --git a/libavfilter/colorspace_basic.h b/libavfilter/colorspace_basic.h
new file mode 100644
index 0000000..5647ca6
--- /dev/null
+++ b/libavfilter/colorspace_basic.h
@@ -0,0 +1,40 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_COLORSPACE_BASIC_H
+#define AVFILTER_COLORSPACE_BASIC_H
+
+#include "libavutil/common.h"
+
+struct LumaCoefficients {
+    double cr, cg, cb;
+};
+
+struct ColorPrimaries {
+    double xr, yr, xg, yg, xb, yb;
+};
+
+struct WhitePoint {
+    double xw, yw;
+};
+
+void invert_matrix3x3(const double in[3][3], double out[3][3]);
+void mul3x3(double dst[3][3], const double src1[3][3], const double src2[3][3]);
+void fill_rgb2xyz_table(const struct ColorPrimaries *coeffs,
+                        const struct WhitePoint *wp, double rgb2xyz[3][3]);
+#endif
diff --git a/libavfilter/opencl/colorspace_basic.cl b/libavfilter/opencl/colorspace_basic.cl
new file mode 100644
index 0000000..478a4f3
--- /dev/null
+++ b/libavfilter/opencl/colorspace_basic.cl
@@ -0,0 +1,137 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+constant const float ST2084_M1 = 0.1593017578125f;
+constant const float ST2084_M2 = 78.84375f;
+constant const float ST2084_C1 = 0.8359375f;
+constant const float ST2084_C2 = 18.8515625f;
+constant const float ST2084_C3 = 18.6875f;
+
+__constant float yuv2rgb_bt2020[] = {
+    1.0f, 0.0f, 1.4746f,
+    1.0f, -0.16455f, -0.57135f,
+    1.0f, 1.8814f, 0.0f
+};
+
+__constant float yuv2rgb_bt709[] = {
+    1.0f, 0.0f, 1.5748f,
+    1.0f, -0.18732f, -0.46812f,
+    1.0f, 1.8556f, 0.0f
+};
+
+__constant float rgb2yuv_bt709[] = {
+    0.2126f, 0.7152f, 0.0722f,
+    -0.11457f, -0.38543f, 0.5f,
+    0.5f, -0.45415f, -0.04585f
+};
+
+__constant float rgb2yuv_bt2020[] ={
+    0.2627f, 0.678f, 0.0593f,
+    -0.1396f, -0.36037f, 0.5f,
+    0.5f, -0.4598f, -0.0402f,
+};
+
+float eotf_st2084(float x) {
+    float p = pow(x, 1.0f / ST2084_M2);
+    float a = max(p -ST2084_C1, 0.0f);
+    float b = max(ST2084_C2 - ST2084_C3 * p, 1e-6f);
+    float c  = pow(a / b, 1.0f / ST2084_M1);
+    return x > 0.0f ? c : 0.0f;
+}
+
+float inverse_eotf_bt1886(float c) {
+  return c < 0.0f ? 0.0f : pow(c, 1.0f / 2.4f);
+}
+
+float oetf_bt709(float c) {
+  c = c < 0.0f ? 0.0f : c;
+  float r1 = 4.5f * c;
+  float r2 = 1.099f * pow(c, 0.45f) - 0.099f;
+  return c < 0.018f ? r1 : r2;
+}
+float inverse_oetf_bt709(float c) {
+  float r1 = c / 4.5f;
+  float r2 = pow((c + 0.099f) / 1.099f, 1.0f / 0.45f);
+  return c < 0.081f ? r1 : r2;
+}
+
+float get_luma(float r, float g, float b) {
+  return r * YUV_COFF[0] + g * YUV_COFF[1] + b * YUV_COFF[2];
+}
+
+float3 yuv2rgb(float y, float u, float v) {
+#ifdef FULL_RANGE_IN
+    u -= 0.5f; v -= 0.5f;
+#else
+    y = (y * 255.0f -  16.0f) / 219.0f;
+    u = (u * 255.0f - 128.0f) / 224.0f;
+    v = (v * 255.0f - 128.0f) / 224.0f;
+#endif
+    float r = y*RGB_COFF[0] + u*RGB_COFF[1] + v*RGB_COFF[2];
+    float g = y*RGB_COFF[3] + u*RGB_COFF[4] + v*RGB_COFF[5];
+    float b = y*RGB_COFF[6] + u*RGB_COFF[7] + v*RGB_COFF[8];
+    return (float3)(r, g, b);
+}
+
+float3 yuv2lrgb(float y, float u, float v, float post_scale) {
+    float3 rgb = yuv2rgb(y, u, v);
+    float r = linearize(rgb.x);
+    float g = linearize(rgb.y);
+    float b = linearize(rgb.z);
+    r *= post_scale;
+    g *= post_scale;
+    b *= post_scale;
+    return (float3)(r, g, b);
+}
+
+float3 rgb2yuv(float r, float g, float b) {
+    float y = r*YUV_COFF[0] + g*YUV_COFF[1] + b*YUV_COFF[2];
+    float u = r*YUV_COFF[3] + g*YUV_COFF[4] + b*YUV_COFF[5];
+    float v = r*YUV_COFF[6] + g*YUV_COFF[7] + b*YUV_COFF[8];
+#ifdef FULL_RANGE_OUT
+    u += 0.5f; v += 0.5f;
+#else
+    y = (219.0f * y + 16.0f) / 255.0f;
+    u = (224.0f * u + 128.0f) / 255.0f;
+    v = (224.0f * v + 128.0f) / 255.0f;
+#endif
+    return (float3)(y, u, v);
+}
+
+float3 lrgb2yuv(float r, float g, float b, float pre_scale) {
+    r *= pre_scale;
+    g *= pre_scale;
+    b *= pre_scale;
+
+    r = delinearize(r);
+    g = delinearize(g);
+    b = delinearize(b);
+
+    return rgb2yuv(r, g, b);
+}
+
+float3 lrgb2lrgb(float r, float g, float b) {
+#ifdef RGB2RGB_PASSTHROUGH
+    return (float3)(r, g, b);
+#else
+    float rr = rgb2rgb[0] * r + rgb2rgb[1] * g + rgb2rgb[2] * b;
+    float gg = rgb2rgb[3] * r + rgb2rgb[4] * g + rgb2rgb[5] * b;
+    float bb = rgb2rgb[6] * r + rgb2rgb[7] * g + rgb2rgb[8] * b;
+    return (float3)(rr, gg, bb);
+#endif
+}
diff --git a/libavfilter/opencl/tonemap.cl b/libavfilter/opencl/tonemap.cl
new file mode 100644
index 0000000..e0aca27
--- /dev/null
+++ b/libavfilter/opencl/tonemap.cl
@@ -0,0 +1,136 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#define ST2084_MAX_LUMINANCE 10000.0f
+#define REFERENCE_WHITE 100.0f
+
+extern float3 lrgb2yuv(float, float, float, float);
+extern float3 yuv2lrgb(float, float, float, float);
+extern float get_luma(float, float, float);
+
+float hable_f(float in) {
+    float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f;
+    return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f;
+}
+
+float direct(float s, float peak) {
+    return s;
+}
+
+float linear(float s, float peak) {
+    return s * tone_param / peak;
+}
+
+float gamma(float s, float peak) {
+    float p = s > 0.05f ? s /peak : 0.05f / peak;
+    float v = pow(p, 1.0f / tone_param);
+    return s > 0.05f ? v : (s * v /0.05f);
+}
+
+float clip(float s, float peak) {
+    return clamp(s * tone_param, 0.0f, 1.0f);
+}
+
+float reinhard(float s, float peak) {
+    return s / (s + tone_param) * (peak + tone_param) / peak;
+}
+
+float hable(float s, float peak) {
+    return hable_f(s)/hable_f(peak);
+}
+
+float mobius(float s, float peak) {
+    float j = tone_param;
+    float a, b;
+
+    if (s <= j)
+        return s;
+
+    a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak);
+    b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, 1e-6f);
+
+    return (b * b + 2.0f * b * j + j * j) / (b - a) * (s + a) / (s + b);
+}
+
+float3 map_one_pixel_rgb(float3 rgb, float peak) {
+    // de-saturate
+    float luma = get_luma(rgb.x, rgb.y, rgb.z);
+    float overbright = max(luma - 2.0f, 1e-6f) / max(luma, 1e-6f);
+    rgb = mix(rgb, (float3)luma, (float3)overbright);
+
+    float sig = max(max(rgb.x, max(rgb.y, rgb.z)), 1e-6f);
+    float sig_old = sig;
+    sig = TONE_FUNC(sig, peak);
+    rgb *= (sig/sig_old);
+    return rgb;
+}
+
+float3 map_one_pixel_yuv(float y, float u, float v, float peak, int m, int n) {
+    float3 c = yuv2lrgb(y, u, v, ST2084_MAX_LUMINANCE / peak);
+    c = map_one_pixel_rgb(c, peak / REFERENCE_WHITE);
+    return lrgb2yuv(c.x, c.y, c.z, 1.0f);
+}
+
+__kernel void tonemap(__write_only image2d_t dst1,
+                      __write_only image2d_t dst2,
+                      __read_only  image2d_t src1,
+                      __read_only  image2d_t src2,
+#ifdef THIRD_PLANE
+                      __write_only image2d_t dst3,
+                      __read_only  image2d_t src3,
+#endif
+                      float peak
+                      )
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_FILTER_NEAREST);
+    int xi = get_global_id(0);
+    int yi = get_global_id(1);
+    // each work item process four pixels
+    int x = 2 * xi;
+    int y = 2 * yi;
+
+    float y0 = read_imagef(src1, sampler, (int2)(x,     y)).x;
+    float y1 = read_imagef(src1, sampler, (int2)(x + 1, y)).x;
+    float y2 = read_imagef(src1, sampler, (int2)(x,     y + 1)).x;
+    float y3 = read_imagef(src1, sampler, (int2)(x + 1, y + 1)).x;
+#ifdef THIRD_PLANE
+    float u = read_imagef(src2, sampler, (int2)(xi, yi)).x;
+    float v = read_imagef(src3, sampler, (int2)(xi, yi)).x;
+    float2 uv = (float2)(u, v);
+#else
+    float2 uv = read_imagef(src2, sampler, (int2)(xi,     yi)).xy;
+#endif
+
+    float3 yuv0 = map_one_pixel_yuv(y0, uv.x, uv.y, peak, x, y);
+    float3 yuv1 = map_one_pixel_yuv(y1, uv.x, uv.y, peak, x+1, y);
+    float3 yuv2 = map_one_pixel_yuv(y2, uv.x, uv.y, peak, x, y+1);
+    float3 yuv3 = map_one_pixel_yuv(y3, uv.x, uv.y, peak, x+1,y+1);
+
+    write_imagef(dst1, (int2)(x, y), (float4)(yuv0.x, 0.0f, 0.0f, 1.0f));
+    write_imagef(dst1, (int2)(x+1, y), (float4)(yuv1.x, 0.0f, 0.0f, 1.0f));
+    write_imagef(dst1, (int2)(x, y+1), (float4)(yuv2.x, 0.0f, 0.0f, 1.0f));
+    write_imagef(dst1, (int2)(x+1, y+1), (float4)(yuv3.x, 0.0f, 0.0f, 1.0f));
+#ifdef THIRD_PLANE
+    write_imagef(dst2, (int2)(xi, yi), (float4)(yuv0.y, 0.0f, 0.0f, 1.0f));
+    write_imagef(dst3, (int2)(xi, yi), (float4)(yuv0.z, 0.0f, 0.0f, 1.0f));
+#else
+    write_imagef(dst2, (int2)(xi, yi), (float4)(yuv0.y, yuv0.z, 0.0f, 1.0f));
+#endif
+}
diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h
index 4bb9969..c5b3f37 100644
--- a/libavfilter/opencl_source.h
+++ b/libavfilter/opencl_source.h
@@ -21,7 +21,9 @@
 
 extern const char *ff_opencl_source_avgblur;
 extern const char *ff_opencl_source_convolution;
+extern const char *ff_opencl_source_colorspace_basic;
 extern const char *ff_opencl_source_overlay;
+extern const char *ff_opencl_source_tonemap;
 extern const char *ff_opencl_source_unsharp;
 
 #endif /* AVFILTER_OPENCL_SOURCE_H */
diff --git a/libavfilter/vf_tonemap_opencl.c b/libavfilter/vf_tonemap_opencl.c
new file mode 100644
index 0000000..72676e5
--- /dev/null
+++ b/libavfilter/vf_tonemap_opencl.c
@@ -0,0 +1,472 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <float.h>
+
+#include "libavutil/bprint.h"
+#include "libavutil/common.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mastering_display_metadata.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "internal.h"
+#include "opencl.h"
+#include "opencl_source.h"
+#include "video.h"
+#include "colorspace_basic.h"
+
+enum TonemapAlgorithm {
+    TONEMAP_NONE,
+    TONEMAP_LINEAR,
+    TONEMAP_GAMMA,
+    TONEMAP_CLIP,
+    TONEMAP_REINHARD,
+    TONEMAP_HABLE,
+    TONEMAP_MOBIUS,
+    TONEMAP_MAX,
+};
+
+typedef struct TonemapOpenCLContext {
+    OpenCLFilterContext ocf;
+
+    enum AVColorSpace colorspace, colorspace_in, colorspace_out;
+    enum AVColorTransferCharacteristic trc, trc_in, trc_out;
+    enum AVColorPrimaries primaries, primaries_in, primaries_out;
+
+    enum TonemapAlgorithm tonemap;
+    double           peak;
+    double           param;
+    int              initialised;
+    cl_kernel        kernel;
+    cl_command_queue command_queue;
+} TonemapOpenCLContext;
+
+const char *yuv_coff[AVCOL_SPC_NB] = {
+    [AVCOL_SPC_BT709] = "rgb2yuv_bt709",
+    [AVCOL_SPC_BT2020_NCL] = "rgb2yuv_bt2020",
+};
+
+const char *rgb_coff[AVCOL_SPC_NB] = {
+    [AVCOL_SPC_BT709] = "yuv2rgb_bt709",
+    [AVCOL_SPC_BT2020_NCL] = "yuv2rgb_bt2020",
+};
+
+const char *linearize_funcs[AVCOL_TRC_NB] = {
+    [AVCOL_TRC_SMPTE2084] = "eotf_st2084",
+};
+
+const char *delinearize_funcs[AVCOL_TRC_NB] = {
+    [AVCOL_TRC_BT709]     = "inverse_eotf_bt1886",
+    [AVCOL_TRC_BT2020_10] = "inverse_eotf_bt1886",
+
+};
+struct ColorPrimaries primaries_table[AVCOL_PRI_NB] = {
+    [AVCOL_PRI_BT709]  = { 0.640, 0.330, 0.300, 0.600, 0.150, 0.060 },
+    [AVCOL_PRI_BT2020] = { 0.708, 0.292, 0.170, 0.797, 0.131, 0.046 },
+};
+
+struct WhitePoint whitepoint_table[AVCOL_PRI_NB] = {
+    [AVCOL_PRI_BT709]  = { 0.3127, 0.3290 },
+    [AVCOL_PRI_BT2020] = { 0.3127, 0.3290 },
+};
+
+const char *tonemap_func[TONEMAP_MAX] = {
+    [TONEMAP_NONE]     = "direct",
+    [TONEMAP_LINEAR]   = "linear",
+    [TONEMAP_GAMMA]    = "gamma",
+    [TONEMAP_CLIP]     = "clip",
+    [TONEMAP_REINHARD] = "reinhard",
+    [TONEMAP_HABLE]    = "hable",
+    [TONEMAP_MOBIUS]   = "mobius",
+};
+
+static void get_rgb2rgb_matrix(enum AVColorPrimaries in, enum AVColorPrimaries out,
+                               double rgb2rgb[3][3]) {
+    double rgb2xyz[3][3], xyz2rgb[3][3];
+
+    fill_rgb2xyz_table(&primaries_table[out], &whitepoint_table[out], rgb2xyz);
+    invert_matrix3x3(rgb2xyz, xyz2rgb);
+    fill_rgb2xyz_table(&primaries_table[in], &whitepoint_table[in], rgb2xyz);
+    mul3x3(rgb2rgb, rgb2xyz, xyz2rgb);
+}
+
+#define OPENCL_SOURCE_NB 3
+static int tonemap_opencl_init(AVFilterContext *avctx)
+{
+    TonemapOpenCLContext *ctx = avctx->priv;
+    int rgb2rgb_passthrough = 1;
+    double rgb2rgb[3][3];
+    cl_int cle;
+    int err;
+    AVBPrint header;
+    const char *opencl_sources[OPENCL_SOURCE_NB];
+
+    av_bprint_init(&header, 256, AV_BPRINT_SIZE_AUTOMATIC);
+
+    switch(ctx->tonemap) {
+    case TONEMAP_GAMMA:
+        if (isnan(ctx->param))
+            ctx->param = 1.8f;
+        break;
+    case TONEMAP_REINHARD:
+        if (!isnan(ctx->param))
+            ctx->param = (1.0f - ctx->param) / ctx->param;
+        break;
+    case TONEMAP_MOBIUS:
+        if (isnan(ctx->param))
+            ctx->param = 0.3f;
+        break;
+    }
+
+    if (isnan(ctx->param))
+        ctx->param = 1.0f;
+
+    av_bprintf(&header, "__constant const tone_param = %.4f;\n", ctx->param);
+    av_bprintf(&header, "#define TONE_FUNC %s\n", tonemap_func[ctx->tonemap]);
+
+    if (ctx->primaries_out != ctx->primaries_in) {
+      get_rgb2rgb_matrix(ctx->primaries_in, ctx->primaries_out, rgb2rgb);
+      rgb2rgb_passthrough = 0;
+    }
+
+    if (rgb2rgb_passthrough)
+      av_bprintf(&header, "#define RGB2RGB_PASSTHROUGH\n");
+    av_bprintf(&header, "#define RGB_COFF %s\n",    rgb_coff[ctx->colorspace_in]);
+    av_bprintf(&header, "#define YUV_COFF %s\n",    yuv_coff[ctx->colorspace_out]);
+    av_bprintf(&header, "#define linearize %s\n",   linearize_funcs[ctx->trc_in]);
+    av_bprintf(&header, "#define delinearize %s\n", delinearize_funcs[ctx->trc_out]);
+
+    av_bprintf(&header, "__constant float rgb2rgb[9] = {\n");
+    av_bprintf(&header, "    %.4ff, %.4ff, %.4ff,\n",  rgb2rgb[0][0], rgb2rgb[0][1], rgb2rgb[0][2]);
+    av_bprintf(&header, "    %.4ff, %.4ff, %.4ff,\n",  rgb2rgb[1][0], rgb2rgb[1][1], rgb2rgb[1][2]);
+    av_bprintf(&header, "    %.4ff, %.4ff, %.4ff};\n", rgb2rgb[2][0], rgb2rgb[2][1], rgb2rgb[2][2]);
+
+    opencl_sources[0] = header.str;
+    opencl_sources[1] = ff_opencl_source_tonemap;
+    opencl_sources[2] = ff_opencl_source_colorspace_basic;
+    err = ff_opencl_filter_load_program(avctx, opencl_sources, OPENCL_SOURCE_NB);
+
+    av_bprint_finalize(&header, NULL);
+    if (err < 0)
+        goto fail;
+
+    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
+                                              ctx->ocf.hwctx->device_id,
+                                              0, &cle);
+    if (!ctx->command_queue) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL "
+               "command queue: %d.\n", cle);
+        err = AVERROR(EIO);
+        goto fail;
+    }
+
+    ctx->kernel = clCreateKernel(ctx->ocf.program, "tonemap", &cle);
+    if (!ctx->kernel) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
+        err = AVERROR(EIO);
+        goto fail;
+    }
+
+    ctx->initialised = 1;
+    return 0;
+
+fail:
+    if (ctx->command_queue)
+        clReleaseCommandQueue(ctx->command_queue);
+    if (ctx->kernel)
+        clReleaseKernel(ctx->kernel);
+    return err;
+}
+
+static int tonemap_opencl_config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *avctx = outlink->src;
+    TonemapOpenCLContext *s = avctx->priv;
+    //AVFilterLink *inlink = outlink->src->inputs[0];
+    int ret;
+
+    s->ocf.output_format = AV_PIX_FMT_NV12;
+    ret = ff_opencl_filter_config_output(outlink);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+static int launch_kernel(AVFilterContext *avctx, cl_kernel kernel,
+                         AVFrame *output, AVFrame *input, float peak) {
+    TonemapOpenCLContext *ctx = avctx->priv;
+    int err = AVERROR(ENOSYS);
+    size_t global_work[2];
+    size_t local_work[2];
+    cl_int cle;
+
+    cle = clSetKernelArg(kernel, 0, sizeof(cl_mem), &output->data[0]);
+    if (cle != CL_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+               "destination image 1st plane: %d.\n", cle);
+        return AVERROR(EINVAL);
+    }
+
+    cle = clSetKernelArg(kernel, 1, sizeof(cl_mem), &output->data[1]);
+    if (cle != CL_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+               "destination image 2nd plane: %d.\n", cle);
+        return AVERROR(EINVAL);
+    }
+
+    cle = clSetKernelArg(kernel, 2, sizeof(cl_mem), &input->data[0]);
+    if (cle != CL_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+               "source image 1st plane: %d.\n", cle);
+        return AVERROR(EINVAL);
+    }
+
+    cle = clSetKernelArg(kernel, 3, sizeof(cl_mem), &input->data[1]);
+    if (cle != CL_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+               "source image 2nd plane: %d.\n", cle);
+        return AVERROR(EINVAL);
+    }
+
+    cle = clSetKernelArg(kernel, 4, sizeof(cl_float), &peak);
+    if (cle != CL_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+               "peak luma: %d.\n", cle);
+        return AVERROR(EINVAL);
+    }
+
+    local_work[0]  = 16;
+    local_work[1]  = 16;
+    // Note the work size based on uv plane, as we process a 2x2 quad in one workitem
+    err = ff_opencl_filter_work_size_from_image(avctx, global_work, output,
+                                                1, 16);
+    if (err < 0)
+        return err;
+
+    cle = clEnqueueNDRangeKernel(ctx->command_queue, kernel, 2, NULL,
+                                 global_work, local_work,
+                                 0, NULL, NULL);
+    if (cle != CL_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
+               cle);
+        return AVERROR(EIO);
+    }
+    return 0;
+}
+
+static double determine_signal_peak(AVFrame *in)
+{
+    AVFrameSideData *sd = av_frame_get_side_data(in, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
+    double peak = 0;
+
+    if (sd) {
+        AVContentLightMetadata *clm = (AVContentLightMetadata *)sd->data;
+        peak = clm->MaxCLL;
+    }
+
+    sd = av_frame_get_side_data(in, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
+    if (!peak && sd) {
+        AVMasteringDisplayMetadata *metadata = (AVMasteringDisplayMetadata *)sd->data;
+        if (metadata->has_luminance)
+            peak = av_q2d(metadata->max_luminance);
+    }
+
+    /* smpte2084 needs the side data above to work correctly
+     * if missing, assume that the original transfer was arib-std-b67 */
+    if (!peak)
+        peak = 1200;
+
+    return peak;
+}
+
+static int tonemap_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
+{
+    AVFilterContext    *avctx = inlink->dst;
+    AVFilterLink     *outlink = avctx->outputs[0];
+    TonemapOpenCLContext *ctx = avctx->priv;
+    AVFrame *output = NULL;
+    cl_int cle;
+    int err;
+    double peak = ctx->peak;
+
+    AVHWFramesContext *input_frames_ctx =
+        (AVHWFramesContext*)input->hw_frames_ctx->data;
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(input->format),
+           input->width, input->height, input->pts);
+
+    if (!input->hw_frames_ctx)
+        return AVERROR(EINVAL);
+
+    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!output) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    err = av_frame_copy_props(output, input);
+    if (err < 0)
+        goto fail;
+
+    if (!peak)
+        peak = determine_signal_peak(input);
+
+    if (ctx->trc != -1)
+        output->color_trc = ctx->trc;
+    if (ctx->primaries != -1)
+        output->color_primaries = ctx->primaries;
+    if (ctx->colorspace != -1)
+        output->colorspace = ctx->colorspace;
+
+    ctx->trc_in = input->color_trc;
+    ctx->trc_out = output->color_trc;
+    ctx->colorspace_in = input->colorspace;
+    ctx->colorspace_out = output->colorspace;
+    ctx->primaries_in = input->color_primaries;
+    ctx->primaries_out = output->color_primaries;
+
+    assert(output->sw_format == AV_PIX_FMT_NV12);
+
+    if (!ctx->initialised) {
+        err = tonemap_opencl_init(avctx);
+        if (err < 0)
+            goto fail;
+    }
+
+    switch(input_frames_ctx->sw_format) {
+    case AV_PIX_FMT_P010:
+        err = launch_kernel(avctx, ctx->kernel, output, input, peak);
+        if (err < 0) goto fail;
+        break;
+    default:
+        av_log(ctx, AV_LOG_ERROR, "unsupported format in tonemap_opencl.\n");
+        err = AVERROR(ENOSYS);
+        goto fail;
+    }
+
+    cle = clFinish(ctx->command_queue);
+    if (cle != CL_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to finish command queue: %d.\n",
+               cle);
+        err = AVERROR(EIO);
+        goto fail;
+    }
+
+
+    av_frame_free(&input);
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(output->format),
+           output->width, output->height, output->pts);
+
+    return ff_filter_frame(outlink, output);
+
+fail:
+    clFinish(ctx->command_queue);
+    av_frame_free(&input);
+    av_frame_free(&output);
+    return err;
+}
+
+static av_cold void tonemap_opencl_uninit(AVFilterContext *avctx)
+{
+    TonemapOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+
+    if (ctx->kernel) {
+        cle = clReleaseKernel(ctx->kernel);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "kernel: %d.\n", cle);
+    }
+
+    if (ctx->command_queue) {
+        cle = clReleaseCommandQueue(ctx->command_queue);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "command queue: %d.\n", cle);
+    }
+
+    ff_opencl_filter_uninit(avctx);
+}
+
+#define OFFSET(x) offsetof(TonemapOpenCLContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption tonemap_opencl_options[] = {
+    { "tonemap",      "tonemap algorithm selection", OFFSET(tonemap), AV_OPT_TYPE_INT, {.i64 = TONEMAP_NONE}, TONEMAP_NONE, TONEMAP_MAX - 1, FLAGS, "tonemap" },
+    {     "none",     0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_NONE},              0, 0, FLAGS, "tonemap" },
+    {     "linear",   0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_LINEAR},            0, 0, FLAGS, "tonemap" },
+    {     "gamma",    0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_GAMMA},             0, 0, FLAGS, "tonemap" },
+    {     "clip",     0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_CLIP},              0, 0, FLAGS, "tonemap" },
+    {     "reinhard", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_REINHARD},          0, 0, FLAGS, "tonemap" },
+    {     "hable",    0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_HABLE},             0, 0, FLAGS, "tonemap" },
+    {     "mobius",   0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MOBIUS},            0, 0, FLAGS, "tonemap" },
+    { "transfer", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "transfer" },
+    { "t",        "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "transfer" },
+    {     "bt709",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT709},         0, 0, FLAGS, "transfer" },
+    {     "bt2020",           0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_10},     0, 0, FLAGS, "transfer" },
+    { "matrix", "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "matrix" },
+    { "m",      "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "matrix" },
+    {     "bt709",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT709},         0, 0, FLAGS, "matrix" },
+    {     "bt2020",           0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_NCL},    0, 0, FLAGS, "matrix" },
+    { "primaries", "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "primaries" },
+    { "p",         "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "primaries" },
+    {     "bt709",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT709},         0, 0, FLAGS, "primaries" },
+    {     "bt2020",           0,       0,                 AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT2020},        0, 0, FLAGS, "primaries" },
+    { "peak",      "signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS },
+    { "param",     "tonemap parameter",   OFFSET(param), AV_OPT_TYPE_DOUBLE, {.dbl = NAN}, DBL_MIN, DBL_MAX, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(tonemap_opencl);
+
+static const AVFilterPad tonemap_opencl_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = &tonemap_opencl_filter_frame,
+        .config_props = &ff_opencl_filter_config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad tonemap_opencl_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = &tonemap_opencl_config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_tonemap_opencl = {
+    .name           = "tonemap_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("perform HDR to SDR conversion with tonemapping"),
+    .priv_size      = sizeof(TonemapOpenCLContext),
+    .priv_class     = &tonemap_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &tonemap_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = tonemap_opencl_inputs,
+    .outputs        = tonemap_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
-- 
2.7.4



More information about the ffmpeg-devel mailing list