[FFmpeg-devel] [PATCH v3 1/2] lavfi: add opencl tonemap filter.
Ruiling Song
ruiling.song at intel.com
Wed Jun 6 10:23:53 EEST 2018
This filter does HDR(HDR10/HLG) to SDR conversion with tone-mapping.
An example command to use this filter with vaapi codecs:
FFMPEG -init_hw_device vaapi=va:/dev/dri/renderD128 -init_hw_device \
opencl=ocl at va -hwaccel vaapi -hwaccel_device va -hwaccel_output_format \
vaapi -i INPUT -filter_hw_device ocl -filter_complex \
'[0:v]hwmap,tonemap_opencl=t=bt2020:tonemap=linear:format=p010[x1]; \
[x1]hwmap=derive_device=vaapi:reverse=1' -c:v hevc_vaapi -profile 2 OUTPUT
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
this version mainly address Mark's comments on v2.
Thanks!
Ruiling
configure | 1 +
libavfilter/Makefile | 2 +
libavfilter/allfilters.c | 1 +
libavfilter/colorspace.c | 90 +++++
libavfilter/colorspace.h | 41 ++
libavfilter/opencl/colorspace_common.cl | 220 +++++++++++
libavfilter/opencl/tonemap.cl | 272 +++++++++++++
libavfilter/opencl_source.h | 2 +
libavfilter/vf_tonemap_opencl.c | 657 ++++++++++++++++++++++++++++++++
9 files changed, 1286 insertions(+)
create mode 100644 libavfilter/colorspace.c
create mode 100644 libavfilter/colorspace.h
create mode 100644 libavfilter/opencl/colorspace_common.cl
create mode 100644 libavfilter/opencl/tonemap.cl
create mode 100644 libavfilter/vf_tonemap_opencl.c
diff --git a/configure b/configure
index 53224f0..4ff651f 100755
--- a/configure
+++ b/configure
@@ -3410,6 +3410,7 @@ tinterlace_filter_deps="gpl"
tinterlace_merge_test_deps="tinterlace_filter"
tinterlace_pad_test_deps="tinterlace_filter"
tonemap_filter_deps="const_nan"
+tonemap_opencl_filter_deps="opencl const_nan"
unsharp_opencl_filter_deps="opencl"
uspp_filter_deps="gpl avcodec"
vaguedenoiser_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 5b4be49..d2c85cf 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -356,6 +356,8 @@ OBJS-$(CONFIG_TINTERLACE_FILTER) += vf_tinterlace.o
OBJS-$(CONFIG_TLUT2_FILTER) += vf_lut2.o framesync.o
OBJS-$(CONFIG_TMIX_FILTER) += vf_mix.o framesync.o
OBJS-$(CONFIG_TONEMAP_FILTER) += vf_tonemap.o
+OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o colorspace.o opencl.o \
+ opencl/tonemap.o opencl/colorspace_common.o
OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o
OBJS-$(CONFIG_TRIM_FILTER) += trim.o
OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index f2d27d2..fa85c29 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -345,6 +345,7 @@ extern AVFilter ff_vf_tinterlace;
extern AVFilter ff_vf_tlut2;
extern AVFilter ff_vf_tmix;
extern AVFilter ff_vf_tonemap;
+extern AVFilter ff_vf_tonemap_opencl;
extern AVFilter ff_vf_transpose;
extern AVFilter ff_vf_trim;
extern AVFilter ff_vf_unpremultiply;
diff --git a/libavfilter/colorspace.c b/libavfilter/colorspace.c
new file mode 100644
index 0000000..7fd7bdf
--- /dev/null
+++ b/libavfilter/colorspace.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2016 Ronald S. Bultje <rsbultje at gmail.com>
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "colorspace.h"
+
+
+void invert_matrix3x3(const double in[3][3], double out[3][3])
+{
+ double m00 = in[0][0], m01 = in[0][1], m02 = in[0][2],
+ m10 = in[1][0], m11 = in[1][1], m12 = in[1][2],
+ m20 = in[2][0], m21 = in[2][1], m22 = in[2][2];
+ int i, j;
+ double det;
+
+ out[0][0] = (m11 * m22 - m21 * m12);
+ out[0][1] = -(m01 * m22 - m21 * m02);
+ out[0][2] = (m01 * m12 - m11 * m02);
+ out[1][0] = -(m10 * m22 - m20 * m12);
+ out[1][1] = (m00 * m22 - m20 * m02);
+ out[1][2] = -(m00 * m12 - m10 * m02);
+ out[2][0] = (m10 * m21 - m20 * m11);
+ out[2][1] = -(m00 * m21 - m20 * m01);
+ out[2][2] = (m00 * m11 - m10 * m01);
+
+ det = m00 * out[0][0] + m10 * out[0][1] + m20 * out[0][2];
+ det = 1.0 / det;
+
+ for (i = 0; i < 3; i++) {
+ for (j = 0; j < 3; j++)
+ out[i][j] *= det;
+ }
+}
+
+void mul3x3(double dst[3][3], const double src1[3][3], const double src2[3][3])
+{
+ int m, n;
+
+ for (m = 0; m < 3; m++)
+ for (n = 0; n < 3; n++)
+ dst[m][n] = src2[m][0] * src1[0][n] +
+ src2[m][1] * src1[1][n] +
+ src2[m][2] * src1[2][n];
+}
+/*
+ * see e.g. http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html
+ */
+void fill_rgb2xyz_table(const struct PrimaryCoefficients *coeffs,
+ const struct WhitepointCoefficients *wp,
+ double rgb2xyz[3][3])
+{
+ double i[3][3], sr, sg, sb, zw;
+
+ rgb2xyz[0][0] = coeffs->xr / coeffs->yr;
+ rgb2xyz[0][1] = coeffs->xg / coeffs->yg;
+ rgb2xyz[0][2] = coeffs->xb / coeffs->yb;
+ rgb2xyz[1][0] = rgb2xyz[1][1] = rgb2xyz[1][2] = 1.0;
+ rgb2xyz[2][0] = (1.0 - coeffs->xr - coeffs->yr) / coeffs->yr;
+ rgb2xyz[2][1] = (1.0 - coeffs->xg - coeffs->yg) / coeffs->yg;
+ rgb2xyz[2][2] = (1.0 - coeffs->xb - coeffs->yb) / coeffs->yb;
+ invert_matrix3x3(rgb2xyz, i);
+ zw = 1.0 - wp->xw - wp->yw;
+ sr = i[0][0] * wp->xw + i[0][1] * wp->yw + i[0][2] * zw;
+ sg = i[1][0] * wp->xw + i[1][1] * wp->yw + i[1][2] * zw;
+ sb = i[2][0] * wp->xw + i[2][1] * wp->yw + i[2][2] * zw;
+ rgb2xyz[0][0] *= sr;
+ rgb2xyz[0][1] *= sg;
+ rgb2xyz[0][2] *= sb;
+ rgb2xyz[1][0] *= sr;
+ rgb2xyz[1][1] *= sg;
+ rgb2xyz[1][2] *= sb;
+ rgb2xyz[2][0] *= sr;
+ rgb2xyz[2][1] *= sg;
+ rgb2xyz[2][2] *= sb;
+}
diff --git a/libavfilter/colorspace.h b/libavfilter/colorspace.h
new file mode 100644
index 0000000..d330917
--- /dev/null
+++ b/libavfilter/colorspace.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016 Ronald S. Bultje <rsbultje at gmail.com>
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_COLORSPACE_H
+#define AVFILTER_COLORSPACE_H
+
+#include "libavutil/common.h"
+
+struct LumaCoefficients {
+ double cr, cg, cb;
+};
+
+struct PrimaryCoefficients {
+ double xr, yr, xg, yg, xb, yb;
+};
+
+struct WhitepointCoefficients {
+ double xw, yw;
+};
+
+void invert_matrix3x3(const double in[3][3], double out[3][3]);
+void mul3x3(double dst[3][3], const double src1[3][3], const double src2[3][3]);
+void fill_rgb2xyz_table(const struct PrimaryCoefficients *coeffs,
+ const struct WhitepointCoefficients *wp, double rgb2xyz[3][3]);
+#endif
diff --git a/libavfilter/opencl/colorspace_common.cl b/libavfilter/opencl/colorspace_common.cl
new file mode 100644
index 0000000..4fb75b8
--- /dev/null
+++ b/libavfilter/opencl/colorspace_common.cl
@@ -0,0 +1,220 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define ST2084_MAX_LUMINANCE 10000.0f
+#define REFERENCE_WHITE 100.0f
+
+#if chroma_loc == 1
+ #define chroma_sample(a,b,c,d) ((a + c) * 0.5f)
+#elif chroma_loc == 3
+ #define chroma_sample(a,b,c,d) (a)
+#elif chroma_loc == 4
+ #define chroma_sample(a,b,c,d) ((a + b) * 0.5f)
+#elif chroma_loc == 5
+ #define chroma_sample(a,b,c,d) (c)
+#elif chroma_loc == 6
+ #define chroma_sample(a,b,c,d) ((c + d) * 0.5f)
+#else
+ #define chroma_sample(a,b,c,d) ((a + b + c + d) * 0.25f)
+#endif
+
+constant const float ST2084_M1 = 0.1593017578125f;
+constant const float ST2084_M2 = 78.84375f;
+constant const float ST2084_C1 = 0.8359375f;
+constant const float ST2084_C2 = 18.8515625f;
+constant const float ST2084_C3 = 18.6875f;
+
+__constant float yuv2rgb_bt2020[] = {
+ 1.0f, 0.0f, 1.4746f,
+ 1.0f, -0.16455f, -0.57135f,
+ 1.0f, 1.8814f, 0.0f
+};
+
+__constant float yuv2rgb_bt709[] = {
+ 1.0f, 0.0f, 1.5748f,
+ 1.0f, -0.18732f, -0.46812f,
+ 1.0f, 1.8556f, 0.0f
+};
+
+__constant float rgb2yuv_bt709[] = {
+ 0.2126f, 0.7152f, 0.0722f,
+ -0.11457f, -0.38543f, 0.5f,
+ 0.5f, -0.45415f, -0.04585f
+};
+
+__constant float rgb2yuv_bt2020[] ={
+ 0.2627f, 0.678f, 0.0593f,
+ -0.1396f, -0.36037f, 0.5f,
+ 0.5f, -0.4598f, -0.0402f,
+};
+
+
+float get_luma_dst(float3 c) {
+ return luma_dst.x * c.x + luma_dst.y * c.y + luma_dst.z * c.z;
+}
+
+float get_luma_src(float3 c) {
+ return luma_src.x * c.x + luma_src.y * c.y + luma_src.z * c.z;
+}
+
+float3 get_chroma_sample(float3 a, float3 b, float3 c, float3 d) {
+ return chroma_sample(a, b, c, d);
+}
+
+float eotf_st2084(float x) {
+ float p = powr(x, 1.0f / ST2084_M2);
+ float a = max(p -ST2084_C1, 0.0f);
+ float b = max(ST2084_C2 - ST2084_C3 * p, 1e-6f);
+ float c = powr(a / b, 1.0f / ST2084_M1);
+ return x > 0.0f ? c * ST2084_MAX_LUMINANCE / REFERENCE_WHITE : 0.0f;
+}
+
+__constant const float HLG_A = 0.17883277f;
+__constant const float HLG_B = 0.28466892f;
+__constant const float HLG_C = 0.55991073f;
+
+// linearizer for HLG
+float inverse_oetf_hlg(float x) {
+ float a = 4.0f * x * x;
+ float b = exp((x - HLG_C) / HLG_A) + HLG_B;
+ return x < 0.5f ? a : b;
+}
+
+// delinearizer for HLG
+float oetf_hlg(float x) {
+ float a = 0.5f * sqrt(x);
+ float b = HLG_A * log(x - HLG_B) + HLG_C;
+ return x <= 1.0f ? a : b;
+}
+
+float3 ootf_hlg(float3 c, float peak) {
+ float luma = get_luma_src(c);
+ float gamma = 1.2f + 0.42f * log10(peak * REFERENCE_WHITE / 1000.0f);
+ gamma = max(1.0f, gamma);
+ float factor = peak * powr(luma, gamma - 1.0f) / powr(12.0f, gamma);
+ return c * factor;
+}
+
+float3 inverse_ootf_hlg(float3 c, float peak) {
+ float gamma = 1.2f + 0.42f * log10(peak * REFERENCE_WHITE / 1000.0f);
+ c *= powr(12.0f, gamma) / peak;
+ c /= powr(get_luma_dst(c), (gamma - 1.0f) / gamma);
+ return c;
+}
+
+float inverse_eotf_bt1886(float c) {
+ return c < 0.0f ? 0.0f : powr(c, 1.0f / 2.4f);
+}
+
+float oetf_bt709(float c) {
+ c = c < 0.0f ? 0.0f : c;
+ float r1 = 4.5f * c;
+ float r2 = 1.099f * powr(c, 0.45f) - 0.099f;
+ return c < 0.018f ? r1 : r2;
+}
+float inverse_oetf_bt709(float c) {
+ float r1 = c / 4.5f;
+ float r2 = powr((c + 0.099f) / 1.099f, 1.0f / 0.45f);
+ return c < 0.081f ? r1 : r2;
+}
+
+float3 yuv2rgb(float y, float u, float v) {
+#ifdef FULL_RANGE_IN
+ u -= 0.5f; v -= 0.5f;
+#else
+ y = (y * 255.0f - 16.0f) / 219.0f;
+ u = (u * 255.0f - 128.0f) / 224.0f;
+ v = (v * 255.0f - 128.0f) / 224.0f;
+#endif
+ float r = y * rgb_matrix[0] + u * rgb_matrix[1] + v * rgb_matrix[2];
+ float g = y * rgb_matrix[3] + u * rgb_matrix[4] + v * rgb_matrix[5];
+ float b = y * rgb_matrix[6] + u * rgb_matrix[7] + v * rgb_matrix[8];
+ return (float3)(r, g, b);
+}
+
+float3 yuv2lrgb(float3 yuv) {
+ float3 rgb = yuv2rgb(yuv.x, yuv.y, yuv.z);
+ float r = linearize(rgb.x);
+ float g = linearize(rgb.y);
+ float b = linearize(rgb.z);
+ return (float3)(r, g, b);
+}
+
+float3 rgb2yuv(float r, float g, float b) {
+ float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2];
+ float u = r*yuv_matrix[3] + g*yuv_matrix[4] + b*yuv_matrix[5];
+ float v = r*yuv_matrix[6] + g*yuv_matrix[7] + b*yuv_matrix[8];
+#ifdef FULL_RANGE_OUT
+ u += 0.5f; v += 0.5f;
+#else
+ y = (219.0f * y + 16.0f) / 255.0f;
+ u = (224.0f * u + 128.0f) / 255.0f;
+ v = (224.0f * v + 128.0f) / 255.0f;
+#endif
+ return (float3)(y, u, v);
+}
+
+float rgb2y(float r, float g, float b) {
+ float y = r*yuv_matrix[0] + g*yuv_matrix[1] + b*yuv_matrix[2];
+ y = (219.0f * y + 16.0f) / 255.0f;
+ return y;
+}
+
+float3 lrgb2yuv(float3 c) {
+ float r = delinearize(c.x);
+ float g = delinearize(c.y);
+ float b = delinearize(c.z);
+
+ return rgb2yuv(r, g, b);
+}
+
+float lrgb2y(float3 c) {
+ float r = delinearize(c.x);
+ float g = delinearize(c.y);
+ float b = delinearize(c.z);
+
+ return rgb2y(r, g, b);
+}
+
+float3 lrgb2lrgb(float3 c) {
+#ifdef RGB2RGB_PASSTHROUGH
+ return c;
+#else
+ float r = c.x, g = c.y, b = c.z;
+ float rr = rgb2rgb[0] * r + rgb2rgb[1] * g + rgb2rgb[2] * b;
+ float gg = rgb2rgb[3] * r + rgb2rgb[4] * g + rgb2rgb[5] * b;
+ float bb = rgb2rgb[6] * r + rgb2rgb[7] * g + rgb2rgb[8] * b;
+ return (float3)(rr, gg, bb);
+#endif
+}
+
+float3 ootf(float3 c, float peak) {
+#ifdef ootf_impl
+ return ootf_impl(c, peak);
+#else
+ return c;
+#endif
+}
+
+float3 inverse_ootf(float3 c, float peak) {
+#ifdef inverse_ootf_impl
+ return inverse_ootf_impl(c, peak);
+#else
+ return c;
+#endif
+}
diff --git a/libavfilter/opencl/tonemap.cl b/libavfilter/opencl/tonemap.cl
new file mode 100644
index 0000000..9448ba4
--- /dev/null
+++ b/libavfilter/opencl/tonemap.cl
@@ -0,0 +1,272 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define REFERENCE_WHITE 100.0f
+extern float3 lrgb2yuv(float3);
+extern float lrgb2y(float3);
+extern float3 yuv2lrgb(float3);
+extern float3 lrgb2lrgb(float3);
+extern float get_luma_src(float3);
+extern float get_luma_dst(float3);
+extern float3 ootf(float3 c, float peak);
+extern float3 inverse_ootf(float3 c, float peak);
+extern float3 get_chroma_sample(float3, float3, float3, float3);
+
+struct detection_result {
+ float peak;
+ float average;
+};
+
+float hable_f(float in) {
+ float a = 0.15f, b = 0.50f, c = 0.10f, d = 0.20f, e = 0.02f, f = 0.30f;
+ return (in * (in * a + b * c) + d * e) / (in * (in * a + b) + d * f) - e / f;
+}
+
+float direct(float s, float peak) {
+ return s;
+}
+
+float linear(float s, float peak) {
+ return s * tone_param / peak;
+}
+
+float gamma(float s, float peak) {
+ float p = s > 0.05f ? s /peak : 0.05f / peak;
+ float v = powr(p, 1.0f / tone_param);
+ return s > 0.05f ? v : (s * v /0.05f);
+}
+
+float clip(float s, float peak) {
+ return clamp(s * tone_param, 0.0f, 1.0f);
+}
+
+float reinhard(float s, float peak) {
+ return s / (s + tone_param) * (peak + tone_param) / peak;
+}
+
+float hable(float s, float peak) {
+ return hable_f(s)/hable_f(peak);
+}
+
+float mobius(float s, float peak) {
+ float j = tone_param;
+ float a, b;
+
+ if (s <= j)
+ return s;
+
+ a = -j * j * (peak - 1.0f) / (j * j - 2.0f * j + peak);
+ b = (j * j - 2.0f * j * peak + peak) / max(peak - 1.0f, 1e-6f);
+
+ return (b * b + 2.0f * b * j + j * j) / (b - a) * (s + a) / (s + b);
+}
+
+// detect peak/average signal of a frame, the algorithm was ported from:
+// libplacebo (https://github.com/haasn/libplacebo)
+struct detection_result
+detect_peak_avg(global uint *util_buf, __local uint *sum_wg,
+ float signal, float peak) {
+// layout of the util buffer
+//
+// Name: : Size (units of 4-bytes)
+// average buffer : detection_frames + 1
+// peak buffer : detection_frames + 1
+// workgroup counter : 1
+// total of peak : 1
+// total of average : 1
+// frame index : 1
+// frame number : 1
+ global uint *avg_buf = util_buf;
+ global uint *peak_buf = avg_buf + DETECTION_FRAMES + 1;
+ global uint *counter_wg_p = peak_buf + DETECTION_FRAMES + 1;
+ global uint *max_total_p = counter_wg_p + 1;
+ global uint *avg_total_p = max_total_p + 1;
+ global uint *frame_idx_p = avg_total_p + 1;
+ global uint *scene_frame_num_p = frame_idx_p + 1;
+
+ uint frame_idx = *frame_idx_p;
+ uint scene_frame_num = *scene_frame_num_p;
+
+ size_t lidx = get_local_id(0);
+ size_t lidy = get_local_id(1);
+ size_t lsizex = get_local_size(0);
+ size_t lsizey = get_local_size(1);
+ uint num_wg = get_num_groups(0) * get_num_groups(1);
+ size_t group_idx = get_group_id(0);
+ size_t group_idy = get_group_id(1);
+ struct detection_result r = {peak, sdr_avg};
+ if (lidx == 0 && lidy == 0)
+ *sum_wg = 0;
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ // update workgroup sum
+ atomic_add(sum_wg, (uint)(signal * REFERENCE_WHITE));
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ // update frame peak/avg using work-group-average.
+ if (lidx == 0 && lidy == 0) {
+ uint avg_wg = *sum_wg / (lsizex * lsizey);
+ atomic_max(&peak_buf[frame_idx], avg_wg);
+ atomic_add(&avg_buf[frame_idx], avg_wg);
+ }
+
+ if (scene_frame_num > 0) {
+ float peak = (float)*max_total_p / (REFERENCE_WHITE * scene_frame_num);
+ float avg = (float)*avg_total_p / (REFERENCE_WHITE * scene_frame_num);
+ r.peak = max(1.0f, peak);
+ r.average = max(0.25f, avg);
+ }
+
+ if (lidx == 0 && lidy == 0 && atomic_add(counter_wg_p, 1) == num_wg - 1) {
+ *counter_wg_p = 0;
+ avg_buf[frame_idx] /= num_wg;
+
+ if (scene_threshold > 0.0f) {
+ uint cur_max = peak_buf[frame_idx];
+ uint cur_avg = avg_buf[frame_idx];
+ int diff = (int)(scene_frame_num * cur_avg) - (int)*avg_total_p;
+
+ if (abs(diff) > scene_frame_num * scene_threshold * REFERENCE_WHITE) {
+ for (uint i = 0; i < DETECTION_FRAMES + 1; i++)
+ avg_buf[i] = 0;
+ for (uint i = 0; i < DETECTION_FRAMES + 1; i++)
+ peak_buf[i] = 0;
+ *avg_total_p = *max_total_p = 0;
+ *scene_frame_num_p = 0;
+ avg_buf[frame_idx] = cur_avg;
+ peak_buf[frame_idx] = cur_max;
+ }
+ }
+ uint next = (frame_idx + 1) % (DETECTION_FRAMES + 1);
+ // add current frame, subtract next frame
+ *max_total_p += peak_buf[frame_idx] - peak_buf[next];
+ *avg_total_p += avg_buf[frame_idx] - avg_buf[next];
+ // reset next frame
+ peak_buf[next] = avg_buf[next] = 0;
+ *frame_idx_p = next;
+ *scene_frame_num_p = min(*scene_frame_num_p + 1,
+ (uint)DETECTION_FRAMES);
+ }
+ return r;
+}
+
+float3 map_one_pixel_rgb(float3 rgb, float peak, float average) {
+ float sig = max(max(rgb.x, max(rgb.y, rgb.z)), 1e-6f);
+
+ // Rescale the variables in order to bring it into a representation where
+ // 1.0 represents the dst_peak. This is because all of the tone mapping
+ // algorithms are defined in such a way that they map to the range [0.0, 1.0].
+ if (target_peak > 1.0f) {
+ sig *= 1.0f / target_peak;
+ peak *= 1.0f / target_peak;
+ }
+
+ float sig_old = sig;
+
+ // Scale the signal to compensate for differences in the average brightness
+ float slope = min(1.0f, sdr_avg / average);
+ sig *= slope;
+ peak *= slope;
+
+ // Desaturate the color using a coefficient dependent on the signal level
+ if (desat_param > 0.0f) {
+ float luma = get_luma_dst(rgb);
+ float coeff = max(sig - 0.18f, 1e-6f) / max(sig, 1e-6f);
+ coeff = native_powr(coeff, 10.0f / desat_param);
+ rgb = mix(rgb, (float3)luma, (float3)coeff);
+ sig = mix(sig, luma * slope, coeff);
+ }
+
+ sig = TONE_FUNC(sig, peak);
+
+ sig = min(sig, 1.0f);
+ rgb *= (sig/sig_old);
+ return rgb;
+}
+// map from source space YUV to destination space RGB
+float3 map_to_dst_space_from_yuv(float3 yuv, float peak) {
+ float3 c = yuv2lrgb(yuv);
+ c = ootf(c, peak);
+ c = lrgb2lrgb(c);
+ return c;
+}
+
+__kernel void tonemap(__write_only image2d_t dst1,
+ __read_only image2d_t src1,
+ __write_only image2d_t dst2,
+ __read_only image2d_t src2,
+ global uint *util_buf,
+ float peak
+ )
+{
+ __local uint sum_wg;
+ const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+ CLK_ADDRESS_CLAMP_TO_EDGE |
+ CLK_FILTER_NEAREST);
+ int xi = get_global_id(0);
+ int yi = get_global_id(1);
+ // each work item process four pixels
+ int x = 2 * xi;
+ int y = 2 * yi;
+
+ float y0 = read_imagef(src1, sampler, (int2)(x, y)).x;
+ float y1 = read_imagef(src1, sampler, (int2)(x + 1, y)).x;
+ float y2 = read_imagef(src1, sampler, (int2)(x, y + 1)).x;
+ float y3 = read_imagef(src1, sampler, (int2)(x + 1, y + 1)).x;
+ float2 uv = read_imagef(src2, sampler, (int2)(xi, yi)).xy;
+
+ float3 c0 = map_to_dst_space_from_yuv((float3)(y0, uv.x, uv.y), peak);
+ float3 c1 = map_to_dst_space_from_yuv((float3)(y1, uv.x, uv.y), peak);
+ float3 c2 = map_to_dst_space_from_yuv((float3)(y2, uv.x, uv.y), peak);
+ float3 c3 = map_to_dst_space_from_yuv((float3)(y3, uv.x, uv.y), peak);
+
+ float sig0 = max(c0.x, max(c0.y, c0.z));
+ float sig1 = max(c1.x, max(c1.y, c1.z));
+ float sig2 = max(c2.x, max(c2.y, c2.z));
+ float sig3 = max(c3.x, max(c3.y, c3.z));
+ float sig = max(sig0, max(sig1, max(sig2, sig3)));
+
+ struct detection_result r = detect_peak_avg(util_buf, &sum_wg, sig, peak);
+
+ float3 c0_old = c0, c1_old = c1, c2_old = c2;
+ c0 = map_one_pixel_rgb(c0, r.peak, r.average);
+ c1 = map_one_pixel_rgb(c1, r.peak, r.average);
+ c2 = map_one_pixel_rgb(c2, r.peak, r.average);
+ c3 = map_one_pixel_rgb(c3, r.peak, r.average);
+
+ c0 = inverse_ootf(c0, target_peak);
+ c1 = inverse_ootf(c1, target_peak);
+ c2 = inverse_ootf(c2, target_peak);
+ c3 = inverse_ootf(c3, target_peak);
+
+ y0 = lrgb2y(c0);
+ y1 = lrgb2y(c1);
+ y2 = lrgb2y(c2);
+ y3 = lrgb2y(c3);
+ float3 chroma_c = get_chroma_sample(c0, c1, c2, c3);
+ float3 chroma = lrgb2yuv(chroma_c);
+
+ if (xi < get_image_width(dst2) && yi < get_image_height(dst2)) {
+ write_imagef(dst1, (int2)(x, y), (float4)(y0, 0.0f, 0.0f, 1.0f));
+ write_imagef(dst1, (int2)(x+1, y), (float4)(y1, 0.0f, 0.0f, 1.0f));
+ write_imagef(dst1, (int2)(x, y+1), (float4)(y2, 0.0f, 0.0f, 1.0f));
+ write_imagef(dst1, (int2)(x+1, y+1), (float4)(y3, 0.0f, 0.0f, 1.0f));
+ write_imagef(dst2, (int2)(xi, yi),
+ (float4)(chroma.y, chroma.z, 0.0f, 1.0f));
+ }
+}
diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h
index 4bb9969..a241bdb 100644
--- a/libavfilter/opencl_source.h
+++ b/libavfilter/opencl_source.h
@@ -21,7 +21,9 @@
extern const char *ff_opencl_source_avgblur;
extern const char *ff_opencl_source_convolution;
+extern const char *ff_opencl_source_colorspace_common;
extern const char *ff_opencl_source_overlay;
+extern const char *ff_opencl_source_tonemap;
extern const char *ff_opencl_source_unsharp;
#endif /* AVFILTER_OPENCL_SOURCE_H */
diff --git a/libavfilter/vf_tonemap_opencl.c b/libavfilter/vf_tonemap_opencl.c
new file mode 100644
index 0000000..a4c45ab
--- /dev/null
+++ b/libavfilter/vf_tonemap_opencl.c
@@ -0,0 +1,657 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <float.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/bprint.h"
+#include "libavutil/common.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mastering_display_metadata.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "internal.h"
+#include "opencl.h"
+#include "opencl_source.h"
+#include "video.h"
+#include "colorspace.h"
+
+// TODO:
+// - seperate peak-detection from tone-mapping kernel to solve
+// one-frame-delay issue.
+// - import colorspace matrix generation from vf_colorspace.c
+// - more format support
+
+#define DETECTION_FRAMES 63
+#define REFERENCE_WHITE 100.0f
+
+enum TonemapAlgorithm {
+ TONEMAP_NONE,
+ TONEMAP_LINEAR,
+ TONEMAP_GAMMA,
+ TONEMAP_CLIP,
+ TONEMAP_REINHARD,
+ TONEMAP_HABLE,
+ TONEMAP_MOBIUS,
+ TONEMAP_MAX,
+};
+
+typedef struct TonemapOpenCLContext {
+ OpenCLFilterContext ocf;
+
+ enum AVColorSpace colorspace, colorspace_in, colorspace_out;
+ enum AVColorTransferCharacteristic trc, trc_in, trc_out;
+ enum AVColorPrimaries primaries, primaries_in, primaries_out;
+ enum AVColorRange range, range_in, range_out;
+ enum AVChromaLocation chroma_loc;
+
+ enum TonemapAlgorithm tonemap;
+ enum AVPixelFormat format;
+ double peak;
+ double param;
+ double desat_param;
+ double target_peak;
+ double scene_threshold;
+ int initialised;
+ cl_kernel kernel;
+ cl_command_queue command_queue;
+ cl_mem util_mem;
+} TonemapOpenCLContext;
+
+const char *yuv_coff[AVCOL_SPC_NB] = {
+ [AVCOL_SPC_BT709] = "rgb2yuv_bt709",
+ [AVCOL_SPC_BT2020_NCL] = "rgb2yuv_bt2020",
+};
+
+const char *rgb_coff[AVCOL_SPC_NB] = {
+ [AVCOL_SPC_BT709] = "yuv2rgb_bt709",
+ [AVCOL_SPC_BT2020_NCL] = "yuv2rgb_bt2020",
+};
+
+const char *linearize_funcs[AVCOL_TRC_NB] = {
+ [AVCOL_TRC_SMPTE2084] = "eotf_st2084",
+ [AVCOL_TRC_ARIB_STD_B67] = "inverse_oetf_hlg",
+};
+
+const char *delinearize_funcs[AVCOL_TRC_NB] = {
+ [AVCOL_TRC_BT709] = "inverse_eotf_bt1886",
+ [AVCOL_TRC_BT2020_10] = "inverse_eotf_bt1886",
+};
+
+static const struct LumaCoefficients luma_coefficients[AVCOL_SPC_NB] = {
+ [AVCOL_SPC_BT709] = { 0.2126, 0.7152, 0.0722 },
+ [AVCOL_SPC_BT2020_NCL] = { 0.2627, 0.6780, 0.0593 },
+};
+
+struct PrimaryCoefficients primaries_table[AVCOL_PRI_NB] = {
+ [AVCOL_PRI_BT709] = { 0.640, 0.330, 0.300, 0.600, 0.150, 0.060 },
+ [AVCOL_PRI_BT2020] = { 0.708, 0.292, 0.170, 0.797, 0.131, 0.046 },
+};
+
+struct WhitepointCoefficients whitepoint_table[AVCOL_PRI_NB] = {
+ [AVCOL_PRI_BT709] = { 0.3127, 0.3290 },
+ [AVCOL_PRI_BT2020] = { 0.3127, 0.3290 },
+};
+
+const char *tonemap_func[TONEMAP_MAX] = {
+ [TONEMAP_NONE] = "direct",
+ [TONEMAP_LINEAR] = "linear",
+ [TONEMAP_GAMMA] = "gamma",
+ [TONEMAP_CLIP] = "clip",
+ [TONEMAP_REINHARD] = "reinhard",
+ [TONEMAP_HABLE] = "hable",
+ [TONEMAP_MOBIUS] = "mobius",
+};
+
+static void get_rgb2rgb_matrix(enum AVColorPrimaries in, enum AVColorPrimaries out,
+ double rgb2rgb[3][3]) {
+ double rgb2xyz[3][3], xyz2rgb[3][3];
+
+ fill_rgb2xyz_table(&primaries_table[out], &whitepoint_table[out], rgb2xyz);
+ invert_matrix3x3(rgb2xyz, xyz2rgb);
+ fill_rgb2xyz_table(&primaries_table[in], &whitepoint_table[in], rgb2xyz);
+ mul3x3(rgb2rgb, rgb2xyz, xyz2rgb);
+}
+
+#define OPENCL_SOURCE_NB 3
+// Average light level for SDR signals. This is equal to a signal level of 0.5
+// under a typical presentation gamma of about 2.0.
+static const float sdr_avg = 0.25f;
+
+static int tonemap_opencl_init(AVFilterContext *avctx)
+{
+ TonemapOpenCLContext *ctx = avctx->priv;
+ int rgb2rgb_passthrough = 1;
+ double rgb2rgb[3][3];
+ struct LumaCoefficients luma_src, luma_dst;
+ cl_int cle;
+ int err;
+ AVBPrint header;
+ const char *opencl_sources[OPENCL_SOURCE_NB];
+
+ av_bprint_init(&header, 1024, AV_BPRINT_SIZE_AUTOMATIC);
+
+ switch(ctx->tonemap) {
+ case TONEMAP_GAMMA:
+ if (isnan(ctx->param))
+ ctx->param = 1.8f;
+ break;
+ case TONEMAP_REINHARD:
+ if (!isnan(ctx->param))
+ ctx->param = (1.0f - ctx->param) / ctx->param;
+ break;
+ case TONEMAP_MOBIUS:
+ if (isnan(ctx->param))
+ ctx->param = 0.3f;
+ break;
+ }
+
+ if (isnan(ctx->param))
+ ctx->param = 1.0f;
+
+ // SDR peak is 1.0f
+ ctx->target_peak = 1.0f;
+ av_log(ctx, AV_LOG_DEBUG, "tone mapping transfer from %s to %s\n",
+ av_color_transfer_name(ctx->trc_in),
+ av_color_transfer_name(ctx->trc_out));
+ av_log(ctx, AV_LOG_DEBUG, "mapping colorspace from %s to %s\n",
+ av_color_space_name(ctx->colorspace_in),
+ av_color_space_name(ctx->colorspace_out));
+ av_log(ctx, AV_LOG_DEBUG, "mapping primaries from %s to %s\n",
+ av_color_primaries_name(ctx->primaries_in),
+ av_color_primaries_name(ctx->primaries_out));
+ av_log(ctx, AV_LOG_DEBUG, "mapping range from %s to %s\n",
+ av_color_range_name(ctx->range_in),
+ av_color_range_name(ctx->range_out));
+ // checking valid value just because of limited implementaion
+ // please remove when more functionalities are implemented
+ av_assert0(ctx->trc_out == AVCOL_TRC_BT709 ||
+ ctx->trc_out == AVCOL_TRC_BT2020_10);
+ av_assert0(ctx->trc_in == AVCOL_TRC_SMPTE2084||
+ ctx->trc_in == AVCOL_TRC_ARIB_STD_B67);
+ av_assert0(ctx->colorspace_in == AVCOL_SPC_BT2020_NCL ||
+ ctx->colorspace_in == AVCOL_SPC_BT709);
+ av_assert0(ctx->primaries_in == AVCOL_PRI_BT2020 ||
+ ctx->primaries_in == AVCOL_PRI_BT709);
+
+ av_bprintf(&header, "__constant const float tone_param = %.4ff;\n",
+ ctx->param);
+ av_bprintf(&header, "__constant const float desat_param = %.4ff;\n",
+ ctx->desat_param);
+ av_bprintf(&header, "__constant const float target_peak = %.4ff;\n",
+ ctx->target_peak);
+ av_bprintf(&header, "__constant const float sdr_avg = %.4ff;\n", sdr_avg);
+ av_bprintf(&header, "__constant const float scene_threshold = %.4ff;\n",
+ ctx->scene_threshold);
+ av_bprintf(&header, "#define TONE_FUNC %s\n", tonemap_func[ctx->tonemap]);
+ av_bprintf(&header, "#define DETECTION_FRAMES %d\n", DETECTION_FRAMES);
+
+ if (ctx->primaries_out != ctx->primaries_in) {
+ get_rgb2rgb_matrix(ctx->primaries_in, ctx->primaries_out, rgb2rgb);
+ rgb2rgb_passthrough = 0;
+ }
+ if (ctx->range_in == AVCOL_RANGE_JPEG)
+ av_bprintf(&header, "#define FULL_RANGE_IN\n");
+
+ if (ctx->range_out == AVCOL_RANGE_JPEG)
+ av_bprintf(&header, "#define FULL_RANGE_OUT\n");
+
+ av_bprintf(&header, "#define chroma_loc %d\n", (int)ctx->chroma_loc);
+
+ if (rgb2rgb_passthrough)
+ av_bprintf(&header, "#define RGB2RGB_PASSTHROUGH\n");
+ else {
+ av_bprintf(&header, "__constant float rgb2rgb[9] = {\n");
+ av_bprintf(&header, " %.4ff, %.4ff, %.4ff,\n",
+ rgb2rgb[0][0], rgb2rgb[0][1], rgb2rgb[0][2]);
+ av_bprintf(&header, " %.4ff, %.4ff, %.4ff,\n",
+ rgb2rgb[1][0], rgb2rgb[1][1], rgb2rgb[1][2]);
+ av_bprintf(&header, " %.4ff, %.4ff, %.4ff};\n",
+ rgb2rgb[2][0], rgb2rgb[2][1], rgb2rgb[2][2]);
+ }
+
+ av_bprintf(&header, "#define rgb_matrix %s\n",
+ rgb_coff[ctx->colorspace_in]);
+ av_bprintf(&header, "#define yuv_matrix %s\n",
+ yuv_coff[ctx->colorspace_out]);
+
+ luma_src = luma_coefficients[ctx->colorspace_in];
+ luma_dst = luma_coefficients[ctx->colorspace_out];
+ av_bprintf(&header, "constant float3 luma_src = {%.4ff, %.4ff, %.4ff};\n",
+ luma_src.cr, luma_src.cg, luma_src.cb);
+ av_bprintf(&header, "constant float3 luma_dst = {%.4ff, %.4ff, %.4ff};\n",
+ luma_dst.cr, luma_dst.cg, luma_dst.cb);
+
+ av_bprintf(&header, "#define linearize %s\n", linearize_funcs[ctx->trc_in]);
+ av_bprintf(&header, "#define delinearize %s\n",
+ delinearize_funcs[ctx->trc_out]);
+
+ if (ctx->trc_in == AVCOL_TRC_ARIB_STD_B67)
+ av_bprintf(&header, "#define ootf_impl ootf_hlg\n");
+
+ if (ctx->trc_out == AVCOL_TRC_ARIB_STD_B67)
+ av_bprintf(&header, "#define inverse_ootf_impl inverse_ootf_hlg\n");
+
+ av_log(avctx, AV_LOG_DEBUG, "Generated OpenCL header:\n%s\n", header.str);
+ opencl_sources[0] = header.str;
+ opencl_sources[1] = ff_opencl_source_tonemap;
+ opencl_sources[2] = ff_opencl_source_colorspace_common;
+ err = ff_opencl_filter_load_program(avctx, opencl_sources, OPENCL_SOURCE_NB);
+
+ av_bprint_finalize(&header, NULL);
+ if (err < 0)
+ goto fail;
+
+ ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
+ ctx->ocf.hwctx->device_id,
+ 0, &cle);
+ if (!ctx->command_queue) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL "
+ "command queue: %d.\n", cle);
+ err = AVERROR(EIO);
+ goto fail;
+ }
+
+ ctx->kernel = clCreateKernel(ctx->ocf.program, "tonemap", &cle);
+ if (!ctx->kernel) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
+ err = AVERROR(EIO);
+ goto fail;
+ }
+
+ ctx->util_mem =
+ clCreateBuffer(ctx->ocf.hwctx->context, 0,
+ (2 * DETECTION_FRAMES + 7) * sizeof(unsigned),
+ NULL, &cle);
+ if (cle != CL_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to create util buffer: %d.\n", cle);
+ err = AVERROR(EIO);
+ goto fail;
+ }
+
+ ctx->initialised = 1;
+ return 0;
+
+fail:
+ if (ctx->util_mem)
+ clReleaseMemObject(ctx->util_mem);
+ if (ctx->command_queue)
+ clReleaseCommandQueue(ctx->command_queue);
+ if (ctx->kernel)
+ clReleaseKernel(ctx->kernel);
+ return err;
+}
+
+static int tonemap_opencl_config_output(AVFilterLink *outlink)
+{
+ AVFilterContext *avctx = outlink->src;
+ TonemapOpenCLContext *s = avctx->priv;
+ int ret;
+ if (s->format == AV_PIX_FMT_NONE)
+ av_log(avctx, AV_LOG_WARNING, "format not set, use default format NV12\n");
+ else {
+ if (s->format != AV_PIX_FMT_P010 &&
+ s->format != AV_PIX_FMT_NV12) {
+ av_log(avctx, AV_LOG_ERROR, "unsupported output format,"
+ "only p010/nv12 supported now\n");
+ return AVERROR(EINVAL);
+ }
+ }
+
+ s->ocf.output_format = s->format == AV_PIX_FMT_NONE ? AV_PIX_FMT_NV12 : s->format;
+ ret = ff_opencl_filter_config_output(outlink);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int launch_kernel(AVFilterContext *avctx, cl_kernel kernel,
+ AVFrame *output, AVFrame *input, float peak) {
+ TonemapOpenCLContext *ctx = avctx->priv;
+ int err = AVERROR(ENOSYS);
+ size_t global_work[2];
+ size_t local_work[2];
+ cl_int cle;
+
+ cle = clSetKernelArg(kernel, 0, sizeof(cl_mem), &output->data[0]);
+ if (cle != CL_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+ "destination image 1st plane: %d.\n", cle);
+ return AVERROR(EINVAL);
+ }
+
+ cle = clSetKernelArg(kernel, 1, sizeof(cl_mem), &input->data[0]);
+ if (cle != CL_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+ "source image 1st plane: %d.\n", cle);
+ return AVERROR(EINVAL);
+ }
+
+ cle = clSetKernelArg(kernel, 2, sizeof(cl_mem), &output->data[1]);
+ if (cle != CL_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+ "destination image 2nd plane: %d.\n", cle);
+ return AVERROR(EINVAL);
+ }
+
+ cle = clSetKernelArg(kernel, 3, sizeof(cl_mem), &input->data[1]);
+ if (cle != CL_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+ "source image 2nd plane: %d.\n", cle);
+ return AVERROR(EINVAL);
+ }
+
+ cle = clSetKernelArg(kernel, 4, sizeof(cl_mem), &ctx->util_mem);
+ if (cle != CL_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+ "source image 2nd plane: %d.\n", cle);
+ return AVERROR(EINVAL);
+ }
+
+ cle = clSetKernelArg(kernel, 5, sizeof(cl_float), &peak);
+ if (cle != CL_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+ "peak luma: %d.\n", cle);
+ return AVERROR(EINVAL);
+ }
+
+ local_work[0] = 16;
+ local_work[1] = 16;
+ // Note the work size based on uv plane, as we process a 2x2 quad in one workitem
+ err = ff_opencl_filter_work_size_from_image(avctx, global_work, output,
+ 1, 16);
+ if (err < 0)
+ return err;
+
+ cle = clEnqueueNDRangeKernel(ctx->command_queue, kernel, 2, NULL,
+ global_work, local_work,
+ 0, NULL, NULL);
+ if (cle != CL_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
+ cle);
+ return AVERROR(EIO);
+ }
+ return 0;
+}
+
+static double determine_signal_peak(AVFrame *in)
+{
+ AVFrameSideData *sd = av_frame_get_side_data(in, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
+ double peak = 0;
+
+ if (sd) {
+ AVContentLightMetadata *clm = (AVContentLightMetadata *)sd->data;
+ peak = clm->MaxCLL / REFERENCE_WHITE;
+ }
+
+ sd = av_frame_get_side_data(in, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
+ if (!peak && sd) {
+ AVMasteringDisplayMetadata *metadata = (AVMasteringDisplayMetadata *)sd->data;
+ if (metadata->has_luminance)
+ peak = av_q2d(metadata->max_luminance) / REFERENCE_WHITE;
+ }
+
+ // For untagged source, use peak of 10000 if SMPTE ST.2084
+ // otherwise assume HLG with reference display peak 1000.
+ if (!peak)
+ peak = in->color_trc == AVCOL_TRC_SMPTE2084 ? 100.0f : 10.0f;
+
+ return peak;
+}
+
+static void update_metadata(AVFrame *in, double peak) {
+ AVFrameSideData *sd = av_frame_get_side_data(in, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
+
+ if (sd) {
+ AVContentLightMetadata *clm = (AVContentLightMetadata *)sd->data;
+ clm->MaxCLL = (unsigned)(peak * REFERENCE_WHITE);
+ }
+
+ sd = av_frame_get_side_data(in, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
+ if (sd) {
+ AVMasteringDisplayMetadata *metadata = (AVMasteringDisplayMetadata *)sd->data;
+ if (metadata->has_luminance)
+ metadata->max_luminance =av_d2q(peak * REFERENCE_WHITE, 10000);
+ }
+}
+
+static int tonemap_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
+{
+ AVFilterContext *avctx = inlink->dst;
+ AVFilterLink *outlink = avctx->outputs[0];
+ TonemapOpenCLContext *ctx = avctx->priv;
+ AVFrame *output = NULL;
+ cl_int cle;
+ int err;
+ double peak = ctx->peak;
+
+ AVHWFramesContext *input_frames_ctx =
+ (AVHWFramesContext*)input->hw_frames_ctx->data;
+
+ av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+ av_get_pix_fmt_name(input->format),
+ input->width, input->height, input->pts);
+
+ if (!input->hw_frames_ctx)
+ return AVERROR(EINVAL);
+
+ output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!output) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ err = av_frame_copy_props(output, input);
+ if (err < 0)
+ goto fail;
+
+ if (!peak)
+ peak = determine_signal_peak(input);
+
+ if (ctx->trc != -1)
+ output->color_trc = ctx->trc;
+ if (ctx->primaries != -1)
+ output->color_primaries = ctx->primaries;
+ if (ctx->colorspace != -1)
+ output->colorspace = ctx->colorspace;
+ if (ctx->range != -1)
+ output->color_range = ctx->range;
+
+ ctx->trc_in = input->color_trc;
+ ctx->trc_out = output->color_trc;
+ ctx->colorspace_in = input->colorspace;
+ ctx->colorspace_out = output->colorspace;
+ ctx->primaries_in = input->color_primaries;
+ ctx->primaries_out = output->color_primaries;
+ ctx->range_in = input->color_range;
+ ctx->range_out = output->color_range;
+ ctx->chroma_loc = output->chroma_location;
+
+ if (!ctx->initialised) {
+ if (!(input->color_trc == AVCOL_TRC_SMPTE2084 ||
+ input->color_trc == AVCOL_TRC_ARIB_STD_B67)) {
+ av_log(ctx, AV_LOG_ERROR, "unsupported transfer function characteristic.\n");
+ err = AVERROR(ENOSYS);
+ goto fail;
+ }
+
+ if (input_frames_ctx->sw_format != AV_PIX_FMT_P010) {
+ av_log(ctx, AV_LOG_ERROR, "unsupported format in tonemap_opencl.\n");
+ err = AVERROR(ENOSYS);
+ goto fail;
+ }
+
+ err = tonemap_opencl_init(avctx);
+ if (err < 0)
+ goto fail;
+ }
+
+ switch(input_frames_ctx->sw_format) {
+ case AV_PIX_FMT_P010:
+ err = launch_kernel(avctx, ctx->kernel, output, input, peak);
+ if (err < 0) goto fail;
+ break;
+ default:
+ err = AVERROR(ENOSYS);
+ goto fail;
+ }
+
+ cle = clFinish(ctx->command_queue);
+ if (cle != CL_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to finish command queue: %d.\n",
+ cle);
+ err = AVERROR(EIO);
+ goto fail;
+ }
+
+ av_frame_free(&input);
+
+ update_metadata(output, ctx->target_peak);
+
+ av_log(ctx, AV_LOG_DEBUG, "Tone-mapping output: %s, %ux%u (%"PRId64").\n",
+ av_get_pix_fmt_name(output->format),
+ output->width, output->height, output->pts);
+#ifndef NDEBUG
+ {
+ uint32_t *ptr, *max_total_p, *avg_total_p, *frame_number_p;
+ float peak_detected, avg_detected;
+ unsigned map_size = (2 * DETECTION_FRAMES + 7) * sizeof(unsigned);
+ ptr = (void *)clEnqueueMapBuffer(ctx->command_queue, ctx->util_mem,
+ CL_TRUE, CL_MAP_READ, 0, map_size,
+ 0, NULL, NULL, &cle);
+ // For the layout of the util buffer, refer tonemap.cl
+ if (ptr) {
+ max_total_p = ptr + 2 * (DETECTION_FRAMES + 1) + 1;
+ avg_total_p = max_total_p + 1;
+ frame_number_p = avg_total_p + 2;
+ peak_detected = (float)*max_total_p / (REFERENCE_WHITE * (*frame_number_p));
+ avg_detected = (float)*avg_total_p / (REFERENCE_WHITE * (*frame_number_p));
+ av_log(ctx, AV_LOG_DEBUG, "peak %f, avg %f will be used for next frame\n",
+ peak_detected, avg_detected);
+ clEnqueueUnmapMemObject(ctx->command_queue, ctx->util_mem, ptr, 0,
+ NULL, NULL);
+ }
+ }
+#endif
+
+ return ff_filter_frame(outlink, output);
+
+fail:
+ clFinish(ctx->command_queue);
+ av_frame_free(&input);
+ av_frame_free(&output);
+ return err;
+}
+
+static av_cold void tonemap_opencl_uninit(AVFilterContext *avctx)
+{
+ TonemapOpenCLContext *ctx = avctx->priv;
+ cl_int cle;
+
+ if (ctx->util_mem)
+ clReleaseMemObject(ctx->util_mem);
+ if (ctx->kernel) {
+ cle = clReleaseKernel(ctx->kernel);
+ if (cle != CL_SUCCESS)
+ av_log(avctx, AV_LOG_ERROR, "Failed to release "
+ "kernel: %d.\n", cle);
+ }
+
+ if (ctx->command_queue) {
+ cle = clReleaseCommandQueue(ctx->command_queue);
+ if (cle != CL_SUCCESS)
+ av_log(avctx, AV_LOG_ERROR, "Failed to release "
+ "command queue: %d.\n", cle);
+ }
+
+ ff_opencl_filter_uninit(avctx);
+}
+
+#define OFFSET(x) offsetof(TonemapOpenCLContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption tonemap_opencl_options[] = {
+ { "tonemap", "tonemap algorithm selection", OFFSET(tonemap), AV_OPT_TYPE_INT, {.i64 = TONEMAP_NONE}, TONEMAP_NONE, TONEMAP_MAX - 1, FLAGS, "tonemap" },
+ { "none", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_NONE}, 0, 0, FLAGS, "tonemap" },
+ { "linear", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_LINEAR}, 0, 0, FLAGS, "tonemap" },
+ { "gamma", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_GAMMA}, 0, 0, FLAGS, "tonemap" },
+ { "clip", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_CLIP}, 0, 0, FLAGS, "tonemap" },
+ { "reinhard", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_REINHARD}, 0, 0, FLAGS, "tonemap" },
+ { "hable", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_HABLE}, 0, 0, FLAGS, "tonemap" },
+ { "mobius", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MOBIUS}, 0, 0, FLAGS, "tonemap" },
+ { "transfer", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" },
+ { "t", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_BT709}, -1, INT_MAX, FLAGS, "transfer" },
+ { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT709}, 0, 0, FLAGS, "transfer" },
+ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_10}, 0, 0, FLAGS, "transfer" },
+ { "matrix", "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "matrix" },
+ { "m", "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "matrix" },
+ { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT709}, 0, 0, FLAGS, "matrix" },
+ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT2020_NCL}, 0, 0, FLAGS, "matrix" },
+ { "primaries", "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "primaries" },
+ { "p", "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "primaries" },
+ { "bt709", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT709}, 0, 0, FLAGS, "primaries" },
+ { "bt2020", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT2020}, 0, 0, FLAGS, "primaries" },
+ { "range", "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "range" },
+ { "r", "set color range", OFFSET(range), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, FLAGS, "range" },
+ { "tv", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, "range" },
+ { "pc", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, "range" },
+ { "limited", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_MPEG}, 0, 0, FLAGS, "range" },
+ { "full", 0, 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_RANGE_JPEG}, 0, 0, FLAGS, "range" },
+ { "format", "output pixel format", OFFSET(format), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, INT_MAX, FLAGS, "fmt" },
+ { "peak", "signal peak override", OFFSET(peak), AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS },
+ { "param", "tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, {.dbl = NAN}, DBL_MIN, DBL_MAX, FLAGS },
+ { "desat", "desaturation parameter", OFFSET(desat_param), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0, DBL_MAX, FLAGS },
+ { "threshold", "scene detection threshold", OFFSET(scene_threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.2}, 0, DBL_MAX, FLAGS },
+ { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(tonemap_opencl);
+
+static const AVFilterPad tonemap_opencl_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .filter_frame = &tonemap_opencl_filter_frame,
+ .config_props = &ff_opencl_filter_config_input,
+ },
+ { NULL }
+};
+
+static const AVFilterPad tonemap_opencl_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = &tonemap_opencl_config_output,
+ },
+ { NULL }
+};
+
+AVFilter ff_vf_tonemap_opencl = {
+ .name = "tonemap_opencl",
+ .description = NULL_IF_CONFIG_SMALL("perform HDR to SDR conversion with tonemapping"),
+ .priv_size = sizeof(TonemapOpenCLContext),
+ .priv_class = &tonemap_opencl_class,
+ .init = &ff_opencl_filter_init,
+ .uninit = &tonemap_opencl_uninit,
+ .query_formats = &ff_opencl_filter_query_formats,
+ .inputs = tonemap_opencl_inputs,
+ .outputs = tonemap_opencl_outputs,
+ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
--
2.7.4
More information about the ffmpeg-devel
mailing list