[FFmpeg-devel] [PATCH] vf_colorspace: add floyd-steinberg dithering option to full conversion.

Ronald S. Bultje rsbultje at gmail.com
Tue May 3 16:34:06 CEST 2016


---
 doc/filters.texi                     |  13 ++++
 libavfilter/colorspacedsp.c          |  12 ++++
 libavfilter/colorspacedsp.h          |   6 ++
 libavfilter/colorspacedsp_template.c | 128 +++++++++++++++++++++++++++++++++++
 libavfilter/vf_colorspace.c          |  53 ++++++++++++++-
 5 files changed, 210 insertions(+), 2 deletions(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index b17b115..98a002b 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -5104,6 +5104,19 @@ YUV 4:4:4 planar 12-bits
 Do a fast conversion, which skips gamma/primary correction. This will take
 significantly less CPU, but will be mathematically incorrect. To get output
 compatible with that produced by the colormatrix filter, use fast=1.
+
+ at item dither
+Specify dithering mode.
+
+The accepted values are:
+ at table @samp
+ at item none
+No dithering
+
+ at item fsb
+Floyd-Steinberg dithering
+ at end table
+
 @end table
 
 The filter converts the transfer characteristics, color space and color
diff --git a/libavfilter/colorspacedsp.c b/libavfilter/colorspacedsp.c
index d4c43c3..f95805b 100644
--- a/libavfilter/colorspacedsp.c
+++ b/libavfilter/colorspacedsp.c
@@ -20,6 +20,9 @@
 
 #include "colorspacedsp.h"
 
+/*
+ * SS_W/H are synonyms for AVPixFmtDescriptor->log2_chroma_w/h.
+ */
 #define SS_W 0
 #define SS_H 0
 
@@ -114,6 +117,15 @@ void ff_colorspacedsp_init(ColorSpaceDSPContext *dsp)
     init_rgb2yuv_fn(1, 10);
     init_rgb2yuv_fn(2, 12);
 
+#define init_rgb2yuv_fsb_fn(idx, bit) \
+    dsp->rgb2yuv_fsb[idx][0] = rgb2yuv_fsb_444p##bit##_c; \
+    dsp->rgb2yuv_fsb[idx][1] = rgb2yuv_fsb_422p##bit##_c; \
+    dsp->rgb2yuv_fsb[idx][2] = rgb2yuv_fsb_420p##bit##_c
+
+    init_rgb2yuv_fsb_fn(0,  8);
+    init_rgb2yuv_fsb_fn(1, 10);
+    init_rgb2yuv_fsb_fn(2, 12);
+
 #define init_yuv2yuv_fn(idx1, idx2, bit1, bit2) \
     dsp->yuv2yuv[idx1][idx2][0] = yuv2yuv_444p##bit1##to##bit2##_c; \
     dsp->yuv2yuv[idx1][idx2][1] = yuv2yuv_422p##bit1##to##bit2##_c; \
diff --git a/libavfilter/colorspacedsp.h b/libavfilter/colorspacedsp.h
index 4e70c6c..2ca7b19 100644
--- a/libavfilter/colorspacedsp.h
+++ b/libavfilter/colorspacedsp.h
@@ -32,6 +32,11 @@ typedef void (*rgb2yuv_fn)(uint8_t *yuv[3], ptrdiff_t yuv_stride[3],
                            int16_t *rgb[3], ptrdiff_t rgb_stride,
                            int w, int h, const int16_t rgb2yuv_coeffs[3][3][8],
                            const int16_t yuv_offset[8]);
+typedef void (*rgb2yuv_fsb_fn)(uint8_t *yuv[3], ptrdiff_t yuv_stride[3],
+                               int16_t *rgb[3], ptrdiff_t rgb_stride,
+                               int w, int h, const int16_t rgb2yuv_coeffs[3][3][8],
+                               const int16_t yuv_offset[8],
+                               int *rnd[3][2]);
 typedef void (*yuv2yuv_fn)(uint8_t *yuv_out[3], ptrdiff_t yuv_out_stride[3],
                            uint8_t *yuv_in[3], ptrdiff_t yuv_in_stride[3],
                            int w, int h, const int16_t yuv2yuv_coeffs[3][3][8],
@@ -40,6 +45,7 @@ typedef void (*yuv2yuv_fn)(uint8_t *yuv_out[3], ptrdiff_t yuv_out_stride[3],
 typedef struct ColorSpaceDSPContext {
     yuv2rgb_fn yuv2rgb[3 /* 0: 8bit, 1: 10bit, 2: 12bit */][3 /* 0: 444, 1: 422, 2: 420 */];
     rgb2yuv_fn rgb2yuv[3 /* 0: 8bit, 1: 10bit, 2: 12bit */][3 /* 0: 444, 1: 422, 2: 420 */];
+    rgb2yuv_fsb_fn rgb2yuv_fsb[3 /* 0: 8bit, 1: 10bit, 2: 12bit */][3 /* 0: 444, 1: 422, 2: 420 */];
     yuv2yuv_fn yuv2yuv[3 /* in_depth */][3 /* out_depth */][3 /* 0: 444, 1: 422, 2: 420 */];
 
     void (*multiply3x3)(int16_t *data[3], ptrdiff_t stride,
diff --git a/libavfilter/colorspacedsp_template.c b/libavfilter/colorspacedsp_template.c
index f225391..db4a8d2 100644
--- a/libavfilter/colorspacedsp_template.c
+++ b/libavfilter/colorspacedsp_template.c
@@ -199,6 +199,134 @@ static void fn(rgb2yuv)(uint8_t *_yuv[3], ptrdiff_t yuv_stride[3],
     }
 }
 
+/* floyd-steinberg dithering - for any mid-top pixel A in a 3x2 block of pixels:
+ *    1 A 2
+ *    3 4 5
+ * the rounding error is distributed over the neighbouring pixels:
+ *    2: 7/16th, 3: 3/16th, 4: 5/16th and 5: 1/16th
+ */
+static void fn(rgb2yuv_fsb)(uint8_t *_yuv[3], ptrdiff_t yuv_stride[3],
+                            int16_t *rgb[3], ptrdiff_t s,
+                            int w, int h, const int16_t rgb2yuv_coeffs[3][3][8],
+                            const int16_t yuv_offset[8],
+                            int *rnd_scratch[3][2])
+{
+    pixel **yuv = (pixel **) _yuv;
+    pixel *yuv0 = yuv[0], *yuv1 = yuv[1], *yuv2 = yuv[2];
+    const int16_t *rgb0 = rgb[0], *rgb1 = rgb[1], *rgb2 = rgb[2];
+    int y, x;
+    const int sh = 29 - BIT_DEPTH;
+    const int rnd = 1 << (sh - 1);
+    int cry = rgb2yuv_coeffs[0][0][0];
+    int cgy = rgb2yuv_coeffs[0][1][0];
+    int cby = rgb2yuv_coeffs[0][2][0];
+    int cru = rgb2yuv_coeffs[1][0][0];
+    int cgu = rgb2yuv_coeffs[1][1][0];
+    int cburv = rgb2yuv_coeffs[1][2][0];
+    int cgv = rgb2yuv_coeffs[2][1][0];
+    int cbv = rgb2yuv_coeffs[2][2][0];
+    ptrdiff_t s0 = yuv_stride[0] / sizeof(pixel);
+    const int uv_offset = 128 << (BIT_DEPTH - 8);
+    unsigned mask = (1 << sh) - 1;
+
+    for (x = 0; x < w; x++) {
+        rnd_scratch[0][0][x] =
+        rnd_scratch[0][1][x] = rnd;
+    }
+    av_assert2(rgb2yuv_coeffs[1][2][0] == rgb2yuv_coeffs[2][0][0]);
+    w = AV_CEIL_RSHIFT(w, SS_W);
+    h = AV_CEIL_RSHIFT(h, SS_H);
+    for (x = 0; x < w; x++) {
+        rnd_scratch[1][0][x] =
+        rnd_scratch[1][1][x] =
+        rnd_scratch[2][0][x] =
+        rnd_scratch[2][1][x] = rnd;
+    }
+    for (y = 0; y < h; y++) {
+        for (x = 0; x < w; x++) {
+            int r00 = rgb0[x << SS_W], g00 = rgb1[x << SS_W], b00 = rgb2[x << SS_W];
+            int y00;
+#if SS_W == 1
+            int r01 = rgb0[x * 2 + 1], g01 = rgb1[x * 2 + 1], b01 = rgb2[x * 2 + 1];
+            int y01;
+#if SS_H == 1
+            int r10 = rgb0[x * 2 + 0 + s], g10 = rgb1[x * 2 + 0 + s], b10 = rgb2[x * 2 + 0 + s];
+            int r11 = rgb0[x * 2 + 1 + s], g11 = rgb1[x * 2 + 1 + s], b11 = rgb2[x * 2 + 1 + s];
+            int y10, y11;
+#endif
+#endif
+            int u, v, diff;
+
+            y00 = r00 * cry + g00 * cgy + b00 * cby + rnd_scratch[0][y & !SS_H][x << SS_W];
+            diff = (y00 & mask) - rnd;
+            yuv0[x << SS_W]      = av_clip_pixel(yuv_offset[0] + (y00 >> sh));
+            rnd_scratch[0][ (y & !SS_H)][(x << SS_W) + 1] += (diff * 7 + 8) >> 4;
+            rnd_scratch[0][!(y & !SS_H)][(x << SS_W) - 1] += (diff * 3 + 8) >> 4;
+            rnd_scratch[0][!(y & !SS_H)][(x << SS_W) + 0] += (diff * 5 + 8) >> 4;
+            rnd_scratch[0][!(y & !SS_H)][(x << SS_W) + 1] += (diff * 1 + 8) >> 4;
+            rnd_scratch[0][ (y & !SS_H)][(x << SS_W) + 0]  = rnd;
+#if SS_W == 1
+            y01 = r01 * cry + g01 * cgy + b01 * cby + rnd_scratch[0][y & !SS_H][x * 2 + 1];
+            diff = (y01 & mask) - rnd;
+            yuv0[x * 2 + 1]      = av_clip_pixel(yuv_offset[0] + (y01 >> sh));
+            rnd_scratch[0][ (y & !SS_H)][x * 2 + 2] += (diff * 7 + 8) >> 4;
+            rnd_scratch[0][!(y & !SS_H)][x * 2 + 0] += (diff * 3 + 8) >> 4;
+            rnd_scratch[0][!(y & !SS_H)][x * 2 + 1] += (diff * 5 + 8) >> 4;
+            rnd_scratch[0][!(y & !SS_H)][x * 2 + 2] += (diff * 1 + 8) >> 4;
+            rnd_scratch[0][ (y & !SS_H)][x * 2 + 1]  = rnd;
+#if SS_H == 1
+            y10 = r10 * cry + g10 * cgy + b10 * cby + rnd_scratch[0][1][x * 2 + 0];
+            diff = (y10 & mask) - rnd;
+            yuv0[x * 2 + 0 + s0] = av_clip_pixel(yuv_offset[0] + (y10 >> sh));
+            rnd_scratch[0][1][x * 2 + 1] += (diff * 7 + 8) >> 4;
+            rnd_scratch[0][0][x * 2 - 1] += (diff * 3 + 8) >> 4;
+            rnd_scratch[0][0][x * 2 + 0] += (diff * 5 + 8) >> 4;
+            rnd_scratch[0][0][x * 2 + 1] += (diff * 1 + 8) >> 4;
+            rnd_scratch[0][1][x * 2 + 0]  = rnd;
+
+            y11 = r11 * cry + g11 * cgy + b11 * cby + rnd_scratch[0][1][x * 2 + 1];
+            diff = (y11 & mask) - rnd;
+            yuv0[x * 2 + 1 + s0] = av_clip_pixel(yuv_offset[0] + (y11 >> sh));
+            rnd_scratch[0][1][x * 2 + 2] += (diff * 7 + 8) >> 4;
+            rnd_scratch[0][0][x * 2 + 0] += (diff * 3 + 8) >> 4;
+            rnd_scratch[0][0][x * 2 + 1] += (diff * 5 + 8) >> 4;
+            rnd_scratch[0][0][x * 2 + 2] += (diff * 1 + 8) >> 4;
+            rnd_scratch[0][1][x * 2 + 1]  = rnd;
+#endif
+#endif
+
+            u = avg(r00, r01, r10, r11) * cru +
+                avg(g00, g01, g10, g11) * cgu +
+                avg(b00, b01, b10, b11) * cburv + rnd_scratch[1][y & 1][x];
+            diff = (u & mask) - rnd;
+            yuv1[x] = av_clip_pixel(uv_offset + (u >> sh));
+            rnd_scratch[1][ (y & 1)][x + 1] += (diff * 7 + 8) >> 4;
+            rnd_scratch[1][!(y & 1)][x - 1] += (diff * 3 + 8) >> 4;
+            rnd_scratch[1][!(y & 1)][x + 0] += (diff * 5 + 8) >> 4;
+            rnd_scratch[1][!(y & 1)][x + 1] += (diff * 1 + 8) >> 4;
+            rnd_scratch[1][ (y & 1)][x + 0]  = rnd;
+
+            v = avg(r00, r01, r10, r11) * cburv +
+                avg(g00, g01, g10, g11) * cgv +
+                avg(b00, b01, b10, b11) * cbv + rnd_scratch[2][y & 1][x];
+            diff = (v & mask) - rnd;
+            yuv2[x] = av_clip_pixel(uv_offset + (v >> sh));
+            rnd_scratch[2][ (y & 1)][x + 1] += (diff * 7 + 8) >> 4;
+            rnd_scratch[2][!(y & 1)][x - 1] += (diff * 3 + 8) >> 4;
+            rnd_scratch[2][!(y & 1)][x + 0] += (diff * 5 + 8) >> 4;
+            rnd_scratch[2][!(y & 1)][x + 1] += (diff * 1 + 8) >> 4;
+            rnd_scratch[2][ (y & 1)][x + 0]  = rnd;
+        }
+
+        yuv0 += s0 * (1 << SS_H);
+        yuv1 += yuv_stride[1] / sizeof(pixel);
+        yuv2 += yuv_stride[2] / sizeof(pixel);
+        rgb0 += s * (1 << SS_H);
+        rgb1 += s * (1 << SS_H);
+        rgb2 += s * (1 << SS_H);
+    }
+}
+
 #undef IN_BIT_DEPTH
 #undef OUT_BIT_DEPTH
 #define OUT_BIT_DEPTH BIT_DEPTH
diff --git a/libavfilter/vf_colorspace.c b/libavfilter/vf_colorspace.c
index 4003f70..10a2486 100644
--- a/libavfilter/vf_colorspace.c
+++ b/libavfilter/vf_colorspace.c
@@ -34,6 +34,12 @@
 #include "internal.h"
 #include "video.h"
 
+enum DitherMode {
+    DITHER_NONE,
+    DITHER_FSB,
+    DITHER_NB,
+};
+
 enum Colorspace {
     CS_UNSPECIFIED,
     CS_BT470M,
@@ -121,10 +127,12 @@ typedef struct ColorSpaceContext {
     enum AVColorPrimaries in_prm, out_prm, user_prm, iprm;
     enum AVPixelFormat in_format, user_format;
     int fast_mode;
+    enum DitherMode dither;
 
     int16_t *rgb[3];
     ptrdiff_t rgb_stride;
     unsigned rgb_sz;
+    int *dither_scratch[3][2], *dither_scratch_base[3][2];
 
     const struct ColorPrimaries *in_primaries, *out_primaries;
     int lrgb2lrgb_passthrough;
@@ -142,6 +150,7 @@ typedef struct ColorSpaceContext {
     DECLARE_ALIGNED(16, int16_t, yuv_offset)[2 /* in, out */][8];
     yuv2rgb_fn yuv2rgb;
     rgb2yuv_fn rgb2yuv;
+    rgb2yuv_fsb_fn rgb2yuv_fsb;
     yuv2yuv_fn yuv2yuv;
     double yuv2rgb_dbl_coeffs[3][3], rgb2yuv_dbl_coeffs[3][3];
     int in_y_rng, in_uv_rng, out_y_rng, out_uv_rng;
@@ -481,8 +490,13 @@ static int convert(AVFilterContext *ctx, void *data, int job_nr, int n_jobs)
                 s->dsp.multiply3x3(rgb, s->rgb_stride, w, h, s->lrgb2lrgb_coeffs);
             apply_lut(rgb, s->rgb_stride, w, h, s->delin_lut);
         }
-        s->rgb2yuv(out_data, td->out_linesize, rgb, s->rgb_stride, w, h,
-                   s->rgb2yuv_coeffs, s->yuv_offset[1]);
+        if (s->dither == DITHER_FSB) {
+            s->rgb2yuv_fsb(out_data, td->out_linesize, rgb, s->rgb_stride, w, h,
+                           s->rgb2yuv_coeffs, s->yuv_offset[1], s->dither_scratch);
+        } else {
+            s->rgb2yuv(out_data, td->out_linesize, rgb, s->rgb_stride, w, h,
+                       s->rgb2yuv_coeffs, s->yuv_offset[1]);
+        }
     }
 
     return 0;
@@ -784,6 +798,8 @@ static int create_filtergraph(AVFilterContext *ctx,
             av_assert2(s->rgb2yuv_coeffs[1][2][0] == s->rgb2yuv_coeffs[2][0][0]);
             s->rgb2yuv = s->dsp.rgb2yuv[(out_desc->comp[0].depth - 8) >> 1]
                                        [out_desc->log2_chroma_h + out_desc->log2_chroma_w];
+            s->rgb2yuv_fsb = s->dsp.rgb2yuv_fsb[(out_desc->comp[0].depth - 8) >> 1]
+                                       [out_desc->log2_chroma_h + out_desc->log2_chroma_w];
             emms = 1;
         }
 
@@ -834,6 +850,12 @@ static void uninit(AVFilterContext *ctx)
     av_freep(&s->rgb[1]);
     av_freep(&s->rgb[2]);
     s->rgb_sz = 0;
+    av_freep(&s->dither_scratch_base[0][0]);
+    av_freep(&s->dither_scratch_base[0][1]);
+    av_freep(&s->dither_scratch_base[1][0]);
+    av_freep(&s->dither_scratch_base[1][1]);
+    av_freep(&s->dither_scratch_base[2][0]);
+    av_freep(&s->dither_scratch_base[2][1]);
 
     av_freep(&s->lin_lut);
 }
@@ -888,14 +910,35 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
     else if (s->iall != CS_UNSPECIFIED)
         in->color_trc = default_trc[s->iall];
     if (rgb_sz != s->rgb_sz) {
+        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(out->format);
+        int uvw = in->width >> desc->log2_chroma_w;
+
         av_freep(&s->rgb[0]);
         av_freep(&s->rgb[1]);
         av_freep(&s->rgb[2]);
         s->rgb_sz = 0;
+        av_freep(&s->dither_scratch_base[0][0]);
+        av_freep(&s->dither_scratch_base[0][1]);
+        av_freep(&s->dither_scratch_base[1][0]);
+        av_freep(&s->dither_scratch_base[1][1]);
+        av_freep(&s->dither_scratch_base[2][0]);
+        av_freep(&s->dither_scratch_base[2][1]);
 
         s->rgb[0] = av_malloc(rgb_sz);
         s->rgb[1] = av_malloc(rgb_sz);
         s->rgb[2] = av_malloc(rgb_sz);
+        s->dither_scratch_base[0][0] = av_malloc(sizeof(int) * (in->width + 4));
+        s->dither_scratch_base[0][1] = av_malloc(sizeof(int) * (in->width + 4));
+        s->dither_scratch_base[1][0] = av_malloc(sizeof(int) * (uvw + 4));
+        s->dither_scratch_base[1][1] = av_malloc(sizeof(int) * (uvw + 4));
+        s->dither_scratch_base[2][0] = av_malloc(sizeof(int) * (uvw + 4));
+        s->dither_scratch_base[2][1] = av_malloc(sizeof(int) * (uvw + 4));
+        s->dither_scratch[0][0] = &s->dither_scratch_base[0][0][1];
+        s->dither_scratch[0][1] = &s->dither_scratch_base[0][1][1];
+        s->dither_scratch[1][0] = &s->dither_scratch_base[1][0][1];
+        s->dither_scratch[1][1] = &s->dither_scratch_base[1][1][1];
+        s->dither_scratch[2][0] = &s->dither_scratch_base[2][0][1];
+        s->dither_scratch[2][1] = &s->dither_scratch_base[2][1][1];
         if (!s->rgb[0] || !s->rgb[1] || !s->rgb[2]) {
             uninit(ctx);
             return AVERROR(ENOMEM);
@@ -1046,6 +1089,12 @@ static const AVOption colorspace_options[] = {
     { "fast",     "Ignore primary chromaticity and gamma correction",
       OFFSET(fast_mode), AV_OPT_TYPE_BOOL,  { .i64 = 0    },
       0, 1, FLAGS },
+    { "dither",   "Dithering mode",
+      OFFSET(dither), AV_OPT_TYPE_INT, { .i64 = DITHER_NONE },
+      DITHER_NONE, DITHER_NB - 1, FLAGS, "dither" },
+    ENUM("none", DITHER_NONE, "dither"),
+    ENUM("fsb",  DITHER_FSB,  "dither"),
+
     { NULL }
 };
 
-- 
2.8.1



More information about the ffmpeg-devel mailing list