[FFmpeg-devel] [PATCH v1 2/2] avfilter/vf_colorbalance: rewrite the code with macro-based function

lance.lmwang at gmail.com lance.lmwang at gmail.com
Sun Oct 27 11:31:50 EET 2019


From: Limin Wang <lance.lmwang at gmail.com>

rename the function to lut_planar_##bits and lut_packed_##bits to make 
it more clear

Signed-off-by: Limin Wang <lance.lmwang at gmail.com>
---
 libavfilter/vf_colorbalance.c | 253 +++++++++++++++---------------------------
 1 file changed, 89 insertions(+), 164 deletions(-)

diff --git a/libavfilter/vf_colorbalance.c b/libavfilter/vf_colorbalance.c
index fd003fd..c7262ce 100644
--- a/libavfilter/vf_colorbalance.c
+++ b/libavfilter/vf_colorbalance.c
@@ -51,6 +51,7 @@ typedef struct ColorBalanceContext {
 
     uint8_t rgba_map[4];
     int step;
+    int bps;
 
     int (*apply_lut)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
 } ColorBalanceContext;
@@ -96,161 +97,89 @@ static int query_formats(AVFilterContext *ctx)
     return ff_set_common_formats(ctx, fmts_list);
 }
 
-static int apply_lut8_p(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
-{
-    ColorBalanceContext *s = ctx->priv;
-    ThreadData *td = arg;
-    AVFrame *in = td->in;
-    AVFrame *out = td->out;
-    const int slice_start = (out->height * jobnr) / nb_jobs;
-    const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
-    const uint8_t *srcg = in->data[0] + slice_start * in->linesize[0];
-    const uint8_t *srcb = in->data[1] + slice_start * in->linesize[1];
-    const uint8_t *srcr = in->data[2] + slice_start * in->linesize[2];
-    const uint8_t *srca = in->data[3] + slice_start * in->linesize[3];
-    uint8_t *dstg = out->data[0] + slice_start * out->linesize[0];
-    uint8_t *dstb = out->data[1] + slice_start * out->linesize[1];
-    uint8_t *dstr = out->data[2] + slice_start * out->linesize[2];
-    uint8_t *dsta = out->data[3] + slice_start * out->linesize[3];
-    int i, j;
-
-    for (i = slice_start; i < slice_end; i++) {
-        for (j = 0; j < out->width; j++) {
-            dstg[j] = s->lut[G][srcg[j]];
-            dstb[j] = s->lut[B][srcb[j]];
-            dstr[j] = s->lut[R][srcr[j]];
-            if (in != out && out->linesize[3])
-                dsta[j] = srca[j];
-        }
-
-        srcg += in->linesize[0];
-        srcb += in->linesize[1];
-        srcr += in->linesize[2];
-        srca += in->linesize[3];
-        dstg += out->linesize[0];
-        dstb += out->linesize[1];
-        dstr += out->linesize[2];
-        dsta += out->linesize[3];
-    }
-
-    return 0;
+#define DEF_PLANAR_LUT_FUNC(type, nbits)                                                     \
+static int lut_planar_##nbits(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)       \
+{                                                                                            \
+    ColorBalanceContext *s = ctx->priv;                                                      \
+    ThreadData *td = arg;                                                                    \
+    AVFrame *in = td->in;                                                                    \
+    AVFrame *out = td->out;                                                                  \
+    const int slice_start = (out->height * jobnr) / nb_jobs;                                 \
+    const int slice_end = (out->height * (jobnr+1)) / nb_jobs;                               \
+    const type *srcg = (const type *)in->data[0] + slice_start * in->linesize[0] / s->bps;   \
+    const type *srcb = (const type *)in->data[1] + slice_start * in->linesize[1] / s->bps;   \
+    const type *srcr = (const type *)in->data[2] + slice_start * in->linesize[2] / s->bps;   \
+    const type *srca = (const type *)in->data[3] + slice_start * in->linesize[3] / s->bps;   \
+    type *dstg = (type *)out->data[0] + slice_start * out->linesize[0] / s->bps;             \
+    type *dstb = (type *)out->data[1] + slice_start * out->linesize[1] / s->bps;             \
+    type *dstr = (type *)out->data[2] + slice_start * out->linesize[2] / s->bps;             \
+    type *dsta = (type *)out->data[3] + slice_start * out->linesize[3] / s->bps;             \
+    int i, j;                                                                                \
+                                                                                             \
+    for (i = slice_start; i < slice_end; i++) {                                              \
+        for (j = 0; j < out->width; j++) {                                                   \
+            dstg[j] = s->lut[G][srcg[j]];                                                    \
+            dstb[j] = s->lut[B][srcb[j]];                                                    \
+            dstr[j] = s->lut[R][srcr[j]];                                                    \
+            if (in != out && out->linesize[3])                                               \
+                dsta[j] = srca[j];                                                           \
+        }                                                                                    \
+                                                                                             \
+        srcg += in->linesize[0] / s->bps;                                                    \
+        srcb += in->linesize[1] / s->bps;                                                    \
+        srcr += in->linesize[2] / s->bps;                                                    \
+        srca += in->linesize[3] / s->bps;                                                    \
+        dstg += out->linesize[0] / s->bps;                                                   \
+        dstb += out->linesize[1] / s->bps;                                                   \
+        dstr += out->linesize[2] / s->bps;                                                   \
+        dsta += out->linesize[3] / s->bps;                                                   \
+    }                                                                                        \
+                                                                                             \
+    return 0;                                                                                \
 }
-
-static int apply_lut16_p(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
-{
-    ColorBalanceContext *s = ctx->priv;
-    ThreadData *td = arg;
-    AVFrame *in = td->in;
-    AVFrame *out = td->out;
-    const int slice_start = (out->height * jobnr) / nb_jobs;
-    const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
-    const uint16_t *srcg = (const uint16_t *)in->data[0] + slice_start * in->linesize[0] / 2;
-    const uint16_t *srcb = (const uint16_t *)in->data[1] + slice_start * in->linesize[1] / 2;
-    const uint16_t *srcr = (const uint16_t *)in->data[2] + slice_start * in->linesize[2] / 2;
-    const uint16_t *srca = (const uint16_t *)in->data[3] + slice_start * in->linesize[3] / 2;
-    uint16_t *dstg = (uint16_t *)out->data[0] + slice_start * out->linesize[0] / 2;
-    uint16_t *dstb = (uint16_t *)out->data[1] + slice_start * out->linesize[1] / 2;
-    uint16_t *dstr = (uint16_t *)out->data[2] + slice_start * out->linesize[2] / 2;
-    uint16_t *dsta = (uint16_t *)out->data[3] + slice_start * out->linesize[3] / 2;
-    int i, j;
-
-    for (i = slice_start; i < slice_end; i++) {
-        for (j = 0; j < out->width; j++) {
-            dstg[j] = s->lut[G][srcg[j]];
-            dstb[j] = s->lut[B][srcb[j]];
-            dstr[j] = s->lut[R][srcr[j]];
-            if (in != out && out->linesize[3])
-                dsta[j] = srca[j];
-        }
-
-        srcg += in->linesize[0] / 2;
-        srcb += in->linesize[1] / 2;
-        srcr += in->linesize[2] / 2;
-        srca += in->linesize[3] / 2;
-        dstg += out->linesize[0] / 2;
-        dstb += out->linesize[1] / 2;
-        dstr += out->linesize[2] / 2;
-        dsta += out->linesize[3] / 2;
-    }
-
-    return 0;
-}
-
-static int apply_lut8(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
-{
-    ColorBalanceContext *s = ctx->priv;
-    ThreadData *td = arg;
-    AVFrame *in = td->in;
-    AVFrame *out = td->out;
-    AVFilterLink *outlink = ctx->outputs[0];
-    const int slice_start = (out->height * jobnr) / nb_jobs;
-    const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
-    const uint8_t *srcrow = in->data[0] + slice_start * in->linesize[0];
-    const uint8_t roffset = s->rgba_map[R];
-    const uint8_t goffset = s->rgba_map[G];
-    const uint8_t boffset = s->rgba_map[B];
-    const uint8_t aoffset = s->rgba_map[A];
-    const int step = s->step;
-    uint8_t *dstrow;
-    int i, j;
-
-    dstrow = out->data[0] + slice_start * out->linesize[0];
-    for (i = slice_start; i < slice_end; i++) {
-        const uint8_t *src = srcrow;
-        uint8_t *dst = dstrow;
-
-        for (j = 0; j < outlink->w * step; j += step) {
-            dst[j + roffset] = s->lut[R][src[j + roffset]];
-            dst[j + goffset] = s->lut[G][src[j + goffset]];
-            dst[j + boffset] = s->lut[B][src[j + boffset]];
-            if (in != out && step == 4)
-                dst[j + aoffset] = src[j + aoffset];
-        }
-
-        srcrow += in->linesize[0];
-        dstrow += out->linesize[0];
-    }
-
-    return 0;
-}
-
-static int apply_lut16(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
-{
-    ColorBalanceContext *s = ctx->priv;
-    ThreadData *td = arg;
-    AVFrame *in = td->in;
-    AVFrame *out = td->out;
-    AVFilterLink *outlink = ctx->outputs[0];
-    const int slice_start = (out->height * jobnr) / nb_jobs;
-    const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
-    const uint16_t *srcrow = (const uint16_t *)in->data[0] + slice_start * in->linesize[0] / 2;
-    const uint8_t roffset = s->rgba_map[R];
-    const uint8_t goffset = s->rgba_map[G];
-    const uint8_t boffset = s->rgba_map[B];
-    const uint8_t aoffset = s->rgba_map[A];
-    const int step = s->step / 2;
-    uint16_t *dstrow;
-    int i, j;
-
-    dstrow = (uint16_t *)out->data[0] + slice_start * out->linesize[0] / 2;
-    for (i = slice_start; i < slice_end; i++) {
-        const uint16_t *src = srcrow;
-        uint16_t *dst = dstrow;
-
-        for (j = 0; j < outlink->w * step; j += step) {
-            dst[j + roffset] = s->lut[R][src[j + roffset]];
-            dst[j + goffset] = s->lut[G][src[j + goffset]];
-            dst[j + boffset] = s->lut[B][src[j + boffset]];
-            if (in != out && step == 4)
-                dst[j + aoffset] = src[j + aoffset];
-        }
-
-        srcrow += in->linesize[0] / 2;
-        dstrow += out->linesize[0] / 2;
-    }
-
-    return 0;
+DEF_PLANAR_LUT_FUNC(uint16_t, 16);
+DEF_PLANAR_LUT_FUNC(uint8_t, 8);
+
+#define DEF_LUT_PACKETED_FUNC(type, nbits)                                                   \
+static int lut_packed_##nbits(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)       \
+{                                                                                            \
+    ColorBalanceContext *s = ctx->priv;                                                      \
+    ThreadData *td = arg;                                                                    \
+    AVFrame *in = td->in;                                                                    \
+    AVFrame *out = td->out;                                                                  \
+    AVFilterLink *outlink = ctx->outputs[0];                                                 \
+    const int slice_start = (out->height * jobnr) / nb_jobs;                                 \
+    const int slice_end = (out->height * (jobnr+1)) / nb_jobs;                               \
+    const type *srcrow = (const type *)in->data[0] + slice_start * in->linesize[0] / s->bps; \
+    const uint8_t roffset = s->rgba_map[R];                                                  \
+    const uint8_t goffset = s->rgba_map[G];                                                  \
+    const uint8_t boffset = s->rgba_map[B];                                                  \
+    const uint8_t aoffset = s->rgba_map[A];                                                  \
+    const int step = s->step;                                                                \
+    type *dstrow;                                                                            \
+    int i, j;                                                                                \
+                                                                                             \
+    dstrow = (type *)out->data[0] + slice_start * out->linesize[0] / s->bps;                 \
+    for (i = slice_start; i < slice_end; i++) {                                              \
+        const type *src = srcrow;                                                            \
+        type *dst = dstrow;                                                                  \
+                                                                                             \
+        for (j = 0; j < outlink->w * step; j += step) {                                      \
+            dst[j + roffset] = s->lut[R][src[j + roffset]];                                  \
+            dst[j + goffset] = s->lut[G][src[j + goffset]];                                  \
+            dst[j + boffset] = s->lut[B][src[j + boffset]];                                  \
+            if (in != out && step == 4)                                                      \
+                dst[j + aoffset] = src[j + aoffset];                                         \
+        }                                                                                    \
+                                                                                             \
+        srcrow += in->linesize[0] / s->bps;                                                  \
+        dstrow += out->linesize[0] / s->bps;                                                 \
+    }                                                                                        \
+                                                                                             \
+    return 0;                                                                                \
 }
+DEF_LUT_PACKETED_FUNC(uint16_t, 16);
+DEF_LUT_PACKETED_FUNC(uint8_t, 8);
 
 static int config_output(AVFilterLink *outlink)
 {
@@ -259,19 +188,15 @@ static int config_output(AVFilterLink *outlink)
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(outlink->format);
     const int depth = desc->comp[0].depth;
     const int max = 1 << depth;
-    const int planar = av_pix_fmt_count_planes(outlink->format) > 1;
+    const int is_planar = desc->flags & AV_PIX_FMT_FLAG_PLANAR;
     double *shadows, *midtones, *highlights, *buffer;
     int i, r, g, b;
 
-    if (max == 256 && planar) {
-        s->apply_lut = apply_lut8_p;
-    } else if (planar) {
-        s->apply_lut = apply_lut16_p;
-    } else if (max == 256) {
-        s->apply_lut = apply_lut8;
-    } else {
-        s->apply_lut = apply_lut16;
-    }
+    s->bps = depth > 8 ? 2 : 1;
+    if (!is_planar)
+        s->apply_lut = (depth <= 8) ? lut_packed_8 : lut_packed_16;
+    else
+        s->apply_lut = (depth <= 8) ? lut_planar_8 : lut_planar_16;
 
     buffer = av_malloc(max * 3 * sizeof(*buffer));
     if (!buffer)
@@ -317,7 +242,7 @@ static int config_output(AVFilterLink *outlink)
     av_free(buffer);
 
     ff_fill_rgba_map(s->rgba_map, outlink->format);
-    s->step = av_get_padded_bits_per_pixel(desc) >> 3;
+    s->step = (av_get_padded_bits_per_pixel(desc) >> 3) / s->bps;
 
     return 0;
 }
-- 
2.6.4



More information about the ffmpeg-devel mailing list