[FFmpeg-cvslog] avfilter/vf_xmedian: small additional speedup

Fri Feb 18 18:14:26 EET 2022

ffmpeg | branch: master | Paul B Mahol <onemda at gmail.com> | Fri Feb 18 16:13:16 2022 +0100| [51cd565208123f5e69053bb37b9ae7f1c6e9d9b2] | committer: Paul B Mahol

avfilter/vf_xmedian: small additional speedup

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=51cd565208123f5e69053bb37b9ae7f1c6e9d9b2
---

 libavfilter/vf_xmedian.c | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/libavfilter/vf_xmedian.c b/libavfilter/vf_xmedian.c
index 28d8e6d4e9..148ab555bb 100644
--- a/libavfilter/vf_xmedian.c
+++ b/libavfilter/vf_xmedian.c
@@ -36,6 +36,7 @@ typedef struct XMedianContext {
     const AVPixFmtDescriptor *desc;
     int nb_inputs;
     int nb_frames;
+    int nb_threads;
     int planes;
     float percentile;
 
@@ -49,6 +50,9 @@ typedef struct XMedianContext {
     int width[4];
     int height[4];
 
+    uint8_t **data;
+    int *linesize;
+
     AVFrame **frames;
     FFFrameSync fs;
 
@@ -129,6 +133,8 @@ static int median_frames ## name(AVFilterContext *ctx, void *arg, int jobnr, int
     AVFrame **in = td->in;                                                                      \
     AVFrame *out = td->out;                                                                     \
     const int nb_inputs = s->nb_inputs;                                                         \
+    uint8_t **srcf = s->data + jobnr * nb_inputs;                                               \
+    int *linesize = s->linesize + jobnr * nb_inputs;                                            \
     const int radius = s->radius;                                                               \
     const int index = s->index;                                                                 \
     type values[256];                                                                           \
@@ -136,6 +142,7 @@ static int median_frames ## name(AVFilterContext *ctx, void *arg, int jobnr, int
     for (int p = 0; p < s->nb_planes; p++) {                                                    \
         const int slice_start = (s->height[p] * jobnr) / nb_jobs;                               \
         const int slice_end = (s->height[p] * (jobnr+1)) / nb_jobs;                             \
+        const int width = s->width[p];                                                          \
         type *dst = (type *)(out->data[p] + slice_start * out->linesize[p]);                    \
         ptrdiff_t dst_linesize = out->linesize[p] / sizeof(type);                               \
                                                                                                 \
@@ -147,10 +154,16 @@ static int median_frames ## name(AVFilterContext *ctx, void *arg, int jobnr, int
             continue;                                                                           \
         }                                                                                       \
                                                                                                 \
+        for (int i = 0; i < nb_inputs; i++)                                                     \
+            linesize[i] = in[i]->linesize[p];                                                   \
+                                                                                                \
+        for (int i = 0; i < nb_inputs; i++)                                                     \
+            srcf[i] = in[i]->data[p] + slice_start * linesize[i];                               \
+                                                                                                \
         for (int y = slice_start; y < slice_end; y++) {                                         \
-            for (int x = 0; x < s->width[p]; x++) {                                             \
+            for (int x = 0; x < width; x++) {                                                   \
                 for (int i = 0; i < nb_inputs; i++) {                                           \
-                    const type *src = (const type *)(in[i]->data[p] + y * in[i]->linesize[p]);  \
+                    const type *src = (const type *)srcf[i];                                    \
                     values[i] = src[x];                                                         \
                 }                                                                               \
                                                                                                 \
@@ -162,6 +175,8 @@ static int median_frames ## name(AVFilterContext *ctx, void *arg, int jobnr, int
             }                                                                                   \
                                                                                                 \
             dst += dst_linesize;                                                                \
+            for (int i = 0; i < nb_inputs; i++)                                                 \
+                srcf[i] += linesize[i];                                                         \
         }                                                                                       \
     }                                                                                           \
                                                                                                 \
@@ -199,7 +214,7 @@ static int process_frame(FFFrameSync *fs)
         td.in = in;
         td.out = out;
         ff_filter_execute(ctx, s->median_frames, &td, NULL,
-                          FFMIN(s->height[1], ff_filter_get_nb_threads(ctx)));
+                          FFMIN(s->height[1], s->nb_threads));
     }
 
     return ff_filter_frame(outlink, out);
@@ -230,6 +245,7 @@ static int config_output(AVFilterLink *outlink)
     s->nb_planes = av_pix_fmt_count_planes(outlink->format);
     s->depth = s->desc->comp[0].depth;
     s->max = (1 << s->depth) - 1;
+    s->nb_threads = ff_filter_get_nb_threads(ctx);
 
     if (s->depth <= 8)
         s->median_frames = median_frames8;
@@ -244,6 +260,14 @@ static int config_output(AVFilterLink *outlink)
     s->height[1] = s->height[2] = AV_CEIL_RSHIFT(inlink->h, s->desc->log2_chroma_h);
     s->height[0] = s->height[3] = inlink->h;
 
+    s->data = av_calloc(s->nb_threads * s->nb_inputs, sizeof(*s->data));
+    if (!s->data)
+        return AVERROR(ENOMEM);
+
+    s->linesize = av_calloc(s->nb_threads * s->nb_inputs, sizeof(*s->linesize));
+    if (!s->linesize)
+        return AVERROR(ENOMEM);
+
     if (!s->xmedian)
         return 0;
 
@@ -283,6 +307,8 @@ static av_cold void uninit(AVFilterContext *ctx)
     for (int i = 0; i < s->nb_frames && s->frames && !s->xmedian; i++)
         av_frame_free(&s->frames[i]);
     av_freep(&s->frames);
+    av_freep(&s->data);
+    av_freep(&s->linesize);
 }
 
 static int activate(AVFilterContext *ctx)