[FFmpeg-devel] [PATCH] vf_overlay: unroll Y plane output computation in blend_slice()

Stefano Sabatini stefasab at gmail.com
Mon Oct 31 12:46:45 CET 2011


Faster, as avoids unnecessary comparation instructions.

Improve performance, from:
47528710 dezicycles in blend_slice, 8192 runs, 0 skips
to:
40958444 dezicycles in blend_slice, 8192 runs, 0 skips
---
 libavfilter/vf_overlay.c |   36 ++++++++++++++++++++++++++++++------
 1 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c
index 0698299..22a14bf 100644
--- a/libavfilter/vf_overlay.c
+++ b/libavfilter/vf_overlay.c
@@ -425,15 +425,39 @@ static void blend_slice(AVFilterContext *ctx,
             sp += src->linesize[0];
         }
     } else {
-        for (i = 0; i < 3; i++) {
+        uint8_t *dp, *sp, *ap;
+
+        /* compute Y */
+        dp = dst->data[Y] + x + start_y * dst->linesize[Y];
+        sp = src->data[Y];
+        ap = src->data[A];
+
+        if (slice_y > y) {
+            sp += (slice_y - y) * src->linesize[Y];
+            ap += (slice_y - y) * src->linesize[A];
+        }
+        for (i = 0; i < height; i++) {
+            uint8_t *d = dp, *s = sp, *a = ap;
+            for (j = 0; j < width; j++) {
+                *d = FAST_DIV255(*d * (255 - *a) + *s * *a);
+                s++;
+                d++;
+                a++;
+            }
+            dp += dst->linesize[Y];
+            sp += src->linesize[Y];
+            ap += src->linesize[A];
+        }
+
+        /* compute U and V */
+        for (i = 1; i < A; i++) {
             int hsub = i ? over->hsub : 0;
             int vsub = i ? over->vsub : 0;
-            uint8_t *dp = dst->data[i] + (x >> hsub) +
-                (start_y >> vsub) * dst->linesize[i];
-            uint8_t *sp = src->data[i];
-            uint8_t *ap = src->data[3];
-            int wp = FFALIGN(width, 1<<hsub) >> hsub;
+            int wp = FFALIGN(width,  1<<hsub) >> hsub;
             int hp = FFALIGN(height, 1<<vsub) >> vsub;
+            dp = dst->data[i] + (x >> hsub) + (start_y >> vsub) * dst->linesize[i];
+            sp = src->data[i];
+            ap = src->data[3];
             if (slice_y > y) {
                 sp += ((slice_y - y) >> vsub) * src->linesize[i];
                 ap += (slice_y - y) * src->linesize[3];
-- 
1.7.4.1



More information about the ffmpeg-devel mailing list