[FFmpeg-devel] [PATCH] vf_overlay: unroll Y plane output computation in blend_slice()
Michael Niedermayer
michaelni at gmx.at
Mon Oct 31 14:46:02 CET 2011
On Mon, Oct 31, 2011 at 12:46:50PM +0100, Stefano Sabatini wrote:
> Faster, as avoids unnecessary comparation instructions.
i suggest something along the lines of below
its entirely untested and surely buggy, just to show what i mean
it avoid code duplicatios and lets gcc by inlining remove the
unnecessary comparation instructions
it also fixes the right/bottom blend code which was wrong for
even width/height
diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c
index 06967c2..faa8a1f 100644
--- a/libavfilter/vf_overlay.c
+++ b/libavfilter/vf_overlay.c
@@ -335,6 +335,35 @@ static void start_frame_overlay(AVFilterLink *inlink, AVFilterBufferRef *inpicre
// apply a fast variant: (X+127)/255 = ((X+127)*257+257)>>16 = ((X+128)*257)>>16
#define FAST_DIV255(x) ((((x) + 128) * 257) >> 16)
+static void av_always_inline blend_plane(AVFilterBufferRef *dst, AVFilterBufferRef *src, int i,
+ uint8_t *dp, uint8_t *sp, uint8_t *ap, int wp, int hp, int hsub, int vsub, int hcheck, int vcheck)
+{
+ int j,k;
+ for (j = 0; j < hp; j++) {
+ uint8_t *d = dp, *s = sp, *a = ap;
+ for (k = 0; k < wp; k++) {
+ // average alpha for color components, improve quality
+ int alpha;
+ if ( (hsub && (!hcheck || k+1<wp))
+ && (vsub && (!vcheck || j+1<hp))) {
+ alpha = (a[0] + a[src->linesize[3]] +
+ a[1] + a[src->linesize[3]+1]) >> 2;
+ } else if (hsub && (!hcheck || k+1<wp)) {
+ alpha= (a[0] + a[1]) >> 1;
+ } else if (vsub && (!vcheck || j+1<hp)) {
+ alpha= (a[0] + a[src->linesize[3]]) >> 1;
+ } else
+ alpha = a[0];
+ *d = (*d * (0xff - alpha) + *s++ * alpha + 128) >> 8;
+ d++;
+ a += 1 << hsub;
+ }
+ dp += dst->linesize[i];
+ sp += src->linesize[i];
+ ap += (1 << vsub) * src->linesize[3];
+ }
+}
+
static void blend_slice(AVFilterContext *ctx,
AVFilterBufferRef *dst, AVFilterBufferRef *src,
int x, int y, int w, int h,
@@ -436,30 +465,13 @@ static void blend_slice(AVFilterContext *ctx,
sp += ((slice_y - y) >> vsub) * src->linesize[i];
ap += (slice_y - y) * src->linesize[3];
}
- for (j = 0; j < hp; j++) {
- uint8_t *d = dp, *s = sp, *a = ap;
- for (k = 0; k < wp; k++) {
- // average alpha for color components, improve quality
- int alpha_v, alpha_h, alpha;
- if (hsub && vsub && j+1 < hp && k+1 < wp) {
- alpha = (a[0] + a[src->linesize[3]] +
- a[1] + a[src->linesize[3]+1]) >> 2;
- } else if (hsub || vsub) {
- alpha_h = hsub && k+1 < wp ?
- (a[0] + a[1]) >> 1 : a[0];
- alpha_v = vsub && j+1 < hp ?
- (a[0] + a[src->linesize[3]]) >> 1 : a[0];
- alpha = (alpha_v + alpha_h) >> 1;
- } else
- alpha = a[0];
- *d = (*d * (0xff - alpha) + *s++ * alpha + 128) >> 8;
- d++;
- a += 1 << hsub;
- }
- dp += dst->linesize[i];
- sp += src->linesize[i];
- ap += (1 << vsub) * src->linesize[3];
- }
+ if(hsub||vsub){
+ if(wp<<hsub == width && hp<<vsub == height)
+ blend_plane(dst, src, i, dp, sp, ap, wp, hp, hsub, vsub, 0, 0);
+ else
+ blend_plane(dst, src, i, dp, sp, ap, wp, hp, hsub, vsub, wp<<hsub != width, hp<<vsub != height);
+ }else
+ blend_plane(dst, src, i, dp, sp, ap, wp, hp, 0, 0, 0, 0);
}
}
}
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
I do not agree with what you have to say, but I'll defend to the death your
right to say it. -- Voltaire
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20111031/db2626dc/attachment.asc>
More information about the ffmpeg-devel
mailing list