[FFmpeg-cvslog] swscale: split yuv2packedX_altivec in smaller functions.

Ronald S. Bultje git at videolan.org
Wed Jun 29 05:26:33 CEST 2011


ffmpeg | branch: master | Ronald S. Bultje <rsbultje at gmail.com> | Thu Jun 16 12:04:24 2011 -0400| [dc179ec81902e3c9d327f9e818454f2849308000] | committer: Ronald Bultje

swscale: split yuv2packedX_altivec in smaller functions.

This will likely lead to a considerable performance boost,
since it removes a branch from the inner loop. Part of the
Great Evil Plan to simplify swscale.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=dc179ec81902e3c9d327f9e818454f2849308000
---

 libswscale/ppc/swscale_altivec.c |   14 +++++++++-----
 libswscale/ppc/yuv2rgb_altivec.c |   28 ++++++++++++++++++++++++----
 libswscale/ppc/yuv2rgb_altivec.h |   18 +++++++++++++-----
 3 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index 47fe54c..7161fe7 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -414,10 +414,14 @@ void ff_sws_init_swScale_altivec(SwsContext *c)
 
     /* The following list of supported dstFormat values should
      * match what's found in the body of ff_yuv2packedX_altivec() */
-    if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->alpPixBuf &&
-        (c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
-         c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
-         c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB)) {
-            c->yuv2packedX  = ff_yuv2packedX_altivec;
+    if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->alpPixBuf) {
+        switch (c->dstFormat) {
+        case PIX_FMT_ABGR:  c->yuv2packedX = ff_yuv2abgr_X_altivec;  break;
+        case PIX_FMT_BGRA:  c->yuv2packedX = ff_yuv2bgra_X_altivec;  break;
+        case PIX_FMT_ARGB:  c->yuv2packedX = ff_yuv2argb_X_altivec;  break;
+        case PIX_FMT_RGBA:  c->yuv2packedX = ff_yuv2rgba_X_altivec;  break;
+        case PIX_FMT_BGR24: c->yuv2packedX = ff_yuv2bgr24_X_altivec; break;
+        case PIX_FMT_RGB24: c->yuv2packedX = ff_yuv2rgb24_X_altivec; break;
         }
+    }
 }
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 476db22..73c02e9 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -626,13 +626,13 @@ void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], int b
 }
 
 
-void
+static av_always_inline void
 ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
                        const int16_t **lumSrc, int lumFilterSize,
                        const int16_t *chrFilter, const int16_t **chrUSrc,
                        const int16_t **chrVSrc, int chrFilterSize,
                        const int16_t **alpSrc, uint8_t *dest,
-                       int dstW, int dstY)
+                       int dstW, int dstY, enum PixelFormat target)
 {
     int i,j;
     vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
@@ -706,7 +706,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
         G  = vec_packclp (G0,G1);
         B  = vec_packclp (B0,B1);
 
-        switch(c->dstFormat) {
+        switch(target) {
         case PIX_FMT_ABGR:  out_abgr  (R,G,B,out); break;
         case PIX_FMT_BGRA:  out_bgra  (R,G,B,out); break;
         case PIX_FMT_RGBA:  out_rgba  (R,G,B,out); break;
@@ -785,7 +785,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
         B  = vec_packclp (B0,B1);
 
         nout = (vector unsigned char *)scratch;
-        switch(c->dstFormat) {
+        switch(target) {
         case PIX_FMT_ABGR:  out_abgr  (R,G,B,nout); break;
         case PIX_FMT_BGRA:  out_bgra  (R,G,B,nout); break;
         case PIX_FMT_RGBA:  out_rgba  (R,G,B,nout); break;
@@ -803,3 +803,23 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
     }
 
 }
+
+#define YUV2PACKEDX_WRAPPER(suffix, pixfmt) \
+void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \
+                            const int16_t **lumSrc, int lumFilterSize, \
+                            const int16_t *chrFilter, const int16_t **chrUSrc, \
+                            const int16_t **chrVSrc, int chrFilterSize, \
+                            const int16_t **alpSrc, uint8_t *dest, \
+                            int dstW, int dstY) \
+{ \
+    ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, \
+                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
+                           alpSrc, dest, dstW, dstY, pixfmt); \
+}
+
+YUV2PACKEDX_WRAPPER(abgr,  PIX_FMT_ABGR);
+YUV2PACKEDX_WRAPPER(bgra,  PIX_FMT_BGRA);
+YUV2PACKEDX_WRAPPER(argb,  PIX_FMT_ARGB);
+YUV2PACKEDX_WRAPPER(rgba,  PIX_FMT_RGBA);
+YUV2PACKEDX_WRAPPER(rgb24, PIX_FMT_RGB24);
+YUV2PACKEDX_WRAPPER(bgr24, PIX_FMT_BGR24);
diff --git a/libswscale/ppc/yuv2rgb_altivec.h b/libswscale/ppc/yuv2rgb_altivec.h
index b54a856..b809fe1 100644
--- a/libswscale/ppc/yuv2rgb_altivec.h
+++ b/libswscale/ppc/yuv2rgb_altivec.h
@@ -24,11 +24,19 @@
 #ifndef PPC_YUV2RGB_ALTIVEC_H
 #define PPC_YUV2RGB_ALTIVEC_H 1
 
-void ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
-                            const int16_t **lumSrc, int lumFilterSize,
-                            const int16_t *chrFilter, const int16_t **chrUSrc,
-                            const int16_t **chrVSrc, int chrFilterSize,
-                            const int16_t **alpSrc, uint8_t *dest,
+#define YUV2PACKEDX_HEADER(suffix) \
+void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \
+                            const int16_t **lumSrc, int lumFilterSize, \
+                            const int16_t *chrFilter, const int16_t **chrUSrc, \
+                            const int16_t **chrVSrc, int chrFilterSize, \
+                            const int16_t **alpSrc, uint8_t *dest, \
                             int dstW, int dstY);
 
+YUV2PACKEDX_HEADER(abgr);
+YUV2PACKEDX_HEADER(bgra);
+YUV2PACKEDX_HEADER(argb);
+YUV2PACKEDX_HEADER(rgba);
+YUV2PACKEDX_HEADER(rgb24);
+YUV2PACKEDX_HEADER(bgr24);
+
 #endif /* PPC_YUV2RGB_ALTIVEC_H */



More information about the ffmpeg-cvslog mailing list