[FFmpeg-cvslog] sws/output: add yuv2rgb_full_1_c_template()

Michael Niedermayer git at videolan.org
Sun Apr 14 17:08:03 CEST 2013


ffmpeg | branch: master | Michael Niedermayer <michaelni at gmx.at> | Sun Apr 14 16:14:22 2013 +0200| [a4b5e45e2bbc3f2877e4d353893316fb583ed57a] | committer: Michael Niedermayer

sws/output: add yuv2rgb_full_1_c_template()

7500->4500 cycles on sandybridge

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a4b5e45e2bbc3f2877e4d353893316fb583ed57a
---

 libswscale/output.c |  110 ++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 92 insertions(+), 18 deletions(-)

diff --git a/libswscale/output.c b/libswscale/output.c
index 99a283e..0557818 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -1404,30 +1404,86 @@ yuv2rgb_full_2_c_template(SwsContext *c, const int16_t *buf[2],
     c->dither_error[2][i] = err[2];
 }
 
+static av_always_inline void
+yuv2rgb_full_1_c_template(SwsContext *c, const int16_t *buf0,
+                     const int16_t *ubuf[2], const int16_t *vbuf[2],
+                     const int16_t *abuf0, uint8_t *dest, int dstW,
+                     int uvalpha, int y, enum AVPixelFormat target,
+                     int hasAlpha)
+{
+    const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
+    int i;
+    int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
+    int err[4] = {0};
+
+    if(   target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
+       || target == AV_PIX_FMT_BGR8      || target == AV_PIX_FMT_RGB8)
+        step = 1;
+
+    if (uvalpha < 2048) {
+        for (i = 0; i < dstW; i++) {
+            int Y = buf0[i] << 2;
+            int U = (ubuf0[i] - (128<<7)) << 2;
+            int V = (vbuf0[i] - (128<<7)) << 2;
+            int A;
+
+            if (hasAlpha) {
+                A = abuf0[i]<<15;
+                if (A & 0xC0000000)
+                    A = av_clip_uintp2(A, 30);
+            }
+
+            yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
+            dest += step;
+        }
+    } else {
+        const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
+        for (i = 0; i < dstW; i++) {
+            int Y = buf0[i] << 2;
+            int U = (ubuf0[i] + ubuf1[i] - (128<<8)) << 1;
+            int V = (vbuf0[i] + vbuf1[i] - (128<<8)) << 1;
+            int A;
+
+            if (hasAlpha) {
+                A = abuf0[i]<<15;
+                if (A & 0xC0000000)
+                    A = av_clip_uintp2(A, 30);
+            }
+
+            yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
+            dest += step;
+        }
+    }
+
+    c->dither_error[0][i] = err[0];
+    c->dither_error[1][i] = err[1];
+    c->dither_error[2][i] = err[2];
+}
+
 #if CONFIG_SMALL
-YUV2RGBWRAPPERX2(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
-YUV2RGBWRAPPERX2(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
-YUV2RGBWRAPPERX2(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
-YUV2RGBWRAPPERX2(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
+YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
+YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
+YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
+YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB,  CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
 #else
 #if CONFIG_SWSCALE_ALPHA
-YUV2RGBWRAPPERX2(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA,  1)
-YUV2RGBWRAPPERX2(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR,  1)
-YUV2RGBWRAPPERX2(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA,  1)
-YUV2RGBWRAPPERX2(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB,  1)
+YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA,  1)
+YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR,  1)
+YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA,  1)
+YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB,  1)
 #endif
-YUV2RGBWRAPPERX2(yuv2, rgb_full, bgrx32_full, AV_PIX_FMT_BGRA,  0)
-YUV2RGBWRAPPERX2(yuv2, rgb_full, xbgr32_full, AV_PIX_FMT_ABGR,  0)
-YUV2RGBWRAPPERX2(yuv2, rgb_full, rgbx32_full, AV_PIX_FMT_RGBA,  0)
-YUV2RGBWRAPPERX2(yuv2, rgb_full, xrgb32_full, AV_PIX_FMT_ARGB,  0)
+YUV2RGBWRAPPER(yuv2, rgb_full, bgrx32_full, AV_PIX_FMT_BGRA,  0)
+YUV2RGBWRAPPER(yuv2, rgb_full, xbgr32_full, AV_PIX_FMT_ABGR,  0)
+YUV2RGBWRAPPER(yuv2, rgb_full, rgbx32_full, AV_PIX_FMT_RGBA,  0)
+YUV2RGBWRAPPER(yuv2, rgb_full, xrgb32_full, AV_PIX_FMT_ARGB,  0)
 #endif
-YUV2RGBWRAPPERX2(yuv2, rgb_full, bgr24_full,  AV_PIX_FMT_BGR24, 0)
-YUV2RGBWRAPPERX2(yuv2, rgb_full, rgb24_full,  AV_PIX_FMT_RGB24, 0)
+YUV2RGBWRAPPER(yuv2, rgb_full, bgr24_full,  AV_PIX_FMT_BGR24, 0)
+YUV2RGBWRAPPER(yuv2, rgb_full, rgb24_full,  AV_PIX_FMT_RGB24, 0)
 
-YUV2RGBWRAPPERX2(yuv2, rgb_full, bgr4_byte_full,  AV_PIX_FMT_BGR4_BYTE, 0)
-YUV2RGBWRAPPERX2(yuv2, rgb_full, rgb4_byte_full,  AV_PIX_FMT_RGB4_BYTE, 0)
-YUV2RGBWRAPPERX2(yuv2, rgb_full, bgr8_full,   AV_PIX_FMT_BGR8,  0)
-YUV2RGBWRAPPERX2(yuv2, rgb_full, rgb8_full,   AV_PIX_FMT_RGB8,  0)
+YUV2RGBWRAPPER(yuv2, rgb_full, bgr4_byte_full,  AV_PIX_FMT_BGR4_BYTE, 0)
+YUV2RGBWRAPPER(yuv2, rgb_full, rgb4_byte_full,  AV_PIX_FMT_RGB4_BYTE, 0)
+YUV2RGBWRAPPER(yuv2, rgb_full, bgr8_full,   AV_PIX_FMT_BGR8,  0)
+YUV2RGBWRAPPER(yuv2, rgb_full, rgb8_full,   AV_PIX_FMT_RGB8,  0)
 
 static void
 yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter,
@@ -1549,16 +1605,19 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
 #if CONFIG_SMALL
                 *yuv2packedX = yuv2rgba32_full_X_c;
                 *yuv2packed2 = yuv2rgba32_full_2_c;
+                *yuv2packed1 = yuv2rgba32_full_1_c;
 #else
 #if CONFIG_SWSCALE_ALPHA
                 if (c->alpPixBuf) {
                     *yuv2packedX = yuv2rgba32_full_X_c;
                     *yuv2packed2 = yuv2rgba32_full_2_c;
+                    *yuv2packed1 = yuv2rgba32_full_1_c;
                 } else
 #endif /* CONFIG_SWSCALE_ALPHA */
                 {
                     *yuv2packedX = yuv2rgbx32_full_X_c;
                     *yuv2packed2 = yuv2rgbx32_full_2_c;
+                    *yuv2packed1 = yuv2rgbx32_full_1_c;
                 }
 #endif /* !CONFIG_SMALL */
                 break;
@@ -1566,16 +1625,19 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
 #if CONFIG_SMALL
                 *yuv2packedX = yuv2argb32_full_X_c;
                 *yuv2packed2 = yuv2argb32_full_2_c;
+                *yuv2packed1 = yuv2argb32_full_1_c;
 #else
 #if CONFIG_SWSCALE_ALPHA
                 if (c->alpPixBuf) {
                     *yuv2packedX = yuv2argb32_full_X_c;
                     *yuv2packed2 = yuv2argb32_full_2_c;
+                    *yuv2packed1 = yuv2argb32_full_1_c;
                 } else
 #endif /* CONFIG_SWSCALE_ALPHA */
                 {
                     *yuv2packedX = yuv2xrgb32_full_X_c;
                     *yuv2packed2 = yuv2xrgb32_full_2_c;
+                    *yuv2packed1 = yuv2xrgb32_full_1_c;
                 }
 #endif /* !CONFIG_SMALL */
                 break;
@@ -1583,16 +1645,19 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
 #if CONFIG_SMALL
                 *yuv2packedX = yuv2bgra32_full_X_c;
                 *yuv2packed2 = yuv2bgra32_full_2_c;
+                *yuv2packed1 = yuv2bgra32_full_1_c;
 #else
 #if CONFIG_SWSCALE_ALPHA
                 if (c->alpPixBuf) {
                     *yuv2packedX = yuv2bgra32_full_X_c;
                     *yuv2packed2 = yuv2bgra32_full_2_c;
+                    *yuv2packed1 = yuv2bgra32_full_1_c;
                 } else
 #endif /* CONFIG_SWSCALE_ALPHA */
                 {
                     *yuv2packedX = yuv2bgrx32_full_X_c;
                     *yuv2packed2 = yuv2bgrx32_full_2_c;
+                    *yuv2packed1 = yuv2bgrx32_full_1_c;
                 }
 #endif /* !CONFIG_SMALL */
                 break;
@@ -1600,42 +1665,51 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
 #if CONFIG_SMALL
                 *yuv2packedX = yuv2abgr32_full_X_c;
                 *yuv2packed2 = yuv2abgr32_full_2_c;
+                *yuv2packed1 = yuv2abgr32_full_1_c;
 #else
 #if CONFIG_SWSCALE_ALPHA
                 if (c->alpPixBuf) {
                     *yuv2packedX = yuv2abgr32_full_X_c;
                     *yuv2packed2 = yuv2abgr32_full_2_c;
+                    *yuv2packed1 = yuv2abgr32_full_1_c;
                 } else
 #endif /* CONFIG_SWSCALE_ALPHA */
                 {
                     *yuv2packedX = yuv2xbgr32_full_X_c;
                     *yuv2packed2 = yuv2xbgr32_full_2_c;
+                    *yuv2packed1 = yuv2xbgr32_full_1_c;
                 }
 #endif /* !CONFIG_SMALL */
                 break;
             case AV_PIX_FMT_RGB24:
             *yuv2packedX = yuv2rgb24_full_X_c;
             *yuv2packed2 = yuv2rgb24_full_2_c;
+            *yuv2packed1 = yuv2rgb24_full_1_c;
             break;
         case AV_PIX_FMT_BGR24:
             *yuv2packedX = yuv2bgr24_full_X_c;
             *yuv2packed2 = yuv2bgr24_full_2_c;
+            *yuv2packed1 = yuv2bgr24_full_1_c;
             break;
         case AV_PIX_FMT_BGR4_BYTE:
             *yuv2packedX = yuv2bgr4_byte_full_X_c;
             *yuv2packed2 = yuv2bgr4_byte_full_2_c;
+            *yuv2packed1 = yuv2bgr4_byte_full_1_c;
             break;
         case AV_PIX_FMT_RGB4_BYTE:
             *yuv2packedX = yuv2rgb4_byte_full_X_c;
             *yuv2packed2 = yuv2rgb4_byte_full_2_c;
+            *yuv2packed1 = yuv2rgb4_byte_full_1_c;
             break;
         case AV_PIX_FMT_BGR8:
             *yuv2packedX = yuv2bgr8_full_X_c;
             *yuv2packed2 = yuv2bgr8_full_2_c;
+            *yuv2packed1 = yuv2bgr8_full_1_c;
             break;
         case AV_PIX_FMT_RGB8:
             *yuv2packedX = yuv2rgb8_full_X_c;
             *yuv2packed2 = yuv2rgb8_full_2_c;
+            *yuv2packed1 = yuv2rgb8_full_1_c;
             break;
         case AV_PIX_FMT_GBRP:
         case AV_PIX_FMT_GBRP9BE:



More information about the ffmpeg-cvslog mailing list