[FFmpeg-devel] [PATCH] Support > 8 bit input in yuv2rgb.

Reimar Döffinger Reimar.Doeffinger at gmx.de
Thu Nov 7 21:43:07 CET 2013


Fairly ugly but about 3x faster than the default path (tested on ARM).

Signed-off-by: Reimar Döffinger <Reimar.Doeffinger at gmx.de>
---
 libswscale/swscale_unscaled.c |   3 +
 libswscale/yuv2rgb.c          | 548 ++++++------------------------------------
 libswscale/yuv2rgb_template.c | 458 +++++++++++++++++++++++++++++++++++
 3 files changed, 536 insertions(+), 473 deletions(-)
 create mode 100644 libswscale/yuv2rgb_template.c

diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index 83086f7..8842f35 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -1217,6 +1217,9 @@ void ff_get_unscaled_swscale(SwsContext *c)
     }
     /* yuv2bgr */
     if ((srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUV422P ||
+         srcFormat == AV_PIX_FMT_YUV420P9 || srcFormat == AV_PIX_FMT_YUV422P9 ||
+         srcFormat == AV_PIX_FMT_YUV420P10 || srcFormat == AV_PIX_FMT_YUV422P10 ||
+         srcFormat == AV_PIX_FMT_YUV420P16 || srcFormat == AV_PIX_FMT_YUV422P16 ||
          srcFormat == AV_PIX_FMT_YUVA420P) && isAnyRGB(dstFormat) &&
         !(flags & SWS_ACCURATE_RND) && (c->dither == SWS_DITHER_BAYER || c->dither == SWS_DITHER_AUTO) && !(dstH & 1)) {
         c->swscale = ff_yuv2rgb_get_func_ptr(c);
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 77c56a9..076acf6 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -54,66 +54,66 @@ const int *sws_getCoefficients(int colorspace)
 }
 
 #define LOADCHROMA(i)                               \
-    U = pu[i];                                      \
-    V = pv[i];                                      \
+    U = pu[i] >> shift;                             \
+    V = pv[i] >> shift;                             \
     r = (void *)c->table_rV[V+YUVRGB_TABLE_HEADROOM];                     \
     g = (void *)(c->table_gU[U+YUVRGB_TABLE_HEADROOM] + c->table_gV[V+YUVRGB_TABLE_HEADROOM]);  \
     b = (void *)c->table_bU[U+YUVRGB_TABLE_HEADROOM];
 
 #define PUTRGB(dst, src, i)                         \
-    Y              = src[2 * i];                    \
+    Y              = src[2 * i] >> shift;           \
     dst[2 * i]     = r[Y] + g[Y] + b[Y];            \
-    Y              = src[2 * i + 1];                \
+    Y              = src[2 * i + 1] >> shift;       \
     dst[2 * i + 1] = r[Y] + g[Y] + b[Y];
 
 #define PUTRGB24(dst, src, i)                       \
-    Y              = src[2 * i];                    \
+    Y              = src[2 * i] >> shift;           \
     dst[6 * i + 0] = r[Y];                          \
     dst[6 * i + 1] = g[Y];                          \
     dst[6 * i + 2] = b[Y];                          \
-    Y              = src[2 * i + 1];                \
+    Y              = src[2 * i + 1] >> shift;       \
     dst[6 * i + 3] = r[Y];                          \
     dst[6 * i + 4] = g[Y];                          \
     dst[6 * i + 5] = b[Y];
 
 #define PUTBGR24(dst, src, i)                       \
-    Y              = src[2 * i];                    \
+    Y              = src[2 * i] >> shift;           \
     dst[6 * i + 0] = b[Y];                          \
     dst[6 * i + 1] = g[Y];                          \
     dst[6 * i + 2] = r[Y];                          \
-    Y              = src[2 * i + 1];                \
+    Y              = src[2 * i + 1] >> shift;       \
     dst[6 * i + 3] = b[Y];                          \
     dst[6 * i + 4] = g[Y];                          \
     dst[6 * i + 5] = r[Y];
 
 #define PUTRGBA(dst, ysrc, asrc, i, s)                                  \
-    Y              = ysrc[2 * i];                                       \
-    dst[2 * i]     = r[Y] + g[Y] + b[Y] + (asrc[2 * i]     << s);       \
-    Y              = ysrc[2 * i + 1];                                   \
-    dst[2 * i + 1] = r[Y] + g[Y] + b[Y] + (asrc[2 * i + 1] << s);
+    Y              = ysrc[2 * i] >> shift;                              \
+    dst[2 * i]     = r[Y] + g[Y] + b[Y] + (asrc[2 * i]     >> shift << s); \
+    Y              = ysrc[2 * i + 1] >> shift;                          \
+    dst[2 * i + 1] = r[Y] + g[Y] + b[Y] + (asrc[2 * i + 1] >> shift << s);
 
 #define PUTRGB48(dst, src, i)                       \
-    Y                = src[ 2 * i];                 \
+    Y                = src[ 2 * i] >> shift;        \
     dst[12 * i +  0] = dst[12 * i +  1] = r[Y];     \
     dst[12 * i +  2] = dst[12 * i +  3] = g[Y];     \
     dst[12 * i +  4] = dst[12 * i +  5] = b[Y];     \
-    Y                = src[ 2 * i + 1];             \
+    Y                = src[ 2 * i + 1] >> shift;    \
     dst[12 * i +  6] = dst[12 * i +  7] = r[Y];     \
     dst[12 * i +  8] = dst[12 * i +  9] = g[Y];     \
     dst[12 * i + 10] = dst[12 * i + 11] = b[Y];
 
 #define PUTBGR48(dst, src, i)                       \
-    Y                = src[2 * i];                  \
+    Y                = src[2 * i] >> shift;         \
     dst[12 * i +  0] = dst[12 * i +  1] = b[Y];     \
     dst[12 * i +  2] = dst[12 * i +  3] = g[Y];     \
     dst[12 * i +  4] = dst[12 * i +  5] = r[Y];     \
-    Y                = src[2  * i +  1];            \
+    Y                = src[2  * i +  1] >> shift;   \
     dst[12 * i +  6] = dst[12 * i +  7] = b[Y];     \
     dst[12 * i +  8] = dst[12 * i +  9] = g[Y];     \
     dst[12 * i + 10] = dst[12 * i + 11] = r[Y];
 
 #define YUV2RGBFUNC(func_name, dst_type, alpha)                             \
-    static int func_name(SwsContext *c, const uint8_t *src[],               \
+    static int suffix(func_name)(SwsContext *c, const uint8_t *src[],       \
                          int srcStride[], int srcSliceY, int srcSliceH,     \
                          uint8_t *dst[], int dstStride[])                   \
     {                                                                       \
@@ -129,14 +129,14 @@ const int *sws_getCoefficients(int colorspace)
             dst_type *dst_2 =                                               \
                 (dst_type *)(dst[0] + (y + srcSliceY + 1) * dstStride[0]);  \
             dst_type av_unused *r, *g, *b;                                  \
-            const uint8_t *py_1 = src[0] +  y       * srcStride[0];         \
-            const uint8_t *py_2 = py_1   +            srcStride[0];         \
-            const uint8_t *pu   = src[1] + (y >> 1) * srcStride[1];         \
-            const uint8_t *pv   = src[2] + (y >> 1) * srcStride[2];         \
-            const uint8_t av_unused *pa_1, *pa_2;                           \
+            src_type *py_1 = (src_type *)(src[0] +  y       * srcStride[0]); \
+            src_type *py_2 = (src_type *)(src[0] + (y + 1)  * srcStride[0]); \
+            src_type *pu   = (src_type *)(src[1] + (y >> 1) * srcStride[1]); \
+            src_type *pv   = (src_type *)(src[2] + (y >> 1) * srcStride[2]); \
+            src_type av_unused *pa_1, *pa_2;                                \
             unsigned int h_size = c->dstW >> 3;                             \
             if (alpha) {                                                    \
-                pa_1 = src[3] + y * srcStride[3];                           \
+                pa_1 = (src_type *)(src[3] + y * srcStride[3]);             \
                 pa_2 = pa_1   +     srcStride[3];                           \
             }                                                               \
             while (h_size--) {                                              \
@@ -163,451 +163,50 @@ const int *sws_getCoefficients(int colorspace)
     ENDYUV2RGBLINE(dst_delta, 0)                    \
     ENDYUV2RGBFUNC()
 
-YUV2RGBFUNC(yuv2rgb_c_48, uint8_t, 0)
-    LOADCHROMA(0);
-    PUTRGB48(dst_1, py_1, 0);
-    PUTRGB48(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTRGB48(dst_2, py_2, 1);
-    PUTRGB48(dst_1, py_1, 1);
-
-    LOADCHROMA(2);
-    PUTRGB48(dst_1, py_1, 2);
-    PUTRGB48(dst_2, py_2, 2);
-
-    LOADCHROMA(3);
-    PUTRGB48(dst_2, py_2, 3);
-    PUTRGB48(dst_1, py_1, 3);
-ENDYUV2RGBLINE(48, 0)
-    LOADCHROMA(0);
-    PUTRGB48(dst_1, py_1, 0);
-    PUTRGB48(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTRGB48(dst_2, py_2, 1);
-    PUTRGB48(dst_1, py_1, 1);
-ENDYUV2RGBLINE(48, 1)
-    LOADCHROMA(0);
-    PUTRGB48(dst_1, py_1, 0);
-    PUTRGB48(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_bgr48, uint8_t, 0)
-    LOADCHROMA(0);
-    PUTBGR48(dst_1, py_1, 0);
-    PUTBGR48(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTBGR48(dst_2, py_2, 1);
-    PUTBGR48(dst_1, py_1, 1);
-
-    LOADCHROMA(2);
-    PUTBGR48(dst_1, py_1, 2);
-    PUTBGR48(dst_2, py_2, 2);
-
-    LOADCHROMA(3);
-    PUTBGR48(dst_2, py_2, 3);
-    PUTBGR48(dst_1, py_1, 3);
-ENDYUV2RGBLINE(48, 0)
-    LOADCHROMA(0);
-    PUTBGR48(dst_1, py_1, 0);
-    PUTBGR48(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTBGR48(dst_2, py_2, 1);
-    PUTBGR48(dst_1, py_1, 1);
-ENDYUV2RGBLINE(48, 1)
-    LOADCHROMA(0);
-    PUTBGR48(dst_1, py_1, 0);
-    PUTBGR48(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_32, uint32_t, 0)
-    LOADCHROMA(0);
-    PUTRGB(dst_1, py_1, 0);
-    PUTRGB(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTRGB(dst_2, py_2, 1);
-    PUTRGB(dst_1, py_1, 1);
-
-    LOADCHROMA(2);
-    PUTRGB(dst_1, py_1, 2);
-    PUTRGB(dst_2, py_2, 2);
-
-    LOADCHROMA(3);
-    PUTRGB(dst_2, py_2, 3);
-    PUTRGB(dst_1, py_1, 3);
-ENDYUV2RGBLINE(8, 0)
-    LOADCHROMA(0);
-    PUTRGB(dst_1, py_1, 0);
-    PUTRGB(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTRGB(dst_2, py_2, 1);
-    PUTRGB(dst_1, py_1, 1);
-ENDYUV2RGBLINE(8, 1)
-    LOADCHROMA(0);
-    PUTRGB(dst_1, py_1, 0);
-    PUTRGB(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1)
-    LOADCHROMA(0);
-    PUTRGBA(dst_1, py_1, pa_1, 0, 24);
-    PUTRGBA(dst_2, py_2, pa_2, 0, 24);
-
-    LOADCHROMA(1);
-    PUTRGBA(dst_2, py_2, pa_2, 1, 24);
-    PUTRGBA(dst_1, py_1, pa_1, 1, 24);
-
-    LOADCHROMA(2);
-    PUTRGBA(dst_1, py_1, pa_1, 2, 24);
-    PUTRGBA(dst_2, py_2, pa_2, 2, 24);
-
-    LOADCHROMA(3);
-    PUTRGBA(dst_2, py_2, pa_2, 3, 24);
-    PUTRGBA(dst_1, py_1, pa_1, 3, 24);
-    pa_1 += 8;
-    pa_2 += 8;
-ENDYUV2RGBLINE(8, 0)
-    LOADCHROMA(0);
-    PUTRGBA(dst_1, py_1, pa_1, 0, 24);
-    PUTRGBA(dst_2, py_2, pa_2, 0, 24);
-
-    LOADCHROMA(1);
-    PUTRGBA(dst_2, py_2, pa_2, 1, 24);
-    PUTRGBA(dst_1, py_1, pa_1, 1, 24);
-    pa_1 += 4;
-    pa_2 += 4;
-ENDYUV2RGBLINE(8, 1)
-    LOADCHROMA(0);
-    PUTRGBA(dst_1, py_1, pa_1, 0, 24);
-    PUTRGBA(dst_2, py_2, pa_2, 0, 24);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuva2argb_c, uint32_t, 1)
-    LOADCHROMA(0);
-    PUTRGBA(dst_1, py_1, pa_1, 0, 0);
-    PUTRGBA(dst_2, py_2, pa_2, 0, 0);
-
-    LOADCHROMA(1);
-    PUTRGBA(dst_2, py_2, pa_2, 1, 0);
-    PUTRGBA(dst_1, py_1, pa_1, 1, 0);
-
-    LOADCHROMA(2);
-    PUTRGBA(dst_1, py_1, pa_1, 2, 0);
-    PUTRGBA(dst_2, py_2, pa_2, 2, 0);
-
-    LOADCHROMA(3);
-    PUTRGBA(dst_2, py_2, pa_2, 3, 0);
-    PUTRGBA(dst_1, py_1, pa_1, 3, 0);
-    pa_1 += 8;
-    pa_2 += 8;
-ENDYUV2RGBLINE(8, 0)
-    LOADCHROMA(0);
-    PUTRGBA(dst_1, py_1, pa_1, 0, 0);
-    PUTRGBA(dst_2, py_2, pa_2, 0, 0);
-
-    LOADCHROMA(1);
-    PUTRGBA(dst_2, py_2, pa_2, 1, 0);
-    PUTRGBA(dst_1, py_1, pa_1, 1, 0);
-    pa_1 += 4;
-    pa_2 += 4;
-ENDYUV2RGBLINE(8, 1)
-    LOADCHROMA(0);
-    PUTRGBA(dst_1, py_1, pa_1, 0, 0);
-    PUTRGBA(dst_2, py_2, pa_2, 0, 0);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t, 0)
-    LOADCHROMA(0);
-    PUTRGB24(dst_1, py_1, 0);
-    PUTRGB24(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTRGB24(dst_2, py_2, 1);
-    PUTRGB24(dst_1, py_1, 1);
-
-    LOADCHROMA(2);
-    PUTRGB24(dst_1, py_1, 2);
-    PUTRGB24(dst_2, py_2, 2);
-
-    LOADCHROMA(3);
-    PUTRGB24(dst_2, py_2, 3);
-    PUTRGB24(dst_1, py_1, 3);
-ENDYUV2RGBLINE(24, 0)
-    LOADCHROMA(0);
-    PUTRGB24(dst_1, py_1, 0);
-    PUTRGB24(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTRGB24(dst_2, py_2, 1);
-    PUTRGB24(dst_1, py_1, 1);
-ENDYUV2RGBLINE(24, 1)
-    LOADCHROMA(0);
-    PUTRGB24(dst_1, py_1, 0);
-    PUTRGB24(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-// only trivial mods from yuv2rgb_c_24_rgb
-YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t, 0)
-    LOADCHROMA(0);
-    PUTBGR24(dst_1, py_1, 0);
-    PUTBGR24(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTBGR24(dst_2, py_2, 1);
-    PUTBGR24(dst_1, py_1, 1);
-
-    LOADCHROMA(2);
-    PUTBGR24(dst_1, py_1, 2);
-    PUTBGR24(dst_2, py_2, 2);
-
-    LOADCHROMA(3);
-    PUTBGR24(dst_2, py_2, 3);
-    PUTBGR24(dst_1, py_1, 3);
-ENDYUV2RGBLINE(24, 0)
-    LOADCHROMA(0);
-    PUTBGR24(dst_1, py_1, 0);
-    PUTBGR24(dst_2, py_2, 0);
-
-    LOADCHROMA(1);
-    PUTBGR24(dst_2, py_2, 1);
-    PUTBGR24(dst_1, py_1, 1);
-ENDYUV2RGBLINE(24, 1)
-    LOADCHROMA(0);
-    PUTBGR24(dst_1, py_1, 0);
-    PUTBGR24(dst_2, py_2, 0);
-ENDYUV2RGBFUNC()
-
-YUV2RGBFUNC(yuv2rgb_c_16_ordered_dither, uint16_t, 0)
-    const uint8_t *d16 = ff_dither_2x2_8[y & 1];
-    const uint8_t *e16 = ff_dither_2x2_4[y & 1];
-    const uint8_t *f16 = ff_dither_2x2_8[(y & 1)^1];
-
-#define PUTRGB16(dst, src, i, o)                    \
-    Y              = src[2 * i];                    \
-    dst[2 * i]     = r[Y + d16[0 + o]] +            \
-                     g[Y + e16[0 + o]] +            \
-                     b[Y + f16[0 + o]];             \
-    Y              = src[2 * i + 1];                \
-    dst[2 * i + 1] = r[Y + d16[1 + o]] +            \
-                     g[Y + e16[1 + o]] +            \
-                     b[Y + f16[1 + o]];
-    LOADCHROMA(0);
-    PUTRGB16(dst_1, py_1, 0, 0);
-    PUTRGB16(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB16(dst_2, py_2, 1, 2 + 8);
-    PUTRGB16(dst_1, py_1, 1, 2);
-
-    LOADCHROMA(2);
-    PUTRGB16(dst_1, py_1, 2, 4);
-    PUTRGB16(dst_2, py_2, 2, 4 + 8);
-
-    LOADCHROMA(3);
-    PUTRGB16(dst_2, py_2, 3, 6 + 8);
-    PUTRGB16(dst_1, py_1, 3, 6);
-CLOSEYUV2RGBFUNC(8)
-
-YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0)
-    const uint8_t *d16 = ff_dither_2x2_8[y & 1];
-    const uint8_t *e16 = ff_dither_2x2_8[(y & 1)^1];
-
-#define PUTRGB15(dst, src, i, o)                    \
-    Y              = src[2 * i];                    \
-    dst[2 * i]     = r[Y + d16[0 + o]] +            \
-                     g[Y + d16[1 + o]] +            \
-                     b[Y + e16[0 + o]];             \
-    Y              = src[2 * i + 1];                \
-    dst[2 * i + 1] = r[Y + d16[1 + o]] +            \
-                     g[Y + d16[0 + o]] +            \
-                     b[Y + e16[1 + o]];
-    LOADCHROMA(0);
-    PUTRGB15(dst_1, py_1, 0, 0);
-    PUTRGB15(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB15(dst_2, py_2, 1, 2 + 8);
-    PUTRGB15(dst_1, py_1, 1, 2);
-
-    LOADCHROMA(2);
-    PUTRGB15(dst_1, py_1, 2, 4);
-    PUTRGB15(dst_2, py_2, 2, 4 + 8);
-
-    LOADCHROMA(3);
-    PUTRGB15(dst_2, py_2, 3, 6 + 8);
-    PUTRGB15(dst_1, py_1, 3, 6);
-CLOSEYUV2RGBFUNC(8)
-
-// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0)
-    const uint8_t *d16 = ff_dither_4x4_16[y & 3];
-
-#define PUTRGB12(dst, src, i, o)                    \
-    Y              = src[2 * i];                    \
-    dst[2 * i]     = r[Y + d16[0 + o]] +            \
-                     g[Y + d16[0 + o]] +            \
-                     b[Y + d16[0 + o]];             \
-    Y              = src[2 * i + 1];                \
-    dst[2 * i + 1] = r[Y + d16[1 + o]] +            \
-                     g[Y + d16[1 + o]] +            \
-                     b[Y + d16[1 + o]];
-
-    LOADCHROMA(0);
-    PUTRGB12(dst_1, py_1, 0, 0);
-    PUTRGB12(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB12(dst_2, py_2, 1, 2 + 8);
-    PUTRGB12(dst_1, py_1, 1, 2);
-
-    LOADCHROMA(2);
-    PUTRGB12(dst_1, py_1, 2, 4);
-    PUTRGB12(dst_2, py_2, 2, 4 + 8);
-
-    LOADCHROMA(3);
-    PUTRGB12(dst_2, py_2, 3, 6 + 8);
-    PUTRGB12(dst_1, py_1, 3, 6);
-CLOSEYUV2RGBFUNC(8)
-
-// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0)
-    const uint8_t *d32 = ff_dither_8x8_32[y & 7];
-    const uint8_t *d64 = ff_dither_8x8_73[y & 7];
-
-#define PUTRGB8(dst, src, i, o)                     \
-    Y              = src[2 * i];                    \
-    dst[2 * i]     = r[Y + d32[0 + o]] +            \
-                     g[Y + d32[0 + o]] +            \
-                     b[Y + d64[0 + o]];             \
-    Y              = src[2 * i + 1];                \
-    dst[2 * i + 1] = r[Y + d32[1 + o]] +            \
-                     g[Y + d32[1 + o]] +            \
-                     b[Y + d64[1 + o]];
-
-    LOADCHROMA(0);
-    PUTRGB8(dst_1, py_1, 0, 0);
-    PUTRGB8(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB8(dst_2, py_2, 1, 2 + 8);
-    PUTRGB8(dst_1, py_1, 1, 2);
-
-    LOADCHROMA(2);
-    PUTRGB8(dst_1, py_1, 2, 4);
-    PUTRGB8(dst_2, py_2, 2, 4 + 8);
-
-    LOADCHROMA(3);
-    PUTRGB8(dst_2, py_2, 3, 6 + 8);
-    PUTRGB8(dst_1, py_1, 3, 6);
-CLOSEYUV2RGBFUNC(8)
-
-YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0)
-    const uint8_t * d64 = ff_dither_8x8_73[y & 7];
-    const uint8_t *d128 = ff_dither_8x8_220[y & 7];
-    int acc;
-
-#define PUTRGB4D(dst, src, i, o)                    \
-    Y      = src[2 * i];                            \
-    acc    = r[Y + d128[0 + o]] +                   \
-             g[Y +  d64[0 + o]] +                   \
-             b[Y + d128[0 + o]];                    \
-    Y      = src[2 * i + 1];                        \
-    acc   |= (r[Y + d128[1 + o]] +                  \
-              g[Y +  d64[1 + o]] +                  \
-              b[Y + d128[1 + o]]) << 4;             \
-    dst[i] = acc;
-
-    LOADCHROMA(0);
-    PUTRGB4D(dst_1, py_1, 0, 0);
-    PUTRGB4D(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB4D(dst_2, py_2, 1, 2 + 8);
-    PUTRGB4D(dst_1, py_1, 1, 2);
-
-    LOADCHROMA(2);
-    PUTRGB4D(dst_1, py_1, 2, 4);
-    PUTRGB4D(dst_2, py_2, 2, 4 + 8);
-
-    LOADCHROMA(3);
-    PUTRGB4D(dst_2, py_2, 3, 6 + 8);
-    PUTRGB4D(dst_1, py_1, 3, 6);
-CLOSEYUV2RGBFUNC(4)
-
-YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0)
-    const uint8_t *d64  = ff_dither_8x8_73[y & 7];
-    const uint8_t *d128 = ff_dither_8x8_220[y & 7];
-
-#define PUTRGB4DB(dst, src, i, o)                   \
-    Y              = src[2 * i];                    \
-    dst[2 * i]     = r[Y + d128[0 + o]] +           \
-                     g[Y +  d64[0 + o]] +           \
-                     b[Y + d128[0 + o]];            \
-    Y              = src[2 * i + 1];                \
-    dst[2 * i + 1] = r[Y + d128[1 + o]] +           \
-                     g[Y +  d64[1 + o]] +           \
-                     b[Y + d128[1 + o]];
-
-    LOADCHROMA(0);
-    PUTRGB4DB(dst_1, py_1, 0, 0);
-    PUTRGB4DB(dst_2, py_2, 0, 0 + 8);
-
-    LOADCHROMA(1);
-    PUTRGB4DB(dst_2, py_2, 1, 2 + 8);
-    PUTRGB4DB(dst_1, py_1, 1, 2);
-
-    LOADCHROMA(2);
-    PUTRGB4DB(dst_1, py_1, 2, 4);
-    PUTRGB4DB(dst_2, py_2, 2, 4 + 8);
-
-    LOADCHROMA(3);
-    PUTRGB4DB(dst_2, py_2, 3, 6 + 8);
-    PUTRGB4DB(dst_1, py_1, 3, 6);
-CLOSEYUV2RGBFUNC(8)
-
-YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
-    const uint8_t *d128 = ff_dither_8x8_220[y & 7];
-    char out_1 = 0, out_2 = 0;
-    g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM];
-
-#define PUTRGB1(out, src, i, o)                     \
-    Y    = src[2 * i];                              \
-    out += out + g[Y + d128[0 + o]];                \
-    Y    = src[2 * i + 1];                          \
-    out += out + g[Y + d128[1 + o]];
-
-    PUTRGB1(out_1, py_1, 0, 0);
-    PUTRGB1(out_2, py_2, 0, 0 + 8);
-
-    PUTRGB1(out_2, py_2, 1, 2 + 8);
-    PUTRGB1(out_1, py_1, 1, 2);
-
-    PUTRGB1(out_1, py_1, 2, 4);
-    PUTRGB1(out_2, py_2, 2, 4 + 8);
-
-    PUTRGB1(out_2, py_2, 3, 6 + 8);
-    PUTRGB1(out_1, py_1, 3, 6);
-
-    dst_1[0] = out_1;
-    dst_2[0] = out_2;
-CLOSEYUV2RGBFUNC(1)
+#define src_type const uint8_t
+#define shift 0
+#define suffix(a) a
+#include "yuv2rgb_template.c"
+#undef src_type
+#undef shift
+#undef suffix
+
+#define src_type const uint16_t
+#define shift 1
+#define suffix(a) a##9
+#include "yuv2rgb_template.c"
+#undef src_type
+#undef shift
+#undef suffix
+
+#define src_type const uint16_t
+#define shift 2
+#define suffix(a) a##10
+#include "yuv2rgb_template.c"
+#undef src_type
+#undef shift
+#undef suffix
+
+#define src_type const uint16_t
+#define shift 8
+#define suffix(a) a##16
+#include "yuv2rgb_template.c"
+#undef src_type
+#undef shift
+#undef suffix
 
 SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
 {
+    int bits = av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth_minus1 + 1;
     SwsFunc t = NULL;
 
-    if (ARCH_BFIN)
+    if (ARCH_BFIN && bits == 8)
         t = ff_yuv2rgb_init_bfin(c);
-    if (ARCH_PPC)
+    if (ARCH_PPC && bits == 8)
         t = ff_yuv2rgb_init_ppc(c);
-    if (HAVE_VIS)
+    if (HAVE_VIS && bits == 8)
         t = ff_yuv2rgb_init_vis(c);
-    if (ARCH_X86)
+    if (ARCH_X86 && bits == 8)
         t = ff_yuv2rgb_init_x86(c);
 
     if (t)
@@ -617,44 +216,47 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
            "No accelerated colorspace conversion found from %s to %s.\n",
            av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat));
 
+#define SELECT(n) \
+    (bits == 16 ? n##16 : bits == 10 ? n##10 : bits == 9 ? n##9 : n)
+
     switch (c->dstFormat) {
     case AV_PIX_FMT_BGR48BE:
     case AV_PIX_FMT_BGR48LE:
-        return yuv2rgb_c_bgr48;
+        return SELECT(yuv2rgb_c_bgr48);
     case AV_PIX_FMT_RGB48BE:
     case AV_PIX_FMT_RGB48LE:
-        return yuv2rgb_c_48;
+        return SELECT(yuv2rgb_c_48);
     case AV_PIX_FMT_ARGB:
     case AV_PIX_FMT_ABGR:
         if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat))
-            return yuva2argb_c;
+            return SELECT(yuva2argb_c);
     case AV_PIX_FMT_RGBA:
     case AV_PIX_FMT_BGRA:
-        return (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) ? yuva2rgba_c : yuv2rgb_c_32;
+        return (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) ? SELECT(yuva2rgba_c) : SELECT(yuv2rgb_c_32);
     case AV_PIX_FMT_RGB24:
-        return yuv2rgb_c_24_rgb;
+        return SELECT(yuv2rgb_c_24_rgb);
     case AV_PIX_FMT_BGR24:
-        return yuv2rgb_c_24_bgr;
+        return SELECT(yuv2rgb_c_24_bgr);
     case AV_PIX_FMT_RGB565:
     case AV_PIX_FMT_BGR565:
-        return yuv2rgb_c_16_ordered_dither;
+        return SELECT(yuv2rgb_c_16_ordered_dither);
     case AV_PIX_FMT_RGB555:
     case AV_PIX_FMT_BGR555:
-        return yuv2rgb_c_15_ordered_dither;
+        return SELECT(yuv2rgb_c_15_ordered_dither);
     case AV_PIX_FMT_RGB444:
     case AV_PIX_FMT_BGR444:
-        return yuv2rgb_c_12_ordered_dither;
+        return SELECT(yuv2rgb_c_12_ordered_dither);
     case AV_PIX_FMT_RGB8:
     case AV_PIX_FMT_BGR8:
-        return yuv2rgb_c_8_ordered_dither;
+        return SELECT(yuv2rgb_c_8_ordered_dither);
     case AV_PIX_FMT_RGB4:
     case AV_PIX_FMT_BGR4:
-        return yuv2rgb_c_4_ordered_dither;
+        return SELECT(yuv2rgb_c_4_ordered_dither);
     case AV_PIX_FMT_RGB4_BYTE:
     case AV_PIX_FMT_BGR4_BYTE:
-        return yuv2rgb_c_4b_ordered_dither;
+        return SELECT(yuv2rgb_c_4b_ordered_dither);
     case AV_PIX_FMT_MONOBLACK:
-        return yuv2rgb_c_1_ordered_dither;
+        return SELECT(yuv2rgb_c_1_ordered_dither);
     }
     return NULL;
 }
diff --git a/libswscale/yuv2rgb_template.c b/libswscale/yuv2rgb_template.c
new file mode 100644
index 0000000..e3ca8ba
--- /dev/null
+++ b/libswscale/yuv2rgb_template.c
@@ -0,0 +1,458 @@
+/*
+ * software YUV to RGB converter
+ *
+ * Copyright (C) 2009 Konstantin Shishkov
+ *
+ * 1,4,8bpp support and context / deglobalize stuff
+ * by Michael Niedermayer (michaelni at gmx.at)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+YUV2RGBFUNC(yuv2rgb_c_48, uint8_t, 0)
+    LOADCHROMA(0);
+    PUTRGB48(dst_1, py_1, 0);
+    PUTRGB48(dst_2, py_2, 0);
+
+    LOADCHROMA(1);
+    PUTRGB48(dst_2, py_2, 1);
+    PUTRGB48(dst_1, py_1, 1);
+
+    LOADCHROMA(2);
+    PUTRGB48(dst_1, py_1, 2);
+    PUTRGB48(dst_2, py_2, 2);
+
+    LOADCHROMA(3);
+    PUTRGB48(dst_2, py_2, 3);
+    PUTRGB48(dst_1, py_1, 3);
+ENDYUV2RGBLINE(48, 0)
+    LOADCHROMA(0);
+    PUTRGB48(dst_1, py_1, 0);
+    PUTRGB48(dst_2, py_2, 0);
+
+    LOADCHROMA(1);
+    PUTRGB48(dst_2, py_2, 1);
+    PUTRGB48(dst_1, py_1, 1);
+ENDYUV2RGBLINE(48, 1)
+    LOADCHROMA(0);
+    PUTRGB48(dst_1, py_1, 0);
+    PUTRGB48(dst_2, py_2, 0);
+ENDYUV2RGBFUNC()
+
+YUV2RGBFUNC(yuv2rgb_c_bgr48, uint8_t, 0)
+    LOADCHROMA(0);
+    PUTBGR48(dst_1, py_1, 0);
+    PUTBGR48(dst_2, py_2, 0);
+
+    LOADCHROMA(1);
+    PUTBGR48(dst_2, py_2, 1);
+    PUTBGR48(dst_1, py_1, 1);
+
+    LOADCHROMA(2);
+    PUTBGR48(dst_1, py_1, 2);
+    PUTBGR48(dst_2, py_2, 2);
+
+    LOADCHROMA(3);
+    PUTBGR48(dst_2, py_2, 3);
+    PUTBGR48(dst_1, py_1, 3);
+ENDYUV2RGBLINE(48, 0)
+    LOADCHROMA(0);
+    PUTBGR48(dst_1, py_1, 0);
+    PUTBGR48(dst_2, py_2, 0);
+
+    LOADCHROMA(1);
+    PUTBGR48(dst_2, py_2, 1);
+    PUTBGR48(dst_1, py_1, 1);
+ENDYUV2RGBLINE(48, 1)
+    LOADCHROMA(0);
+    PUTBGR48(dst_1, py_1, 0);
+    PUTBGR48(dst_2, py_2, 0);
+ENDYUV2RGBFUNC()
+
+YUV2RGBFUNC(yuv2rgb_c_32, uint32_t, 0)
+    LOADCHROMA(0);
+    PUTRGB(dst_1, py_1, 0);
+    PUTRGB(dst_2, py_2, 0);
+
+    LOADCHROMA(1);
+    PUTRGB(dst_2, py_2, 1);
+    PUTRGB(dst_1, py_1, 1);
+
+    LOADCHROMA(2);
+    PUTRGB(dst_1, py_1, 2);
+    PUTRGB(dst_2, py_2, 2);
+
+    LOADCHROMA(3);
+    PUTRGB(dst_2, py_2, 3);
+    PUTRGB(dst_1, py_1, 3);
+ENDYUV2RGBLINE(8, 0)
+    LOADCHROMA(0);
+    PUTRGB(dst_1, py_1, 0);
+    PUTRGB(dst_2, py_2, 0);
+
+    LOADCHROMA(1);
+    PUTRGB(dst_2, py_2, 1);
+    PUTRGB(dst_1, py_1, 1);
+ENDYUV2RGBLINE(8, 1)
+    LOADCHROMA(0);
+    PUTRGB(dst_1, py_1, 0);
+    PUTRGB(dst_2, py_2, 0);
+ENDYUV2RGBFUNC()
+
+YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1)
+    LOADCHROMA(0);
+    PUTRGBA(dst_1, py_1, pa_1, 0, 24);
+    PUTRGBA(dst_2, py_2, pa_2, 0, 24);
+
+    LOADCHROMA(1);
+    PUTRGBA(dst_2, py_2, pa_2, 1, 24);
+    PUTRGBA(dst_1, py_1, pa_1, 1, 24);
+
+    LOADCHROMA(2);
+    PUTRGBA(dst_1, py_1, pa_1, 2, 24);
+    PUTRGBA(dst_2, py_2, pa_2, 2, 24);
+
+    LOADCHROMA(3);
+    PUTRGBA(dst_2, py_2, pa_2, 3, 24);
+    PUTRGBA(dst_1, py_1, pa_1, 3, 24);
+    pa_1 += 8;
+    pa_2 += 8;
+ENDYUV2RGBLINE(8, 0)
+    LOADCHROMA(0);
+    PUTRGBA(dst_1, py_1, pa_1, 0, 24);
+    PUTRGBA(dst_2, py_2, pa_2, 0, 24);
+
+    LOADCHROMA(1);
+    PUTRGBA(dst_2, py_2, pa_2, 1, 24);
+    PUTRGBA(dst_1, py_1, pa_1, 1, 24);
+    pa_1 += 4;
+    pa_2 += 4;
+ENDYUV2RGBLINE(8, 1)
+    LOADCHROMA(0);
+    PUTRGBA(dst_1, py_1, pa_1, 0, 24);
+    PUTRGBA(dst_2, py_2, pa_2, 0, 24);
+ENDYUV2RGBFUNC()
+
+YUV2RGBFUNC(yuva2argb_c, uint32_t, 1)
+    LOADCHROMA(0);
+    PUTRGBA(dst_1, py_1, pa_1, 0, 0);
+    PUTRGBA(dst_2, py_2, pa_2, 0, 0);
+
+    LOADCHROMA(1);
+    PUTRGBA(dst_2, py_2, pa_2, 1, 0);
+    PUTRGBA(dst_1, py_1, pa_1, 1, 0);
+
+    LOADCHROMA(2);
+    PUTRGBA(dst_1, py_1, pa_1, 2, 0);
+    PUTRGBA(dst_2, py_2, pa_2, 2, 0);
+
+    LOADCHROMA(3);
+    PUTRGBA(dst_2, py_2, pa_2, 3, 0);
+    PUTRGBA(dst_1, py_1, pa_1, 3, 0);
+    pa_1 += 8;
+    pa_2 += 8;
+ENDYUV2RGBLINE(8, 0)
+    LOADCHROMA(0);
+    PUTRGBA(dst_1, py_1, pa_1, 0, 0);
+    PUTRGBA(dst_2, py_2, pa_2, 0, 0);
+
+    LOADCHROMA(1);
+    PUTRGBA(dst_2, py_2, pa_2, 1, 0);
+    PUTRGBA(dst_1, py_1, pa_1, 1, 0);
+    pa_1 += 4;
+    pa_2 += 4;
+ENDYUV2RGBLINE(8, 1)
+    LOADCHROMA(0);
+    PUTRGBA(dst_1, py_1, pa_1, 0, 0);
+    PUTRGBA(dst_2, py_2, pa_2, 0, 0);
+ENDYUV2RGBFUNC()
+
+YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t, 0)
+    LOADCHROMA(0);
+    PUTRGB24(dst_1, py_1, 0);
+    PUTRGB24(dst_2, py_2, 0);
+
+    LOADCHROMA(1);
+    PUTRGB24(dst_2, py_2, 1);
+    PUTRGB24(dst_1, py_1, 1);
+
+    LOADCHROMA(2);
+    PUTRGB24(dst_1, py_1, 2);
+    PUTRGB24(dst_2, py_2, 2);
+
+    LOADCHROMA(3);
+    PUTRGB24(dst_2, py_2, 3);
+    PUTRGB24(dst_1, py_1, 3);
+ENDYUV2RGBLINE(24, 0)
+    LOADCHROMA(0);
+    PUTRGB24(dst_1, py_1, 0);
+    PUTRGB24(dst_2, py_2, 0);
+
+    LOADCHROMA(1);
+    PUTRGB24(dst_2, py_2, 1);
+    PUTRGB24(dst_1, py_1, 1);
+ENDYUV2RGBLINE(24, 1)
+    LOADCHROMA(0);
+    PUTRGB24(dst_1, py_1, 0);
+    PUTRGB24(dst_2, py_2, 0);
+ENDYUV2RGBFUNC()
+
+// only trivial mods from yuv2rgb_c_24_rgb
+YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t, 0)
+    LOADCHROMA(0);
+    PUTBGR24(dst_1, py_1, 0);
+    PUTBGR24(dst_2, py_2, 0);
+
+    LOADCHROMA(1);
+    PUTBGR24(dst_2, py_2, 1);
+    PUTBGR24(dst_1, py_1, 1);
+
+    LOADCHROMA(2);
+    PUTBGR24(dst_1, py_1, 2);
+    PUTBGR24(dst_2, py_2, 2);
+
+    LOADCHROMA(3);
+    PUTBGR24(dst_2, py_2, 3);
+    PUTBGR24(dst_1, py_1, 3);
+ENDYUV2RGBLINE(24, 0)
+    LOADCHROMA(0);
+    PUTBGR24(dst_1, py_1, 0);
+    PUTBGR24(dst_2, py_2, 0);
+
+    LOADCHROMA(1);
+    PUTBGR24(dst_2, py_2, 1);
+    PUTBGR24(dst_1, py_1, 1);
+ENDYUV2RGBLINE(24, 1)
+    LOADCHROMA(0);
+    PUTBGR24(dst_1, py_1, 0);
+    PUTBGR24(dst_2, py_2, 0);
+ENDYUV2RGBFUNC()
+
+YUV2RGBFUNC(yuv2rgb_c_16_ordered_dither, uint16_t, 0)
+    const uint8_t *d16 = ff_dither_2x2_8[y & 1];
+    const uint8_t *e16 = ff_dither_2x2_4[y & 1];
+    const uint8_t *f16 = ff_dither_2x2_8[(y & 1)^1];
+
+#define PUTRGB16(dst, src, i, o)                    \
+    Y              = src[2 * i] >> shift;           \
+    dst[2 * i]     = r[Y + d16[0 + o]] +            \
+                     g[Y + e16[0 + o]] +            \
+                     b[Y + f16[0 + o]];             \
+    Y              = src[2 * i + 1] >> shift;       \
+    dst[2 * i + 1] = r[Y + d16[1 + o]] +            \
+                     g[Y + e16[1 + o]] +            \
+                     b[Y + f16[1 + o]];
+    LOADCHROMA(0);
+    PUTRGB16(dst_1, py_1, 0, 0);
+    PUTRGB16(dst_2, py_2, 0, 0 + 8);
+
+    LOADCHROMA(1);
+    PUTRGB16(dst_2, py_2, 1, 2 + 8);
+    PUTRGB16(dst_1, py_1, 1, 2);
+
+    LOADCHROMA(2);
+    PUTRGB16(dst_1, py_1, 2, 4);
+    PUTRGB16(dst_2, py_2, 2, 4 + 8);
+
+    LOADCHROMA(3);
+    PUTRGB16(dst_2, py_2, 3, 6 + 8);
+    PUTRGB16(dst_1, py_1, 3, 6);
+CLOSEYUV2RGBFUNC(8)
+
+YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0)
+    const uint8_t *d16 = ff_dither_2x2_8[y & 1];
+    const uint8_t *e16 = ff_dither_2x2_8[(y & 1)^1];
+
+#define PUTRGB15(dst, src, i, o)                    \
+    Y              = src[2 * i] >> shift;           \
+    dst[2 * i]     = r[Y + d16[0 + o]] +            \
+                     g[Y + d16[1 + o]] +            \
+                     b[Y + e16[0 + o]];             \
+    Y              = src[2 * i + 1] >> shift;       \
+    dst[2 * i + 1] = r[Y + d16[1 + o]] +            \
+                     g[Y + d16[0 + o]] +            \
+                     b[Y + e16[1 + o]];
+    LOADCHROMA(0);
+    PUTRGB15(dst_1, py_1, 0, 0);
+    PUTRGB15(dst_2, py_2, 0, 0 + 8);
+
+    LOADCHROMA(1);
+    PUTRGB15(dst_2, py_2, 1, 2 + 8);
+    PUTRGB15(dst_1, py_1, 1, 2);
+
+    LOADCHROMA(2);
+    PUTRGB15(dst_1, py_1, 2, 4);
+    PUTRGB15(dst_2, py_2, 2, 4 + 8);
+
+    LOADCHROMA(3);
+    PUTRGB15(dst_2, py_2, 3, 6 + 8);
+    PUTRGB15(dst_1, py_1, 3, 6);
+CLOSEYUV2RGBFUNC(8)
+
+// r, g, b, dst_1, dst_2
+YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0)
+    const uint8_t *d16 = ff_dither_4x4_16[y & 3];
+
+#define PUTRGB12(dst, src, i, o)                    \
+    Y              = src[2 * i] >> shift;           \
+    dst[2 * i]     = r[Y + d16[0 + o]] +            \
+                     g[Y + d16[0 + o]] +            \
+                     b[Y + d16[0 + o]];             \
+    Y              = src[2 * i + 1] >> shift;       \
+    dst[2 * i + 1] = r[Y + d16[1 + o]] +            \
+                     g[Y + d16[1 + o]] +            \
+                     b[Y + d16[1 + o]];
+
+    LOADCHROMA(0);
+    PUTRGB12(dst_1, py_1, 0, 0);
+    PUTRGB12(dst_2, py_2, 0, 0 + 8);
+
+    LOADCHROMA(1);
+    PUTRGB12(dst_2, py_2, 1, 2 + 8);
+    PUTRGB12(dst_1, py_1, 1, 2);
+
+    LOADCHROMA(2);
+    PUTRGB12(dst_1, py_1, 2, 4);
+    PUTRGB12(dst_2, py_2, 2, 4 + 8);
+
+    LOADCHROMA(3);
+    PUTRGB12(dst_2, py_2, 3, 6 + 8);
+    PUTRGB12(dst_1, py_1, 3, 6);
+CLOSEYUV2RGBFUNC(8)
+
+// r, g, b, dst_1, dst_2
+YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0)
+    const uint8_t *d32 = ff_dither_8x8_32[y & 7];
+    const uint8_t *d64 = ff_dither_8x8_73[y & 7];
+
+#define PUTRGB8(dst, src, i, o)                     \
+    Y              = src[2 * i] >> shift;           \
+    dst[2 * i]     = r[Y + d32[0 + o]] +            \
+                     g[Y + d32[0 + o]] +            \
+                     b[Y + d64[0 + o]];             \
+    Y              = src[2 * i + 1] >> shift;       \
+    dst[2 * i + 1] = r[Y + d32[1 + o]] +            \
+                     g[Y + d32[1 + o]] +            \
+                     b[Y + d64[1 + o]];
+
+    LOADCHROMA(0);
+    PUTRGB8(dst_1, py_1, 0, 0);
+    PUTRGB8(dst_2, py_2, 0, 0 + 8);
+
+    LOADCHROMA(1);
+    PUTRGB8(dst_2, py_2, 1, 2 + 8);
+    PUTRGB8(dst_1, py_1, 1, 2);
+
+    LOADCHROMA(2);
+    PUTRGB8(dst_1, py_1, 2, 4);
+    PUTRGB8(dst_2, py_2, 2, 4 + 8);
+
+    LOADCHROMA(3);
+    PUTRGB8(dst_2, py_2, 3, 6 + 8);
+    PUTRGB8(dst_1, py_1, 3, 6);
+CLOSEYUV2RGBFUNC(8)
+
+YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0)
+    const uint8_t * d64 = ff_dither_8x8_73[y & 7];
+    const uint8_t *d128 = ff_dither_8x8_220[y & 7];
+    int acc;
+
+#define PUTRGB4D(dst, src, i, o)                    \
+    Y      = src[2 * i] >> shift;                   \
+    acc    = r[Y + d128[0 + o]] +                   \
+             g[Y +  d64[0 + o]] +                   \
+             b[Y + d128[0 + o]];                    \
+    Y      = src[2 * i + 1] >> shift;               \
+    acc   |= (r[Y + d128[1 + o]] +                  \
+              g[Y +  d64[1 + o]] +                  \
+              b[Y + d128[1 + o]]) << 4;             \
+    dst[i] = acc;
+
+    LOADCHROMA(0);
+    PUTRGB4D(dst_1, py_1, 0, 0);
+    PUTRGB4D(dst_2, py_2, 0, 0 + 8);
+
+    LOADCHROMA(1);
+    PUTRGB4D(dst_2, py_2, 1, 2 + 8);
+    PUTRGB4D(dst_1, py_1, 1, 2);
+
+    LOADCHROMA(2);
+    PUTRGB4D(dst_1, py_1, 2, 4);
+    PUTRGB4D(dst_2, py_2, 2, 4 + 8);
+
+    LOADCHROMA(3);
+    PUTRGB4D(dst_2, py_2, 3, 6 + 8);
+    PUTRGB4D(dst_1, py_1, 3, 6);
+CLOSEYUV2RGBFUNC(4)
+
+YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0)
+    const uint8_t *d64  = ff_dither_8x8_73[y & 7];
+    const uint8_t *d128 = ff_dither_8x8_220[y & 7];
+
+#define PUTRGB4DB(dst, src, i, o)                   \
+    Y              = src[2 * i] >> shift;           \
+    dst[2 * i]     = r[Y + d128[0 + o]] +           \
+                     g[Y +  d64[0 + o]] +           \
+                     b[Y + d128[0 + o]];            \
+    Y              = src[2 * i + 1] >> shift;       \
+    dst[2 * i + 1] = r[Y + d128[1 + o]] +           \
+                     g[Y +  d64[1 + o]] +           \
+                     b[Y + d128[1 + o]];
+
+    LOADCHROMA(0);
+    PUTRGB4DB(dst_1, py_1, 0, 0);
+    PUTRGB4DB(dst_2, py_2, 0, 0 + 8);
+
+    LOADCHROMA(1);
+    PUTRGB4DB(dst_2, py_2, 1, 2 + 8);
+    PUTRGB4DB(dst_1, py_1, 1, 2);
+
+    LOADCHROMA(2);
+    PUTRGB4DB(dst_1, py_1, 2, 4);
+    PUTRGB4DB(dst_2, py_2, 2, 4 + 8);
+
+    LOADCHROMA(3);
+    PUTRGB4DB(dst_2, py_2, 3, 6 + 8);
+    PUTRGB4DB(dst_1, py_1, 3, 6);
+CLOSEYUV2RGBFUNC(8)
+
+YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
+    const uint8_t *d128 = ff_dither_8x8_220[y & 7];
+    char out_1 = 0, out_2 = 0;
+    g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM];
+
+#define PUTRGB1(out, src, i, o)                     \
+    Y    = src[2 * i] >> shift;                     \
+    out += out + g[Y + d128[0 + o]];                \
+    Y    = src[2 * i + 1] >> shift;                 \
+    out += out + g[Y + d128[1 + o]];
+
+    PUTRGB1(out_1, py_1, 0, 0);
+    PUTRGB1(out_2, py_2, 0, 0 + 8);
+
+    PUTRGB1(out_2, py_2, 1, 2 + 8);
+    PUTRGB1(out_1, py_1, 1, 2);
+
+    PUTRGB1(out_1, py_1, 2, 4);
+    PUTRGB1(out_2, py_2, 2, 4 + 8);
+
+    PUTRGB1(out_2, py_2, 3, 6 + 8);
+    PUTRGB1(out_1, py_1, 3, 6);
+
+    dst_1[0] = out_1;
+    dst_2[0] = out_2;
+CLOSEYUV2RGBFUNC(1)
-- 
1.8.4.2



More information about the ffmpeg-devel mailing list