[FFmpeg-cvslog] swscale: add dithering to yuv2yuvX_altivec_real

Sat Aug 13 22:34:26 CEST 2011

ffmpeg | branch: master | Luca Barbato <lu_zero at gentoo.org> | Wed Aug 10 02:16:26 2011 +0200| [3304a1e69a8a050eb66d2304acd2d01354fa1aac] | committer: Luca Barbato

swscale: add dithering to yuv2yuvX_altivec_real

It just does that part in scalar form, I doubt using a vector store
over 2 array would speed it up particularly.

The function should be written to not use a scratch buffer.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3304a1e69a8a050eb66d2304acd2d01354fa1aac
---

 libswscale/ppc/swscale_altivec.c |   21 +++++++--------------
 1 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index f988b53..7fdca39 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -92,6 +92,7 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW)
     }
 }
 
+//FIXME remove the usage of scratch buffers.
 static void
 yuv2yuvX_altivec_real(SwsContext *c,
                       const int16_t *lumFilter, const int16_t **lumSrc,
@@ -101,17 +102,13 @@ yuv2yuvX_altivec_real(SwsContext *c,
                       uint8_t *dest[4], int dstW, int chrDstW)
 {
     uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2];
-    const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 18)};
+    const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
     register int i, j;
     {
         DECLARE_ALIGNED(16, int, val)[dstW];
 
-        for (i = 0; i < (dstW -7); i+=4) {
-            vec_st(vini, i << 2, val);
-        }
-        for (; i < dstW; i++) {
-            val[i] = (1 << 18);
-        }
+        for (i=0; i<dstW; i++)
+            val[i] = lumDither[i & 7] << 12;
 
         for (j = 0; j < lumFilterSize; j++) {
             vector signed short l1, vLumFilter = vec_ld(j << 1, lumFilter);
@@ -155,13 +152,9 @@ yuv2yuvX_altivec_real(SwsContext *c,
         DECLARE_ALIGNED(16, int, u)[chrDstW];
         DECLARE_ALIGNED(16, int, v)[chrDstW];
 
-        for (i = 0; i < (chrDstW -7); i+=4) {
-            vec_st(vini, i << 2, u);
-            vec_st(vini, i << 2, v);
-        }
-        for (; i < chrDstW; i++) {
-            u[i] = (1 << 18);
-            v[i] = (1 << 18);
+        for (i=0; i<chrDstW; i++) {
+            u[i] = chrDither[i & 7] << 12;
+            v[i] = chrDither[(i + 3) & 7] << 12;
         }
 
         for (j = 0; j < chrFilterSize; j++) {