[FFmpeg-devel] [PATCH] Fix bitexact SwScale on PPC

Sat Feb 14 16:23:02 CET 2009

This is needed to make regression test pass on PPC.
First patch avoids calling AltiVec'ed functions that produce different results
from C implementation (maybe just for the reference until someone fixes that
stuff).

The second patch changes hScale_altivec_real() to output filter results in the
same manner as C code does (i.e. without negative values clipped). I suspect
MMX version of hScale() with its "packssdw" does the same thing as C code too.
-------------- next part --------------
Index: swscale.c
===================================================================

--- swscale.c	(revision 28549)
+++ swscale.c	(working copy)
@@ -2396,6 +2396,7 @@
 
 #ifdef COMPILE_ALTIVEC
         if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
+            !(c->flags & SWS_BITEXACT) &&
             srcFormat == PIX_FMT_YUV420P) {
           // unscaled YV12 -> packed YUV, we want speed
           if (dstFormat == PIX_FMT_YUYV422)
Index: swscale_template.c
===================================================================
--- swscale_template.c	(revision 28549)
+++ swscale_template.c	(working copy)
@@ -916,14 +916,15 @@
     }
 #endif
 #if HAVE_ALTIVEC
+if(!(c->flags & SWS_BITEXACT))
 yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
                       chrFilter, chrSrc, chrFilterSize,
                       dest, uDest, vDest, dstW, chrDstW);
-#else //HAVE_ALTIVEC
+else
+#endif //!HAVE_ALTIVEC
 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
             chrFilter, chrSrc, chrFilterSize,
             dest, uDest, vDest, dstW, chrDstW);
-#endif //!HAVE_ALTIVEC
 }
 
 static inline void RENAME(yuv2nv12X)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
@@ -1136,9 +1137,10 @@
 #if HAVE_ALTIVEC
     /* The following list of supported dstFormat values should
        match what's found in the body of altivec_yuv2packedX() */
-    if (c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
+    if (!(c->flags & SWS_BITEXACT) && 
+       (c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
         c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
-        c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB)
+        c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB))
             altivec_yuv2packedX (c, lumFilter, lumSrc, lumFilterSize,
                                  chrFilter, chrSrc, chrFilterSize,
                                  dest, dstW, dstY);
-------------- next part --------------
Index: swscale_altivec_template.c
===================================================================
--- swscale_altivec_template.c	(revision 28549)
+++ swscale_altivec_template.c	(working copy)
@@ -220,7 +220,7 @@
             for (j=0; j<filterSize; j++) {
                 val += ((int)src[srcPos + j])*filter[filterSize*i + j];
             }
-            dst[i] = av_clip(val>>7, 0, (1<<15)-1);
+            dst[i] = FFMIN(val>>7, (1<<15)-1);
         }
     }
     else
@@ -259,7 +259,7 @@
         val_vEven = vec_mule(src_v, filter_v);
         val_s = vec_sums(val_vEven, vzero);
         vec_st(val_s, 0, tempo);
-        dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1);
+        dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
     }
     }
     break;
@@ -286,7 +286,7 @@
         val_v = vec_msums(src_v, filter_v, (vector signed int)vzero);
         val_s = vec_sums(val_v, vzero);
         vec_st(val_s, 0, tempo);
-        dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1);
+        dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
     }
     }
     break;
@@ -315,7 +315,7 @@
             vector signed int val_s = vec_sums(val_v, vzero);
 
             vec_st(val_s, 0, tempo);
-            dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1);
+            dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
         }
     }
     break;
@@ -377,7 +377,7 @@
         val_s = vec_sums(val_v, vzero);
 
         vec_st(val_s, 0, tempo);
-        dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1);
+        dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
     }
 
     }