[FFmpeg-devel] [PATCH] Fix bitexact SwScale on PPC
Kostya
kostya.shishkov
Sat Feb 14 16:23:02 CET 2009
This is needed to make regression test pass on PPC.
First patch avoids calling AltiVec'ed functions that produce different results
from C implementation (maybe just for the reference until someone fixes that
stuff).
The second patch changes hScale_altivec_real() to output filter results in the
same manner as C code does (i.e. without negative values clipped). I suspect
MMX version of hScale() with its "packssdw" does the same thing as C code too.
-------------- next part --------------
Index: swscale.c
===================================================================
--- swscale.c (revision 28549)
+++ swscale.c (working copy)
@@ -2396,6 +2396,7 @@
#ifdef COMPILE_ALTIVEC
if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
+ !(c->flags & SWS_BITEXACT) &&
srcFormat == PIX_FMT_YUV420P) {
// unscaled YV12 -> packed YUV, we want speed
if (dstFormat == PIX_FMT_YUYV422)
Index: swscale_template.c
===================================================================
--- swscale_template.c (revision 28549)
+++ swscale_template.c (working copy)
@@ -916,14 +916,15 @@
}
#endif
#if HAVE_ALTIVEC
+if(!(c->flags & SWS_BITEXACT))
yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
chrFilter, chrSrc, chrFilterSize,
dest, uDest, vDest, dstW, chrDstW);
-#else //HAVE_ALTIVEC
+else
+#endif //!HAVE_ALTIVEC
yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
chrFilter, chrSrc, chrFilterSize,
dest, uDest, vDest, dstW, chrDstW);
-#endif //!HAVE_ALTIVEC
}
static inline void RENAME(yuv2nv12X)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
@@ -1136,9 +1137,10 @@
#if HAVE_ALTIVEC
/* The following list of supported dstFormat values should
match what's found in the body of altivec_yuv2packedX() */
- if (c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA ||
+ if (!(c->flags & SWS_BITEXACT) &&
+ (c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA ||
c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
- c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB)
+ c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB))
altivec_yuv2packedX (c, lumFilter, lumSrc, lumFilterSize,
chrFilter, chrSrc, chrFilterSize,
dest, dstW, dstY);
-------------- next part --------------
Index: swscale_altivec_template.c
===================================================================
--- swscale_altivec_template.c (revision 28549)
+++ swscale_altivec_template.c (working copy)
@@ -220,7 +220,7 @@
for (j=0; j<filterSize; j++) {
val += ((int)src[srcPos + j])*filter[filterSize*i + j];
}
- dst[i] = av_clip(val>>7, 0, (1<<15)-1);
+ dst[i] = FFMIN(val>>7, (1<<15)-1);
}
}
else
@@ -259,7 +259,7 @@
val_vEven = vec_mule(src_v, filter_v);
val_s = vec_sums(val_vEven, vzero);
vec_st(val_s, 0, tempo);
- dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1);
+ dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
}
}
break;
@@ -286,7 +286,7 @@
val_v = vec_msums(src_v, filter_v, (vector signed int)vzero);
val_s = vec_sums(val_v, vzero);
vec_st(val_s, 0, tempo);
- dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1);
+ dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
}
}
break;
@@ -315,7 +315,7 @@
vector signed int val_s = vec_sums(val_v, vzero);
vec_st(val_s, 0, tempo);
- dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1);
+ dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
}
}
break;
@@ -377,7 +377,7 @@
val_s = vec_sums(val_v, vzero);
vec_st(val_s, 0, tempo);
- dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1);
+ dst[i] = FFMIN(tempo[3]>>7, (1<<15)-1);
}
}
More information about the ffmpeg-devel
mailing list