[FFmpeg-cvslog] avcodec/ppc/h264dsp: POWER LE support in h264_idct_dc_add_internal() fix vec_lvsl bug

Rong Yan git at videolan.org
Thu May 14 11:28:01 CEST 2015


ffmpeg | branch: master | Rong Yan <rongyan236 at gmail.com> | Thu May 14 06:43:44 2015 +0000| [a2cd07d22a5e10b39f65f2cfcbab921244d32152] | committer: Michael Niedermayer

avcodec/ppc/h264dsp: POWER LE support in h264_idct_dc_add_internal() fix vec_lvsl bug

We got defective video when use GCC 4.9.2 instead of GCC 4.9.1 to compile FFMEPG. And further found
that GCC 4.8 and 4.9 need patch to fix the lvsl/lvsr bug on POWER LE, and GCC 5.1 contains
the correct code since its release. The message on gcc-patches requesting approval for lvsl/lvsr
patch is at https://gcc.gnu.org/ml/gcc-patches/2014-10/msg00228.html.

The fixed code avoids using lvsl and will not depends on GCC version, also it uses less instructions on POWER LE.

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a2cd07d22a5e10b39f65f2cfcbab921244d32152
---

 libavcodec/ppc/h264dsp.c |   26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/libavcodec/ppc/h264dsp.c b/libavcodec/ppc/h264dsp.c
index da118a4..3822c7f 100644
--- a/libavcodec/ppc/h264dsp.c
+++ b/libavcodec/ppc/h264dsp.c
@@ -256,6 +256,11 @@ static void h264_idct8_add_altivec(uint8_t *dst, int16_t *dct, int stride)
     ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel);
 }
 
+#if HAVE_BIGENDIAN
+#define DST_LD vec_ld
+#else
+#define DST_LD vec_vsx_ld
+#endif
 static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, int16_t *block, int stride, int size)
 {
     vec_s16 dc16;
@@ -275,18 +280,17 @@ static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, int16_t *bl
     dcplus = vec_packsu(dc16, zero_s16v);
     dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v);
 
+#if HAVE_BIGENDIAN
     aligner = vec_lvsr(0, dst);
-#if !HAVE_BIGENDIAN
-    aligner = vec_perm(aligner, zero_u8v, vcswapc());
-#endif
     dcplus = vec_perm(dcplus, dcplus, aligner);
     dcminus = vec_perm(dcminus, dcminus, aligner);
+#endif
 
     for (i = 0; i < size; i += 4) {
-        v0 = vec_ld(0, dst+0*stride);
-        v1 = vec_ld(0, dst+1*stride);
-        v2 = vec_ld(0, dst+2*stride);
-        v3 = vec_ld(0, dst+3*stride);
+        v0 = DST_LD(0, dst+0*stride);
+        v1 = DST_LD(0, dst+1*stride);
+        v2 = DST_LD(0, dst+2*stride);
+        v3 = DST_LD(0, dst+3*stride);
 
         v0 = vec_adds(v0, dcplus);
         v1 = vec_adds(v1, dcplus);
@@ -298,10 +302,10 @@ static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, int16_t *bl
         v2 = vec_subs(v2, dcminus);
         v3 = vec_subs(v3, dcminus);
 
-        vec_st(v0, 0, dst+0*stride);
-        vec_st(v1, 0, dst+1*stride);
-        vec_st(v2, 0, dst+2*stride);
-        vec_st(v3, 0, dst+3*stride);
+        VEC_ST(v0, 0, dst+0*stride);
+        VEC_ST(v1, 0, dst+1*stride);
+        VEC_ST(v2, 0, dst+2*stride);
+        VEC_ST(v3, 0, dst+3*stride);
 
         dst += 4*stride;
     }



More information about the ffmpeg-cvslog mailing list