[FFmpeg-cvslog] Merge commit 'de452e503734ebb0fdbce86e9d16693b3530fad3'

Clément Bœsch git at videolan.org
Mon Mar 20 16:59:54 EET 2017


ffmpeg | branch: master | Clément Bœsch <u at pkh.me> | Mon Mar 20 13:47:29 2017 +0100| [e07fa3008bcafc027e9aa654657bae1f1ac64f5d] | committer: Clément Bœsch

Merge commit 'de452e503734ebb0fdbce86e9d16693b3530fad3'

* commit 'de452e503734ebb0fdbce86e9d16693b3530fad3':
  pixblockdsp: Change type of stride parameters to ptrdiff_t

Merged-by: Clément Bœsch <u at pkh.me>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e07fa3008bcafc027e9aa654657bae1f1ac64f5d
---

 libavcodec/alpha/pixblockdsp_alpha.c  |  7 ++++---
 libavcodec/arm/pixblockdsp_init_arm.c |  5 +++--
 libavcodec/dv.h                       |  2 +-
 libavcodec/dvenc.c                    |  9 +++++----
 libavcodec/mips/pixblockdsp_mips.h    |  6 +++---
 libavcodec/mips/pixblockdsp_mmi.c     | 22 +++++++++++-----------
 libavcodec/mips/pixblockdsp_msa.c     |  2 +-
 libavcodec/pixblockdsp.c              | 24 ++++++++++++------------
 libavcodec/pixblockdsp.h              |  4 ++--
 libavcodec/ppc/pixblockdsp.c          | 18 +++++++++---------
 libavcodec/x86/pixblockdsp.asm        |  5 ++---
 libavcodec/x86/pixblockdsp_init.c     |  8 ++++----
 tests/checkasm/pixblockdsp.c          |  2 +-
 13 files changed, 58 insertions(+), 56 deletions(-)

diff --git a/libavcodec/alpha/pixblockdsp_alpha.c b/libavcodec/alpha/pixblockdsp_alpha.c
index 866b762..c2f1a1d 100644
--- a/libavcodec/alpha/pixblockdsp_alpha.c
+++ b/libavcodec/alpha/pixblockdsp_alpha.c
@@ -23,7 +23,7 @@
 #include "asm.h"
 
 static void get_pixels_mvi(int16_t *restrict block,
-                           const uint8_t *restrict pixels, ptrdiff_t line_size)
+                           const uint8_t *restrict pixels, ptrdiff_t stride)
 {
     int h = 8;
 
@@ -34,13 +34,14 @@ static void get_pixels_mvi(int16_t *restrict block,
         stq(unpkbw(p),       block);
         stq(unpkbw(p >> 32), block + 4);
 
-        pixels += line_size;
+        pixels += stride;
         block += 8;
     } while (--h);
 }
 
 static void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2,
-                            int stride) {
+                            ptrdiff_t stride)
+{
     int h = 8;
     uint64_t mask = 0x4040;
 
diff --git a/libavcodec/arm/pixblockdsp_init_arm.c b/libavcodec/arm/pixblockdsp_init_arm.c
index 76d7509..59d2b49 100644
--- a/libavcodec/arm/pixblockdsp_init_arm.c
+++ b/libavcodec/arm/pixblockdsp_init_arm.c
@@ -24,9 +24,10 @@
 #include "libavcodec/avcodec.h"
 #include "libavcodec/pixblockdsp.h"
 
-void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, ptrdiff_t stride);
+void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels,
+                         ptrdiff_t stride);
 void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1,
-                          const uint8_t *s2, int stride);
+                          const uint8_t *s2, ptrdiff_t stride);
 
 av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c,
                                      AVCodecContext *avctx,
diff --git a/libavcodec/dv.h b/libavcodec/dv.h
index 3367f92..d5482d1 100644
--- a/libavcodec/dv.h
+++ b/libavcodec/dv.h
@@ -45,7 +45,7 @@ typedef struct DVVideoContext {
 
     uint8_t dv_zigzag[2][64];
 
-    void (*get_pixels)(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size);
+    void (*get_pixels)(int16_t *block, const uint8_t *pixels, ptrdiff_t linesize);
     void (*fdct[2])(int16_t *block);
     void (*idct_put[2])(uint8_t *dest, int line_size, int16_t *block);
     me_cmp_func ildct_cmp;
diff --git a/libavcodec/dvenc.c b/libavcodec/dvenc.c
index a27b9ef..e3de18a 100644
--- a/libavcodec/dvenc.c
+++ b/libavcodec/dvenc.c
@@ -204,7 +204,7 @@ static av_always_inline PutBitContext *dv_encode_ac(EncBlockInfo *bi,
 }
 
 static av_always_inline int dv_guess_dct_mode(DVVideoContext *s, uint8_t *data,
-                                              int linesize)
+                                              ptrdiff_t linesize)
 {
     if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
         int ps = s->ildct_cmp(NULL, data, NULL, linesize, 8) - 400;
@@ -241,8 +241,8 @@ static const int dv_weight_248[64] = {
 };
 
 static av_always_inline int dv_init_enc_block(EncBlockInfo *bi, uint8_t *data,
-                                              int linesize, DVVideoContext *s,
-                                              int bias)
+                                              ptrdiff_t linesize,
+                                              DVVideoContext *s, int bias)
 {
     const int *weight;
     const uint8_t *zigzag_scan;
@@ -420,7 +420,8 @@ static int dv_encode_video_segment(AVCodecContext *avctx, void *arg)
     DVVideoContext *s = avctx->priv_data;
     DVwork_chunk *work_chunk = arg;
     int mb_index, i, j;
-    int mb_x, mb_y, c_offset, linesize, y_stride;
+    int mb_x, mb_y, c_offset;
+    ptrdiff_t linesize, y_stride;
     uint8_t *y_ptr;
     uint8_t *dif;
     LOCAL_ALIGNED_8(uint8_t, scratch, [128]);
diff --git a/libavcodec/mips/pixblockdsp_mips.h b/libavcodec/mips/pixblockdsp_mips.h
index 7f8cc96..a12b1a6 100644
--- a/libavcodec/mips/pixblockdsp_mips.h
+++ b/libavcodec/mips/pixblockdsp_mips.h
@@ -25,15 +25,15 @@
 #include "../mpegvideo.h"
 
 void ff_diff_pixels_msa(int16_t *av_restrict block, const uint8_t *src1,
-                        const uint8_t *src2, int stride);
+                        const uint8_t *src2, ptrdiff_t stride);
 void ff_get_pixels_16_msa(int16_t *restrict dst, const uint8_t *src,
                           ptrdiff_t stride);
 void ff_get_pixels_8_msa(int16_t *restrict dst, const uint8_t *src,
                          ptrdiff_t stride);
 
 void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
-        ptrdiff_t line_size);
+                         ptrdiff_t stride);
 void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1,
-        const uint8_t *src2, int stride);
+                        const uint8_t *src2, ptrdiff_t stride);
 
 #endif  // #ifndef AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H
diff --git a/libavcodec/mips/pixblockdsp_mmi.c b/libavcodec/mips/pixblockdsp_mmi.c
index 9f2eac3..a915a3c 100644
--- a/libavcodec/mips/pixblockdsp_mmi.c
+++ b/libavcodec/mips/pixblockdsp_mmi.c
@@ -26,7 +26,7 @@
 #include "libavutil/mips/mmiutils.h"
 
 void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
-        ptrdiff_t line_size)
+                         ptrdiff_t stride)
 {
     double ftmp[7];
     DECLARE_VAR_ALL64;
@@ -36,7 +36,7 @@ void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
         "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
 
         MMI_LDC1(%[ftmp1], %[pixels], 0x00)
-        MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
+        MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
         "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]                \n\t"
         "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]                \n\t"
         "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]                \n\t"
@@ -45,10 +45,10 @@ void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
         MMI_SDC1(%[ftmp4], %[block], 0x08)
         MMI_SDC1(%[ftmp5], %[block], 0x10)
         MMI_SDC1(%[ftmp6], %[block], 0x18)
-        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size_x2]         \n\t"
+        PTR_ADDU   "%[pixels],  %[pixels],      %[stride_x2]            \n\t"
 
         MMI_LDC1(%[ftmp1], %[pixels], 0x00)
-        MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
+        MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
         "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]                \n\t"
         "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]                \n\t"
         "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]                \n\t"
@@ -57,10 +57,10 @@ void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
         MMI_SDC1(%[ftmp4], %[block], 0x28)
         MMI_SDC1(%[ftmp5], %[block], 0x30)
         MMI_SDC1(%[ftmp6], %[block], 0x38)
-        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size_x2]         \n\t"
+        PTR_ADDU   "%[pixels],  %[pixels],      %[stride_x2]            \n\t"
 
         MMI_LDC1(%[ftmp1], %[pixels], 0x00)
-        MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
+        MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
         "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]                \n\t"
         "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]                \n\t"
         "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]                \n\t"
@@ -69,10 +69,10 @@ void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
         MMI_SDC1(%[ftmp4], %[block], 0x48)
         MMI_SDC1(%[ftmp5], %[block], 0x50)
         MMI_SDC1(%[ftmp6], %[block], 0x58)
-        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size_x2]         \n\t"
+        PTR_ADDU   "%[pixels],  %[pixels],      %[stride_x2]            \n\t"
 
         MMI_LDC1(%[ftmp1], %[pixels], 0x00)
-        MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
+        MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
         "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]                \n\t"
         "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]                \n\t"
         "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]                \n\t"
@@ -88,14 +88,14 @@ void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
           RESTRICT_ASM_ALL64
           RESTRICT_ASM_ADDRT
           [pixels]"+&r"(pixels)
-        : [block]"r"((mips_reg)block),      [line_size]"r"((mips_reg)line_size),
-          [line_size_x2]"r"((mips_reg)(line_size<<1))
+        : [block]"r"((mips_reg)block),      [stride]"r"((mips_reg)stride),
+          [stride_x2]"r"((mips_reg)(stride<<1))
         : "memory"
     );
 }
 
 void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1,
-        const uint8_t *src2, int stride)
+        const uint8_t *src2, ptrdiff_t stride)
 {
     double ftmp[5];
     mips_reg tmp[1];
diff --git a/libavcodec/mips/pixblockdsp_msa.c b/libavcodec/mips/pixblockdsp_msa.c
index 966e11a..86a4576 100644
--- a/libavcodec/mips/pixblockdsp_msa.c
+++ b/libavcodec/mips/pixblockdsp_msa.c
@@ -137,7 +137,7 @@ void ff_get_pixels_8_msa(int16_t *av_restrict dest, const uint8_t *src,
 }
 
 void ff_diff_pixels_msa(int16_t *av_restrict block, const uint8_t *src1,
-                        const uint8_t *src2, int stride)
+                        const uint8_t *src2, ptrdiff_t stride)
 {
     diff_pixels_msa(block, src1, src2, stride);
 }
diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c
index f0883d3..417c944 100644
--- a/libavcodec/pixblockdsp.c
+++ b/libavcodec/pixblockdsp.c
@@ -25,20 +25,20 @@
 #include "pixblockdsp.h"
 
 static void get_pixels_16_c(int16_t *av_restrict block, const uint8_t *pixels,
-                            ptrdiff_t line_size)
+                            ptrdiff_t stride)
 {
-    AV_COPY128U(block + 0 * 8, pixels + 0 * line_size);
-    AV_COPY128U(block + 1 * 8, pixels + 1 * line_size);
-    AV_COPY128U(block + 2 * 8, pixels + 2 * line_size);
-    AV_COPY128U(block + 3 * 8, pixels + 3 * line_size);
-    AV_COPY128U(block + 4 * 8, pixels + 4 * line_size);
-    AV_COPY128U(block + 5 * 8, pixels + 5 * line_size);
-    AV_COPY128U(block + 6 * 8, pixels + 6 * line_size);
-    AV_COPY128U(block + 7 * 8, pixels + 7 * line_size);
+    AV_COPY128U(block + 0 * 8, pixels + 0 * stride);
+    AV_COPY128U(block + 1 * 8, pixels + 1 * stride);
+    AV_COPY128U(block + 2 * 8, pixels + 2 * stride);
+    AV_COPY128U(block + 3 * 8, pixels + 3 * stride);
+    AV_COPY128U(block + 4 * 8, pixels + 4 * stride);
+    AV_COPY128U(block + 5 * 8, pixels + 5 * stride);
+    AV_COPY128U(block + 6 * 8, pixels + 6 * stride);
+    AV_COPY128U(block + 7 * 8, pixels + 7 * stride);
 }
 
 static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels,
-                           ptrdiff_t line_size)
+                           ptrdiff_t stride)
 {
     int i;
 
@@ -52,13 +52,13 @@ static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels,
         block[5] = pixels[5];
         block[6] = pixels[6];
         block[7] = pixels[7];
-        pixels  += line_size;
+        pixels  += stride;
         block   += 8;
     }
 }
 
 static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1,
-                          const uint8_t *s2, int stride)
+                          const uint8_t *s2, ptrdiff_t stride)
 {
     int i;
 
diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h
index 79ed86c..278accb 100644
--- a/libavcodec/pixblockdsp.h
+++ b/libavcodec/pixblockdsp.h
@@ -26,11 +26,11 @@
 typedef struct PixblockDSPContext {
     void (*get_pixels)(int16_t *block /* align 16 */,
                        const uint8_t *pixels /* align 8 */,
-                       ptrdiff_t line_size);
+                       ptrdiff_t stride);
     void (*diff_pixels)(int16_t *block /* align 16 */,
                         const uint8_t *s1 /* align 8 */,
                         const uint8_t *s2 /* align 8 */,
-                        int stride);
+                        ptrdiff_t stride);
 } PixblockDSPContext;
 
 void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx);
diff --git a/libavcodec/ppc/pixblockdsp.c b/libavcodec/ppc/pixblockdsp.c
index f3a5050..f5ac850 100644
--- a/libavcodec/ppc/pixblockdsp.c
+++ b/libavcodec/ppc/pixblockdsp.c
@@ -37,7 +37,7 @@
 
 #if HAVE_VSX
 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
-                               ptrdiff_t line_size)
+                               ptrdiff_t stride)
 {
     int i;
     vector unsigned char perm =
@@ -59,12 +59,12 @@ static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
         // Save the data to the block, we assume the block is 16-byte aligned.
         vec_vsx_st(shorts, i * 16, (vector signed short *) block);
 
-        pixels += line_size;
+        pixels += stride;
     }
 }
 #else
 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
-                               ptrdiff_t line_size)
+                               ptrdiff_t stride)
 {
     int i;
     const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
@@ -84,7 +84,7 @@ static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
         // Save the data to the block, we assume the block is 16-byte aligned.
         vec_st(shorts, i * 16, (vec_s16 *)block);
 
-        pixels += line_size;
+        pixels += stride;
     }
 }
 
@@ -92,7 +92,7 @@ static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
 
 #if HAVE_VSX
 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
-                                const uint8_t *s2, int stride)
+                                const uint8_t *s2, ptrdiff_t stride)
 {
   int i;
   const vector unsigned char zero =
@@ -154,7 +154,7 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
 }
 #else
 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
-                                const uint8_t *s2, int stride)
+                                const uint8_t *s2, ptrdiff_t stride)
 {
     int i;
     vec_u8 perm;
@@ -233,7 +233,7 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
 
 #if HAVE_VSX
 static void get_pixels_vsx(int16_t *restrict block, const uint8_t *pixels,
-                           ptrdiff_t line_size)
+                           ptrdiff_t stride)
 {
     int i;
     for (i = 0; i < 8; i++) {
@@ -241,12 +241,12 @@ static void get_pixels_vsx(int16_t *restrict block, const uint8_t *pixels,
 
         vec_vsx_st(shorts, i * 16, block);
 
-        pixels += line_size;
+        pixels += stride;
     }
 }
 
 static void diff_pixels_vsx(int16_t *restrict block, const uint8_t *s1,
-                            const uint8_t *s2, int stride)
+                            const uint8_t *s2, ptrdiff_t stride)
 {
     int i;
     vec_s16 shorts1, shorts2;
diff --git a/libavcodec/x86/pixblockdsp.asm b/libavcodec/x86/pixblockdsp.asm
index 2864d0c..440fe29 100644
--- a/libavcodec/x86/pixblockdsp.asm
+++ b/libavcodec/x86/pixblockdsp.asm
@@ -26,7 +26,7 @@
 SECTION .text
 
 INIT_MMX mmx
-; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size)
+; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t stride)
 cglobal get_pixels, 3,4
     add          r0, 128
     mov          r3, -128
@@ -81,10 +81,9 @@ cglobal get_pixels, 3, 4, 5
     RET
 
 ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
-;                         int stride);
+;                         ptrdiff_t stride);
 %macro DIFF_PIXELS 0
 cglobal diff_pixels, 4,5,5
-    movsxdifnidn r3, r3d
     pxor         m4, m4
     add          r0,  128
     mov          r4, -128
diff --git a/libavcodec/x86/pixblockdsp_init.c b/libavcodec/x86/pixblockdsp_init.c
index 4d06a44..fa9578a 100644
--- a/libavcodec/x86/pixblockdsp_init.c
+++ b/libavcodec/x86/pixblockdsp_init.c
@@ -23,12 +23,12 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/pixblockdsp.h"
 
-void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size);
-void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size);
+void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t stride);
+void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, ptrdiff_t stride);
 void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
-                        int stride);
+                        ptrdiff_t stride);
 void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
-                         int stride);
+                         ptrdiff_t stride);
 
 av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c,
                                      AVCodecContext *avctx,
diff --git a/tests/checkasm/pixblockdsp.c b/tests/checkasm/pixblockdsp.c
index 2b88e7d..e14b0a9 100644
--- a/tests/checkasm/pixblockdsp.c
+++ b/tests/checkasm/pixblockdsp.c
@@ -64,7 +64,7 @@
 #define check_diff_pixels(type)                                                            \
     do {                                                                                   \
         int i;                                                                             \
-        declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, int stride); \
+        declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride); \
                                                                                            \
         for (i = 0; i < BUF_UNITS; i++) {                                              \
             int src_offset = i * 64 * sizeof(type) + 8 * i; /* Test various alignments */      \


======================================================================

diff --cc libavcodec/alpha/pixblockdsp_alpha.c
index 866b762,0000000..c2f1a1d
mode 100644,000000..100644
--- a/libavcodec/alpha/pixblockdsp_alpha.c
+++ b/libavcodec/alpha/pixblockdsp_alpha.c
@@@ -1,78 -1,0 +1,79 @@@
 +/*
 + * SIMD-optimized pixel operations
 + *
 + * This file is part of FFmpeg.
 + *
 + * FFmpeg is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public
 + * License as published by the Free Software Foundation; either
 + * version 2.1 of the License, or (at your option) any later version.
 + *
 + * FFmpeg is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with FFmpeg; if not, write to the Free Software
 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 + */
 +
 +#include "libavutil/attributes.h"
 +#include "libavcodec/pixblockdsp.h"
 +#include "asm.h"
 +
 +static void get_pixels_mvi(int16_t *restrict block,
-                            const uint8_t *restrict pixels, ptrdiff_t line_size)
++                           const uint8_t *restrict pixels, ptrdiff_t stride)
 +{
 +    int h = 8;
 +
 +    do {
 +        uint64_t p;
 +
 +        p = ldq(pixels);
 +        stq(unpkbw(p),       block);
 +        stq(unpkbw(p >> 32), block + 4);
 +
-         pixels += line_size;
++        pixels += stride;
 +        block += 8;
 +    } while (--h);
 +}
 +
 +static void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2,
-                             int stride) {
++                            ptrdiff_t stride)
++{
 +    int h = 8;
 +    uint64_t mask = 0x4040;
 +
 +    mask |= mask << 16;
 +    mask |= mask << 32;
 +    do {
 +        uint64_t x, y, c, d, a;
 +        uint64_t signs;
 +
 +        x = ldq(s1);
 +        y = ldq(s2);
 +        c = cmpbge(x, y);
 +        d = x - y;
 +        a = zap(mask, c);       /* We use 0x4040404040404040 here...  */
 +        d += 4 * a;             /* ...so we can use s4addq here.      */
 +        signs = zap(-1, c);
 +
 +        stq(unpkbw(d)       | (unpkbw(signs)       << 8), block);
 +        stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4);
 +
 +        s1 += stride;
 +        s2 += stride;
 +        block += 8;
 +    } while (--h);
 +}
 +
 +av_cold void ff_pixblockdsp_init_alpha(PixblockDSPContext *c, AVCodecContext *avctx,
 +                                       unsigned high_bit_depth)
 +{
 +    if (amask(AMASK_MVI) == 0) {
 +        if (!high_bit_depth)
 +            c->get_pixels = get_pixels_mvi;
 +        c->diff_pixels = diff_pixels_mvi;
 +    }
 +}
diff --cc libavcodec/mips/pixblockdsp_mips.h
index 7f8cc96,0000000..a12b1a6
mode 100644,000000..100644
--- a/libavcodec/mips/pixblockdsp_mips.h
+++ b/libavcodec/mips/pixblockdsp_mips.h
@@@ -1,39 -1,0 +1,39 @@@
 +/*
 + * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil at imgtec.com)
 + *                    Zhou Xiaoyong <zhouxiaoyong at loongson.cn>
 + *
 + * This file is part of FFmpeg.
 + *
 + * FFmpeg is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public
 + * License as published by the Free Software Foundation; either
 + * version 2.1 of the License, or (at your option) any later version.
 + *
 + * FFmpeg is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with FFmpeg; if not, write to the Free Software
 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 + */
 +
 +#ifndef AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H
 +#define AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H
 +
 +#include "../mpegvideo.h"
 +
 +void ff_diff_pixels_msa(int16_t *av_restrict block, const uint8_t *src1,
-                         const uint8_t *src2, int stride);
++                        const uint8_t *src2, ptrdiff_t stride);
 +void ff_get_pixels_16_msa(int16_t *restrict dst, const uint8_t *src,
 +                          ptrdiff_t stride);
 +void ff_get_pixels_8_msa(int16_t *restrict dst, const uint8_t *src,
 +                         ptrdiff_t stride);
 +
 +void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
-         ptrdiff_t line_size);
++                         ptrdiff_t stride);
 +void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1,
-         const uint8_t *src2, int stride);
++                        const uint8_t *src2, ptrdiff_t stride);
 +
 +#endif  // #ifndef AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H
diff --cc libavcodec/mips/pixblockdsp_mmi.c
index 9f2eac3,0000000..a915a3c
mode 100644,000000..100644
--- a/libavcodec/mips/pixblockdsp_mmi.c
+++ b/libavcodec/mips/pixblockdsp_mmi.c
@@@ -1,135 -1,0 +1,135 @@@
 +/*
 + * Loongson SIMD optimized pixblockdsp
 + *
 + * Copyright (c) 2015 Loongson Technology Corporation Limited
 + * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong at loongson.cn>
 + *
 + * This file is part of FFmpeg.
 + *
 + * FFmpeg is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public
 + * License as published by the Free Software Foundation; either
 + * version 2.1 of the License, or (at your option) any later version.
 + *
 + * FFmpeg is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with FFmpeg; if not, write to the Free Software
 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 + */
 +
 +#include "pixblockdsp_mips.h"
 +#include "libavutil/mips/asmdefs.h"
 +#include "libavutil/mips/mmiutils.h"
 +
 +void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
-         ptrdiff_t line_size)
++                         ptrdiff_t stride)
 +{
 +    double ftmp[7];
 +    DECLARE_VAR_ALL64;
 +    DECLARE_VAR_ADDRT;
 +
 +    __asm__ volatile (
 +        "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
 +
 +        MMI_LDC1(%[ftmp1], %[pixels], 0x00)
-         MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
++        MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
 +        "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]                \n\t"
 +        "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]                \n\t"
 +        "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]                \n\t"
 +        "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]                \n\t"
 +        MMI_SDC1(%[ftmp3], %[block], 0x00)
 +        MMI_SDC1(%[ftmp4], %[block], 0x08)
 +        MMI_SDC1(%[ftmp5], %[block], 0x10)
 +        MMI_SDC1(%[ftmp6], %[block], 0x18)
-         PTR_ADDU   "%[pixels],  %[pixels],      %[line_size_x2]         \n\t"
++        PTR_ADDU   "%[pixels],  %[pixels],      %[stride_x2]            \n\t"
 +
 +        MMI_LDC1(%[ftmp1], %[pixels], 0x00)
-         MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
++        MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
 +        "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]                \n\t"
 +        "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]                \n\t"
 +        "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]                \n\t"
 +        "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]                \n\t"
 +        MMI_SDC1(%[ftmp3], %[block], 0x20)
 +        MMI_SDC1(%[ftmp4], %[block], 0x28)
 +        MMI_SDC1(%[ftmp5], %[block], 0x30)
 +        MMI_SDC1(%[ftmp6], %[block], 0x38)
-         PTR_ADDU   "%[pixels],  %[pixels],      %[line_size_x2]         \n\t"
++        PTR_ADDU   "%[pixels],  %[pixels],      %[stride_x2]            \n\t"
 +
 +        MMI_LDC1(%[ftmp1], %[pixels], 0x00)
-         MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
++        MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
 +        "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]                \n\t"
 +        "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]                \n\t"
 +        "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]                \n\t"
 +        "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]                \n\t"
 +        MMI_SDC1(%[ftmp3], %[block], 0x40)
 +        MMI_SDC1(%[ftmp4], %[block], 0x48)
 +        MMI_SDC1(%[ftmp5], %[block], 0x50)
 +        MMI_SDC1(%[ftmp6], %[block], 0x58)
-         PTR_ADDU   "%[pixels],  %[pixels],      %[line_size_x2]         \n\t"
++        PTR_ADDU   "%[pixels],  %[pixels],      %[stride_x2]            \n\t"
 +
 +        MMI_LDC1(%[ftmp1], %[pixels], 0x00)
-         MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
++        MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
 +        "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]                \n\t"
 +        "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]                \n\t"
 +        "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]                \n\t"
 +        "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]                \n\t"
 +        MMI_SDC1(%[ftmp3], %[block], 0x60)
 +        MMI_SDC1(%[ftmp4], %[block], 0x68)
 +        MMI_SDC1(%[ftmp5], %[block], 0x70)
 +        MMI_SDC1(%[ftmp6], %[block], 0x78)
 +        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
 +          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
 +          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
 +          [ftmp6]"=&f"(ftmp[6]),
 +          RESTRICT_ASM_ALL64
 +          RESTRICT_ASM_ADDRT
 +          [pixels]"+&r"(pixels)
-         : [block]"r"((mips_reg)block),      [line_size]"r"((mips_reg)line_size),
-           [line_size_x2]"r"((mips_reg)(line_size<<1))
++        : [block]"r"((mips_reg)block),      [stride]"r"((mips_reg)stride),
++          [stride_x2]"r"((mips_reg)(stride<<1))
 +        : "memory"
 +    );
 +}
 +
 +void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1,
-         const uint8_t *src2, int stride)
++        const uint8_t *src2, ptrdiff_t stride)
 +{
 +    double ftmp[5];
 +    mips_reg tmp[1];
 +    DECLARE_VAR_ALL64;
 +
 +    __asm__ volatile (
 +        "li         %[tmp0],    0x08                                    \n\t"
 +        "xor        %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
 +        "1:                                                             \n\t"
 +        MMI_LDC1(%[ftmp0], %[src1], 0x00)
 +        "or         %[ftmp1],   %[ftmp0],       %[ftmp0]                \n\t"
 +        MMI_LDC1(%[ftmp2], %[src2], 0x00)
 +        "or         %[ftmp3],   %[ftmp2],       %[ftmp2]                \n\t"
 +        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
 +        "punpckhbh  %[ftmp1],   %[ftmp1],       %[ftmp4]                \n\t"
 +        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
 +        "punpckhbh  %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
 +        "psubh      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 +        "psubh      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 +        MMI_SDC1(%[ftmp0], %[block], 0x00)
 +        MMI_SDC1(%[ftmp1], %[block], 0x08)
 +        PTR_ADDI   "%[tmp0],    %[tmp0], -0x01                          \n\t"
 +        PTR_ADDIU  "%[block],   %[block], 0x10                          \n\t"
 +        PTR_ADDU   "%[src1],    %[src1],        %[stride]               \n\t"
 +        PTR_ADDU   "%[src2],    %[src2],        %[stride]               \n\t"
 +        "bgtz       %[tmp0],    1b                                      \n\t"
 +        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
 +          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
 +          [ftmp4]"=&f"(ftmp[4]),
 +          [tmp0]"=&r"(tmp[0]),
 +          RESTRICT_ASM_ALL64
 +          [block]"+&r"(block),              [src1]"+&r"(src1),
 +          [src2]"+&r"(src2)
 +        : [stride]"r"((mips_reg)stride)
 +        : "memory"
 +    );
 +}
diff --cc libavcodec/mips/pixblockdsp_msa.c
index 966e11a,0000000..86a4576
mode 100644,000000..100644
--- a/libavcodec/mips/pixblockdsp_msa.c
+++ b/libavcodec/mips/pixblockdsp_msa.c
@@@ -1,143 -1,0 +1,143 @@@
 +/*
 + * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil at imgtec.com)
 + *
 + * This file is part of FFmpeg.
 + *
 + * FFmpeg is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public
 + * License as published by the Free Software Foundation; either
 + * version 2.1 of the License, or (at your option) any later version.
 + *
 + * FFmpeg is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with FFmpeg; if not, write to the Free Software
 + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 + */
 +
 +#include "libavutil/mips/generic_macros_msa.h"
 +#include "pixblockdsp_mips.h"
 +
 +static void diff_pixels_msa(int16_t *block, const uint8_t *src1,
 +                            const uint8_t *src2, int32_t stride)
 +{
 +    v16u8 in10, in11, in12, in13, in14, in15, in16, in17;
 +    v16u8 in20, in21, in22, in23, in24, in25, in26, in27;
 +    v8i16 out0, out1, out2, out3, out4, out5, out6, out7;
 +
 +    LD_UB8(src1, stride, in10, in11, in12, in13, in14, in15, in16, in17);
 +    LD_UB8(src2, stride, in20, in21, in22, in23, in24, in25, in26, in27);
 +    ILVR_B4_SH(in10, in20, in11, in21, in12, in22, in13, in23,
 +               out0, out1, out2, out3);
 +    ILVR_B4_SH(in14, in24, in15, in25, in16, in26, in17, in27,
 +               out4, out5, out6, out7);
 +    HSUB_UB4_SH(out0, out1, out2, out3, out0, out1, out2, out3);
 +    HSUB_UB4_SH(out4, out5, out6, out7, out4, out5, out6, out7);
 +    ST_SH8(out0, out1, out2, out3, out4, out5, out6, out7, block, 8);
 +}
 +
 +static void copy_8bit_to_16bit_width8_msa(const uint8_t *src, int32_t src_stride,
 +                                          int16_t *dst, int32_t dst_stride,
 +                                          int32_t height)
 +{
 +    uint8_t *dst_ptr;
 +    int32_t cnt;
 +    v16u8 src0, src1, src2, src3;
 +    v16i8 zero = { 0 };
 +
 +    dst_ptr = (uint8_t *) dst;
 +
 +    for (cnt = (height >> 2); cnt--;) {
 +        LD_UB4(src, src_stride, src0, src1, src2, src3);
 +        src += (4 * src_stride);
 +
 +        ILVR_B4_UB(zero, src0, zero, src1, zero, src2, zero, src3,
 +                   src0, src1, src2, src3);
 +
 +        ST_UB4(src0, src1, src2, src3, dst_ptr, (dst_stride * 2));
 +        dst_ptr += (4 * 2 * dst_stride);
 +    }
 +}
 +
 +static void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride,
 +                                  uint8_t *dst, int32_t dst_stride,
 +                                  int32_t height, int32_t width)
 +{
 +    int32_t cnt, loop_cnt;
 +    const uint8_t *src_tmp;
 +    uint8_t *dst_tmp;
 +    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
 +
 +    for (cnt = (width >> 4); cnt--;) {
 +        src_tmp = src;
 +        dst_tmp = dst;
 +
 +        for (loop_cnt = (height >> 3); loop_cnt--;) {
 +            LD_UB8(src_tmp, src_stride,
 +                   src0, src1, src2, src3, src4, src5, src6, src7);
 +            src_tmp += (8 * src_stride);
 +
 +            ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7,
 +                   dst_tmp, dst_stride);
 +            dst_tmp += (8 * dst_stride);
 +        }
 +
 +        src += 16;
 +        dst += 16;
 +    }
 +}
 +
 +static void copy_width16_msa(const uint8_t *src, int32_t src_stride,
 +                             uint8_t *dst, int32_t dst_stride,
 +                             int32_t height)
 +{
 +    int32_t cnt;
 +    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
 +
 +    if (0 == height % 12) {
 +        for (cnt = (height / 12); cnt--;) {
 +            LD_UB8(src, src_stride,
 +                   src0, src1, src2, src3, src4, src5, src6, src7);
 +            src += (8 * src_stride);
 +            ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7,
 +                   dst, dst_stride);
 +            dst += (8 * dst_stride);
 +
 +            LD_UB4(src, src_stride, src0, src1, src2, src3);
 +            src += (4 * src_stride);
 +            ST_UB4(src0, src1, src2, src3, dst, dst_stride);
 +            dst += (4 * dst_stride);
 +        }
 +    } else if (0 == height % 8) {
 +        copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16);
 +    } else if (0 == height % 4) {
 +        for (cnt = (height >> 2); cnt--;) {
 +            LD_UB4(src, src_stride, src0, src1, src2, src3);
 +            src += (4 * src_stride);
 +
 +            ST_UB4(src0, src1, src2, src3, dst, dst_stride);
 +            dst += (4 * dst_stride);
 +        }
 +    }
 +}
 +
 +void ff_get_pixels_16_msa(int16_t *av_restrict dest, const uint8_t *src,
 +                          ptrdiff_t stride)
 +{
 +    copy_width16_msa(src, stride, (uint8_t *) dest, 16, 8);
 +}
 +
 +void ff_get_pixels_8_msa(int16_t *av_restrict dest, const uint8_t *src,
 +                         ptrdiff_t stride)
 +{
 +    copy_8bit_to_16bit_width8_msa(src, stride, dest, 8, 8);
 +}
 +
 +void ff_diff_pixels_msa(int16_t *av_restrict block, const uint8_t *src1,
-                         const uint8_t *src2, int stride)
++                        const uint8_t *src2, ptrdiff_t stride)
 +{
 +    diff_pixels_msa(block, src1, src2, stride);
 +}
diff --cc libavcodec/pixblockdsp.c
index f0883d3,9d68d26..417c944
--- a/libavcodec/pixblockdsp.c
+++ b/libavcodec/pixblockdsp.c
@@@ -24,41 -23,15 +24,41 @@@
  #include "avcodec.h"
  #include "pixblockdsp.h"
  
 -#define BIT_DEPTH 16
 -#include "pixblockdsp_template.c"
 -#undef BIT_DEPTH
 +static void get_pixels_16_c(int16_t *av_restrict block, const uint8_t *pixels,
-                             ptrdiff_t line_size)
++                            ptrdiff_t stride)
 +{
-     AV_COPY128U(block + 0 * 8, pixels + 0 * line_size);
-     AV_COPY128U(block + 1 * 8, pixels + 1 * line_size);
-     AV_COPY128U(block + 2 * 8, pixels + 2 * line_size);
-     AV_COPY128U(block + 3 * 8, pixels + 3 * line_size);
-     AV_COPY128U(block + 4 * 8, pixels + 4 * line_size);
-     AV_COPY128U(block + 5 * 8, pixels + 5 * line_size);
-     AV_COPY128U(block + 6 * 8, pixels + 6 * line_size);
-     AV_COPY128U(block + 7 * 8, pixels + 7 * line_size);
++    AV_COPY128U(block + 0 * 8, pixels + 0 * stride);
++    AV_COPY128U(block + 1 * 8, pixels + 1 * stride);
++    AV_COPY128U(block + 2 * 8, pixels + 2 * stride);
++    AV_COPY128U(block + 3 * 8, pixels + 3 * stride);
++    AV_COPY128U(block + 4 * 8, pixels + 4 * stride);
++    AV_COPY128U(block + 5 * 8, pixels + 5 * stride);
++    AV_COPY128U(block + 6 * 8, pixels + 6 * stride);
++    AV_COPY128U(block + 7 * 8, pixels + 7 * stride);
 +}
 +
 +static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels,
-                            ptrdiff_t line_size)
++                           ptrdiff_t stride)
 +{
 +    int i;
  
 -#define BIT_DEPTH 8
 -#include "pixblockdsp_template.c"
 +    /* read the pixels */
 +    for (i = 0; i < 8; i++) {
 +        block[0] = pixels[0];
 +        block[1] = pixels[1];
 +        block[2] = pixels[2];
 +        block[3] = pixels[3];
 +        block[4] = pixels[4];
 +        block[5] = pixels[5];
 +        block[6] = pixels[6];
 +        block[7] = pixels[7];
-         pixels  += line_size;
++        pixels  += stride;
 +        block   += 8;
 +    }
 +}
  
 -static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
 +static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1,
-                           const uint8_t *s2, int stride)
+                           const uint8_t *s2, ptrdiff_t stride)
  {
      int i;
  
diff --cc libavcodec/ppc/pixblockdsp.c
index f3a5050,96e7024..f5ac850
--- a/libavcodec/ppc/pixblockdsp.c
+++ b/libavcodec/ppc/pixblockdsp.c
@@@ -33,40 -33,13 +33,40 @@@
  #include "libavcodec/avcodec.h"
  #include "libavcodec/pixblockdsp.h"
  
 -#if HAVE_ALTIVEC && HAVE_BIGENDIAN
 +#if HAVE_ALTIVEC
  
 +#if HAVE_VSX
 +static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
-                                ptrdiff_t line_size)
++                               ptrdiff_t stride)
 +{
 +    int i;
 +    vector unsigned char perm =
 +        (vector unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\
 +            0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17};
 +    const vector unsigned char zero =
 +        (const vector unsigned char) vec_splat_u8(0);
 +
 +    for (i = 0; i < 8; i++) {
 +        /* Read potentially unaligned pixels.
 +         * We're reading 16 pixels, and actually only want 8,
 +         * but we simply ignore the extras. */
 +        vector unsigned char bytes = vec_vsx_ld(0, pixels);
 +
 +        // Convert the bytes into shorts.
 +        //vector signed short shorts = (vector signed short) vec_perm(zero, bytes, perm);
 +        vector signed short shorts = (vector signed short) vec_perm(bytes, zero, perm);
 +
 +        // Save the data to the block, we assume the block is 16-byte aligned.
 +        vec_vsx_st(shorts, i * 16, (vector signed short *) block);
 +
-         pixels += line_size;
++        pixels += stride;
 +    }
 +}
 +#else
  static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
-                                ptrdiff_t line_size)
+                                ptrdiff_t stride)
  {
      int i;
 -    vec_u8 perm = vec_lvsl(0, pixels);
      const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
  
      for (i = 0; i < 8; i++) {
@@@ -88,76 -60,12 +88,76 @@@
      }
  }
  
 +#endif /* HAVE_VSX */
 +
 +#if HAVE_VSX
 +static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
-                                 const uint8_t *s2, int stride)
++                                const uint8_t *s2, ptrdiff_t stride)
 +{
 +  int i;
 +  const vector unsigned char zero =
 +    (const vector unsigned char) vec_splat_u8(0);
 +  vector signed short shorts1, shorts2;
 +
 +  for (i = 0; i < 4; i++) {
 +    /* Read potentially unaligned pixels.
 +     * We're reading 16 pixels, and actually only want 8,
 +     * but we simply ignore the extras. */
 +    vector unsigned char bytes = vec_vsx_ld(0,  s1);
 +
 +    // Convert the bytes into shorts.
 +    shorts1 = (vector signed short) vec_mergeh(bytes, zero);
 +
 +    // Do the same for the second block of pixels.
 +    bytes =vec_vsx_ld(0,  s2);
 +
 +    // Convert the bytes into shorts.
 +    shorts2 = (vector signed short) vec_mergeh(bytes, zero);
 +
 +    // Do the subtraction.
 +    shorts1 = vec_sub(shorts1, shorts2);
 +
 +    // Save the data to the block, we assume the block is 16-byte aligned.
 +    vec_vsx_st(shorts1, 0, (vector signed short *) block);
 +
 +    s1    += stride;
 +    s2    += stride;
 +    block += 8;
 +
 +    /* The code below is a copy of the code above...
 +     * This is a manual unroll. */
 +
 +    /* Read potentially unaligned pixels.
 +     * We're reading 16 pixels, and actually only want 8,
 +     * but we simply ignore the extras. */
 +    bytes = vec_vsx_ld(0,  s1);
 +
 +    // Convert the bytes into shorts.
 +    shorts1 = (vector signed short) vec_mergeh(bytes, zero);
 +
 +    // Do the same for the second block of pixels.
 +    bytes = vec_vsx_ld(0,  s2);
 +
 +    // Convert the bytes into shorts.
 +    shorts2 = (vector signed short) vec_mergeh(bytes, zero);
 +
 +    // Do the subtraction.
 +    shorts1 = vec_sub(shorts1, shorts2);
 +
 +    // Save the data to the block, we assume the block is 16-byte aligned.
 +    vec_vsx_st(shorts1, 0, (vector signed short *) block);
 +
 +    s1    += stride;
 +    s2    += stride;
 +    block += 8;
 +  }
 +}
 +#else
  static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
-                                 const uint8_t *s2, int stride)
+                                 const uint8_t *s2, ptrdiff_t stride)
  {
      int i;
 -    vec_u8 perm1 = vec_lvsl(0, s1);
 -    vec_u8 perm2 = vec_lvsl(0, s2);
 +    vec_u8 perm;
      const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
      vec_s16 shorts1, shorts2;
  
diff --cc libavcodec/x86/pixblockdsp.asm
index 2864d0c,8712442..440fe29
--- a/libavcodec/x86/pixblockdsp.asm
+++ b/libavcodec/x86/pixblockdsp.asm
@@@ -80,12 -80,11 +80,11 @@@ cglobal get_pixels, 3, 4, 
      mova  [r0+0x70], m3
      RET
  
 -INIT_MMX mmx
  ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
- ;                         int stride);
+ ;                         ptrdiff_t stride);
 -cglobal diff_pixels, 4,5
 -    pxor         m7, m7
 +%macro DIFF_PIXELS 0
 +cglobal diff_pixels, 4,5,5
-     movsxdifnidn r3, r3d
 +    pxor         m4, m4
      add          r0,  128
      mov          r4, -128
  .loop:
diff --cc libavcodec/x86/pixblockdsp_init.c
index 4d06a44,faa5141..fa9578a
--- a/libavcodec/x86/pixblockdsp_init.c
+++ b/libavcodec/x86/pixblockdsp_init.c
@@@ -23,12 -23,10 +23,12 @@@
  #include "libavutil/x86/cpu.h"
  #include "libavcodec/pixblockdsp.h"
  
- void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size);
- void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size);
+ void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t stride);
+ void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, ptrdiff_t stride);
  void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
-                         int stride);
+                         ptrdiff_t stride);
 +void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
-                          int stride);
++                         ptrdiff_t stride);
  
  av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c,
                                       AVCodecContext *avctx,
diff --cc tests/checkasm/pixblockdsp.c
index 2b88e7d,0000000..e14b0a9
mode 100644,000000..100644
--- a/tests/checkasm/pixblockdsp.c
+++ b/tests/checkasm/pixblockdsp.c
@@@ -1,107 -1,0 +1,107 @@@
 +/*
 + * Copyright (c) 2015 Tiancheng "Timothy" Gu
 + *
 + * This file is part of FFmpeg.
 + *
 + * FFmpeg is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License as published by
 + * the Free Software Foundation; either version 2 of the License, or
 + * (at your option) any later version.
 + *
 + * FFmpeg is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public License along
 + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
 + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 + */
 +
 +#include <string.h>
 +#include "checkasm.h"
 +#include "libavcodec/pixblockdsp.h"
 +#include "libavutil/common.h"
 +#include "libavutil/internal.h"
 +#include "libavutil/intreadwrite.h"
 +
 +#define BUF_UNITS 8
 +#define BUF_SIZE (BUF_UNITS * 128 + 8 * BUF_UNITS)
 +
 +#define randomize_buffers()                 \
 +    do {                                    \
 +        int i;                              \
 +        for (i = 0; i < BUF_SIZE; i += 4) { \
 +            uint32_t r = rnd();             \
 +            AV_WN32A(src10 + i, r);         \
 +            AV_WN32A(src11 + i, r);         \
 +            r = rnd();                      \
 +            AV_WN32A(src20 + i, r);         \
 +            AV_WN32A(src21 + i, r);         \
 +            r = rnd();                      \
 +            AV_WN32A(dst0_ + i, r);         \
 +            AV_WN32A(dst1_ + i, r);         \
 +        }                                   \
 +    } while (0)
 +
 +#define check_get_pixels(type)                                                             \
 +    do {                                                                                   \
 +        int i;                                                                             \
 +        declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *block, const uint8_t *pixels, ptrdiff_t line_size);    \
 +                                                                                           \
 +        for (i = 0; i < BUF_UNITS; i++) {                                              \
 +            int src_offset = i * 64 * sizeof(type) + 8 * i; /* Test various alignments */      \
 +            int dst_offset = i * 64; /* dst must be aligned */                             \
 +            randomize_buffers();                                                           \
 +            call_ref(dst0 + dst_offset, src10 + src_offset, 8);                            \
 +            call_new(dst1 + dst_offset, src11 + src_offset, 8);                            \
 +            if (memcmp(src10, src11, BUF_SIZE)|| memcmp(dst0, dst1, BUF_SIZE)) \
 +                fail();                                                                    \
 +            bench_new(dst1 + dst_offset, src11 + src_offset, 8);                           \
 +        }                                                                                  \
 +    } while (0)
 +
 +#define check_diff_pixels(type)                                                            \
 +    do {                                                                                   \
 +        int i;                                                                             \
-         declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, int stride); \
++        declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride); \
 +                                                                                           \
 +        for (i = 0; i < BUF_UNITS; i++) {                                              \
 +            int src_offset = i * 64 * sizeof(type) + 8 * i; /* Test various alignments */      \
 +            int dst_offset = i * 64; /* dst must be aligned */                             \
 +            randomize_buffers();                                                           \
 +            call_ref(dst0 + dst_offset, src10 + src_offset, src20 + src_offset, 8);        \
 +            call_new(dst1 + dst_offset, src11 + src_offset, src21 + src_offset, 8);        \
 +            if (memcmp(src10, src11, BUF_SIZE) || memcmp(src20, src21, BUF_SIZE) || memcmp(dst0, dst1, BUF_SIZE)) \
 +                fail();                                                                    \
 +            bench_new(dst1 + dst_offset, src11 + src_offset, src21 + src_offset, 8);       \
 +        }                                                                                  \
 +    } while (0)
 +
 +void checkasm_check_pixblockdsp(void)
 +{
 +    LOCAL_ALIGNED_16(uint8_t, src10, [BUF_SIZE]);
 +    LOCAL_ALIGNED_16(uint8_t, src11, [BUF_SIZE]);
 +    LOCAL_ALIGNED_16(uint8_t, src20, [BUF_SIZE]);
 +    LOCAL_ALIGNED_16(uint8_t, src21, [BUF_SIZE]);
 +    LOCAL_ALIGNED_16(uint8_t, dst0_, [BUF_SIZE]);
 +    LOCAL_ALIGNED_16(uint8_t, dst1_, [BUF_SIZE]);
 +    uint16_t *dst0 = (uint16_t *)dst0_;
 +    uint16_t *dst1 = (uint16_t *)dst1_;
 +    PixblockDSPContext h;
 +    AVCodecContext avctx = {
 +        .bits_per_raw_sample = 8,
 +    };
 +
 +    ff_pixblockdsp_init(&h, &avctx);
 +
 +    if (check_func(h.get_pixels, "get_pixels"))
 +        check_get_pixels(uint8_t);
 +
 +    report("get_pixels");
 +
 +    if (check_func(h.diff_pixels, "diff_pixels"))
 +        check_diff_pixels(uint8_t);
 +
 +    report("diff_pixels");
 +}



More information about the ffmpeg-cvslog mailing list