[FFmpeg-cvslog] Merge commit 'de452e503734ebb0fdbce86e9d16693b3530fad3'
Clément Bœsch
git at videolan.org
Mon Mar 20 16:59:54 EET 2017
ffmpeg | branch: master | Clément Bœsch <u at pkh.me> | Mon Mar 20 13:47:29 2017 +0100| [e07fa3008bcafc027e9aa654657bae1f1ac64f5d] | committer: Clément Bœsch
Merge commit 'de452e503734ebb0fdbce86e9d16693b3530fad3'
* commit 'de452e503734ebb0fdbce86e9d16693b3530fad3':
pixblockdsp: Change type of stride parameters to ptrdiff_t
Merged-by: Clément Bœsch <u at pkh.me>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e07fa3008bcafc027e9aa654657bae1f1ac64f5d
---
libavcodec/alpha/pixblockdsp_alpha.c | 7 ++++---
libavcodec/arm/pixblockdsp_init_arm.c | 5 +++--
libavcodec/dv.h | 2 +-
libavcodec/dvenc.c | 9 +++++----
libavcodec/mips/pixblockdsp_mips.h | 6 +++---
libavcodec/mips/pixblockdsp_mmi.c | 22 +++++++++++-----------
libavcodec/mips/pixblockdsp_msa.c | 2 +-
libavcodec/pixblockdsp.c | 24 ++++++++++++------------
libavcodec/pixblockdsp.h | 4 ++--
libavcodec/ppc/pixblockdsp.c | 18 +++++++++---------
libavcodec/x86/pixblockdsp.asm | 5 ++---
libavcodec/x86/pixblockdsp_init.c | 8 ++++----
tests/checkasm/pixblockdsp.c | 2 +-
13 files changed, 58 insertions(+), 56 deletions(-)
diff --git a/libavcodec/alpha/pixblockdsp_alpha.c b/libavcodec/alpha/pixblockdsp_alpha.c
index 866b762..c2f1a1d 100644
--- a/libavcodec/alpha/pixblockdsp_alpha.c
+++ b/libavcodec/alpha/pixblockdsp_alpha.c
@@ -23,7 +23,7 @@
#include "asm.h"
static void get_pixels_mvi(int16_t *restrict block,
- const uint8_t *restrict pixels, ptrdiff_t line_size)
+ const uint8_t *restrict pixels, ptrdiff_t stride)
{
int h = 8;
@@ -34,13 +34,14 @@ static void get_pixels_mvi(int16_t *restrict block,
stq(unpkbw(p), block);
stq(unpkbw(p >> 32), block + 4);
- pixels += line_size;
+ pixels += stride;
block += 8;
} while (--h);
}
static void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2,
- int stride) {
+ ptrdiff_t stride)
+{
int h = 8;
uint64_t mask = 0x4040;
diff --git a/libavcodec/arm/pixblockdsp_init_arm.c b/libavcodec/arm/pixblockdsp_init_arm.c
index 76d7509..59d2b49 100644
--- a/libavcodec/arm/pixblockdsp_init_arm.c
+++ b/libavcodec/arm/pixblockdsp_init_arm.c
@@ -24,9 +24,10 @@
#include "libavcodec/avcodec.h"
#include "libavcodec/pixblockdsp.h"
-void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, ptrdiff_t stride);
+void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels,
+ ptrdiff_t stride);
void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1,
- const uint8_t *s2, int stride);
+ const uint8_t *s2, ptrdiff_t stride);
av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c,
AVCodecContext *avctx,
diff --git a/libavcodec/dv.h b/libavcodec/dv.h
index 3367f92..d5482d1 100644
--- a/libavcodec/dv.h
+++ b/libavcodec/dv.h
@@ -45,7 +45,7 @@ typedef struct DVVideoContext {
uint8_t dv_zigzag[2][64];
- void (*get_pixels)(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size);
+ void (*get_pixels)(int16_t *block, const uint8_t *pixels, ptrdiff_t linesize);
void (*fdct[2])(int16_t *block);
void (*idct_put[2])(uint8_t *dest, int line_size, int16_t *block);
me_cmp_func ildct_cmp;
diff --git a/libavcodec/dvenc.c b/libavcodec/dvenc.c
index a27b9ef..e3de18a 100644
--- a/libavcodec/dvenc.c
+++ b/libavcodec/dvenc.c
@@ -204,7 +204,7 @@ static av_always_inline PutBitContext *dv_encode_ac(EncBlockInfo *bi,
}
static av_always_inline int dv_guess_dct_mode(DVVideoContext *s, uint8_t *data,
- int linesize)
+ ptrdiff_t linesize)
{
if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
int ps = s->ildct_cmp(NULL, data, NULL, linesize, 8) - 400;
@@ -241,8 +241,8 @@ static const int dv_weight_248[64] = {
};
static av_always_inline int dv_init_enc_block(EncBlockInfo *bi, uint8_t *data,
- int linesize, DVVideoContext *s,
- int bias)
+ ptrdiff_t linesize,
+ DVVideoContext *s, int bias)
{
const int *weight;
const uint8_t *zigzag_scan;
@@ -420,7 +420,8 @@ static int dv_encode_video_segment(AVCodecContext *avctx, void *arg)
DVVideoContext *s = avctx->priv_data;
DVwork_chunk *work_chunk = arg;
int mb_index, i, j;
- int mb_x, mb_y, c_offset, linesize, y_stride;
+ int mb_x, mb_y, c_offset;
+ ptrdiff_t linesize, y_stride;
uint8_t *y_ptr;
uint8_t *dif;
LOCAL_ALIGNED_8(uint8_t, scratch, [128]);
diff --git a/libavcodec/mips/pixblockdsp_mips.h b/libavcodec/mips/pixblockdsp_mips.h
index 7f8cc96..a12b1a6 100644
--- a/libavcodec/mips/pixblockdsp_mips.h
+++ b/libavcodec/mips/pixblockdsp_mips.h
@@ -25,15 +25,15 @@
#include "../mpegvideo.h"
void ff_diff_pixels_msa(int16_t *av_restrict block, const uint8_t *src1,
- const uint8_t *src2, int stride);
+ const uint8_t *src2, ptrdiff_t stride);
void ff_get_pixels_16_msa(int16_t *restrict dst, const uint8_t *src,
ptrdiff_t stride);
void ff_get_pixels_8_msa(int16_t *restrict dst, const uint8_t *src,
ptrdiff_t stride);
void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
- ptrdiff_t line_size);
+ ptrdiff_t stride);
void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1,
- const uint8_t *src2, int stride);
+ const uint8_t *src2, ptrdiff_t stride);
#endif // #ifndef AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H
diff --git a/libavcodec/mips/pixblockdsp_mmi.c b/libavcodec/mips/pixblockdsp_mmi.c
index 9f2eac3..a915a3c 100644
--- a/libavcodec/mips/pixblockdsp_mmi.c
+++ b/libavcodec/mips/pixblockdsp_mmi.c
@@ -26,7 +26,7 @@
#include "libavutil/mips/mmiutils.h"
void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
- ptrdiff_t line_size)
+ ptrdiff_t stride)
{
double ftmp[7];
DECLARE_VAR_ALL64;
@@ -36,7 +36,7 @@ void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
"xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
MMI_LDC1(%[ftmp1], %[pixels], 0x00)
- MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
+ MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
"punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
"punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
"punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t"
@@ -45,10 +45,10 @@ void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
MMI_SDC1(%[ftmp4], %[block], 0x08)
MMI_SDC1(%[ftmp5], %[block], 0x10)
MMI_SDC1(%[ftmp6], %[block], 0x18)
- PTR_ADDU "%[pixels], %[pixels], %[line_size_x2] \n\t"
+ PTR_ADDU "%[pixels], %[pixels], %[stride_x2] \n\t"
MMI_LDC1(%[ftmp1], %[pixels], 0x00)
- MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
+ MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
"punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
"punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
"punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t"
@@ -57,10 +57,10 @@ void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
MMI_SDC1(%[ftmp4], %[block], 0x28)
MMI_SDC1(%[ftmp5], %[block], 0x30)
MMI_SDC1(%[ftmp6], %[block], 0x38)
- PTR_ADDU "%[pixels], %[pixels], %[line_size_x2] \n\t"
+ PTR_ADDU "%[pixels], %[pixels], %[stride_x2] \n\t"
MMI_LDC1(%[ftmp1], %[pixels], 0x00)
- MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
+ MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
"punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
"punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
"punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t"
@@ -69,10 +69,10 @@ void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
MMI_SDC1(%[ftmp4], %[block], 0x48)
MMI_SDC1(%[ftmp5], %[block], 0x50)
MMI_SDC1(%[ftmp6], %[block], 0x58)
- PTR_ADDU "%[pixels], %[pixels], %[line_size_x2] \n\t"
+ PTR_ADDU "%[pixels], %[pixels], %[stride_x2] \n\t"
MMI_LDC1(%[ftmp1], %[pixels], 0x00)
- MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
+ MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
"punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
"punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
"punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t"
@@ -88,14 +88,14 @@ void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
RESTRICT_ASM_ALL64
RESTRICT_ASM_ADDRT
[pixels]"+&r"(pixels)
- : [block]"r"((mips_reg)block), [line_size]"r"((mips_reg)line_size),
- [line_size_x2]"r"((mips_reg)(line_size<<1))
+ : [block]"r"((mips_reg)block), [stride]"r"((mips_reg)stride),
+ [stride_x2]"r"((mips_reg)(stride<<1))
: "memory"
);
}
void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1,
- const uint8_t *src2, int stride)
+ const uint8_t *src2, ptrdiff_t stride)
{
double ftmp[5];
mips_reg tmp[1];
diff --git a/libavcodec/mips/pixblockdsp_msa.c b/libavcodec/mips/pixblockdsp_msa.c
index 966e11a..86a4576 100644
--- a/libavcodec/mips/pixblockdsp_msa.c
+++ b/libavcodec/mips/pixblockdsp_msa.c
@@ -137,7 +137,7 @@ void ff_get_pixels_8_msa(int16_t *av_restrict dest, const uint8_t *src,
}
void ff_diff_pixels_msa(int16_t *av_restrict block, const uint8_t *src1,
- const uint8_t *src2, int stride)
+ const uint8_t *src2, ptrdiff_t stride)
{
diff_pixels_msa(block, src1, src2, stride);
}
diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c
index f0883d3..417c944 100644
--- a/libavcodec/pixblockdsp.c
+++ b/libavcodec/pixblockdsp.c
@@ -25,20 +25,20 @@
#include "pixblockdsp.h"
static void get_pixels_16_c(int16_t *av_restrict block, const uint8_t *pixels,
- ptrdiff_t line_size)
+ ptrdiff_t stride)
{
- AV_COPY128U(block + 0 * 8, pixels + 0 * line_size);
- AV_COPY128U(block + 1 * 8, pixels + 1 * line_size);
- AV_COPY128U(block + 2 * 8, pixels + 2 * line_size);
- AV_COPY128U(block + 3 * 8, pixels + 3 * line_size);
- AV_COPY128U(block + 4 * 8, pixels + 4 * line_size);
- AV_COPY128U(block + 5 * 8, pixels + 5 * line_size);
- AV_COPY128U(block + 6 * 8, pixels + 6 * line_size);
- AV_COPY128U(block + 7 * 8, pixels + 7 * line_size);
+ AV_COPY128U(block + 0 * 8, pixels + 0 * stride);
+ AV_COPY128U(block + 1 * 8, pixels + 1 * stride);
+ AV_COPY128U(block + 2 * 8, pixels + 2 * stride);
+ AV_COPY128U(block + 3 * 8, pixels + 3 * stride);
+ AV_COPY128U(block + 4 * 8, pixels + 4 * stride);
+ AV_COPY128U(block + 5 * 8, pixels + 5 * stride);
+ AV_COPY128U(block + 6 * 8, pixels + 6 * stride);
+ AV_COPY128U(block + 7 * 8, pixels + 7 * stride);
}
static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels,
- ptrdiff_t line_size)
+ ptrdiff_t stride)
{
int i;
@@ -52,13 +52,13 @@ static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels,
block[5] = pixels[5];
block[6] = pixels[6];
block[7] = pixels[7];
- pixels += line_size;
+ pixels += stride;
block += 8;
}
}
static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1,
- const uint8_t *s2, int stride)
+ const uint8_t *s2, ptrdiff_t stride)
{
int i;
diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h
index 79ed86c..278accb 100644
--- a/libavcodec/pixblockdsp.h
+++ b/libavcodec/pixblockdsp.h
@@ -26,11 +26,11 @@
typedef struct PixblockDSPContext {
void (*get_pixels)(int16_t *block /* align 16 */,
const uint8_t *pixels /* align 8 */,
- ptrdiff_t line_size);
+ ptrdiff_t stride);
void (*diff_pixels)(int16_t *block /* align 16 */,
const uint8_t *s1 /* align 8 */,
const uint8_t *s2 /* align 8 */,
- int stride);
+ ptrdiff_t stride);
} PixblockDSPContext;
void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx);
diff --git a/libavcodec/ppc/pixblockdsp.c b/libavcodec/ppc/pixblockdsp.c
index f3a5050..f5ac850 100644
--- a/libavcodec/ppc/pixblockdsp.c
+++ b/libavcodec/ppc/pixblockdsp.c
@@ -37,7 +37,7 @@
#if HAVE_VSX
static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
- ptrdiff_t line_size)
+ ptrdiff_t stride)
{
int i;
vector unsigned char perm =
@@ -59,12 +59,12 @@ static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
// Save the data to the block, we assume the block is 16-byte aligned.
vec_vsx_st(shorts, i * 16, (vector signed short *) block);
- pixels += line_size;
+ pixels += stride;
}
}
#else
static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
- ptrdiff_t line_size)
+ ptrdiff_t stride)
{
int i;
const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
@@ -84,7 +84,7 @@ static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
// Save the data to the block, we assume the block is 16-byte aligned.
vec_st(shorts, i * 16, (vec_s16 *)block);
- pixels += line_size;
+ pixels += stride;
}
}
@@ -92,7 +92,7 @@ static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
#if HAVE_VSX
static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
- const uint8_t *s2, int stride)
+ const uint8_t *s2, ptrdiff_t stride)
{
int i;
const vector unsigned char zero =
@@ -154,7 +154,7 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
}
#else
static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
- const uint8_t *s2, int stride)
+ const uint8_t *s2, ptrdiff_t stride)
{
int i;
vec_u8 perm;
@@ -233,7 +233,7 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
#if HAVE_VSX
static void get_pixels_vsx(int16_t *restrict block, const uint8_t *pixels,
- ptrdiff_t line_size)
+ ptrdiff_t stride)
{
int i;
for (i = 0; i < 8; i++) {
@@ -241,12 +241,12 @@ static void get_pixels_vsx(int16_t *restrict block, const uint8_t *pixels,
vec_vsx_st(shorts, i * 16, block);
- pixels += line_size;
+ pixels += stride;
}
}
static void diff_pixels_vsx(int16_t *restrict block, const uint8_t *s1,
- const uint8_t *s2, int stride)
+ const uint8_t *s2, ptrdiff_t stride)
{
int i;
vec_s16 shorts1, shorts2;
diff --git a/libavcodec/x86/pixblockdsp.asm b/libavcodec/x86/pixblockdsp.asm
index 2864d0c..440fe29 100644
--- a/libavcodec/x86/pixblockdsp.asm
+++ b/libavcodec/x86/pixblockdsp.asm
@@ -26,7 +26,7 @@
SECTION .text
INIT_MMX mmx
-; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size)
+; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t stride)
cglobal get_pixels, 3,4
add r0, 128
mov r3, -128
@@ -81,10 +81,9 @@ cglobal get_pixels, 3, 4, 5
RET
; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
-; int stride);
+; ptrdiff_t stride);
%macro DIFF_PIXELS 0
cglobal diff_pixels, 4,5,5
- movsxdifnidn r3, r3d
pxor m4, m4
add r0, 128
mov r4, -128
diff --git a/libavcodec/x86/pixblockdsp_init.c b/libavcodec/x86/pixblockdsp_init.c
index 4d06a44..fa9578a 100644
--- a/libavcodec/x86/pixblockdsp_init.c
+++ b/libavcodec/x86/pixblockdsp_init.c
@@ -23,12 +23,12 @@
#include "libavutil/x86/cpu.h"
#include "libavcodec/pixblockdsp.h"
-void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size);
-void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size);
+void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t stride);
+void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, ptrdiff_t stride);
void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
- int stride);
+ ptrdiff_t stride);
void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
- int stride);
+ ptrdiff_t stride);
av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c,
AVCodecContext *avctx,
diff --git a/tests/checkasm/pixblockdsp.c b/tests/checkasm/pixblockdsp.c
index 2b88e7d..e14b0a9 100644
--- a/tests/checkasm/pixblockdsp.c
+++ b/tests/checkasm/pixblockdsp.c
@@ -64,7 +64,7 @@
#define check_diff_pixels(type) \
do { \
int i; \
- declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, int stride); \
+ declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride); \
\
for (i = 0; i < BUF_UNITS; i++) { \
int src_offset = i * 64 * sizeof(type) + 8 * i; /* Test various alignments */ \
======================================================================
diff --cc libavcodec/alpha/pixblockdsp_alpha.c
index 866b762,0000000..c2f1a1d
mode 100644,000000..100644
--- a/libavcodec/alpha/pixblockdsp_alpha.c
+++ b/libavcodec/alpha/pixblockdsp_alpha.c
@@@ -1,78 -1,0 +1,79 @@@
+/*
+ * SIMD-optimized pixel operations
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavcodec/pixblockdsp.h"
+#include "asm.h"
+
+static void get_pixels_mvi(int16_t *restrict block,
- const uint8_t *restrict pixels, ptrdiff_t line_size)
++ const uint8_t *restrict pixels, ptrdiff_t stride)
+{
+ int h = 8;
+
+ do {
+ uint64_t p;
+
+ p = ldq(pixels);
+ stq(unpkbw(p), block);
+ stq(unpkbw(p >> 32), block + 4);
+
- pixels += line_size;
++ pixels += stride;
+ block += 8;
+ } while (--h);
+}
+
+static void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2,
- int stride) {
++ ptrdiff_t stride)
++{
+ int h = 8;
+ uint64_t mask = 0x4040;
+
+ mask |= mask << 16;
+ mask |= mask << 32;
+ do {
+ uint64_t x, y, c, d, a;
+ uint64_t signs;
+
+ x = ldq(s1);
+ y = ldq(s2);
+ c = cmpbge(x, y);
+ d = x - y;
+ a = zap(mask, c); /* We use 0x4040404040404040 here... */
+ d += 4 * a; /* ...so we can use s4addq here. */
+ signs = zap(-1, c);
+
+ stq(unpkbw(d) | (unpkbw(signs) << 8), block);
+ stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4);
+
+ s1 += stride;
+ s2 += stride;
+ block += 8;
+ } while (--h);
+}
+
+av_cold void ff_pixblockdsp_init_alpha(PixblockDSPContext *c, AVCodecContext *avctx,
+ unsigned high_bit_depth)
+{
+ if (amask(AMASK_MVI) == 0) {
+ if (!high_bit_depth)
+ c->get_pixels = get_pixels_mvi;
+ c->diff_pixels = diff_pixels_mvi;
+ }
+}
diff --cc libavcodec/mips/pixblockdsp_mips.h
index 7f8cc96,0000000..a12b1a6
mode 100644,000000..100644
--- a/libavcodec/mips/pixblockdsp_mips.h
+++ b/libavcodec/mips/pixblockdsp_mips.h
@@@ -1,39 -1,0 +1,39 @@@
+/*
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil at imgtec.com)
+ * Zhou Xiaoyong <zhouxiaoyong at loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H
+#define AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H
+
+#include "../mpegvideo.h"
+
+void ff_diff_pixels_msa(int16_t *av_restrict block, const uint8_t *src1,
- const uint8_t *src2, int stride);
++ const uint8_t *src2, ptrdiff_t stride);
+void ff_get_pixels_16_msa(int16_t *restrict dst, const uint8_t *src,
+ ptrdiff_t stride);
+void ff_get_pixels_8_msa(int16_t *restrict dst, const uint8_t *src,
+ ptrdiff_t stride);
+
+void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
- ptrdiff_t line_size);
++ ptrdiff_t stride);
+void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1,
- const uint8_t *src2, int stride);
++ const uint8_t *src2, ptrdiff_t stride);
+
+#endif // #ifndef AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H
diff --cc libavcodec/mips/pixblockdsp_mmi.c
index 9f2eac3,0000000..a915a3c
mode 100644,000000..100644
--- a/libavcodec/mips/pixblockdsp_mmi.c
+++ b/libavcodec/mips/pixblockdsp_mmi.c
@@@ -1,135 -1,0 +1,135 @@@
+/*
+ * Loongson SIMD optimized pixblockdsp
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong at loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "pixblockdsp_mips.h"
+#include "libavutil/mips/asmdefs.h"
+#include "libavutil/mips/mmiutils.h"
+
+void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
- ptrdiff_t line_size)
++ ptrdiff_t stride)
+{
+ double ftmp[7];
+ DECLARE_VAR_ALL64;
+ DECLARE_VAR_ADDRT;
+
+ __asm__ volatile (
+ "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
+
+ MMI_LDC1(%[ftmp1], %[pixels], 0x00)
- MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
++ MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
+ "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
+ "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
+ "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t"
+ "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t"
+ MMI_SDC1(%[ftmp3], %[block], 0x00)
+ MMI_SDC1(%[ftmp4], %[block], 0x08)
+ MMI_SDC1(%[ftmp5], %[block], 0x10)
+ MMI_SDC1(%[ftmp6], %[block], 0x18)
- PTR_ADDU "%[pixels], %[pixels], %[line_size_x2] \n\t"
++ PTR_ADDU "%[pixels], %[pixels], %[stride_x2] \n\t"
+
+ MMI_LDC1(%[ftmp1], %[pixels], 0x00)
- MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
++ MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
+ "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
+ "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
+ "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t"
+ "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t"
+ MMI_SDC1(%[ftmp3], %[block], 0x20)
+ MMI_SDC1(%[ftmp4], %[block], 0x28)
+ MMI_SDC1(%[ftmp5], %[block], 0x30)
+ MMI_SDC1(%[ftmp6], %[block], 0x38)
- PTR_ADDU "%[pixels], %[pixels], %[line_size_x2] \n\t"
++ PTR_ADDU "%[pixels], %[pixels], %[stride_x2] \n\t"
+
+ MMI_LDC1(%[ftmp1], %[pixels], 0x00)
- MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
++ MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
+ "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
+ "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
+ "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t"
+ "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t"
+ MMI_SDC1(%[ftmp3], %[block], 0x40)
+ MMI_SDC1(%[ftmp4], %[block], 0x48)
+ MMI_SDC1(%[ftmp5], %[block], 0x50)
+ MMI_SDC1(%[ftmp6], %[block], 0x58)
- PTR_ADDU "%[pixels], %[pixels], %[line_size_x2] \n\t"
++ PTR_ADDU "%[pixels], %[pixels], %[stride_x2] \n\t"
+
+ MMI_LDC1(%[ftmp1], %[pixels], 0x00)
- MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
++ MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
+ "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
+ "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
+ "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t"
+ "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t"
+ MMI_SDC1(%[ftmp3], %[block], 0x60)
+ MMI_SDC1(%[ftmp4], %[block], 0x68)
+ MMI_SDC1(%[ftmp5], %[block], 0x70)
+ MMI_SDC1(%[ftmp6], %[block], 0x78)
+ : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
+ [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
+ [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
+ [ftmp6]"=&f"(ftmp[6]),
+ RESTRICT_ASM_ALL64
+ RESTRICT_ASM_ADDRT
+ [pixels]"+&r"(pixels)
- : [block]"r"((mips_reg)block), [line_size]"r"((mips_reg)line_size),
- [line_size_x2]"r"((mips_reg)(line_size<<1))
++ : [block]"r"((mips_reg)block), [stride]"r"((mips_reg)stride),
++ [stride_x2]"r"((mips_reg)(stride<<1))
+ : "memory"
+ );
+}
+
+void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1,
- const uint8_t *src2, int stride)
++ const uint8_t *src2, ptrdiff_t stride)
+{
+ double ftmp[5];
+ mips_reg tmp[1];
+ DECLARE_VAR_ALL64;
+
+ __asm__ volatile (
+ "li %[tmp0], 0x08 \n\t"
+ "xor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
+ "1: \n\t"
+ MMI_LDC1(%[ftmp0], %[src1], 0x00)
+ "or %[ftmp1], %[ftmp0], %[ftmp0] \n\t"
+ MMI_LDC1(%[ftmp2], %[src2], 0x00)
+ "or %[ftmp3], %[ftmp2], %[ftmp2] \n\t"
+ "punpcklbh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
+ "punpckhbh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
+ "punpcklbh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
+ "punpckhbh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
+ "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
+ "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
+ MMI_SDC1(%[ftmp0], %[block], 0x00)
+ MMI_SDC1(%[ftmp1], %[block], 0x08)
+ PTR_ADDI "%[tmp0], %[tmp0], -0x01 \n\t"
+ PTR_ADDIU "%[block], %[block], 0x10 \n\t"
+ PTR_ADDU "%[src1], %[src1], %[stride] \n\t"
+ PTR_ADDU "%[src2], %[src2], %[stride] \n\t"
+ "bgtz %[tmp0], 1b \n\t"
+ : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
+ [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
+ [ftmp4]"=&f"(ftmp[4]),
+ [tmp0]"=&r"(tmp[0]),
+ RESTRICT_ASM_ALL64
+ [block]"+&r"(block), [src1]"+&r"(src1),
+ [src2]"+&r"(src2)
+ : [stride]"r"((mips_reg)stride)
+ : "memory"
+ );
+}
diff --cc libavcodec/mips/pixblockdsp_msa.c
index 966e11a,0000000..86a4576
mode 100644,000000..100644
--- a/libavcodec/mips/pixblockdsp_msa.c
+++ b/libavcodec/mips/pixblockdsp_msa.c
@@@ -1,143 -1,0 +1,143 @@@
+/*
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil at imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mips/generic_macros_msa.h"
+#include "pixblockdsp_mips.h"
+
+static void diff_pixels_msa(int16_t *block, const uint8_t *src1,
+ const uint8_t *src2, int32_t stride)
+{
+ v16u8 in10, in11, in12, in13, in14, in15, in16, in17;
+ v16u8 in20, in21, in22, in23, in24, in25, in26, in27;
+ v8i16 out0, out1, out2, out3, out4, out5, out6, out7;
+
+ LD_UB8(src1, stride, in10, in11, in12, in13, in14, in15, in16, in17);
+ LD_UB8(src2, stride, in20, in21, in22, in23, in24, in25, in26, in27);
+ ILVR_B4_SH(in10, in20, in11, in21, in12, in22, in13, in23,
+ out0, out1, out2, out3);
+ ILVR_B4_SH(in14, in24, in15, in25, in16, in26, in17, in27,
+ out4, out5, out6, out7);
+ HSUB_UB4_SH(out0, out1, out2, out3, out0, out1, out2, out3);
+ HSUB_UB4_SH(out4, out5, out6, out7, out4, out5, out6, out7);
+ ST_SH8(out0, out1, out2, out3, out4, out5, out6, out7, block, 8);
+}
+
+static void copy_8bit_to_16bit_width8_msa(const uint8_t *src, int32_t src_stride,
+ int16_t *dst, int32_t dst_stride,
+ int32_t height)
+{
+ uint8_t *dst_ptr;
+ int32_t cnt;
+ v16u8 src0, src1, src2, src3;
+ v16i8 zero = { 0 };
+
+ dst_ptr = (uint8_t *) dst;
+
+ for (cnt = (height >> 2); cnt--;) {
+ LD_UB4(src, src_stride, src0, src1, src2, src3);
+ src += (4 * src_stride);
+
+ ILVR_B4_UB(zero, src0, zero, src1, zero, src2, zero, src3,
+ src0, src1, src2, src3);
+
+ ST_UB4(src0, src1, src2, src3, dst_ptr, (dst_stride * 2));
+ dst_ptr += (4 * 2 * dst_stride);
+ }
+}
+
+static void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride,
+ uint8_t *dst, int32_t dst_stride,
+ int32_t height, int32_t width)
+{
+ int32_t cnt, loop_cnt;
+ const uint8_t *src_tmp;
+ uint8_t *dst_tmp;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+
+ for (cnt = (width >> 4); cnt--;) {
+ src_tmp = src;
+ dst_tmp = dst;
+
+ for (loop_cnt = (height >> 3); loop_cnt--;) {
+ LD_UB8(src_tmp, src_stride,
+ src0, src1, src2, src3, src4, src5, src6, src7);
+ src_tmp += (8 * src_stride);
+
+ ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7,
+ dst_tmp, dst_stride);
+ dst_tmp += (8 * dst_stride);
+ }
+
+ src += 16;
+ dst += 16;
+ }
+}
+
+static void copy_width16_msa(const uint8_t *src, int32_t src_stride,
+ uint8_t *dst, int32_t dst_stride,
+ int32_t height)
+{
+ int32_t cnt;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+
+ if (0 == height % 12) {
+ for (cnt = (height / 12); cnt--;) {
+ LD_UB8(src, src_stride,
+ src0, src1, src2, src3, src4, src5, src6, src7);
+ src += (8 * src_stride);
+ ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7,
+ dst, dst_stride);
+ dst += (8 * dst_stride);
+
+ LD_UB4(src, src_stride, src0, src1, src2, src3);
+ src += (4 * src_stride);
+ ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+ dst += (4 * dst_stride);
+ }
+ } else if (0 == height % 8) {
+ copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16);
+ } else if (0 == height % 4) {
+ for (cnt = (height >> 2); cnt--;) {
+ LD_UB4(src, src_stride, src0, src1, src2, src3);
+ src += (4 * src_stride);
+
+ ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+ dst += (4 * dst_stride);
+ }
+ }
+}
+
+void ff_get_pixels_16_msa(int16_t *av_restrict dest, const uint8_t *src,
+ ptrdiff_t stride)
+{
+ copy_width16_msa(src, stride, (uint8_t *) dest, 16, 8);
+}
+
+void ff_get_pixels_8_msa(int16_t *av_restrict dest, const uint8_t *src,
+ ptrdiff_t stride)
+{
+ copy_8bit_to_16bit_width8_msa(src, stride, dest, 8, 8);
+}
+
+void ff_diff_pixels_msa(int16_t *av_restrict block, const uint8_t *src1,
- const uint8_t *src2, int stride)
++ const uint8_t *src2, ptrdiff_t stride)
+{
+ diff_pixels_msa(block, src1, src2, stride);
+}
diff --cc libavcodec/pixblockdsp.c
index f0883d3,9d68d26..417c944
--- a/libavcodec/pixblockdsp.c
+++ b/libavcodec/pixblockdsp.c
@@@ -24,41 -23,15 +24,41 @@@
#include "avcodec.h"
#include "pixblockdsp.h"
-#define BIT_DEPTH 16
-#include "pixblockdsp_template.c"
-#undef BIT_DEPTH
+static void get_pixels_16_c(int16_t *av_restrict block, const uint8_t *pixels,
- ptrdiff_t line_size)
++ ptrdiff_t stride)
+{
- AV_COPY128U(block + 0 * 8, pixels + 0 * line_size);
- AV_COPY128U(block + 1 * 8, pixels + 1 * line_size);
- AV_COPY128U(block + 2 * 8, pixels + 2 * line_size);
- AV_COPY128U(block + 3 * 8, pixels + 3 * line_size);
- AV_COPY128U(block + 4 * 8, pixels + 4 * line_size);
- AV_COPY128U(block + 5 * 8, pixels + 5 * line_size);
- AV_COPY128U(block + 6 * 8, pixels + 6 * line_size);
- AV_COPY128U(block + 7 * 8, pixels + 7 * line_size);
++ AV_COPY128U(block + 0 * 8, pixels + 0 * stride);
++ AV_COPY128U(block + 1 * 8, pixels + 1 * stride);
++ AV_COPY128U(block + 2 * 8, pixels + 2 * stride);
++ AV_COPY128U(block + 3 * 8, pixels + 3 * stride);
++ AV_COPY128U(block + 4 * 8, pixels + 4 * stride);
++ AV_COPY128U(block + 5 * 8, pixels + 5 * stride);
++ AV_COPY128U(block + 6 * 8, pixels + 6 * stride);
++ AV_COPY128U(block + 7 * 8, pixels + 7 * stride);
+}
+
+static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels,
- ptrdiff_t line_size)
++ ptrdiff_t stride)
+{
+ int i;
-#define BIT_DEPTH 8
-#include "pixblockdsp_template.c"
+ /* read the pixels */
+ for (i = 0; i < 8; i++) {
+ block[0] = pixels[0];
+ block[1] = pixels[1];
+ block[2] = pixels[2];
+ block[3] = pixels[3];
+ block[4] = pixels[4];
+ block[5] = pixels[5];
+ block[6] = pixels[6];
+ block[7] = pixels[7];
- pixels += line_size;
++ pixels += stride;
+ block += 8;
+ }
+}
-static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
+static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1,
- const uint8_t *s2, int stride)
+ const uint8_t *s2, ptrdiff_t stride)
{
int i;
diff --cc libavcodec/ppc/pixblockdsp.c
index f3a5050,96e7024..f5ac850
--- a/libavcodec/ppc/pixblockdsp.c
+++ b/libavcodec/ppc/pixblockdsp.c
@@@ -33,40 -33,13 +33,40 @@@
#include "libavcodec/avcodec.h"
#include "libavcodec/pixblockdsp.h"
-#if HAVE_ALTIVEC && HAVE_BIGENDIAN
+#if HAVE_ALTIVEC
+#if HAVE_VSX
+static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
- ptrdiff_t line_size)
++ ptrdiff_t stride)
+{
+ int i;
+ vector unsigned char perm =
+ (vector unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\
+ 0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17};
+ const vector unsigned char zero =
+ (const vector unsigned char) vec_splat_u8(0);
+
+ for (i = 0; i < 8; i++) {
+ /* Read potentially unaligned pixels.
+ * We're reading 16 pixels, and actually only want 8,
+ * but we simply ignore the extras. */
+ vector unsigned char bytes = vec_vsx_ld(0, pixels);
+
+ // Convert the bytes into shorts.
+ //vector signed short shorts = (vector signed short) vec_perm(zero, bytes, perm);
+ vector signed short shorts = (vector signed short) vec_perm(bytes, zero, perm);
+
+ // Save the data to the block, we assume the block is 16-byte aligned.
+ vec_vsx_st(shorts, i * 16, (vector signed short *) block);
+
- pixels += line_size;
++ pixels += stride;
+ }
+}
+#else
static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
- ptrdiff_t line_size)
+ ptrdiff_t stride)
{
int i;
- vec_u8 perm = vec_lvsl(0, pixels);
const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
for (i = 0; i < 8; i++) {
@@@ -88,76 -60,12 +88,76 @@@
}
}
+#endif /* HAVE_VSX */
+
+#if HAVE_VSX
+static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
- const uint8_t *s2, int stride)
++ const uint8_t *s2, ptrdiff_t stride)
+{
+ int i;
+ const vector unsigned char zero =
+ (const vector unsigned char) vec_splat_u8(0);
+ vector signed short shorts1, shorts2;
+
+ for (i = 0; i < 4; i++) {
+ /* Read potentially unaligned pixels.
+ * We're reading 16 pixels, and actually only want 8,
+ * but we simply ignore the extras. */
+ vector unsigned char bytes = vec_vsx_ld(0, s1);
+
+ // Convert the bytes into shorts.
+ shorts1 = (vector signed short) vec_mergeh(bytes, zero);
+
+ // Do the same for the second block of pixels.
+ bytes =vec_vsx_ld(0, s2);
+
+ // Convert the bytes into shorts.
+ shorts2 = (vector signed short) vec_mergeh(bytes, zero);
+
+ // Do the subtraction.
+ shorts1 = vec_sub(shorts1, shorts2);
+
+ // Save the data to the block, we assume the block is 16-byte aligned.
+ vec_vsx_st(shorts1, 0, (vector signed short *) block);
+
+ s1 += stride;
+ s2 += stride;
+ block += 8;
+
+ /* The code below is a copy of the code above...
+ * This is a manual unroll. */
+
+ /* Read potentially unaligned pixels.
+ * We're reading 16 pixels, and actually only want 8,
+ * but we simply ignore the extras. */
+ bytes = vec_vsx_ld(0, s1);
+
+ // Convert the bytes into shorts.
+ shorts1 = (vector signed short) vec_mergeh(bytes, zero);
+
+ // Do the same for the second block of pixels.
+ bytes = vec_vsx_ld(0, s2);
+
+ // Convert the bytes into shorts.
+ shorts2 = (vector signed short) vec_mergeh(bytes, zero);
+
+ // Do the subtraction.
+ shorts1 = vec_sub(shorts1, shorts2);
+
+ // Save the data to the block, we assume the block is 16-byte aligned.
+ vec_vsx_st(shorts1, 0, (vector signed short *) block);
+
+ s1 += stride;
+ s2 += stride;
+ block += 8;
+ }
+}
+#else
static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
- const uint8_t *s2, int stride)
+ const uint8_t *s2, ptrdiff_t stride)
{
int i;
- vec_u8 perm1 = vec_lvsl(0, s1);
- vec_u8 perm2 = vec_lvsl(0, s2);
+ vec_u8 perm;
const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
vec_s16 shorts1, shorts2;
diff --cc libavcodec/x86/pixblockdsp.asm
index 2864d0c,8712442..440fe29
--- a/libavcodec/x86/pixblockdsp.asm
+++ b/libavcodec/x86/pixblockdsp.asm
@@@ -80,12 -80,11 +80,11 @@@ cglobal get_pixels, 3, 4,
mova [r0+0x70], m3
RET
-INIT_MMX mmx
; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
- ; int stride);
+ ; ptrdiff_t stride);
-cglobal diff_pixels, 4,5
- pxor m7, m7
+%macro DIFF_PIXELS 0
+cglobal diff_pixels, 4,5,5
- movsxdifnidn r3, r3d
+ pxor m4, m4
add r0, 128
mov r4, -128
.loop:
diff --cc libavcodec/x86/pixblockdsp_init.c
index 4d06a44,faa5141..fa9578a
--- a/libavcodec/x86/pixblockdsp_init.c
+++ b/libavcodec/x86/pixblockdsp_init.c
@@@ -23,12 -23,10 +23,12 @@@
#include "libavutil/x86/cpu.h"
#include "libavcodec/pixblockdsp.h"
- void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size);
- void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size);
+ void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t stride);
+ void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, ptrdiff_t stride);
void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
- int stride);
+ ptrdiff_t stride);
+void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
- int stride);
++ ptrdiff_t stride);
av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c,
AVCodecContext *avctx,
diff --cc tests/checkasm/pixblockdsp.c
index 2b88e7d,0000000..e14b0a9
mode 100644,000000..100644
--- a/tests/checkasm/pixblockdsp.c
+++ b/tests/checkasm/pixblockdsp.c
@@@ -1,107 -1,0 +1,107 @@@
+/*
+ * Copyright (c) 2015 Tiancheng "Timothy" Gu
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/pixblockdsp.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+#define BUF_UNITS 8
+#define BUF_SIZE (BUF_UNITS * 128 + 8 * BUF_UNITS)
+
+#define randomize_buffers() \
+ do { \
+ int i; \
+ for (i = 0; i < BUF_SIZE; i += 4) { \
+ uint32_t r = rnd(); \
+ AV_WN32A(src10 + i, r); \
+ AV_WN32A(src11 + i, r); \
+ r = rnd(); \
+ AV_WN32A(src20 + i, r); \
+ AV_WN32A(src21 + i, r); \
+ r = rnd(); \
+ AV_WN32A(dst0_ + i, r); \
+ AV_WN32A(dst1_ + i, r); \
+ } \
+ } while (0)
+
+#define check_get_pixels(type) \
+ do { \
+ int i; \
+ declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *block, const uint8_t *pixels, ptrdiff_t line_size); \
+ \
+ for (i = 0; i < BUF_UNITS; i++) { \
+ int src_offset = i * 64 * sizeof(type) + 8 * i; /* Test various alignments */ \
+ int dst_offset = i * 64; /* dst must be aligned */ \
+ randomize_buffers(); \
+ call_ref(dst0 + dst_offset, src10 + src_offset, 8); \
+ call_new(dst1 + dst_offset, src11 + src_offset, 8); \
+ if (memcmp(src10, src11, BUF_SIZE)|| memcmp(dst0, dst1, BUF_SIZE)) \
+ fail(); \
+ bench_new(dst1 + dst_offset, src11 + src_offset, 8); \
+ } \
+ } while (0)
+
+#define check_diff_pixels(type) \
+ do { \
+ int i; \
- declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, int stride); \
++ declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride); \
+ \
+ for (i = 0; i < BUF_UNITS; i++) { \
+ int src_offset = i * 64 * sizeof(type) + 8 * i; /* Test various alignments */ \
+ int dst_offset = i * 64; /* dst must be aligned */ \
+ randomize_buffers(); \
+ call_ref(dst0 + dst_offset, src10 + src_offset, src20 + src_offset, 8); \
+ call_new(dst1 + dst_offset, src11 + src_offset, src21 + src_offset, 8); \
+ if (memcmp(src10, src11, BUF_SIZE) || memcmp(src20, src21, BUF_SIZE) || memcmp(dst0, dst1, BUF_SIZE)) \
+ fail(); \
+ bench_new(dst1 + dst_offset, src11 + src_offset, src21 + src_offset, 8); \
+ } \
+ } while (0)
+
+void checkasm_check_pixblockdsp(void)
+{
+ LOCAL_ALIGNED_16(uint8_t, src10, [BUF_SIZE]);
+ LOCAL_ALIGNED_16(uint8_t, src11, [BUF_SIZE]);
+ LOCAL_ALIGNED_16(uint8_t, src20, [BUF_SIZE]);
+ LOCAL_ALIGNED_16(uint8_t, src21, [BUF_SIZE]);
+ LOCAL_ALIGNED_16(uint8_t, dst0_, [BUF_SIZE]);
+ LOCAL_ALIGNED_16(uint8_t, dst1_, [BUF_SIZE]);
+ uint16_t *dst0 = (uint16_t *)dst0_;
+ uint16_t *dst1 = (uint16_t *)dst1_;
+ PixblockDSPContext h;
+ AVCodecContext avctx = {
+ .bits_per_raw_sample = 8,
+ };
+
+ ff_pixblockdsp_init(&h, &avctx);
+
+ if (check_func(h.get_pixels, "get_pixels"))
+ check_get_pixels(uint8_t);
+
+ report("get_pixels");
+
+ if (check_func(h.diff_pixels, "diff_pixels"))
+ check_diff_pixels(uint8_t);
+
+ report("diff_pixels");
+}
More information about the ffmpeg-cvslog
mailing list