[FFmpeg-cvslog] avcodec/x86/lossless_videodsp: add diff_int16_mmx/sse2

Michael Niedermayer git at videolan.org
Wed Jan 22 19:47:06 CET 2014


ffmpeg | branch: master | Michael Niedermayer <michaelni at gmx.at> | Wed Jan 22 19:41:21 2014 +0100| [631939bde6e29e29131a0ca389e5e8dea4c3d038] | committer: Michael Niedermayer

avcodec/x86/lossless_videodsp: add diff_int16_mmx/sse2

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=631939bde6e29e29131a0ca389e5e8dea4c3d038
---

 libavcodec/x86/lossless_videodsp.asm    |   66 +++++++++++++++++++++++++++++++
 libavcodec/x86/lossless_videodsp_init.c |    4 ++
 2 files changed, 70 insertions(+)

diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm
index e496c80..37663d7 100644
--- a/libavcodec/x86/lossless_videodsp.asm
+++ b/libavcodec/x86/lossless_videodsp.asm
@@ -93,6 +93,72 @@ cglobal add_int16, 4,4,5, dst, src, mask, w
 .unaligned:
     ADD_INT16_LOOP 0
 
+%macro DIFF_INT16_LOOP 1 ; %1 = is_aligned
+    movd    m4, maskd
+    SPLATW  m4, m4
+    add     wq, wq
+    test    wq, 2*mmsize - 1
+    jz %%.tomainloop
+%%.wordloop:
+    sub     wq, 2
+    mov     ax, [src1q+wq]
+    sub     ax, [src2q+wq]
+    and     ax, maskw
+    mov     [dstq+wq], ax
+    test    wq, 2*mmsize - 1
+    jnz %%.wordloop
+%%.tomainloop:
+    add     src1q, wq
+    add     src2q, wq
+    add     dstq, wq
+    neg     wq
+    jz      %%.end
+%%.loop:
+%if %1
+    mova    m0, [src1q+wq]
+    mova    m1, [src2q+wq]
+    mova    m2, [src1q+wq+mmsize]
+    mova    m3, [src2q+wq+mmsize]
+%else
+    movu    m0, [src1q+wq]
+    movu    m1, [src2q+wq]
+    movu    m2, [src1q+wq+mmsize]
+    movu    m3, [src2q+wq+mmsize]
+%endif
+    psubw   m0, m1
+    psubw   m2, m3
+    pand    m0, m4
+    pand    m2, m4
+%if %1
+    mova    [dstq+wq]       , m0
+    mova    [dstq+wq+mmsize], m2
+%else
+    movu    [dstq+wq]       , m0
+    movu    [dstq+wq+mmsize], m2
+%endif
+    add     wq, 2*mmsize
+    jl %%.loop
+%%.end:
+    RET
+%endmacro
+
+INIT_MMX mmx
+cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
+    DIFF_INT16_LOOP 1
+
+INIT_XMM sse2
+cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
+    test src1q, mmsize-1
+    jnz .unaligned
+    test src2q, mmsize-1
+    jnz .unaligned
+    test dstq, mmsize-1
+    jnz .unaligned
+    DIFF_INT16_LOOP 1
+.unaligned:
+    DIFF_INT16_LOOP 0
+
+
 %macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
     add     wq, wq
     add     srcq, wq
diff --git a/libavcodec/x86/lossless_videodsp_init.c b/libavcodec/x86/lossless_videodsp_init.c
index 88424ba..9927ca3 100644
--- a/libavcodec/x86/lossless_videodsp_init.c
+++ b/libavcodec/x86/lossless_videodsp_init.c
@@ -23,6 +23,8 @@
 
 void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
 void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
+void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w);
+void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w);
 int ff_add_hfyu_left_prediction_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc);
 int ff_add_hfyu_left_prediction_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc);
 
@@ -32,10 +34,12 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
 
     if (EXTERNAL_MMX(cpu_flags)) {
         c->add_int16 = ff_add_int16_mmx;
+        c->diff_int16 = ff_diff_int16_mmx;
     }
 
     if (EXTERNAL_SSE2(cpu_flags)) {
         c->add_int16 = ff_add_int16_sse2;
+        c->diff_int16 = ff_diff_int16_sse2;
     }
 
     if (EXTERNAL_SSSE3(cpu_flags)) {



More information about the ffmpeg-cvslog mailing list