[FFmpeg-cvslog] vp56: port x86 simd to cpuflags.

Ronald S. Bultje git at videolan.org
Sun Jul 29 02:26:18 CEST 2012


ffmpeg | branch: master | Ronald S. Bultje <rsbultje at gmail.com> | Thu Jul 26 22:07:29 2012 -0700| [2734ba787b4a2cbc44bbc6499ae82013c790f453] | committer: Ronald S. Bultje

vp56: port x86 simd to cpuflags.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2734ba787b4a2cbc44bbc6499ae82013c790f453
---

 libavcodec/x86/vp56dsp.asm |   34 +++++++++++++++-------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/libavcodec/x86/vp56dsp.asm b/libavcodec/x86/vp56dsp.asm
index 66a97f1..27a82bc 100644
--- a/libavcodec/x86/vp56dsp.asm
+++ b/libavcodec/x86/vp56dsp.asm
@@ -27,7 +27,8 @@ cextern pw_64
 
 SECTION .text
 
-%macro DIAG4_MMX 6
+%macro DIAG4 6
+%if mmsize == 8
     movq          m0, [%1+%2]
     movq          m1, [%1+%3]
     movq          m3, m0
@@ -64,9 +65,7 @@ SECTION .text
     psraw         m3, 7
     packuswb      m0, m3
     movq        [%6], m0
-%endmacro
-
-%macro DIAG4_SSE2 6
+%else ; mmsize == 16
     movq          m0, [%1+%2]
     movq          m1, [%1+%3]
     punpcklbw     m0, m7
@@ -86,9 +85,11 @@ SECTION .text
     psraw         m0, 7
     packuswb      m0, m0
     movq        [%6], m0
+%endif ; mmsize == 8/16
 %endmacro
 
-%macro SPLAT4REGS_MMX 0
+%macro SPLAT4REGS 0
+%if mmsize == 8
     movq         m5, m3
     punpcklwd    m3, m3
     movq         m4, m3
@@ -102,9 +103,7 @@ SECTION .text
     movq [rsp+8*12], m4
     movq [rsp+8*13], m5
     movq [rsp+8*14], m2
-%endmacro
-
-%macro SPLAT4REGS_SSE2 0
+%else ; mmsize == 16
     pshuflw      m4, m3, 0x0
     pshuflw      m5, m3, 0x55
     pshuflw      m6, m3, 0xAA
@@ -113,15 +112,16 @@ SECTION .text
     punpcklqdq   m5, m5
     punpcklqdq   m6, m6
     punpcklqdq   m3, m3
+%endif ; mmsize == 8/16
 %endmacro
 
-%macro vp6_filter_diag4 2
+%macro vp6_filter_diag4 0
 ; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, int stride,
 ;                                const int16_t h_weight[4], const int16_t v_weights[4])
-cglobal vp6_filter_diag4_%1, 5, 7, %2
+cglobal vp6_filter_diag4, 5, 7, 8
     mov          r5, rsp         ; backup stack pointer
     and         rsp, ~(mmsize-1) ; align stack
-%ifidn %1, sse2
+%if mmsize == 16
     sub         rsp, 8*11
 %else
     sub         rsp, 8*15
@@ -162,12 +162,8 @@ cglobal vp6_filter_diag4_%1, 5, 7, %2
     RET
 %endmacro
 
-INIT_MMX
-%define DIAG4      DIAG4_MMX
-%define SPLAT4REGS SPLAT4REGS_MMX
-vp6_filter_diag4 mmx,  0
+INIT_MMX mmx
+vp6_filter_diag4
 
-INIT_XMM
-%define DIAG4      DIAG4_SSE2
-%define SPLAT4REGS SPLAT4REGS_SSE2
-vp6_filter_diag4 sse2, 8
+INIT_XMM sse2
+vp6_filter_diag4



More information about the ffmpeg-cvslog mailing list