[FFmpeg-cvslog] x86: dct32: port to cpuflags

Diego Biurrun git at videolan.org
Sat Aug 4 23:54:13 CEST 2012


ffmpeg | branch: master | Diego Biurrun <diego at biurrun.de> | Wed Aug  1 19:28:08 2012 +0200| [0c3ff1982c5da0abfb27a7d2328d742a37257698] | committer: Diego Biurrun

x86: dct32: port to cpuflags

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0c3ff1982c5da0abfb27a7d2328d742a37257698
---

 libavcodec/x86/dct32_sse.asm |   74 ++++++++++++++----------------------------
 1 file changed, 25 insertions(+), 49 deletions(-)

diff --git a/libavcodec/x86/dct32_sse.asm b/libavcodec/x86/dct32_sse.asm
index e3c8a45..9d6169c 100644
--- a/libavcodec/x86/dct32_sse.asm
+++ b/libavcodec/x86/dct32_sse.asm
@@ -42,39 +42,24 @@ ps_cos_vec: dd   0.500603,  0.505471,  0.515447,  0.531043
 align 32
 ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
 
-%macro BUTTERFLY_SSE 4
-    movaps %4, %1
-    subps  %1, %2
-    addps  %2, %4
-    mulps  %1, %3
-%endmacro
-
-%macro BUTTERFLY_AVX 4
-    vsubps  %4, %1, %2
-    vaddps  %2, %2, %1
-    vmulps  %1, %4, %3
-%endmacro
-
-%macro BUTTERFLY0_SSE 5
-    movaps %4, %1
-    shufps %1, %1, %5
-    xorps  %4, %2
-    addps  %1, %4
-    mulps  %1, %3
+%macro BUTTERFLY 4
+    subps  %4, %1, %2
+    addps  %2, %2, %1
+    mulps  %1, %4, %3
 %endmacro
 
-%macro BUTTERFLY0_SSE2 5
+%macro BUTTERFLY0 5
+%if cpuflag(sse2) && notcpuflag(avx)
     pshufd %4, %1, %5
     xorps  %1, %2
     addps  %1, %4
     mulps  %1, %3
-%endmacro
-
-%macro BUTTERFLY0_AVX 5
-    vshufps %4, %1, %1, %5
-    vxorps  %1, %1, %2
-    vaddps  %4, %4, %1
-    vmulps  %1, %4, %3
+%else
+    shufps %4, %1, %1, %5
+    xorps  %1, %1, %2
+    addps  %4, %4, %1
+    mulps  %1, %4, %3
+%endif
 %endmacro
 
 %macro BUTTERFLY2 4
@@ -206,14 +191,11 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
     movss [outq+116], m6
 %endmacro
 
-%define BUTTERFLY  BUTTERFLY_AVX
-%define BUTTERFLY0 BUTTERFLY0_AVX
-
-INIT_YMM
+INIT_YMM avx
 SECTION_TEXT
 %if HAVE_AVX
 ; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in)
-cglobal dct32_float_avx, 2,3,8, out, in, tmp
+cglobal dct32_float, 2,3,8, out, in, tmp
     ; pass 1
     vmovaps     m4, [inq+0]
     vinsertf128 m5, m5, [inq+96], 1
@@ -286,9 +268,6 @@ INIT_XMM
     RET
 %endif
 
-%define BUTTERFLY  BUTTERFLY_SSE
-%define BUTTERFLY0 BUTTERFLY0_SSE
-
 %if ARCH_X86_64
 %define SPILL SWAP
 %define UNSPILL SWAP
@@ -411,10 +390,9 @@ INIT_XMM
 %endif
 
 
-INIT_XMM
-%macro DCT32_FUNC 1
 ; void ff_dct32_float_sse(FFTSample *out, const FFTSample *in)
-cglobal dct32_float_%1, 2,3,16, out, in, tmp
+%macro DCT32_FUNC 0
+cglobal dct32_float, 2, 3, 16, out, in, tmp
     ; pass 1
 
     movaps      m0, [inq+0]
@@ -498,18 +476,16 @@ cglobal dct32_float_%1, 2,3,16, out, in, tmp
     RET
 %endmacro
 
-%macro LOAD_INV_SSE 2
+%macro LOAD_INV 2
+%if cpuflag(sse2)
+    pshufd      %1, %2, 0x1b
+%elif cpuflag(sse)
     movaps      %1, %2
     shufps      %1, %1, 0x1b
+%endif
 %endmacro
 
-%define LOAD_INV LOAD_INV_SSE
-DCT32_FUNC sse
-
-%macro LOAD_INV_SSE2 2
-    pshufd      %1, %2, 0x1b
-%endmacro
-
-%define LOAD_INV LOAD_INV_SSE2
-%define BUTTERFLY0 BUTTERFLY0_SSE2
-DCT32_FUNC sse2
+INIT_XMM sse
+DCT32_FUNC
+INIT_XMM sse2
+DCT32_FUNC



More information about the ffmpeg-cvslog mailing list