[FFmpeg-cvslog] x86: Refactor PSWAPD fallback implementations and port to cpuflags

Diego Biurrun git at videolan.org
Sat Nov 3 14:35:37 CET 2012


ffmpeg | branch: master | Diego Biurrun <diego at biurrun.de> | Thu Aug  2 00:55:34 2012 +0200| [0a7a94f2e53bcdb8ac5857eb8c67c16f6f1d0f2f] | committer: Diego Biurrun

x86: Refactor PSWAPD fallback implementations and port to cpuflags

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0a7a94f2e53bcdb8ac5857eb8c67c16f6f1d0f2f
---

 libavcodec/x86/fft.asm        |   16 ++--------------
 libavcodec/x86/fmtconvert.asm |   17 ++---------------
 libavutil/x86/x86util.asm     |   12 ++++++++++++
 3 files changed, 16 insertions(+), 29 deletions(-)

diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm
index 8c69f1f..111f322 100644
--- a/libavcodec/x86/fft.asm
+++ b/libavcodec/x86/fft.asm
@@ -105,7 +105,8 @@ SECTION_TEXT
     pfadd    %5, %4 ; {t6,t5}
     pxor     %3, [ps_m1p1] ; {t8,t7}
     mova     %6, %1
-    PSWAPD   %3, %3
+    movd [r0+12], %3
+    punpckhdq %3, [r0+8]
     pfadd    %1, %5 ; {r0,i0}
     pfsub    %6, %5 ; {r2,i2}
     mova     %4, %2
@@ -498,19 +499,6 @@ fft8 %+ SUFFIX:
 %endmacro
 
 %if ARCH_X86_32
-%macro PSWAPD 2
-%if cpuflag(3dnowext)
-    pswapd %1, %2
-%elifidn %1, %2
-    movd [r0+12], %1
-    punpckhdq %1, [r0+8]
-%else
-    movq  %1, %2
-    psrlq %1, 32
-    punpckldq %1, %2
-%endif
-%endmacro
-
 INIT_MMX 3dnowext
 FFT48_3DNOW
 
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index fb183ce..77b8bd7 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -246,16 +246,6 @@ FLOAT_TO_INT16_INTERLEAVE2
 INIT_XMM sse2
 FLOAT_TO_INT16_INTERLEAVE2
 
-
-%macro PSWAPD_SSE 2
-    pshufw %1, %2, 0x4e
-%endmacro
-%macro PSWAPD_3DNOW 2
-    movq  %1, %2
-    psrlq %1, 32
-    punpckldq %1, %2
-%endmacro
-
 %macro FLOAT_TO_INT16_INTERLEAVE6 0
 ; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
 cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, src5, len
@@ -285,11 +275,11 @@ cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, s
     packssdw   mm0, mm3
     packssdw   mm1, mm4
     packssdw   mm2, mm5
-    pswapd     mm3, mm0
+    PSWAPD     mm3, mm0
     punpcklwd  mm0, mm1
     punpckhwd  mm1, mm2
     punpcklwd  mm2, mm3
-    pswapd     mm3, mm0
+    PSWAPD     mm3, mm0
     punpckldq  mm0, mm2
     punpckhdq  mm2, mm1
     punpckldq  mm1, mm3
@@ -305,12 +295,9 @@ cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, s
 %endmacro ; FLOAT_TO_INT16_INTERLEAVE6
 
 INIT_MMX sse
-%define pswapd PSWAPD_SSE
 FLOAT_TO_INT16_INTERLEAVE6
 INIT_MMX 3dnow
-%define pswapd PSWAPD_3DNOW
 FLOAT_TO_INT16_INTERLEAVE6
-%undef pswapd
 INIT_MMX 3dnowext
 FLOAT_TO_INT16_INTERLEAVE6
 
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index ca0041a..9183d38 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -319,6 +319,18 @@
     %endif
 %endmacro
 
+%macro PSWAPD 2
+%if cpuflag(mmxext)
+    pshufw    %1, %2, q1032
+%elif cpuflag(3dnowext)
+    pswapd    %1, %2
+%elif cpuflag(3dnow)
+    movq      %1, %2
+    psrlq     %1, 32
+    punpckldq %1, %2
+%endif
+%endmacro
+
 %macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from
 %ifnum %5
     pand   m%3, m%5, m%4 ; src .. y6 .. y4



More information about the ffmpeg-cvslog mailing list