[FFmpeg-cvslog] x86: Refactor PSWAPD fallback implementations and port to cpuflags
Diego Biurrun
git at videolan.org
Sat Nov 3 14:35:37 CET 2012
ffmpeg | branch: master | Diego Biurrun <diego at biurrun.de> | Thu Aug 2 00:55:34 2012 +0200| [0a7a94f2e53bcdb8ac5857eb8c67c16f6f1d0f2f] | committer: Diego Biurrun
x86: Refactor PSWAPD fallback implementations and port to cpuflags
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0a7a94f2e53bcdb8ac5857eb8c67c16f6f1d0f2f
---
libavcodec/x86/fft.asm | 16 ++--------------
libavcodec/x86/fmtconvert.asm | 17 ++---------------
libavutil/x86/x86util.asm | 12 ++++++++++++
3 files changed, 16 insertions(+), 29 deletions(-)
diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm
index 8c69f1f..111f322 100644
--- a/libavcodec/x86/fft.asm
+++ b/libavcodec/x86/fft.asm
@@ -105,7 +105,8 @@ SECTION_TEXT
pfadd %5, %4 ; {t6,t5}
pxor %3, [ps_m1p1] ; {t8,t7}
mova %6, %1
- PSWAPD %3, %3
+ movd [r0+12], %3
+ punpckhdq %3, [r0+8]
pfadd %1, %5 ; {r0,i0}
pfsub %6, %5 ; {r2,i2}
mova %4, %2
@@ -498,19 +499,6 @@ fft8 %+ SUFFIX:
%endmacro
%if ARCH_X86_32
-%macro PSWAPD 2
-%if cpuflag(3dnowext)
- pswapd %1, %2
-%elifidn %1, %2
- movd [r0+12], %1
- punpckhdq %1, [r0+8]
-%else
- movq %1, %2
- psrlq %1, 32
- punpckldq %1, %2
-%endif
-%endmacro
-
INIT_MMX 3dnowext
FFT48_3DNOW
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index fb183ce..77b8bd7 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -246,16 +246,6 @@ FLOAT_TO_INT16_INTERLEAVE2
INIT_XMM sse2
FLOAT_TO_INT16_INTERLEAVE2
-
-%macro PSWAPD_SSE 2
- pshufw %1, %2, 0x4e
-%endmacro
-%macro PSWAPD_3DNOW 2
- movq %1, %2
- psrlq %1, 32
- punpckldq %1, %2
-%endmacro
-
%macro FLOAT_TO_INT16_INTERLEAVE6 0
; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, src5, len
@@ -285,11 +275,11 @@ cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, s
packssdw mm0, mm3
packssdw mm1, mm4
packssdw mm2, mm5
- pswapd mm3, mm0
+ PSWAPD mm3, mm0
punpcklwd mm0, mm1
punpckhwd mm1, mm2
punpcklwd mm2, mm3
- pswapd mm3, mm0
+ PSWAPD mm3, mm0
punpckldq mm0, mm2
punpckhdq mm2, mm1
punpckldq mm1, mm3
@@ -305,12 +295,9 @@ cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, s
%endmacro ; FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX sse
-%define pswapd PSWAPD_SSE
FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX 3dnow
-%define pswapd PSWAPD_3DNOW
FLOAT_TO_INT16_INTERLEAVE6
-%undef pswapd
INIT_MMX 3dnowext
FLOAT_TO_INT16_INTERLEAVE6
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index ca0041a..9183d38 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -319,6 +319,18 @@
%endif
%endmacro
+%macro PSWAPD 2
+%if cpuflag(mmxext)
+ pshufw %1, %2, q1032
+%elif cpuflag(3dnowext)
+ pswapd %1, %2
+%elif cpuflag(3dnow)
+ movq %1, %2
+ psrlq %1, 32
+ punpckldq %1, %2
+%endif
+%endmacro
+
%macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from
%ifnum %5
pand m%3, m%5, m%4 ; src .. y6 .. y4
More information about the ffmpeg-cvslog
mailing list