[FFmpeg-devel] [PATCH] x86: move XOP emulation code back to x86inc
James Almer
jamrial at gmail.com
Mon Aug 3 08:41:42 CEST 2015
Only two functions that use xop multiply-accumulate instructions where the
first operand is the same as the fourth actually took advantage of the macros.
This further reduces differences with x264's x86inc.
Signed-off-by: James Almer <jamrial at gmail.com>
---
libavcodec/x86/flacdsp.asm | 9 +++++++++
libavutil/x86/x86inc.asm | 16 ++++++++++++++++
libavutil/x86/x86util.asm | 19 -------------------
libswresample/x86/resample.asm | 7 ++++++-
4 files changed, 31 insertions(+), 20 deletions(-)
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
index 901c440..7138611 100644
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -25,6 +25,15 @@
SECTION .text
+%macro PMACSDQL 5
+%if cpuflag(xop)
+ pmacsdql %1, %2, %3, %1
+%else
+ pmuldq %2, %3
+ paddq %1, %2
+%endif
+%endmacro
+
%macro LPC_32 1
INIT_XMM %1
cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index d4c5e69..28a2d87 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -1427,6 +1427,22 @@ AVX_INSTR pfmul, 3dnow, 1, 0, 1
%undef i
%undef j
+%macro FMA_INSTR 3
+ %macro %1 4-7 %1, %2, %3
+ %if cpuflag(xop)
+ v%5 %1, %2, %3, %4
+ %else
+ %6 %1, %2, %3
+ %7 %1, %4
+ %endif
+ %endmacro
+%endmacro
+
+FMA_INSTR pmacsww, pmullw, paddw
+FMA_INSTR pmacsdd, pmulld, paddd ; sse4 emulation
+FMA_INSTR pmacsdql, pmuldq, paddq ; sse4 emulation
+FMA_INSTR pmadcswd, pmaddwd, paddd
+
; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf.
; This lets us use tzcnt without bumping the yasm version requirement yet.
%define tzcnt rep bsf
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index d6702c1..bf64d17 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -765,25 +765,6 @@
%endif
%endmacro
-%macro PMA_EMU 4
- %macro %1 5-8 %2, %3, %4
- %if cpuflag(xop)
- v%6 %1, %2, %3, %4
- %elifidn %1, %4
- %7 %5, %2, %3
- %8 %1, %4, %5
- %else
- %7 %1, %2, %3
- %8 %1, %4
- %endif
- %endmacro
-%endmacro
-
-PMA_EMU PMACSWW, pmacsww, pmullw, paddw
-PMA_EMU PMACSDD, pmacsdd, pmulld, paddd ; sse4 emulation
-PMA_EMU PMACSDQL, pmacsdql, pmuldq, paddq ; sse4 emulation
-PMA_EMU PMADCSWD, pmadcswd, pmaddwd, paddd
-
; Wrapper for non-FMA version of fmaddps
%macro FMULADD_PS 5
%if cpuflag(fma3) || cpuflag(fma4)
diff --git a/libswresample/x86/resample.asm b/libswresample/x86/resample.asm
index a57ff37..4989aa6 100644
--- a/libswresample/x86/resample.asm
+++ b/libswresample/x86/resample.asm
@@ -176,7 +176,12 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
.inner_loop:
movu m1, [srcq+min_filter_count_x4q*1]
%ifidn %1, int16
- PMADCSWD m0, m1, [filterq+min_filter_count_x4q*1], m0, m1
+%if cpuflag(xop)
+ vpmadcswd m0, m1, [filterq+min_filter_count_x4q*1], m0
+%else
+ pmaddwd m1, [filterq+min_filter_count_x4q*1]
+ paddd m0, m1
+%endif
%else ; float/double
%if cpuflag(fma4) || cpuflag(fma3)
fmaddp%4 m0, m1, [filterq+min_filter_count_x4q*1], m0
--
2.5.0
More information about the ffmpeg-devel
mailing list