[FFmpeg-cvslog] x86inc: Remove our FMA4 support

Derek Buitenhuis git at videolan.org
Mon Oct 14 16:05:44 CEST 2013


ffmpeg | branch: master | Derek Buitenhuis <derek.buitenhuis at gmail.com> | Wed Oct  9 08:54:59 2013 -0400| [206895708ea2b464755d340e44501daf9a07c310] | committer: Derek Buitenhuis

x86inc: Remove our FMA4 support

This is so we can sync to x264's version of FMA4 support.

This partialy reverts commit 79687079a97a039c325ab79d7a95920d800b791f.

Signed-off-by: Derek Buitenhuis <derek.buitenhuis at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=206895708ea2b464755d340e44501daf9a07c310
---

 libavresample/x86/audio_mix.asm |   18 +++++++++---------
 libavutil/x86/x86inc.asm        |   16 +++++-----------
 libavutil/x86/x86util.asm       |   11 +++++++++++
 3 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/libavresample/x86/audio_mix.asm b/libavresample/x86/audio_mix.asm
index 4710bb1..2c657b5 100644
--- a/libavresample/x86/audio_mix.asm
+++ b/libavresample/x86/audio_mix.asm
@@ -384,10 +384,10 @@ cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, n
     S16_TO_S32_SX   4, 5
     cvtdq2ps       m4, m4
     cvtdq2ps       m5, m5
-    fmaddps        m2, m4, mx_1_ %+ %%i, m2, m6
-    fmaddps        m3, m5, mx_1_ %+ %%i, m3, m6
-    fmaddps        m0, m4, mx_0_ %+ %%i, m0, m4
-    fmaddps        m1, m5, mx_0_ %+ %%i, m1, m5
+    FMULADD_PS     m2, m4, mx_1_ %+ %%i, m2, m6
+    FMULADD_PS     m3, m5, mx_1_ %+ %%i, m3, m6
+    FMULADD_PS     m0, m4, mx_0_ %+ %%i, m0, m4
+    FMULADD_PS     m1, m5, mx_0_ %+ %%i, m1, m5
     %else
     %if copy_src_from_stack
     mov       src_ptr, src %+ %%i %+ m
@@ -396,8 +396,8 @@ cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, n
     S16_TO_S32_SX   2, 3
     cvtdq2ps       m2, m2
     cvtdq2ps       m3, m3
-    fmaddps        m0, m2, mx_0_ %+ %%i, m0, m4
-    fmaddps        m1, m3, mx_0_ %+ %%i, m1, m4
+    FMULADD_PS     m0, m2, mx_0_ %+ %%i, m0, m4
+    FMULADD_PS     m1, m3, mx_0_ %+ %%i, m1, m4
     %endif
     %assign %%i %%i+1
 %endrep
@@ -437,12 +437,12 @@ cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, n
     mova           m2, [src_ptr+lenq]
     %endif
     %if stereo
-    fmaddps        m1, m2, mx_1_ %+ %%i, m1, m3
+    FMULADD_PS     m1, m2, mx_1_ %+ %%i, m1, m3
     %endif
     %if stereo || mx_stack_0_ %+ %%i
-    fmaddps        m0, m2, mx_0_ %+ %%i, m0, m2
+    FMULADD_PS     m0, m2, mx_0_ %+ %%i, m0, m2
     %else
-    fmaddps        m0, mx_0_ %+ %%i, [src_ptr+lenq], m0, m1
+    FMULADD_PS     m0, mx_0_ %+ %%i, [src_ptr+lenq], m0, m1
     %endif
     %assign %%i %%i+1
 %endrep
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index b2b67c5..051ac58 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -1400,22 +1400,16 @@ AVX_INSTR pfmul, 1, 0, 1
 %undef j
 
 %macro FMA_INSTR 3
-    %macro %1 5-8 %1, %2, %3
-        %if cpuflag(xop) || cpuflag(fma4)
-            v%6 %1, %2, %3, %4
+    %macro %1 4-7 %1, %2, %3
+        %if cpuflag(xop)
+            v%5 %1, %2, %3, %4
         %else
-            %ifidn %1, %4
-                %7 %5, %2, %3
-                %8 %1, %4, %5
-            %else
-                %7 %1, %2, %3
-                %8 %1, %4
-            %endif
+            %6 %1, %2, %3
+            %7 %1, %4
         %endif
     %endmacro
 %endmacro
 
-FMA_INSTR  fmaddps,   mulps, addps
 FMA_INSTR  pmacsdd,  pmulld, paddd
 FMA_INSTR  pmacsww,  pmullw, paddw
 FMA_INSTR pmadcswd, pmaddwd, paddd
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index 79a023f..bfe7a23 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -665,3 +665,14 @@
     psrad        %1, 16
 %endif
 %endmacro
+
+; Wrapper for non-FMA version of fmaddps
+%macro FMULADD_PS 5
+    %ifidn %1, %4
+        mulps   %5, %2, %3
+        addps   %1, %4, %5
+    %else
+        mulps   %1, %2, %3
+        addps   %1, %4
+    %endif
+%endmacro



More information about the ffmpeg-cvslog mailing list