[FFmpeg-cvslog] avutil/x86util: move haddps sse emulation from showcqt

James Almer git at videolan.org
Wed Jun 8 19:24:08 CEST 2016


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Wed Jun  8 14:18:00 2016 -0300| [99b899483e10d68358f8018cf120c42f370235e6] | committer: James Almer

avutil/x86util: move haddps sse emulation from showcqt

Signed-off-by: James Almer <jamrial at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=99b899483e10d68358f8018cf120c42f370235e6
---

 libavfilter/x86/avf_showcqt.asm |   21 +++++----------------
 libavutil/x86/x86util.asm       |   11 +++++++++++
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/libavfilter/x86/avf_showcqt.asm b/libavfilter/x86/avf_showcqt.asm
index 6dac0a7..36400fb 100644
--- a/libavfilter/x86/avf_showcqt.asm
+++ b/libavfilter/x86/avf_showcqt.asm
@@ -35,17 +35,6 @@ struc Coeffs
     .sizeof:
 endstruc
 
-%macro EMULATE_HADDPS 3 ; dst, src, tmp
-%if cpuflag(sse3)
-    haddps  %1, %2
-%else
-    movaps  %3, %1
-    shufps  %1, %2, q2020
-    shufps  %3, %2, q3131
-    addps   %1, %3
-%endif
-%endmacro ; EMULATE_HADDPS
-
 %macro EMULATE_FMADDPS 5 ; dst, src1, src2, src3, tmp
 %if cpuflag(fma3) || cpuflag(fma4)
     fmaddps %1, %2, %3, %4
@@ -85,9 +74,9 @@ endstruc
     subps   m%6, m%3, m%1
     addps   m%1, m%3
     subps   m%2, m%4
-    EMULATE_HADDPS m%5, m%6, m%3
-    EMULATE_HADDPS m%1, m%2, m%3
-    EMULATE_HADDPS m%1, m%5, m%2
+    HADDPS  m%5, m%6, m%3
+    HADDPS  m%1, m%2, m%3
+    HADDPS  m%1, m%5, m%2
     %if mmsize == 32
     vextractf128 xmm%2, m%1, 1
     addps   xmm%1, xmm%2
@@ -141,7 +130,7 @@ cglobal showcqt_cqt_calc, 5, 10, 12, dst, src, coeffs, len, fft_len, x, coeffs_v
         CQT_SEPARATE 8, 9, 10, 11, 4, 5
         mulps   xmm0, xmm0
         mulps   xmm8, xmm8
-        EMULATE_HADDPS xmm0, xmm8, xmm1
+        HADDPS  xmm0, xmm8, xmm1
         movaps  [dstq], xmm0
         sub     lend, 2
         lea     dstq, [dstq + 16]
@@ -183,7 +172,7 @@ cglobal showcqt_cqt_calc, 4, 7, 8, dst, src, coeffs, len, x, coeffs_val, i
             jb      .loop_x
         CQT_SEPARATE 0, 1, 2, 3, 4, 5
         mulps   xmm0, xmm0
-        EMULATE_HADDPS xmm0, xmm0, xmm1
+        HADDPS  xmm0, xmm0, xmm1
         .store:
         movlps  [dstq], xmm0
         sub     lend, 1
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index b09fa81..50b7b50 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -357,6 +357,17 @@
 %endif
 %endmacro
 
+%macro HADDPS 3 ; dst, src, tmp
+%if cpuflag(sse3)
+    haddps  %1, %2
+%else
+    movaps  %3, %1
+    shufps  %1, %2, q2020
+    shufps  %3, %2, q3131
+    addps   %1, %3
+%endif
+%endmacro
+
 %macro PALIGNR 4-5
 %if cpuflag(ssse3)
 %if %0==5



More information about the ffmpeg-cvslog mailing list