[FFmpeg-devel] [PATCH 4/4] swr: add ff_resample_{common, linear}_int16_xop

James Almer jamrial at gmail.com
Mon Jun 30 01:19:06 CEST 2014


Signed-off-by: James Almer <jamrial at gmail.com>
---
 libswresample/x86/resample.asm       | 36 +++++++++++++++++++++---------------
 libswresample/x86/resample_x86_dsp.c |  5 +++++
 2 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/libswresample/x86/resample.asm b/libswresample/x86/resample.asm
index 2cb656e..cd9eb96 100644
--- a/libswresample/x86/resample.asm
+++ b/libswresample/x86/resample.asm
@@ -176,8 +176,7 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
 .inner_loop:
     movu                          m1, [srcq+min_filter_count_x4q*1]
 %ifidn %1, int16
-    pmaddwd                       m1, [filterq+min_filter_count_x4q*1]
-    paddd                         m0, m1
+    PMADCSWD                      m0, m1, [filterq+min_filter_count_x4q*1], m0, m1
 %else ; float/double
 %if cpuflag(fma4) || cpuflag(fma3)
     fmaddp%4                      m0, m1, [filterq+min_filter_count_x4q*1], m0
@@ -195,14 +194,7 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
 %endif
 
 %ifidn %1, int16
-%if mmsize == 16
-    pshufd                        m1, m0, q0032
-    paddd                         m0, m1
-    pshufd                        m1, m0, q0001
-%else ; mmsize == 8
-    pshufw                        m1, m0, q0032
-%endif
-    paddd                         m0, m1
+    HADDD                         m0, m1
     psrad                         m0, 15
     add                        fracd, dst_incr_modd
     packssdw                      m0, m0
@@ -428,10 +420,15 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
 .inner_loop:
     movu                          m1, [srcq+min_filter_count_x4q*1]
 %ifidn %1, int16
+%if cpuflag(xop)
+    vpmadcswd                     m2, m1, [filter2q+min_filter_count_x4q*1], m2
+    vpmadcswd                     m0, m1, [filter1q+min_filter_count_x4q*1], m0
+%else
     pmaddwd                       m3, m1, [filter2q+min_filter_count_x4q*1]
     pmaddwd                       m1, [filter1q+min_filter_count_x4q*1]
     paddd                         m2, m3
     paddd                         m0, m1
+%endif ; cpuflag
 %else ; float/double
 %if cpuflag(fma4) || cpuflag(fma3)
     fmaddp%4                      m2, m1, [filter2q+min_filter_count_x4q*1], m2
@@ -455,18 +452,21 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
 
 %ifidn %1, int16
 %if mmsize == 16
+%if cpuflag(xop)
+    vphadddq                      m2, m2
+    vphadddq                      m0, m0
+%endif
     pshufd                        m3, m2, q0032
     pshufd                        m1, m0, q0032
     paddd                         m2, m3
     paddd                         m0, m1
-    pshufd                        m3, m2, q0001
-    pshufd                        m1, m0, q0001
-%else ; mmsize == 8
-    pshufw                        m3, m2, q0032
-    pshufw                        m1, m0, q0032
 %endif
+%if notcpuflag(xop) || mmsize == 8
+    PSHUFLW                       m3, m2, q0032
+    PSHUFLW                       m1, m0, q0032
     paddd                         m2, m3
     paddd                         m0, m1
+%endif
     psubd                         m2, m0
     ; This is probably a really bad idea on atom and other machines with a
     ; long transfer latency between GPRs and XMMs (atom). However, it does
@@ -593,4 +593,10 @@ RESAMPLE_FNS int16, 2, 1, , pd_0x4000
 
 INIT_XMM sse2
 RESAMPLE_FNS int16, 2, 1, , pd_0x4000
+%if HAVE_XOP_EXTERNAL
+INIT_XMM xop
+RESAMPLE_FNS int16, 2, 1, , pd_0x4000
+%endif
+
+INIT_XMM sse2
 RESAMPLE_FNS double, 8, 3, d, pdbl_1
diff --git a/libswresample/x86/resample_x86_dsp.c b/libswresample/x86/resample_x86_dsp.c
index c3d8578..491b223 100644
--- a/libswresample/x86/resample_x86_dsp.c
+++ b/libswresample/x86/resample_x86_dsp.c
@@ -35,6 +35,7 @@ int ff_resample_linear_##type##_##opt(ResampleContext *c, uint8_t *dst, \
 
 RESAMPLE_FUNCS(int16, mmxext);
 RESAMPLE_FUNCS(int16, sse2);
+RESAMPLE_FUNCS(int16, xop);
 RESAMPLE_FUNCS(float, sse);
 RESAMPLE_FUNCS(float, avx);
 RESAMPLE_FUNCS(float, fma3);
@@ -73,4 +74,8 @@ void swresample_dsp_x86_init(ResampleContext *c)
         c->dsp.resample_common[FNIDX(FLTP)] = ff_resample_common_float_fma4;
         c->dsp.resample_linear[FNIDX(FLTP)] = ff_resample_linear_float_fma4;
     }
+    if (HAVE_XOP_EXTERNAL && mm_flags & AV_CPU_FLAG_XOP) {
+        c->dsp.resample_common[FNIDX(S16P)] = ff_resample_common_int16_xop;
+        c->dsp.resample_linear[FNIDX(S16P)] = ff_resample_linear_int16_xop;
+    }
 }
-- 
1.9.4.msysgit.0




More information about the ffmpeg-devel mailing list