[FFmpeg-devel] [PATCH 2/2] swresample/x86/resample: add ff_resample_scalarproduct_float_sse

James Almer jamrial at gmail.com
Wed Mar 19 06:49:34 CET 2014


At least two times faster than the C version.

Signed-off-by: James Almer <jamrial at gmail.com>
---
 libswresample/x86/resample.asm     | 21 +++++++++++++++++++++
 libswresample/x86/swresample_x86.c |  4 ++++
 2 files changed, 25 insertions(+)

diff --git a/libswresample/x86/resample.asm b/libswresample/x86/resample.asm
index 0204387..0bcd17d 100644
--- a/libswresample/x86/resample.asm
+++ b/libswresample/x86/resample.asm
@@ -62,3 +62,24 @@ INIT_MMX mmxext
 RESAMPLE_SCALARPRODUCT_INT16
 INIT_XMM sse2
 RESAMPLE_SCALARPRODUCT_INT16
+
+INIT_XMM sse
+cglobal resample_scalarproduct_float, 4,4,2, src, dst, filter, len
+    shl      lenq, 2
+    neg      lenq
+    sub      srcq, lenq
+    sub      filterq, lenq
+    xorps    m0, m0
+.loop
+    movu     m1, [srcq + lenq]
+    mulps    m1, [filterq + lenq]
+    addps    m0, m1
+    add      lenq, mmsize
+    js .loop
+    movhlps  m1, m0
+    addps    m0, m1
+    movss    m1, m0
+    shufps   m0, m0, 1
+    addss    m0, m1
+    movss    [dstq], m0
+    RET
diff --git a/libswresample/x86/swresample_x86.c b/libswresample/x86/swresample_x86.c
index f38b069..b8ff682 100644
--- a/libswresample/x86/swresample_x86.c
+++ b/libswresample/x86/swresample_x86.c
@@ -202,6 +202,7 @@ av_cold void swri_rematrix_init_x86(struct SwrContext *s){
 
 void ff_resample_scalarproduct_int16_mmxext(const void *src, void *dst, void *filter, int length);
 void ff_resample_scalarproduct_int16_sse2  (const void *src, void *dst, void *filter, int length);
+void ff_resample_scalarproduct_float_sse   (const void *src, void *dst, void *filter, int length);
 
 void swri_audio_resample_init_x86(ResampleContext *c)
 {
@@ -212,5 +213,8 @@ void swri_audio_resample_init_x86(ResampleContext *c)
             c->scalarproduct = ff_resample_scalarproduct_int16_mmxext;
         if (EXTERNAL_SSE2(cpuflags))
             c->scalarproduct = ff_resample_scalarproduct_int16_sse2;
+    } else if (c->format == AV_SAMPLE_FMT_FLTP) {
+        if (EXTERNAL_SSE(cpuflags))
+            c->scalarproduct = ff_resample_scalarproduct_float_sse;
     }
 }
-- 
1.8.3.2



More information about the ffmpeg-devel mailing list