[FFmpeg-devel] [RFC] avx2 and scalarproduct

Fri Oct 9 22:08:21 CEST 2015

diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index 3ffb27f..246e945 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -41,7 +41,14 @@ cglobal scalarproduct_int16, 3,3,3, v1, v2, order
     add     orderq, mmsize*2
     jl .loop
     HADDD   m2, m0
+%if cpuflag(avx2)
+    movd   eax, xm2
+%if mmsize > 16
+    xor   eax, eax
+%endif
+%else
     movd   eax, m2
+%endif
 %if mmsize == 8
     emms
 %endif
@@ -52,6 +59,8 @@ INIT_MMX mmxext
 SCALARPRODUCT
 INIT_XMM sse2
 SCALARPRODUCT
+INIT_YMM avx2
+SCALARPRODUCT
 
 
 ;-----------------------------------------------------------------------------

diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c
index a2ce231..b99a129 100644
--- a/libavcodec/x86/audiodsp_init.c
+++ b/libavcodec/x86/audiodsp_init.c
@@ -29,6 +29,8 @@ int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2,
                                       int order);
 int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2,
                                     int order);
+int32_t ff_scalarproduct_int16_avx2(const int16_t *v1, const int16_t *v2,
+                                    int order);
 
 void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src,
                               int32_t min, int32_t max, unsigned int len);
@@ -64,4 +66,8 @@ av_cold void ff_audiodsp_init_x86(AudioDSPContext *c)
 
     if (EXTERNAL_SSE4(cpu_flags))
         c->vector_clip_int32 = ff_vector_clip_int32_sse4;
+
+    if (EXTERNAL_AVX2(cpu_flags))
+        c->scalarproduct_int16 = ff_scalarproduct_int16_avx2;
+
 }
-- 
1.9.1