[FFmpeg-devel] [RFC] avx2 and scalarproduct
Paul B Mahol
onemda at gmail.com
Fri Oct 9 22:08:21 CEST 2015
diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index 3ffb27f..246e945 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -41,7 +41,14 @@ cglobal scalarproduct_int16, 3,3,3, v1, v2, order
add orderq, mmsize*2
jl .loop
HADDD m2, m0
+%if cpuflag(avx2)
+ movd eax, xm2
+%if mmsize > 16
+ xor eax, eax
+%endif
+%else
movd eax, m2
+%endif
%if mmsize == 8
emms
%endif
@@ -52,6 +59,8 @@ INIT_MMX mmxext
SCALARPRODUCT
INIT_XMM sse2
SCALARPRODUCT
+INIT_YMM avx2
+SCALARPRODUCT
;-----------------------------------------------------------------------------
diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c
index a2ce231..b99a129 100644
--- a/libavcodec/x86/audiodsp_init.c
+++ b/libavcodec/x86/audiodsp_init.c
@@ -29,6 +29,8 @@ int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2,
int order);
int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2,
int order);
+int32_t ff_scalarproduct_int16_avx2(const int16_t *v1, const int16_t *v2,
+ int order);
void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src,
int32_t min, int32_t max, unsigned int len);
@@ -64,4 +66,8 @@ av_cold void ff_audiodsp_init_x86(AudioDSPContext *c)
if (EXTERNAL_SSE4(cpu_flags))
c->vector_clip_int32 = ff_vector_clip_int32_sse4;
+
+ if (EXTERNAL_AVX2(cpu_flags))
+ c->scalarproduct_int16 = ff_scalarproduct_int16_avx2;
+
}
--
1.9.1
More information about the ffmpeg-devel
mailing list