[FFmpeg-devel] [PATCH 3/4] avfilter/x86/scene_sad: add AVX512 implementation
Niklas Haas
ffmpeg at haasn.xyz
Sat Jul 12 12:22:42 EEST 2025
From: Niklas Haas <git at haasn.dev>
Trivial to add, but a lot faster (on my machine).
scene_sad8_c: 114476.4 ( 1.00x)
scene_sad8_sse2: 8644.3 (13.24x)
scene_sad8_avx2: 4520.1 (25.33x)
scene_sad8_avx512: 3153.0 (36.31x)
---
libavfilter/x86/scene_sad.asm | 7 +++++++
libavfilter/x86/scene_sad_init.c | 7 +++++++
2 files changed, 14 insertions(+)
diff --git a/libavfilter/x86/scene_sad.asm b/libavfilter/x86/scene_sad.asm
index bf7236b3a3..2cd9dddb5c 100644
--- a/libavfilter/x86/scene_sad.asm
+++ b/libavfilter/x86/scene_sad.asm
@@ -72,3 +72,10 @@ INIT_YMM avx2
SAD_FRAMES
%endif
+
+%if HAVE_AVX512_EXTERNAL
+
+INIT_ZMM avx512
+SAD_FRAMES
+
+%endif
diff --git a/libavfilter/x86/scene_sad_init.c b/libavfilter/x86/scene_sad_init.c
index 4a4c40195f..2d631b376a 100644
--- a/libavfilter/x86/scene_sad_init.c
+++ b/libavfilter/x86/scene_sad_init.c
@@ -41,6 +41,9 @@ SCENE_SAD_FUNC(scene_sad_sse2, ff_scene_sad_sse2, 16)
#if HAVE_AVX2_EXTERNAL
SCENE_SAD_FUNC(scene_sad_avx2, ff_scene_sad_avx2, 32)
#endif
+#if HAVE_AVX512_EXTERNAL
+SCENE_SAD_FUNC(scene_sad_avx512, ff_scene_sad_avx512, 64)
+#endif
#endif
ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth)
@@ -48,6 +51,10 @@ ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth)
#if HAVE_X86ASM
int cpu_flags = av_get_cpu_flags();
if (depth <= 8) {
+#if HAVE_AVX512_EXTERNAL
+ if (EXTERNAL_AVX512(cpu_flags))
+ return scene_sad_avx512;
+#endif
#if HAVE_AVX2_EXTERNAL
if (EXTERNAL_AVX2_FAST(cpu_flags))
return scene_sad_avx2;
--
2.49.0
More information about the ffmpeg-devel
mailing list