[FFmpeg-devel] [PATCH 3/4] avfilter/x86/scene_sad: add AVX512 implementation

Niklas Haas ffmpeg at haasn.xyz
Sat Jul 12 12:22:42 EEST 2025


From: Niklas Haas <git at haasn.dev>

Trivial to add, but a lot faster (on my machine).

scene_sad8_c:                                       114476.4 ( 1.00x)
scene_sad8_sse2:                                      8644.3 (13.24x)
scene_sad8_avx2:                                      4520.1 (25.33x)
scene_sad8_avx512:                                    3153.0 (36.31x)
---
 libavfilter/x86/scene_sad.asm    | 7 +++++++
 libavfilter/x86/scene_sad_init.c | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/libavfilter/x86/scene_sad.asm b/libavfilter/x86/scene_sad.asm
index bf7236b3a3..2cd9dddb5c 100644
--- a/libavfilter/x86/scene_sad.asm
+++ b/libavfilter/x86/scene_sad.asm
@@ -72,3 +72,10 @@ INIT_YMM avx2
 SAD_FRAMES
 
 %endif
+
+%if HAVE_AVX512_EXTERNAL
+
+INIT_ZMM avx512
+SAD_FRAMES
+
+%endif
diff --git a/libavfilter/x86/scene_sad_init.c b/libavfilter/x86/scene_sad_init.c
index 4a4c40195f..2d631b376a 100644
--- a/libavfilter/x86/scene_sad_init.c
+++ b/libavfilter/x86/scene_sad_init.c
@@ -41,6 +41,9 @@ SCENE_SAD_FUNC(scene_sad_sse2, ff_scene_sad_sse2, 16)
 #if HAVE_AVX2_EXTERNAL
 SCENE_SAD_FUNC(scene_sad_avx2, ff_scene_sad_avx2, 32)
 #endif
+#if HAVE_AVX512_EXTERNAL
+SCENE_SAD_FUNC(scene_sad_avx512, ff_scene_sad_avx512, 64)
+#endif
 #endif
 
 ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth)
@@ -48,6 +51,10 @@ ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth)
 #if HAVE_X86ASM
     int cpu_flags = av_get_cpu_flags();
     if (depth <= 8) {
+#if HAVE_AVX512_EXTERNAL
+        if (EXTERNAL_AVX512(cpu_flags))
+            return scene_sad_avx512;
+#endif
 #if HAVE_AVX2_EXTERNAL
         if (EXTERNAL_AVX2_FAST(cpu_flags))
             return scene_sad_avx2;
-- 
2.49.0



More information about the ffmpeg-devel mailing list