[FFmpeg-devel] [PATCH v2 1/5] libavfilter/x86/vf_gblur: add ff_postscale_slice_avx512()

Wu Jianhua jianhua.wu at intel.com
Wed Aug 4 05:06:12 EEST 2021


Co-authored-by: Cheng Yanfei <yanfei.cheng at intel.com>
Co-authored-by: Jin Jun <jun.i.jin at intel.com>
Signed-off-by: Wu Jianhua <jianhua.wu at intel.com>
---
 libavfilter/x86/vf_gblur.asm    | 21 ++++++++++++---------
 libavfilter/x86/vf_gblur_init.c |  4 ++++
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/libavfilter/x86/vf_gblur.asm b/libavfilter/x86/vf_gblur.asm
index 4d84e6d011..276fe347f5 100644
--- a/libavfilter/x86/vf_gblur.asm
+++ b/libavfilter/x86/vf_gblur.asm
@@ -194,19 +194,17 @@ cglobal postscale_slice, 2, 2, 4, ptr, length, postscale, min, max
     VBROADCASTSS m1, minm
     VBROADCASTSS m2, maxm
 %elif WIN64
-    SWAP 0, 2
-    SWAP 1, 3
-    VBROADCASTSS m0, xm0
-    VBROADCASTSS m1, xm1
+    VBROADCASTSS m0, xmm2
+    VBROADCASTSS m1, xmm3
     VBROADCASTSS m2, maxm
-%else ; UNIX64
-    VBROADCASTSS m0, xm0
-    VBROADCASTSS m1, xm1
-    VBROADCASTSS m2, xm2
+%else ; UNIX
+    VBROADCASTSS m0, xmm0
+    VBROADCASTSS m1, xmm1
+    VBROADCASTSS m2, xmm2
 %endif
 
     .loop:
-%if cpuflag(avx2)
+%if cpuflag(avx2) || cpuflag(avx512)
     mulps         m3, m0, [ptrq + lengthq]
 %else
     movu          m3, [ptrq + lengthq]
@@ -229,3 +227,8 @@ POSTSCALE_SLICE
 INIT_YMM avx2
 POSTSCALE_SLICE
 %endif
+
+%if HAVE_AVX512_EXTERNAL
+INIT_ZMM avx512
+POSTSCALE_SLICE
+%endif
diff --git a/libavfilter/x86/vf_gblur_init.c b/libavfilter/x86/vf_gblur_init.c
index d80fb46fe4..34aba4ca6e 100644
--- a/libavfilter/x86/vf_gblur_init.c
+++ b/libavfilter/x86/vf_gblur_init.c
@@ -29,6 +29,7 @@ void ff_horiz_slice_avx2(float *ptr, int width, int height, int steps, float nu,
 
 void ff_postscale_slice_sse(float *ptr, int length, float postscale, float min, float max);
 void ff_postscale_slice_avx2(float *ptr, int length, float postscale, float min, float max);
+void ff_postscale_slice_avx512(float *ptr, int length, float postscale, float min, float max);
 
 av_cold void ff_gblur_init_x86(GBlurContext *s)
 {
@@ -47,5 +48,8 @@ av_cold void ff_gblur_init_x86(GBlurContext *s)
     if (EXTERNAL_AVX2(cpu_flags)) {
         s->horiz_slice = ff_horiz_slice_avx2;
     }
+    if (EXTERNAL_AVX512(cpu_flags)) {
+        s->postscale_slice = ff_postscale_slice_avx512;
+    }
 #endif
 }
-- 
2.17.1



More information about the ffmpeg-devel mailing list