[FFmpeg-devel] [PATCH 2/2] avfilter/x86/vf_gblur: add postscale SIMD

Paul B Mahol onemda at gmail.com
Sat Feb 13 13:10:38 EET 2021


Signed-off-by: Paul B Mahol <onemda at gmail.com>
---
 libavfilter/x86/vf_gblur.asm    | 46 +++++++++++++++++++++++++++++++++
 libavfilter/x86/vf_gblur_init.c | 11 ++++++--
 2 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/libavfilter/x86/vf_gblur.asm b/libavfilter/x86/vf_gblur.asm
index a25b1659f5..8fea6d2a61 100644
--- a/libavfilter/x86/vf_gblur.asm
+++ b/libavfilter/x86/vf_gblur.asm
@@ -183,3 +183,49 @@ HORIZ_SLICE
 INIT_XMM avx2
 HORIZ_SLICE
 %endif
+
+%macro POSTSCALE_SLICE 0
+%if UNIX64
+cglobal postscale_slice, 2, 6, 4, ptr, length, postscale, min, max, x
+%else
+cglobal postscale_slice, 5, 6, 4, ptr, length, postscale, min, max, x
+%endif
+    shl lengthd, 2
+%if WIN64
+    SWAP 0, 2
+    SWAP 1, 3
+    SWAP 2, 4
+%endif
+    shufps   xm0, xm0, 0
+    shufps   xm1, xm1, 0
+    shufps   xm2, xm2, 0
+%if cpuflag(avx2)
+    vinsertf128  m0, m0, xm0, 1
+    vinsertf128  m1, m1, xm1, 1
+    vinsertf128  m2, m2, xm2, 1
+%endif
+    xor      xq, xq
+
+    .loop:
+    movu          m3, [ptrq + xq]
+    mulps         m3, m0
+    maxps         m3, m1
+    minps         m3, m2
+    movu   [ptrq+xq], m3
+
+    add xq, mmsize
+    cmp xd, lengthd
+    jl .loop
+
+    RET
+%endmacro
+
+%if ARCH_X86_64
+INIT_XMM sse4
+POSTSCALE_SLICE
+
+%if HAVE_AVX_EXTERNAL
+INIT_YMM avx2
+POSTSCALE_SLICE
+%endif
+%endif
diff --git a/libavfilter/x86/vf_gblur_init.c b/libavfilter/x86/vf_gblur_init.c
index e63e59fe23..7a9b40b0ad 100644
--- a/libavfilter/x86/vf_gblur_init.c
+++ b/libavfilter/x86/vf_gblur_init.c
@@ -27,14 +27,21 @@
 void ff_horiz_slice_sse4(float *ptr, int width, int height, int steps, float nu, float bscale);
 void ff_horiz_slice_avx2(float *ptr, int width, int height, int steps, float nu, float bscale);
 
+void ff_postscale_slice_sse4(float *ptr, int length, float postscale, float min, float max);
+void ff_postscale_slice_avx2(float *ptr, int length, float postscale, float min, float max);
+
 av_cold void ff_gblur_init_x86(GBlurContext *s)
 {
 #if ARCH_X86_64
     int cpu_flags = av_get_cpu_flags();
 
-    if (EXTERNAL_SSE4(cpu_flags))
+    if (EXTERNAL_SSE4(cpu_flags)) {
         s->horiz_slice = ff_horiz_slice_sse4;
-    if (EXTERNAL_AVX2(cpu_flags))
+        s->postscale_slice = ff_postscale_slice_sse4;
+    }
+    if (EXTERNAL_AVX2(cpu_flags)) {
         s->horiz_slice = ff_horiz_slice_avx2;
+        s->postscale_slice = ff_postscale_slice_avx2;
+    }
 #endif
 }
-- 
2.17.1



More information about the ffmpeg-devel mailing list