[FFmpeg-devel] [PATCH v2 1/5] libavfilter/x86/vf_gblur: add ff_postscale_slice_avx512()

Wu, Jianhua jianhua.wu at intel.com
Wed Aug 11 04:13:40 EEST 2021


Ping

> -----Original Message-----
> From: Wu, Jianhua <jianhua.wu at intel.com>
> Sent: Wednesday, August 4, 2021 10:06 AM
> To: ffmpeg-devel at ffmpeg.org
> Cc: Wu, Jianhua <jianhua.wu at intel.com>
> Subject: [PATCH v2 1/5] libavfilter/x86/vf_gblur: add
> ff_postscale_slice_avx512()
> 
> Co-authored-by: Cheng Yanfei <yanfei.cheng at intel.com>
> Co-authored-by: Jin Jun <jun.i.jin at intel.com>
> Signed-off-by: Wu Jianhua <jianhua.wu at intel.com>
> ---
>  libavfilter/x86/vf_gblur.asm    | 21 ++++++++++++---------
>  libavfilter/x86/vf_gblur_init.c |  4 ++++
>  2 files changed, 16 insertions(+), 9 deletions(-)
> 
> diff --git a/libavfilter/x86/vf_gblur.asm b/libavfilter/x86/vf_gblur.asm index
> 4d84e6d011..276fe347f5 100644
> --- a/libavfilter/x86/vf_gblur.asm
> +++ b/libavfilter/x86/vf_gblur.asm
> @@ -194,19 +194,17 @@ cglobal postscale_slice, 2, 2, 4, ptr, length, postscale,
> min, max
>      VBROADCASTSS m1, minm
>      VBROADCASTSS m2, maxm
>  %elif WIN64
> -    SWAP 0, 2
> -    SWAP 1, 3
> -    VBROADCASTSS m0, xm0
> -    VBROADCASTSS m1, xm1
> +    VBROADCASTSS m0, xmm2
> +    VBROADCASTSS m1, xmm3
>      VBROADCASTSS m2, maxm
> -%else ; UNIX64
> -    VBROADCASTSS m0, xm0
> -    VBROADCASTSS m1, xm1
> -    VBROADCASTSS m2, xm2
> +%else ; UNIX
> +    VBROADCASTSS m0, xmm0
> +    VBROADCASTSS m1, xmm1
> +    VBROADCASTSS m2, xmm2
>  %endif
> 
>      .loop:
> -%if cpuflag(avx2)
> +%if cpuflag(avx2) || cpuflag(avx512)
>      mulps         m3, m0, [ptrq + lengthq]
>  %else
>      movu          m3, [ptrq + lengthq]
> @@ -229,3 +227,8 @@ POSTSCALE_SLICE
>  INIT_YMM avx2
>  POSTSCALE_SLICE
>  %endif
> +
> +%if HAVE_AVX512_EXTERNAL
> +INIT_ZMM avx512
> +POSTSCALE_SLICE
> +%endif
> diff --git a/libavfilter/x86/vf_gblur_init.c b/libavfilter/x86/vf_gblur_init.c
> index d80fb46fe4..34aba4ca6e 100644
> --- a/libavfilter/x86/vf_gblur_init.c
> +++ b/libavfilter/x86/vf_gblur_init.c
> @@ -29,6 +29,7 @@ void ff_horiz_slice_avx2(float *ptr, int width, int height,
> int steps, float nu,
> 
>  void ff_postscale_slice_sse(float *ptr, int length, float postscale, float min,
> float max);  void ff_postscale_slice_avx2(float *ptr, int length, float postscale,
> float min, float max);
> +void ff_postscale_slice_avx512(float *ptr, int length, float postscale,
> +float min, float max);
> 
>  av_cold void ff_gblur_init_x86(GBlurContext *s)  { @@ -47,5 +48,8 @@
> av_cold void ff_gblur_init_x86(GBlurContext *s)
>      if (EXTERNAL_AVX2(cpu_flags)) {
>          s->horiz_slice = ff_horiz_slice_avx2;
>      }
> +    if (EXTERNAL_AVX512(cpu_flags)) {
> +        s->postscale_slice = ff_postscale_slice_avx512;
> +    }
>  #endif
>  }
> --
> 2.17.1



More information about the ffmpeg-devel mailing list