[FFmpeg-devel] [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag

Wu, Jianhua jianhua.wu at intel.com
Wed Mar 9 09:38:05 EET 2022


Ping.
> From: Wu, Jianhua
> Sent: Wednesday, March 2, 2022 1:34 PM
> To: ffmpeg-devel at ffmpeg.org
> Subject: RE: [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag
> 
> Ping.
> > From: Wu, Jianhua <jianhua.wu at intel.com>
> > Sent: Wednesday, February 23, 2022 4:58 PM
> > To: ffmpeg-devel at ffmpeg.org
> > Cc: Wu, Jianhua <jianhua.wu at intel.com>
> > Subject: [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag
> >
> > From: Wu Jianhua <jianhua.wu at intel.com>
> >
> > Signed-off-by: Wu Jianhua <jianhua.wu at intel.com>
> > ---
> >  configure                 | 13 +++++++---
> >  libavutil/cpu.c           |  1 +
> >  libavutil/cpu.h           |  1 +
> >  libavutil/x86/cpu.c       |  8 ++++--
> >  libavutil/x86/cpu.h       |  1 +
> >  libavutil/x86/x86inc.asm  | 53
> > ++++++++++++++++++++-------------------
> >  tests/checkasm/checkasm.c | 35 +++++++++++++-------------
> >  7 files changed, 63 insertions(+), 49 deletions(-)
> >
> > diff --git a/configure b/configure
> > index 1535dc3c5b..d88c2ae979 100755
> > --- a/configure
> > +++ b/configure
> > @@ -444,6 +444,7 @@ Optimization options (experts only):
> >    --disable-fma4           disable FMA4 optimizations
> >    --disable-avx2           disable AVX2 optimizations
> >    --disable-avx512         disable AVX-512 optimizations
> > +  --disable-avx512icl      disable AVX-512ICL optimizations
> >    --disable-aesni          disable AESNI optimizations
> >    --disable-armv5te        disable armv5te optimizations
> >    --disable-armv6          disable armv6 optimizations
> > @@ -2098,6 +2099,7 @@ ARCH_EXT_LIST_X86_SIMD="
> >      avx
> >      avx2
> >      avx512
> > +    avx512icl
> >      fma3
> >      fma4
> >      mmx
> > @@ -2666,6 +2668,7 @@ fma3_deps="avx"
> >  fma4_deps="avx"
> >  avx2_deps="avx"
> >  avx512_deps="avx2"
> > +avx512icl_deps="avx512"
> >
> >  mmx_external_deps="x86asm"
> >  mmx_inline_deps="inline_asm x86"
> > @@ -6128,10 +6131,11 @@ EOF
> >              elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;;
> >          esac
> >
> > -        enabled avx512 && check_x86asm avx512_external "vmovdqa32
> > [eax]{k1}{z}, zmm0"
> > -        enabled avx2   && check_x86asm avx2_external   "vextracti128 xmm0,
> > ymm0, 0"
> > -        enabled xop    && check_x86asm xop_external    "vpmacsdd xmm0,
> > xmm1, xmm2, xmm3"
> > -        enabled fma4   && check_x86asm fma4_external   "vfmaddps ymm0,
> > ymm1, ymm2, ymm3"
> > +        enabled avx512    && check_x86asm avx512_external    "vmovdqa32
> > [eax]{k1}{z}, zmm0"
> > +        enabled avx512icl && check_x86asm avx512icl_external
> > + "vpdpwssds
> > zmm31{k1}{z}, zmm29, zmm28"
> > +        enabled avx2      && check_x86asm avx2_external      "vextracti128
> > xmm0, ymm0, 0"
> > +        enabled xop       && check_x86asm xop_external       "vpmacsdd xmm0,
> > xmm1, xmm2, xmm3"
> > +        enabled fma4      && check_x86asm fma4_external      "vfmaddps
> ymm0,
> > ymm1, ymm2, ymm3"
> >          check_x86asm cpunop          "CPU amdnop"
> >      fi
> >
> > @@ -7471,6 +7475,7 @@ if enabled x86; then
> >      echo "AVX enabled               ${avx-no}"
> >      echo "AVX2 enabled              ${avx2-no}"
> >      echo "AVX-512 enabled           ${avx512-no}"
> > +    echo "AVX-512ICL enabled        ${avx512icl-no}"
> >      echo "XOP enabled               ${xop-no}"
> >      echo "FMA3 enabled              ${fma3-no}"
> >      echo "FMA4 enabled              ${fma4-no}"
> > diff --git a/libavutil/cpu.c b/libavutil/cpu.c index
> > 1368502245..833c220192
> > 100644
> > --- a/libavutil/cpu.c
> > +++ b/libavutil/cpu.c
> > @@ -137,6 +137,7 @@ int av_parse_cpu_caps(unsigned *flags, const char
> *s)
> >          { "cmov",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> > AV_CPU_FLAG_CMOV     },    .unit = "flags" },
> >          { "aesni",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> > AV_CPU_FLAG_AESNI    },    .unit = "flags" },
> >          { "avx512"  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> > AV_CPU_FLAG_AVX512   },    .unit = "flags" },
> > +        { "avx512icl",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> > AV_CPU_FLAG_AVX512ICL   }, .unit = "flags" },
> >          { "slowgather", NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> > AV_CPU_FLAG_SLOW_GATHER }, .unit = "flags" },
> >
> >  #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX diff
> -- git
> > a/libavutil/cpu.h b/libavutil/cpu.h index ce9bf14bf7..9711e574c5
> > 100644
> > --- a/libavutil/cpu.h
> > +++ b/libavutil/cpu.h
> > @@ -54,6 +54,7 @@
> >  #define AV_CPU_FLAG_BMI1        0x20000 ///< Bit Manipulation
> Instruction
> > Set 1
> >  #define AV_CPU_FLAG_BMI2        0x40000 ///< Bit Manipulation
> Instruction
> > Set 2
> >  #define AV_CPU_FLAG_AVX512     0x100000 ///< AVX-512 functions:
> > requires OS support even if YMM/ZMM registers aren't used
> > +#define AV_CPU_FLAG_AVX512ICL  0x200000 ///<
> >
> +F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/V
> > AES/VPCLMULQD
> > +Q
> >  #define AV_CPU_FLAG_SLOW_GATHER  0x2000000 ///< CPU has slow
> gathers.
> >
> >  #define AV_CPU_FLAG_ALTIVEC      0x0001 ///< standard
> > diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index
> > 7b13fcae91..d6cd4fab9c 100644
> > --- a/libavutil/x86/cpu.c
> > +++ b/libavutil/x86/cpu.c
> > @@ -150,9 +150,13 @@ int ff_get_cpu_flags_x86(void)
> >              rval |= AV_CPU_FLAG_AVX2;  #if HAVE_AVX512 /* F, CD, BW,
> > DQ, VL */
> >          if ((xcr0_lo & 0xe0) == 0xe0) { /* OPMASK/ZMM state */
> > -            if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) ==
> > 0xd0030000)
> > +            if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) ==
> > + 0xd0030000) {
> >                  rval |= AV_CPU_FLAG_AVX512;
> > -
> > +#if HAVE_AVX512ICL
> > +                if ((ebx & 0xd0200000) == 0xd0200000 && (ecx & 0x5f42) ==
> 0x5f42)
> > +                    rval |= AV_CPU_FLAG_AVX512ICL; #endif /*
> > +HAVE_AVX512ICL */
> > +            }
> >          }
> >  #endif /* HAVE_AVX512 */
> >  #endif /* HAVE_AVX2 */
> > diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h index
> > 937c697fa0..40a1eef0ab 100644
> > --- a/libavutil/x86/cpu.h
> > +++ b/libavutil/x86/cpu.h
> > @@ -80,6 +80,7 @@
> >  #define EXTERNAL_AVX2_SLOW(flags)   CPUEXT_SUFFIX_SLOW2(flags,
> > _EXTERNAL, AVX2, AVX)
> >  #define EXTERNAL_AESNI(flags)       CPUEXT_SUFFIX(flags, _EXTERNAL,
> > AESNI)
> >  #define EXTERNAL_AVX512(flags)      CPUEXT_SUFFIX(flags, _EXTERNAL,
> > AVX512)
> > +#define EXTERNAL_AVX512ICL(flags)   CPUEXT_SUFFIX(flags, _EXTERNAL,
> > AVX512ICL)
> >
> >  #define INLINE_AMD3DNOW(flags)      CPUEXT_SUFFIX(flags, _INLINE,
> > AMD3DNOW)
> >  #define INLINE_AMD3DNOWEXT(flags)   CPUEXT_SUFFIX(flags, _INLINE,
> > AMD3DNOWEXT)
> > diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index
> > 01c35e3a4b..251ee797de 100644
> > --- a/libavutil/x86/x86inc.asm
> > +++ b/libavutil/x86/x86inc.asm
> > @@ -817,32 +817,33 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl,
> > jnle, jg, jge, jng, jnge, ja, jae,
> >
> >  ; cpuflags
> >
> > -%assign cpuflags_mmx      (1<<0)
> > -%assign cpuflags_mmx2     (1<<1) | cpuflags_mmx
> > -%assign cpuflags_3dnow    (1<<2) | cpuflags_mmx
> > -%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow
> > -%assign cpuflags_sse      (1<<4) | cpuflags_mmx2
> > -%assign cpuflags_sse2     (1<<5) | cpuflags_sse
> > -%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2
> > -%assign cpuflags_lzcnt    (1<<7) | cpuflags_sse2
> > -%assign cpuflags_sse3     (1<<8) | cpuflags_sse2
> > -%assign cpuflags_ssse3    (1<<9) | cpuflags_sse3
> > -%assign cpuflags_sse4     (1<<10)| cpuflags_ssse3
> > -%assign cpuflags_sse42    (1<<11)| cpuflags_sse4
> > -%assign cpuflags_aesni    (1<<12)| cpuflags_sse42
> > -%assign cpuflags_avx      (1<<13)| cpuflags_sse42
> > -%assign cpuflags_xop      (1<<14)| cpuflags_avx
> > -%assign cpuflags_fma4     (1<<15)| cpuflags_avx
> > -%assign cpuflags_fma3     (1<<16)| cpuflags_avx
> > -%assign cpuflags_bmi1     (1<<17)| cpuflags_avx|cpuflags_lzcnt
> > -%assign cpuflags_bmi2     (1<<18)| cpuflags_bmi1
> > -%assign cpuflags_avx2     (1<<19)| cpuflags_fma3|cpuflags_bmi2
> > -%assign cpuflags_avx512   (1<<20)| cpuflags_avx2 ; F, CD, BW, DQ, VL
> > -
> > -%assign cpuflags_cache32  (1<<21)
> > -%assign cpuflags_cache64  (1<<22)
> > -%assign cpuflags_aligned  (1<<23) ; not a cpu feature, but a function
> variant
> > -%assign cpuflags_atom     (1<<24)
> > +%assign cpuflags_mmx       (1<<0)
> > +%assign cpuflags_mmx2      (1<<1) | cpuflags_mmx
> > +%assign cpuflags_3dnow     (1<<2) | cpuflags_mmx
> > +%assign cpuflags_3dnowext  (1<<3) | cpuflags_3dnow
> > +%assign cpuflags_sse       (1<<4) | cpuflags_mmx2
> > +%assign cpuflags_sse2      (1<<5) | cpuflags_sse
> > +%assign cpuflags_sse2slow  (1<<6) | cpuflags_sse2
> > +%assign cpuflags_lzcnt     (1<<7) | cpuflags_sse2
> > +%assign cpuflags_sse3      (1<<8) | cpuflags_sse2
> > +%assign cpuflags_ssse3     (1<<9) | cpuflags_sse3
> > +%assign cpuflags_sse4      (1<<10)| cpuflags_ssse3
> > +%assign cpuflags_sse42     (1<<11)| cpuflags_sse4
> > +%assign cpuflags_aesni     (1<<12)| cpuflags_sse42
> > +%assign cpuflags_avx       (1<<13)| cpuflags_sse42
> > +%assign cpuflags_xop       (1<<14)| cpuflags_avx
> > +%assign cpuflags_fma4      (1<<15)| cpuflags_avx
> > +%assign cpuflags_fma3      (1<<16)| cpuflags_avx
> > +%assign cpuflags_bmi1      (1<<17)| cpuflags_avx|cpuflags_lzcnt
> > +%assign cpuflags_bmi2      (1<<18)| cpuflags_bmi1
> > +%assign cpuflags_avx2      (1<<19)| cpuflags_fma3|cpuflags_bmi2
> > +%assign cpuflags_avx512    (1<<20)| cpuflags_avx2 ; F, CD, BW, DQ, VL
> > +%assign cpuflags_avx512icl (1<<25)| cpuflags_avx512
> > +
> > +%assign cpuflags_cache32   (1<<21)
> > +%assign cpuflags_cache64   (1<<22)
> > +%assign cpuflags_aligned   (1<<23) ; not a cpu feature, but a function
> variant
> > +%assign cpuflags_atom      (1<<24)
> >
> >  ; Returns a boolean value expressing whether or not the specified
> > cpuflag is enabled.
> >  %define    cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) -
> > 1) >> 31) & 1)
> > diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> > index f74125e810..e77b4ec20f 100644
> > --- a/tests/checkasm/checkasm.c
> > +++ b/tests/checkasm/checkasm.c
> > @@ -220,23 +220,24 @@ static const struct {
> >      { "MMI",      "mmi",      AV_CPU_FLAG_MMI },
> >      { "MSA",      "msa",      AV_CPU_FLAG_MSA },
> >  #elif ARCH_X86
> > -    { "MMX",      "mmx",      AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
> > -    { "MMXEXT",   "mmxext",   AV_CPU_FLAG_MMXEXT },
> > -    { "3DNOW",    "3dnow",    AV_CPU_FLAG_3DNOW },
> > -    { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT },
> > -    { "SSE",      "sse",      AV_CPU_FLAG_SSE },
> > -    { "SSE2",     "sse2",     AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW },
> > -    { "SSE3",     "sse3",     AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW },
> > -    { "SSSE3",    "ssse3",    AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM },
> > -    { "SSE4.1",   "sse4",     AV_CPU_FLAG_SSE4 },
> > -    { "SSE4.2",   "sse42",    AV_CPU_FLAG_SSE42 },
> > -    { "AES-NI",   "aesni",    AV_CPU_FLAG_AESNI },
> > -    { "AVX",      "avx",      AV_CPU_FLAG_AVX },
> > -    { "XOP",      "xop",      AV_CPU_FLAG_XOP },
> > -    { "FMA3",     "fma3",     AV_CPU_FLAG_FMA3 },
> > -    { "FMA4",     "fma4",     AV_CPU_FLAG_FMA4 },
> > -    { "AVX2",     "avx2",     AV_CPU_FLAG_AVX2 },
> > -    { "AVX-512",  "avx512",   AV_CPU_FLAG_AVX512 },
> > +    { "MMX",        "mmx",       AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
> > +    { "MMXEXT",     "mmxext",    AV_CPU_FLAG_MMXEXT },
> > +    { "3DNOW",      "3dnow",     AV_CPU_FLAG_3DNOW },
> > +    { "3DNOWEXT",   "3dnowext",  AV_CPU_FLAG_3DNOWEXT },
> > +    { "SSE",        "sse",       AV_CPU_FLAG_SSE },
> > +    { "SSE2",       "sse2",
> AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW },
> > +    { "SSE3",       "sse3",
> AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW },
> > +    { "SSSE3",      "ssse3",     AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM },
> > +    { "SSE4.1",     "sse4",      AV_CPU_FLAG_SSE4 },
> > +    { "SSE4.2",     "sse42",     AV_CPU_FLAG_SSE42 },
> > +    { "AES-NI",     "aesni",     AV_CPU_FLAG_AESNI },
> > +    { "AVX",        "avx",       AV_CPU_FLAG_AVX },
> > +    { "XOP",        "xop",       AV_CPU_FLAG_XOP },
> > +    { "FMA3",       "fma3",      AV_CPU_FLAG_FMA3 },
> > +    { "FMA4",       "fma4",      AV_CPU_FLAG_FMA4 },
> > +    { "AVX2",       "avx2",      AV_CPU_FLAG_AVX2 },
> > +    { "AVX-512",    "avx512",    AV_CPU_FLAG_AVX512 },
> > +    { "AVX-512ICL", "avx512icl", AV_CPU_FLAG_AVX512ICL },
> >  #elif ARCH_LOONGARCH
> >      { "LSX",      "lsx",      AV_CPU_FLAG_LSX },
> >      { "LASX",     "lasx",     AV_CPU_FLAG_LASX },
> > --
> > 2.17.1

Hi there,

These patches have been sent for two weeks but got zero response so far. Could the
maintainers of CPU flags and native HEVC decoding help review this patchset? 

Thanks,
Jianhua



More information about the ffmpeg-devel mailing list