[FFmpeg-devel] [PATCH 1/2] libavutil/cpu: Adds fast gather detection.
Alan Kelly
alankelly at google.com
Fri Jul 16 16:44:53 EEST 2021
Broadwell and later and Zen3 and later have fast gather instructions.
---
Haswell is now excluded from EXTERNAL_AVX2_FAST as discussed in the
email thread.
libavutil/cpu.h | 1 +
libavutil/x86/cpu.c | 11 ++++++++++-
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index c069076439..ec3073d021 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -113,6 +113,7 @@ void av_force_cpu_count(int count);
* av_set_cpu_flags_mask(), then this function will behave as if AVX is not
* present.
*/
+
size_t av_cpu_max_align(void);
#endif /* AVUTIL_CPU_H */
diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index bcd41a50a2..158e2170c4 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -146,8 +146,17 @@ int ff_get_cpu_flags_x86(void)
if (max_std_level >= 7) {
cpuid(7, eax, ebx, ecx, edx);
#if HAVE_AVX2
- if ((rval & AV_CPU_FLAG_AVX) && (ebx & 0x00000020))
+ if ((rval & AV_CPU_FLAG_AVX) && (ebx & 0x00000020)){
rval |= AV_CPU_FLAG_AVX2;
+
+ cpuid(1, eax, ebx, ecx, std_caps);
+ family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
+ model = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
+ // Haswell and earlier has slow gather
+ if(family == 6 && model < 70)
+ rval |= AV_CPU_FLAG_AVXSLOW;
+ }
+
#if HAVE_AVX512 /* F, CD, BW, DQ, VL */
if ((xcr0_lo & 0xe0) == 0xe0) { /* OPMASK/ZMM state */
if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) == 0xd0030000)
--
2.32.0.402.g57bb445576-goog
More information about the ffmpeg-devel
mailing list