[FFmpeg-devel] [PATCH 1/2] libavutil/cpu: Adds fast gather detection.

Alan Kelly alankelly at google.com
Fri Jul 16 16:44:53 EEST 2021


Broadwell and later and Zen3 and later have fast gather instructions.
---
 Haswell is now excluded from EXTERNAL_AVX2_FAST as discussed in the
 email thread.
 libavutil/cpu.h     |  1 +
 libavutil/x86/cpu.c | 11 ++++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index c069076439..ec3073d021 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -113,6 +113,7 @@ void av_force_cpu_count(int count);
  *  av_set_cpu_flags_mask(), then this function will behave as if AVX is not
  *  present.
  */
+
 size_t av_cpu_max_align(void);
 
 #endif /* AVUTIL_CPU_H */
diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index bcd41a50a2..158e2170c4 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -146,8 +146,17 @@ int ff_get_cpu_flags_x86(void)
     if (max_std_level >= 7) {
         cpuid(7, eax, ebx, ecx, edx);
 #if HAVE_AVX2
-        if ((rval & AV_CPU_FLAG_AVX) && (ebx & 0x00000020))
+        if ((rval & AV_CPU_FLAG_AVX) && (ebx & 0x00000020)){
             rval |= AV_CPU_FLAG_AVX2;
+
+            cpuid(1, eax, ebx, ecx, std_caps);
+            family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
+            model  = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
+            // Haswell and earlier has slow gather
+            if(family == 6 && model < 70)
+                rval |= AV_CPU_FLAG_AVXSLOW;
+        }
+
 #if HAVE_AVX512 /* F, CD, BW, DQ, VL */
         if ((xcr0_lo & 0xe0) == 0xe0) { /* OPMASK/ZMM state */
             if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) == 0xd0030000)
-- 
2.32.0.402.g57bb445576-goog



More information about the ffmpeg-devel mailing list