[FFmpeg-devel] [PATCH v2 2/5] aarch64: Add cpu flags for the dotprod and i8mm extensions

Martin Storsjö martin at martin.st
Tue May 30 15:30:40 EEST 2023


Set these available if they are available unconditionally for
the compiler.
---
Fixed the name of the __ARM_FEATURE define used for detecting i8mm.
---
 libavutil/aarch64/cpu.c   | 15 ++++++++++++---
 libavutil/aarch64/cpu.h   |  2 ++
 libavutil/cpu.c           |  2 ++
 libavutil/cpu.h           |  2 ++
 libavutil/tests/cpu.c     |  2 ++
 tests/checkasm/checkasm.c |  2 ++
 6 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/libavutil/aarch64/cpu.c b/libavutil/aarch64/cpu.c
index cc641da576..0c76f5ad15 100644
--- a/libavutil/aarch64/cpu.c
+++ b/libavutil/aarch64/cpu.c
@@ -22,9 +22,18 @@
 
 int ff_get_cpu_flags_aarch64(void)
 {
-    return AV_CPU_FLAG_ARMV8 * HAVE_ARMV8 |
-           AV_CPU_FLAG_NEON  * HAVE_NEON  |
-           AV_CPU_FLAG_VFP   * HAVE_VFP;
+    int flags = AV_CPU_FLAG_ARMV8 * HAVE_ARMV8 |
+                AV_CPU_FLAG_NEON  * HAVE_NEON  |
+                AV_CPU_FLAG_VFP   * HAVE_VFP;
+
+#ifdef __ARM_FEATURE_DOTPROD
+    flags |= AV_CPU_FLAG_DOTPROD;
+#endif
+#ifdef __ARM_FEATURE_MATMUL_INT8
+    flags |= AV_CPU_FLAG_I8MM;
+#endif
+
+    return flags;
 }
 
 size_t ff_get_cpu_max_align_aarch64(void)
diff --git a/libavutil/aarch64/cpu.h b/libavutil/aarch64/cpu.h
index 2ee3f9323a..64d703be37 100644
--- a/libavutil/aarch64/cpu.h
+++ b/libavutil/aarch64/cpu.h
@@ -25,5 +25,7 @@
 #define have_armv8(flags) CPUEXT(flags, ARMV8)
 #define have_neon(flags) CPUEXT(flags, NEON)
 #define have_vfp(flags)  CPUEXT(flags, VFP)
+#define have_dotprod(flags) CPUEXT(flags, DOTPROD)
+#define have_i8mm(flags)    CPUEXT(flags, I8MM)
 
 #endif /* AVUTIL_AARCH64_CPU_H */
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 2c5f7f4958..2ffc3986aa 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -174,6 +174,8 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
         { "armv8",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV8    },    .unit = "flags" },
         { "neon",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_NEON     },    .unit = "flags" },
         { "vfp",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFP      },    .unit = "flags" },
+        { "dotprod",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_DOTPROD  },    .unit = "flags" },
+        { "i8mm",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_I8MM     },    .unit = "flags" },
 #elif ARCH_MIPS
         { "mmi",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMI      },    .unit = "flags" },
         { "msa",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MSA      },    .unit = "flags" },
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 8fa5ea9199..da486f9c7a 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -69,6 +69,8 @@
 #define AV_CPU_FLAG_NEON         (1 << 5)
 #define AV_CPU_FLAG_ARMV8        (1 << 6)
 #define AV_CPU_FLAG_VFP_VM       (1 << 7) ///< VFPv2 vector mode, deprecated in ARMv7-A and unavailable in various CPUs implementations
+#define AV_CPU_FLAG_DOTPROD      (1 << 8)
+#define AV_CPU_FLAG_I8MM         (1 << 9)
 #define AV_CPU_FLAG_SETEND       (1 <<16)
 
 #define AV_CPU_FLAG_MMI          (1 << 0)
diff --git a/libavutil/tests/cpu.c b/libavutil/tests/cpu.c
index dadadb31dc..a52637339d 100644
--- a/libavutil/tests/cpu.c
+++ b/libavutil/tests/cpu.c
@@ -38,6 +38,8 @@ static const struct {
     { AV_CPU_FLAG_ARMV8,     "armv8"      },
     { AV_CPU_FLAG_NEON,      "neon"       },
     { AV_CPU_FLAG_VFP,       "vfp"        },
+    { AV_CPU_FLAG_DOTPROD,   "dotprod"    },
+    { AV_CPU_FLAG_I8MM,      "i8mm"       },
 #elif ARCH_ARM
     { AV_CPU_FLAG_ARMV5TE,   "armv5te"    },
     { AV_CPU_FLAG_ARMV6,     "armv6"      },
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 7389ebaee9..4311a8ffcb 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -230,6 +230,8 @@ static const struct {
 #if   ARCH_AARCH64
     { "ARMV8",    "armv8",    AV_CPU_FLAG_ARMV8 },
     { "NEON",     "neon",     AV_CPU_FLAG_NEON },
+    { "DOTPROD",  "dotprod",  AV_CPU_FLAG_DOTPROD },
+    { "I8MM",     "i8mm",     AV_CPU_FLAG_I8MM },
 #elif ARCH_ARM
     { "ARMV5TE",  "armv5te",  AV_CPU_FLAG_ARMV5TE },
     { "ARMV6",    "armv6",    AV_CPU_FLAG_ARMV6 },
-- 
2.37.1 (Apple Git-137.1)



More information about the ffmpeg-devel mailing list