[FFmpeg-cvslog] avutil/cpu: split flag checks per arch in av_cpu_max_align()
James Almer
git at videolan.org
Thu Sep 28 05:15:37 EEST 2017
ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Wed Sep 27 23:10:09 2017 -0300| [3b345d389be2d67017f904caa21713f53a8e8c90] | committer: James Almer
avutil/cpu: split flag checks per arch in av_cpu_max_align()
Signed-off-by: James Almer <jamrial at gmail.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3b345d389be2d67017f904caa21713f53a8e8c90
---
libavutil/aarch64/cpu.c | 10 ++++++++++
libavutil/arm/cpu.c | 10 ++++++++++
libavutil/cpu.c | 39 ++++++++-------------------------------
libavutil/cpu_internal.h | 5 +++++
libavutil/ppc/cpu.c | 12 ++++++++++++
libavutil/x86/cpu.c | 27 +++++++++++++++++++++++++++
6 files changed, 72 insertions(+), 31 deletions(-)
diff --git a/libavutil/aarch64/cpu.c b/libavutil/aarch64/cpu.c
index 8ef077aaea..cc641da576 100644
--- a/libavutil/aarch64/cpu.c
+++ b/libavutil/aarch64/cpu.c
@@ -26,3 +26,13 @@ int ff_get_cpu_flags_aarch64(void)
AV_CPU_FLAG_NEON * HAVE_NEON |
AV_CPU_FLAG_VFP * HAVE_VFP;
}
+
+size_t ff_get_cpu_max_align_aarch64(void)
+{
+ int flags = av_get_cpu_flags();
+
+ if (flags & AV_CPU_FLAG_NEON)
+ return 16;
+
+ return 8;
+}
diff --git a/libavutil/arm/cpu.c b/libavutil/arm/cpu.c
index 3889ef011c..81e85e2525 100644
--- a/libavutil/arm/cpu.c
+++ b/libavutil/arm/cpu.c
@@ -158,3 +158,13 @@ int ff_get_cpu_flags_arm(void)
}
#endif
+
+size_t ff_get_cpu_max_align_arm(void)
+{
+ int flags = av_get_cpu_flags();
+
+ if (flags & AV_CPU_FLAG_NEON)
+ return 16;
+
+ return 8;
+}
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index ab04494acf..c8401b8258 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -304,37 +304,14 @@ int av_cpu_count(void)
size_t av_cpu_max_align(void)
{
- int av_unused flags = av_get_cpu_flags();
-
-#if ARCH_ARM || ARCH_AARCH64
- if (flags & AV_CPU_FLAG_NEON)
- return 16;
-#elif ARCH_PPC
- if (flags & (AV_CPU_FLAG_ALTIVEC |
- AV_CPU_FLAG_VSX |
- AV_CPU_FLAG_POWER8))
- return 16;
-#elif ARCH_X86
- if (flags & (AV_CPU_FLAG_AVX2 |
- AV_CPU_FLAG_AVX |
- AV_CPU_FLAG_XOP |
- AV_CPU_FLAG_FMA4 |
- AV_CPU_FLAG_FMA3 |
- AV_CPU_FLAG_AVXSLOW))
- return 32;
- if (flags & (AV_CPU_FLAG_AESNI |
- AV_CPU_FLAG_SSE42 |
- AV_CPU_FLAG_SSE4 |
- AV_CPU_FLAG_SSSE3 |
- AV_CPU_FLAG_SSE3 |
- AV_CPU_FLAG_SSE2 |
- AV_CPU_FLAG_SSE |
- AV_CPU_FLAG_ATOM |
- AV_CPU_FLAG_SSSE3SLOW |
- AV_CPU_FLAG_SSE3SLOW |
- AV_CPU_FLAG_SSE2SLOW))
- return 16;
-#endif
+ if (ARCH_AARCH64)
+ return ff_get_cpu_max_align_aarch64();
+ if (ARCH_ARM)
+ return ff_get_cpu_max_align_arm();
+ if (ARCH_PPC)
+ return ff_get_cpu_max_align_ppc();
+ if (ARCH_X86)
+ return ff_get_cpu_max_align_x86();
return 8;
}
diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h
index 6c352abe1b..b8bf1e5396 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/cpu_internal.h
@@ -44,4 +44,9 @@ int ff_get_cpu_flags_arm(void);
int ff_get_cpu_flags_ppc(void);
int ff_get_cpu_flags_x86(void);
+size_t ff_get_cpu_max_align_aarch64(void);
+size_t ff_get_cpu_max_align_arm(void);
+size_t ff_get_cpu_max_align_ppc(void);
+size_t ff_get_cpu_max_align_x86(void);
+
#endif /* AVUTIL_CPU_INTERNAL_H */
diff --git a/libavutil/ppc/cpu.c b/libavutil/ppc/cpu.c
index 0f1e982624..7bb7cd813c 100644
--- a/libavutil/ppc/cpu.c
+++ b/libavutil/ppc/cpu.c
@@ -148,3 +148,15 @@ out:
#endif /* HAVE_ALTIVEC */
return 0;
}
+
+size_t ff_get_cpu_max_align_ppc(void)
+{
+ int flags = av_get_cpu_flags();
+
+ if (flags & (AV_CPU_FLAG_ALTIVEC |
+ AV_CPU_FLAG_VSX |
+ AV_CPU_FLAG_POWER8))
+ return 16;
+
+ return 8;
+}
diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index 3800a11ad8..f33088c8c7 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -233,3 +233,30 @@ int ff_get_cpu_flags_x86(void)
return rval;
}
+
+size_t ff_get_cpu_max_align_x86(void)
+{
+ int flags = av_get_cpu_flags();
+
+ if (flags & (AV_CPU_FLAG_AVX2 |
+ AV_CPU_FLAG_AVX |
+ AV_CPU_FLAG_XOP |
+ AV_CPU_FLAG_FMA4 |
+ AV_CPU_FLAG_FMA3 |
+ AV_CPU_FLAG_AVXSLOW))
+ return 32;
+ if (flags & (AV_CPU_FLAG_AESNI |
+ AV_CPU_FLAG_SSE42 |
+ AV_CPU_FLAG_SSE4 |
+ AV_CPU_FLAG_SSSE3 |
+ AV_CPU_FLAG_SSE3 |
+ AV_CPU_FLAG_SSE2 |
+ AV_CPU_FLAG_SSE |
+ AV_CPU_FLAG_ATOM |
+ AV_CPU_FLAG_SSSE3SLOW |
+ AV_CPU_FLAG_SSE3SLOW |
+ AV_CPU_FLAG_SSE2SLOW))
+ return 16;
+
+ return 8;
+}
More information about the ffmpeg-cvslog
mailing list