[FFmpeg-cvslog] swscale/x86/rgb2rgb: Remove obsolete MMX, 3dnow functions
Andreas Rheinhardt
git at videolan.org
Wed Jun 22 15:29:05 EEST 2022
ffmpeg | branch: master | Andreas Rheinhardt <andreas.rheinhardt at outlook.com> | Sat Jun 11 01:07:57 2022 +0200| [608319a311a31f7d85333a7b08286c00be38eab6] | committer: Andreas Rheinhardt
swscale/x86/rgb2rgb: Remove obsolete MMX, 3dnow functions
x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at outlook.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=608319a311a31f7d85333a7b08286c00be38eab6
---
libswscale/x86/rgb2rgb.c | 26 --------
libswscale/x86/rgb2rgb_template.c | 123 +++++---------------------------------
2 files changed, 15 insertions(+), 134 deletions(-)
diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index 0ab139aca4..b325e5dbd5 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@@ -85,20 +85,11 @@ DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset);
// Note: We have C, MMX, MMXEXT, 3DNOW versions, there is no 3DNOW + MMXEXT one.
-#define COMPILE_TEMPLATE_MMXEXT 0
-#define COMPILE_TEMPLATE_AMD3DNOW 0
#define COMPILE_TEMPLATE_SSE2 0
#define COMPILE_TEMPLATE_AVX 0
-//MMX versions
-#undef RENAME
-#define RENAME(a) a ## _mmx
-#include "rgb2rgb_template.c"
-
// MMXEXT versions
#undef RENAME
-#undef COMPILE_TEMPLATE_MMXEXT
-#define COMPILE_TEMPLATE_MMXEXT 1
#define RENAME(a) a ## _mmxext
#include "rgb2rgb_template.c"
@@ -116,19 +107,6 @@ DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset);
#define RENAME(a) a ## _avx
#include "rgb2rgb_template.c"
-//3DNOW versions
-#undef RENAME
-#undef COMPILE_TEMPLATE_MMXEXT
-#undef COMPILE_TEMPLATE_SSE2
-#undef COMPILE_TEMPLATE_AVX
-#undef COMPILE_TEMPLATE_AMD3DNOW
-#define COMPILE_TEMPLATE_MMXEXT 0
-#define COMPILE_TEMPLATE_SSE2 0
-#define COMPILE_TEMPLATE_AVX 0
-#define COMPILE_TEMPLATE_AMD3DNOW 1
-#define RENAME(a) a ## _3dnow
-#include "rgb2rgb_template.c"
-
/*
RGB15->RGB16 original by Strepto/Astral
ported to gcc & bugfixed : A'rpi
@@ -165,10 +143,6 @@ av_cold void rgb2rgb_init_x86(void)
int cpu_flags = av_get_cpu_flags();
#if HAVE_INLINE_ASM
- if (INLINE_MMX(cpu_flags))
- rgb2rgb_init_mmx();
- if (INLINE_AMD3DNOW(cpu_flags))
- rgb2rgb_init_3dnow();
if (INLINE_MMXEXT(cpu_flags))
rgb2rgb_init_mmxext();
if (INLINE_SSE2(cpu_flags))
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index ae2469e663..4aba25dd51 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -36,34 +36,14 @@
#undef SFENCE
#undef PAVGB
-#if COMPILE_TEMPLATE_AMD3DNOW
-#define PREFETCH "prefetch"
-#define PAVGB "pavgusb"
-#elif COMPILE_TEMPLATE_MMXEXT
#define PREFETCH "prefetchnta"
#define PAVGB "pavgb"
-#else
-#define PREFETCH " # nop"
-#endif
-
-#if COMPILE_TEMPLATE_AMD3DNOW
-/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
-#define EMMS "femms"
-#else
-#define EMMS "emms"
-#endif
-
-#if COMPILE_TEMPLATE_MMXEXT
#define MOVNTQ "movntq"
#define SFENCE "sfence"
-#else
-#define MOVNTQ "movq"
-#define SFENCE " # nop"
-#endif
-#if !COMPILE_TEMPLATE_SSE2
+#define EMMS "emms"
-#if !COMPILE_TEMPLATE_AMD3DNOW
+#if !COMPILE_TEMPLATE_SSE2
static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size)
{
@@ -1353,9 +1333,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
SFENCE" \n\t"
:::"memory");
}
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
-#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride)
{
int x,y;
@@ -1453,9 +1431,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
SFENCE" \n\t"
:::"memory");
}
-#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */
-#if !COMPILE_TEMPLATE_AMD3DNOW
/**
* Height should be a multiple of 2 and width should be a multiple of 16.
* (If this is a problem for anyone then tell me, and I will fix it.)
@@ -1559,7 +1535,6 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
SFENCE" \n\t"
:::"memory");
}
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
/**
* Height should be a multiple of 2 and width should be a multiple of 2.
@@ -1673,7 +1648,6 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"1: \n\t"
PREFETCH" 64(%0, %%"FF_REG_d") \n\t"
PREFETCH" 64(%1, %%"FF_REG_d") \n\t"
-#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
"movq (%0, %%"FF_REG_d"), %%mm0 \n\t"
"movq (%1, %%"FF_REG_d"), %%mm1 \n\t"
"movq 6(%0, %%"FF_REG_d"), %%mm2 \n\t"
@@ -1688,32 +1662,6 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
PAVGB" %%mm3, %%mm2 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
-#else
- "movd (%0, %%"FF_REG_d"), %%mm0 \n\t"
- "movd (%1, %%"FF_REG_d"), %%mm1 \n\t"
- "movd 3(%0, %%"FF_REG_d"), %%mm2 \n\t"
- "movd 3(%1, %%"FF_REG_d"), %%mm3 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "punpcklbw %%mm7, %%mm2 \n\t"
- "punpcklbw %%mm7, %%mm3 \n\t"
- "paddw %%mm1, %%mm0 \n\t"
- "paddw %%mm3, %%mm2 \n\t"
- "paddw %%mm2, %%mm0 \n\t"
- "movd 6(%0, %%"FF_REG_d"), %%mm4 \n\t"
- "movd 6(%1, %%"FF_REG_d"), %%mm1 \n\t"
- "movd 9(%0, %%"FF_REG_d"), %%mm2 \n\t"
- "movd 9(%1, %%"FF_REG_d"), %%mm3 \n\t"
- "punpcklbw %%mm7, %%mm4 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "punpcklbw %%mm7, %%mm2 \n\t"
- "punpcklbw %%mm7, %%mm3 \n\t"
- "paddw %%mm1, %%mm4 \n\t"
- "paddw %%mm3, %%mm2 \n\t"
- "paddw %%mm4, %%mm2 \n\t"
- "psrlw $2, %%mm0 \n\t"
- "psrlw $2, %%mm2 \n\t"
-#endif
"movq "BGR2V_IDX"(%5), %%mm1 \n\t"
"movq "BGR2V_IDX"(%5), %%mm3 \n\t"
@@ -1732,7 +1680,6 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0
"psraw $7, %%mm0 \n\t"
-#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
"movq 12(%0, %%"FF_REG_d"), %%mm4 \n\t"
"movq 12(%1, %%"FF_REG_d"), %%mm1 \n\t"
"movq 18(%0, %%"FF_REG_d"), %%mm2 \n\t"
@@ -1747,33 +1694,6 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
PAVGB" %%mm3, %%mm2 \n\t"
"punpcklbw %%mm7, %%mm4 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t"
-#else
- "movd 12(%0, %%"FF_REG_d"), %%mm4 \n\t"
- "movd 12(%1, %%"FF_REG_d"), %%mm1 \n\t"
- "movd 15(%0, %%"FF_REG_d"), %%mm2 \n\t"
- "movd 15(%1, %%"FF_REG_d"), %%mm3 \n\t"
- "punpcklbw %%mm7, %%mm4 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "punpcklbw %%mm7, %%mm2 \n\t"
- "punpcklbw %%mm7, %%mm3 \n\t"
- "paddw %%mm1, %%mm4 \n\t"
- "paddw %%mm3, %%mm2 \n\t"
- "paddw %%mm2, %%mm4 \n\t"
- "movd 18(%0, %%"FF_REG_d"), %%mm5 \n\t"
- "movd 18(%1, %%"FF_REG_d"), %%mm1 \n\t"
- "movd 21(%0, %%"FF_REG_d"), %%mm2 \n\t"
- "movd 21(%1, %%"FF_REG_d"), %%mm3 \n\t"
- "punpcklbw %%mm7, %%mm5 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "punpcklbw %%mm7, %%mm2 \n\t"
- "punpcklbw %%mm7, %%mm3 \n\t"
- "paddw %%mm1, %%mm5 \n\t"
- "paddw %%mm3, %%mm2 \n\t"
- "paddw %%mm5, %%mm2 \n\t"
- "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
- "psrlw $2, %%mm4 \n\t"
- "psrlw $2, %%mm2 \n\t"
-#endif
"movq "BGR2V_IDX"(%5), %%mm1 \n\t"
"movq "BGR2V_IDX"(%5), %%mm3 \n\t"
@@ -1822,7 +1742,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
#endif /* HAVE_7REGS */
#endif /* !COMPILE_TEMPLATE_SSE2 */
-#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX
+#if !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2
static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest,
int width, int height, int src1Stride,
int src2Stride, int dstStride)
@@ -1833,7 +1753,6 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
int w;
if (width >= 16) {
-#if COMPILE_TEMPLATE_SSE2
if (!((((intptr_t)src1) | ((intptr_t)src2) | ((intptr_t)dest))&15)) {
__asm__(
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
@@ -1854,7 +1773,6 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
: "memory", XMM_CLOBBERS("xmm0", "xmm1", "xmm2",) "%"FF_REG_a
);
} else
-#endif
__asm__(
"xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
"1: \n\t"
@@ -1896,10 +1814,10 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
::: "memory"
);
}
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */
+#endif /* !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2 */
#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL
-#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM
+#if COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM
void RENAME(ff_nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
const uint8_t *unused,
const uint8_t *src1,
@@ -1919,18 +1837,14 @@ static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t
dst2 += dst2Stride;
}
__asm__(
-#if !COMPILE_TEMPLATE_SSE2
- EMMS" \n\t"
-#endif
SFENCE" \n\t"
::: "memory"
);
}
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
+#endif /* COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM */
#endif /* !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL */
#if !COMPILE_TEMPLATE_SSE2
-#if !COMPILE_TEMPLATE_AMD3DNOW
static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
uint8_t *dst1, uint8_t *dst2,
int width, int height,
@@ -2108,7 +2022,6 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2
::: "memory"
);
}
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count)
{
@@ -2185,7 +2098,7 @@ static void RENAME(extract_odd)(const uint8_t *src, uint8_t *dst, x86_reg count)
}
}
-#if !COMPILE_TEMPLATE_AMD3DNOW
+#if ARCH_X86_32
static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
{
dst0+= count;
@@ -2231,7 +2144,7 @@ static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *ds
count++;
}
}
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
+#endif /* ARCH_X86_32 */
static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
{
@@ -2286,7 +2199,6 @@ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, u
}
}
-#if !COMPILE_TEMPLATE_AMD3DNOW
static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
{
dst0+= count;
@@ -2333,7 +2245,6 @@ static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst
count++;
}
}
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
{
@@ -2415,7 +2326,6 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
);
}
-#if !COMPILE_TEMPLATE_AMD3DNOW
static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
int width, int height,
int lumStride, int chromStride, int srcStride)
@@ -2438,7 +2348,6 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
::: "memory"
);
}
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
int width, int height,
@@ -2465,7 +2374,7 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
);
}
-#if !COMPILE_TEMPLATE_AMD3DNOW
+#if ARCH_X86_32
static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
int width, int height,
int lumStride, int chromStride, int srcStride)
@@ -2488,13 +2397,12 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
::: "memory"
);
}
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
+#endif /* ARCH_X86_32 */
#endif /* !COMPILE_TEMPLATE_SSE2 */
static av_cold void RENAME(rgb2rgb_init)(void)
{
#if !COMPILE_TEMPLATE_SSE2
-#if !COMPILE_TEMPLATE_AMD3DNOW
rgb15to16 = RENAME(rgb15to16);
rgb15tobgr24 = RENAME(rgb15tobgr24);
rgb15to32 = RENAME(rgb15to32);
@@ -2519,13 +2427,12 @@ static av_cold void RENAME(rgb2rgb_init)(void)
yuy2toyv12 = RENAME(yuy2toyv12);
vu9_to_vu12 = RENAME(vu9_to_vu12);
yvu9_to_yuy2 = RENAME(yvu9_to_yuy2);
+#if ARCH_X86_32
uyvytoyuv422 = RENAME(uyvytoyuv422);
+#endif
yuyvtoyuv422 = RENAME(yuyvtoyuv422);
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
-#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
planar2x = RENAME(planar2x);
-#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */
#if HAVE_7REGS
ff_rgb24toyv12 = RENAME(rgb24toyv12);
#endif /* HAVE_7REGS */
@@ -2534,11 +2441,11 @@ static av_cold void RENAME(rgb2rgb_init)(void)
uyvytoyuv420 = RENAME(uyvytoyuv420);
#endif /* !COMPILE_TEMPLATE_SSE2 */
-#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX
+#if !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2
interleaveBytes = RENAME(interleaveBytes);
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */
+#endif /* !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2 */
#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL
-#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM
+#if COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM
deinterleaveBytes = RENAME(deinterleaveBytes);
#endif
#endif
More information about the ffmpeg-cvslog
mailing list