[FFmpeg-cvslog] swscale/x86/rgb2rgb: Remove obsolete MMX, 3dnow functions

Andreas Rheinhardt git at videolan.org
Wed Jun 22 15:29:05 EEST 2022


ffmpeg | branch: master | Andreas Rheinhardt <andreas.rheinhardt at outlook.com> | Sat Jun 11 01:07:57 2022 +0200| [608319a311a31f7d85333a7b08286c00be38eab6] | committer: Andreas Rheinhardt

swscale/x86/rgb2rgb: Remove obsolete MMX, 3dnow functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at outlook.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=608319a311a31f7d85333a7b08286c00be38eab6
---

 libswscale/x86/rgb2rgb.c          |  26 --------
 libswscale/x86/rgb2rgb_template.c | 123 +++++---------------------------------
 2 files changed, 15 insertions(+), 134 deletions(-)

diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index 0ab139aca4..b325e5dbd5 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@@ -85,20 +85,11 @@ DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset);
 
 // Note: We have C, MMX, MMXEXT, 3DNOW versions, there is no 3DNOW + MMXEXT one.
 
-#define COMPILE_TEMPLATE_MMXEXT 0
-#define COMPILE_TEMPLATE_AMD3DNOW 0
 #define COMPILE_TEMPLATE_SSE2 0
 #define COMPILE_TEMPLATE_AVX 0
 
-//MMX versions
-#undef RENAME
-#define RENAME(a) a ## _mmx
-#include "rgb2rgb_template.c"
-
 // MMXEXT versions
 #undef RENAME
-#undef COMPILE_TEMPLATE_MMXEXT
-#define COMPILE_TEMPLATE_MMXEXT 1
 #define RENAME(a) a ## _mmxext
 #include "rgb2rgb_template.c"
 
@@ -116,19 +107,6 @@ DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset);
 #define RENAME(a) a ## _avx
 #include "rgb2rgb_template.c"
 
-//3DNOW versions
-#undef RENAME
-#undef COMPILE_TEMPLATE_MMXEXT
-#undef COMPILE_TEMPLATE_SSE2
-#undef COMPILE_TEMPLATE_AVX
-#undef COMPILE_TEMPLATE_AMD3DNOW
-#define COMPILE_TEMPLATE_MMXEXT 0
-#define COMPILE_TEMPLATE_SSE2 0
-#define COMPILE_TEMPLATE_AVX 0
-#define COMPILE_TEMPLATE_AMD3DNOW 1
-#define RENAME(a) a ## _3dnow
-#include "rgb2rgb_template.c"
-
 /*
  RGB15->RGB16 original by Strepto/Astral
  ported to gcc & bugfixed : A'rpi
@@ -165,10 +143,6 @@ av_cold void rgb2rgb_init_x86(void)
     int cpu_flags = av_get_cpu_flags();
 
 #if HAVE_INLINE_ASM
-    if (INLINE_MMX(cpu_flags))
-        rgb2rgb_init_mmx();
-    if (INLINE_AMD3DNOW(cpu_flags))
-        rgb2rgb_init_3dnow();
     if (INLINE_MMXEXT(cpu_flags))
         rgb2rgb_init_mmxext();
     if (INLINE_SSE2(cpu_flags))
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index ae2469e663..4aba25dd51 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -36,34 +36,14 @@
 #undef SFENCE
 #undef PAVGB
 
-#if COMPILE_TEMPLATE_AMD3DNOW
-#define PREFETCH  "prefetch"
-#define PAVGB     "pavgusb"
-#elif COMPILE_TEMPLATE_MMXEXT
 #define PREFETCH "prefetchnta"
 #define PAVGB     "pavgb"
-#else
-#define PREFETCH  " # nop"
-#endif
-
-#if COMPILE_TEMPLATE_AMD3DNOW
-/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
-#define EMMS     "femms"
-#else
-#define EMMS     "emms"
-#endif
-
-#if COMPILE_TEMPLATE_MMXEXT
 #define MOVNTQ "movntq"
 #define SFENCE "sfence"
-#else
-#define MOVNTQ "movq"
-#define SFENCE " # nop"
-#endif
 
-#if !COMPILE_TEMPLATE_SSE2
+#define EMMS     "emms"
 
-#if !COMPILE_TEMPLATE_AMD3DNOW
+#if !COMPILE_TEMPLATE_SSE2
 
 static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size)
 {
@@ -1353,9 +1333,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
                      SFENCE"     \n\t"
                      :::"memory");
 }
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
-#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride)
 {
     int x,y;
@@ -1453,9 +1431,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
                      SFENCE"     \n\t"
                      :::"memory");
 }
-#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */
 
-#if !COMPILE_TEMPLATE_AMD3DNOW
 /**
  * Height should be a multiple of 2 and width should be a multiple of 16.
  * (If this is a problem for anyone then tell me, and I will fix it.)
@@ -1559,7 +1535,6 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
                      SFENCE"     \n\t"
                      :::"memory");
 }
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 /**
  * Height should be a multiple of 2 and width should be a multiple of 2.
@@ -1673,7 +1648,6 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
             "1:                                         \n\t"
             PREFETCH" 64(%0, %%"FF_REG_d")              \n\t"
             PREFETCH" 64(%1, %%"FF_REG_d")              \n\t"
-#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
             "movq       (%0, %%"FF_REG_d"), %%mm0       \n\t"
             "movq       (%1, %%"FF_REG_d"), %%mm1       \n\t"
             "movq      6(%0, %%"FF_REG_d"), %%mm2       \n\t"
@@ -1688,32 +1662,6 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
             PAVGB"                   %%mm3, %%mm2       \n\t"
             "punpcklbw               %%mm7, %%mm0       \n\t"
             "punpcklbw               %%mm7, %%mm2       \n\t"
-#else
-            "movd       (%0, %%"FF_REG_d"), %%mm0       \n\t"
-            "movd       (%1, %%"FF_REG_d"), %%mm1       \n\t"
-            "movd      3(%0, %%"FF_REG_d"), %%mm2       \n\t"
-            "movd      3(%1, %%"FF_REG_d"), %%mm3       \n\t"
-            "punpcklbw               %%mm7, %%mm0       \n\t"
-            "punpcklbw               %%mm7, %%mm1       \n\t"
-            "punpcklbw               %%mm7, %%mm2       \n\t"
-            "punpcklbw               %%mm7, %%mm3       \n\t"
-            "paddw                   %%mm1, %%mm0       \n\t"
-            "paddw                   %%mm3, %%mm2       \n\t"
-            "paddw                   %%mm2, %%mm0       \n\t"
-            "movd      6(%0, %%"FF_REG_d"), %%mm4       \n\t"
-            "movd      6(%1, %%"FF_REG_d"), %%mm1       \n\t"
-            "movd      9(%0, %%"FF_REG_d"), %%mm2       \n\t"
-            "movd      9(%1, %%"FF_REG_d"), %%mm3       \n\t"
-            "punpcklbw               %%mm7, %%mm4       \n\t"
-            "punpcklbw               %%mm7, %%mm1       \n\t"
-            "punpcklbw               %%mm7, %%mm2       \n\t"
-            "punpcklbw               %%mm7, %%mm3       \n\t"
-            "paddw                   %%mm1, %%mm4       \n\t"
-            "paddw                   %%mm3, %%mm2       \n\t"
-            "paddw                   %%mm4, %%mm2       \n\t"
-            "psrlw                      $2, %%mm0       \n\t"
-            "psrlw                      $2, %%mm2       \n\t"
-#endif
             "movq          "BGR2V_IDX"(%5), %%mm1       \n\t"
             "movq          "BGR2V_IDX"(%5), %%mm3       \n\t"
 
@@ -1732,7 +1680,6 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
             "packssdw                %%mm1, %%mm0       \n\t" // V1 V0 U1 U0
             "psraw                      $7, %%mm0       \n\t"
 
-#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
             "movq     12(%0, %%"FF_REG_d"), %%mm4       \n\t"
             "movq     12(%1, %%"FF_REG_d"), %%mm1       \n\t"
             "movq     18(%0, %%"FF_REG_d"), %%mm2       \n\t"
@@ -1747,33 +1694,6 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
             PAVGB"                   %%mm3, %%mm2       \n\t"
             "punpcklbw               %%mm7, %%mm4       \n\t"
             "punpcklbw               %%mm7, %%mm2       \n\t"
-#else
-            "movd     12(%0, %%"FF_REG_d"), %%mm4       \n\t"
-            "movd     12(%1, %%"FF_REG_d"), %%mm1       \n\t"
-            "movd     15(%0, %%"FF_REG_d"), %%mm2       \n\t"
-            "movd     15(%1, %%"FF_REG_d"), %%mm3       \n\t"
-            "punpcklbw               %%mm7, %%mm4       \n\t"
-            "punpcklbw               %%mm7, %%mm1       \n\t"
-            "punpcklbw               %%mm7, %%mm2       \n\t"
-            "punpcklbw               %%mm7, %%mm3       \n\t"
-            "paddw                   %%mm1, %%mm4       \n\t"
-            "paddw                   %%mm3, %%mm2       \n\t"
-            "paddw                   %%mm2, %%mm4       \n\t"
-            "movd     18(%0, %%"FF_REG_d"), %%mm5       \n\t"
-            "movd     18(%1, %%"FF_REG_d"), %%mm1       \n\t"
-            "movd     21(%0, %%"FF_REG_d"), %%mm2       \n\t"
-            "movd     21(%1, %%"FF_REG_d"), %%mm3       \n\t"
-            "punpcklbw               %%mm7, %%mm5       \n\t"
-            "punpcklbw               %%mm7, %%mm1       \n\t"
-            "punpcklbw               %%mm7, %%mm2       \n\t"
-            "punpcklbw               %%mm7, %%mm3       \n\t"
-            "paddw                   %%mm1, %%mm5       \n\t"
-            "paddw                   %%mm3, %%mm2       \n\t"
-            "paddw                   %%mm5, %%mm2       \n\t"
-            "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
-            "psrlw                      $2, %%mm4       \n\t"
-            "psrlw                      $2, %%mm2       \n\t"
-#endif
             "movq          "BGR2V_IDX"(%5), %%mm1       \n\t"
             "movq          "BGR2V_IDX"(%5), %%mm3       \n\t"
 
@@ -1822,7 +1742,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
 #endif /* HAVE_7REGS */
 #endif /* !COMPILE_TEMPLATE_SSE2 */
 
-#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX
+#if !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2
 static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest,
                                     int width, int height, int src1Stride,
                                     int src2Stride, int dstStride)
@@ -1833,7 +1753,6 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
         int w;
 
         if (width >= 16) {
-#if COMPILE_TEMPLATE_SSE2
             if (!((((intptr_t)src1) | ((intptr_t)src2) | ((intptr_t)dest))&15)) {
         __asm__(
             "xor              %%"FF_REG_a", %%"FF_REG_a"  \n\t"
@@ -1854,7 +1773,6 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
             : "memory", XMM_CLOBBERS("xmm0", "xmm1", "xmm2",) "%"FF_REG_a
         );
             } else
-#endif
         __asm__(
             "xor %%"FF_REG_a", %%"FF_REG_a"         \n\t"
             "1:                                     \n\t"
@@ -1896,10 +1814,10 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
             ::: "memory"
             );
 }
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */
+#endif /* !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2 */
 
 #if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL
-#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM
+#if COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM
 void RENAME(ff_nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
                          const uint8_t *unused,
                          const uint8_t *src1,
@@ -1919,18 +1837,14 @@ static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t
         dst2 += dst2Stride;
     }
     __asm__(
-#if !COMPILE_TEMPLATE_SSE2
-            EMMS"       \n\t"
-#endif
             SFENCE"     \n\t"
             ::: "memory"
             );
 }
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
+#endif /* COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM */
 #endif /* !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL */
 
 #if !COMPILE_TEMPLATE_SSE2
-#if !COMPILE_TEMPLATE_AMD3DNOW
 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                                        uint8_t *dst1, uint8_t *dst2,
                                        int width, int height,
@@ -2108,7 +2022,6 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2
             ::: "memory"
         );
 }
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count)
 {
@@ -2185,7 +2098,7 @@ static void RENAME(extract_odd)(const uint8_t *src, uint8_t *dst, x86_reg count)
     }
 }
 
-#if !COMPILE_TEMPLATE_AMD3DNOW
+#if ARCH_X86_32
 static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
 {
     dst0+=   count;
@@ -2231,7 +2144,7 @@ static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *ds
         count++;
     }
 }
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
+#endif /* ARCH_X86_32 */
 
 static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
 {
@@ -2286,7 +2199,6 @@ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, u
     }
 }
 
-#if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
 {
     dst0+=   count;
@@ -2333,7 +2245,6 @@ static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst
         count++;
     }
 }
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count)
 {
@@ -2415,7 +2326,6 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
         );
 }
 
-#if !COMPILE_TEMPLATE_AMD3DNOW
 static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
                                  int width, int height,
                                  int lumStride, int chromStride, int srcStride)
@@ -2438,7 +2348,6 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
             ::: "memory"
         );
 }
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
 static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
                                  int width, int height,
@@ -2465,7 +2374,7 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
         );
 }
 
-#if !COMPILE_TEMPLATE_AMD3DNOW
+#if ARCH_X86_32
 static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src,
                                  int width, int height,
                                  int lumStride, int chromStride, int srcStride)
@@ -2488,13 +2397,12 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
             ::: "memory"
         );
 }
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
+#endif /* ARCH_X86_32 */
 #endif /* !COMPILE_TEMPLATE_SSE2 */
 
 static av_cold void RENAME(rgb2rgb_init)(void)
 {
 #if !COMPILE_TEMPLATE_SSE2
-#if !COMPILE_TEMPLATE_AMD3DNOW
     rgb15to16          = RENAME(rgb15to16);
     rgb15tobgr24       = RENAME(rgb15tobgr24);
     rgb15to32          = RENAME(rgb15to32);
@@ -2519,13 +2427,12 @@ static av_cold void RENAME(rgb2rgb_init)(void)
     yuy2toyv12         = RENAME(yuy2toyv12);
     vu9_to_vu12        = RENAME(vu9_to_vu12);
     yvu9_to_yuy2       = RENAME(yvu9_to_yuy2);
+#if ARCH_X86_32
     uyvytoyuv422       = RENAME(uyvytoyuv422);
+#endif
     yuyvtoyuv422       = RENAME(yuyvtoyuv422);
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
 
-#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
     planar2x           = RENAME(planar2x);
-#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */
 #if HAVE_7REGS
     ff_rgb24toyv12     = RENAME(rgb24toyv12);
 #endif /* HAVE_7REGS */
@@ -2534,11 +2441,11 @@ static av_cold void RENAME(rgb2rgb_init)(void)
     uyvytoyuv420       = RENAME(uyvytoyuv420);
 #endif /* !COMPILE_TEMPLATE_SSE2 */
 
-#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX
+#if !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2
     interleaveBytes    = RENAME(interleaveBytes);
-#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */
+#endif /* !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2 */
 #if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL
-#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM
+#if COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM
     deinterleaveBytes  = RENAME(deinterleaveBytes);
 #endif
 #endif



More information about the ffmpeg-cvslog mailing list