[FFmpeg-cvslog] avcodec/x86/idctdsp: Remove obsolete MMX(EXT) functions

Andreas Rheinhardt git at videolan.org
Wed Jun 22 15:28:44 EEST 2022


ffmpeg | branch: master | Andreas Rheinhardt <andreas.rheinhardt at outlook.com> | Fri Jun 10 20:28:06 2022 +0200| [bfb28b5ce89f3e950214b67ea95b45e3355c2caf] | committer: Andreas Rheinhardt

avcodec/x86/idctdsp: Remove obsolete MMX(EXT) functions

x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at outlook.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bfb28b5ce89f3e950214b67ea95b45e3355c2caf
---

 libavcodec/tests/x86/dct.c     |  3 --
 libavcodec/x86/idctdsp.asm     | 79 +++---------------------------------------
 libavcodec/x86/idctdsp.h       |  6 ----
 libavcodec/x86/idctdsp_init.c  | 11 +++---
 libavcodec/x86/simple_idct.asm | 22 ++----------
 5 files changed, 10 insertions(+), 111 deletions(-)

diff --git a/libavcodec/tests/x86/dct.c b/libavcodec/tests/x86/dct.c
index 207a2bcb36..ef0662ae37 100644
--- a/libavcodec/tests/x86/dct.c
+++ b/libavcodec/tests/x86/dct.c
@@ -65,9 +65,6 @@ static const struct algo fdct_tab_arch[] = {
 };
 
 static const struct algo idct_tab_arch[] = {
-#if HAVE_MMX_EXTERNAL
-    { "SIMPLE-MMX",  ff_simple_idct_mmx,  FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_MMX },
-#endif
 #if CONFIG_MPEG4_DECODER && HAVE_X86ASM
 #if HAVE_SSE2_EXTERNAL
     { "XVID-SSE2",   ff_xvid_idct_sse2,   FF_IDCT_PERM_SSE2,   AV_CPU_FLAG_SSE2,   1 },
diff --git a/libavcodec/x86/idctdsp.asm b/libavcodec/x86/idctdsp.asm
index 089425a9ab..1cfdb5419d 100644
--- a/libavcodec/x86/idctdsp.asm
+++ b/libavcodec/x86/idctdsp.asm
@@ -37,47 +37,24 @@ SECTION .text
 %macro PUT_SIGNED_PIXELS_CLAMPED_HALF 1
     mova     m1, [blockq+mmsize*0+%1]
     mova     m2, [blockq+mmsize*2+%1]
-%if mmsize == 8
-    mova     m3, [blockq+mmsize*4+%1]
-    mova     m4, [blockq+mmsize*6+%1]
-%endif
     packsswb m1, [blockq+mmsize*1+%1]
     packsswb m2, [blockq+mmsize*3+%1]
-%if mmsize == 8
-    packsswb m3, [blockq+mmsize*5+%1]
-    packsswb m4, [blockq+mmsize*7+%1]
-%endif
     paddb    m1, m0
     paddb    m2, m0
-%if mmsize == 8
-    paddb    m3, m0
-    paddb    m4, m0
-    movq     [pixelsq+lsizeq*0], m1
-    movq     [pixelsq+lsizeq*1], m2
-    movq     [pixelsq+lsizeq*2], m3
-    movq     [pixelsq+lsize3q ], m4
-%else
     movq     [pixelsq+lsizeq*0], m1
     movhps   [pixelsq+lsizeq*1], m1
     movq     [pixelsq+lsizeq*2], m2
     movhps   [pixelsq+lsize3q ], m2
-%endif
 %endmacro
 
-%macro PUT_SIGNED_PIXELS_CLAMPED 1
-cglobal put_signed_pixels_clamped, 3, 4, %1, block, pixels, lsize, lsize3
+INIT_XMM sse2
+cglobal put_signed_pixels_clamped, 3, 4, 3, block, pixels, lsize, lsize3
     mova     m0, [pb_80]
     lea      lsize3q, [lsizeq*3]
     PUT_SIGNED_PIXELS_CLAMPED_HALF 0
     lea      pixelsq, [pixelsq+lsizeq*4]
     PUT_SIGNED_PIXELS_CLAMPED_HALF 64
     RET
-%endmacro
-
-INIT_MMX mmx
-PUT_SIGNED_PIXELS_CLAMPED 0
-INIT_XMM sse2
-PUT_SIGNED_PIXELS_CLAMPED 3
 
 ;--------------------------------------------------------------------------
 ; void ff_put_pixels_clamped(const int16_t *block, uint8_t *pixels,
@@ -87,40 +64,21 @@ PUT_SIGNED_PIXELS_CLAMPED 3
 %macro PUT_PIXELS_CLAMPED_HALF 1
     mova     m0, [blockq+mmsize*0+%1]
     mova     m1, [blockq+mmsize*2+%1]
-%if mmsize == 8
-    mova     m2, [blockq+mmsize*4+%1]
-    mova     m3, [blockq+mmsize*6+%1]
-%endif
     packuswb m0, [blockq+mmsize*1+%1]
     packuswb m1, [blockq+mmsize*3+%1]
-%if mmsize == 8
-    packuswb m2, [blockq+mmsize*5+%1]
-    packuswb m3, [blockq+mmsize*7+%1]
-    movq           [pixelsq], m0
-    movq    [lsizeq+pixelsq], m1
-    movq  [2*lsizeq+pixelsq], m2
-    movq   [lsize3q+pixelsq], m3
-%else
     movq           [pixelsq], m0
     movhps  [lsizeq+pixelsq], m0
     movq  [2*lsizeq+pixelsq], m1
     movhps [lsize3q+pixelsq], m1
-%endif
 %endmacro
 
-%macro PUT_PIXELS_CLAMPED 0
+INIT_XMM sse2
 cglobal put_pixels_clamped, 3, 4, 2, block, pixels, lsize, lsize3
     lea lsize3q, [lsizeq*3]
     PUT_PIXELS_CLAMPED_HALF 0
     lea pixelsq, [pixelsq+lsizeq*4]
     PUT_PIXELS_CLAMPED_HALF 64
     RET
-%endmacro
-
-INIT_MMX mmx
-PUT_PIXELS_CLAMPED
-INIT_XMM sse2
-PUT_PIXELS_CLAMPED
 
 ;--------------------------------------------------------------------------
 ; void ff_add_pixels_clamped(const int16_t *block, uint8_t *pixels,
@@ -130,41 +88,18 @@ PUT_PIXELS_CLAMPED
 %macro ADD_PIXELS_CLAMPED 1
     mova       m0, [blockq+mmsize*0+%1]
     mova       m1, [blockq+mmsize*1+%1]
-%if mmsize == 8
-    mova       m5, [blockq+mmsize*2+%1]
-    mova       m6, [blockq+mmsize*3+%1]
-%endif
     movq       m2, [pixelsq]
     movq       m3, [pixelsq+lsizeq]
-%if mmsize == 8
-    mova       m7, m2
-    punpcklbw  m2, m4
-    punpckhbw  m7, m4
-    paddsw     m0, m2
-    paddsw     m1, m7
-    mova       m7, m3
-    punpcklbw  m3, m4
-    punpckhbw  m7, m4
-    paddsw     m5, m3
-    paddsw     m6, m7
-%else
     punpcklbw  m2, m4
     punpcklbw  m3, m4
     paddsw     m0, m2
     paddsw     m1, m3
-%endif
     packuswb   m0, m1
-%if mmsize == 8
-    packuswb   m5, m6
-    movq       [pixelsq], m0
-    movq       [pixelsq+lsizeq], m5
-%else
     movq       [pixelsq], m0
     movhps     [pixelsq+lsizeq], m0
-%endif
 %endmacro
 
-%macro ADD_PIXELS_CLAMPED 0
+INIT_XMM sse2
 cglobal add_pixels_clamped, 3, 3, 5, block, pixels, lsize
     pxor       m4, m4
     ADD_PIXELS_CLAMPED 0
@@ -175,9 +110,3 @@ cglobal add_pixels_clamped, 3, 3, 5, block, pixels, lsize
     lea        pixelsq, [pixelsq+lsizeq*2]
     ADD_PIXELS_CLAMPED 96
     RET
-%endmacro
-
-INIT_MMX mmx
-ADD_PIXELS_CLAMPED
-INIT_XMM sse2
-ADD_PIXELS_CLAMPED
diff --git a/libavcodec/x86/idctdsp.h b/libavcodec/x86/idctdsp.h
index 0d0bdb5f57..738e4e36e4 100644
--- a/libavcodec/x86/idctdsp.h
+++ b/libavcodec/x86/idctdsp.h
@@ -22,16 +22,10 @@
 #include <stddef.h>
 #include <stdint.h>
 
-void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
-                               ptrdiff_t line_size);
 void ff_add_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
                                 ptrdiff_t line_size);
-void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
-                               ptrdiff_t line_size);
 void ff_put_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
                                 ptrdiff_t line_size);
-void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
-                                      ptrdiff_t line_size);
 void ff_put_signed_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
                                        ptrdiff_t line_size);
 
diff --git a/libavcodec/x86/idctdsp_init.c b/libavcodec/x86/idctdsp_init.c
index 9103b92ce7..f28a1ad744 100644
--- a/libavcodec/x86/idctdsp_init.c
+++ b/libavcodec/x86/idctdsp_init.c
@@ -63,28 +63,24 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
 {
     int cpu_flags = av_get_cpu_flags();
 
+#if ARCH_X86_32
     if (EXTERNAL_MMX(cpu_flags)) {
-        c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
-        c->put_pixels_clamped        = ff_put_pixels_clamped_mmx;
-        c->add_pixels_clamped        = ff_add_pixels_clamped_mmx;
-
         if (!high_bit_depth &&
             avctx->lowres == 0 &&
             (avctx->idct_algo == FF_IDCT_AUTO ||
                 avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
                 avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
-                c->idct_put  = ff_simple_idct_put_mmx;
-                c->idct_add  = ff_simple_idct_add_mmx;
                 c->idct      = ff_simple_idct_mmx;
-                c->perm_type = FF_IDCT_PERM_SIMPLE;
         }
     }
+#endif
 
     if (EXTERNAL_SSE2(cpu_flags)) {
         c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2;
         c->put_pixels_clamped        = ff_put_pixels_clamped_sse2;
         c->add_pixels_clamped        = ff_add_pixels_clamped_sse2;
 
+#if ARCH_X86_32
         if (!high_bit_depth &&
             avctx->lowres == 0 &&
             (avctx->idct_algo == FF_IDCT_AUTO ||
@@ -94,6 +90,7 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
                 c->idct_add  = ff_simple_idct_add_sse2;
                 c->perm_type = FF_IDCT_PERM_SIMPLE;
         }
+#endif
 
         if (ARCH_X86_64 &&
             !high_bit_depth &&
diff --git a/libavcodec/x86/simple_idct.asm b/libavcodec/x86/simple_idct.asm
index 6fedbb5784..dcf0da6df1 100644
--- a/libavcodec/x86/simple_idct.asm
+++ b/libavcodec/x86/simple_idct.asm
@@ -25,6 +25,7 @@
 
 %include "libavutil/x86/x86util.asm"
 
+%if ARCH_X86_32
 SECTION_RODATA
 
 cextern pb_80
@@ -846,26 +847,6 @@ cglobal simple_idct, 1, 2, 8, 128, block, t0
     IDCT
 RET
 
-cglobal simple_idct_put, 3, 5, 8, 128, pixels, lsize, block, lsize3, t0
-    IDCT
-    lea lsize3q, [lsizeq*3]
-    PUT_PIXELS_CLAMPED_HALF 0
-    lea pixelsq, [pixelsq+lsizeq*4]
-    PUT_PIXELS_CLAMPED_HALF 64
-RET
-
-cglobal simple_idct_add, 3, 4, 8, 128, pixels, lsize, block, t0
-    IDCT
-    pxor       m4, m4
-    ADD_PIXELS_CLAMPED 0
-    lea        pixelsq, [pixelsq+lsizeq*2]
-    ADD_PIXELS_CLAMPED 32
-    lea        pixelsq, [pixelsq+lsizeq*2]
-    ADD_PIXELS_CLAMPED 64
-    lea        pixelsq, [pixelsq+lsizeq*2]
-    ADD_PIXELS_CLAMPED 96
-RET
-
 INIT_XMM sse2
 
 cglobal simple_idct_put, 3, 5, 8, 128, pixels, lsize, block, lsize3, t0
@@ -887,3 +868,4 @@ cglobal simple_idct_add, 3, 4, 8, 128, pixels, lsize, block, t0
     lea        pixelsq, [pixelsq+lsizeq*2]
     ADD_PIXELS_CLAMPED 96
 RET
+%endif



More information about the ffmpeg-cvslog mailing list