[FFmpeg-cvslog] avcodec/x86/idctdsp: Remove obsolete MMX(EXT) functions
Andreas Rheinhardt
git at videolan.org
Wed Jun 22 15:28:44 EEST 2022
ffmpeg | branch: master | Andreas Rheinhardt <andreas.rheinhardt at outlook.com> | Fri Jun 10 20:28:06 2022 +0200| [bfb28b5ce89f3e950214b67ea95b45e3355c2caf] | committer: Andreas Rheinhardt
avcodec/x86/idctdsp: Remove obsolete MMX(EXT) functions
x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at outlook.com>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bfb28b5ce89f3e950214b67ea95b45e3355c2caf
---
libavcodec/tests/x86/dct.c | 3 --
libavcodec/x86/idctdsp.asm | 79 +++---------------------------------------
libavcodec/x86/idctdsp.h | 6 ----
libavcodec/x86/idctdsp_init.c | 11 +++---
libavcodec/x86/simple_idct.asm | 22 ++----------
5 files changed, 10 insertions(+), 111 deletions(-)
diff --git a/libavcodec/tests/x86/dct.c b/libavcodec/tests/x86/dct.c
index 207a2bcb36..ef0662ae37 100644
--- a/libavcodec/tests/x86/dct.c
+++ b/libavcodec/tests/x86/dct.c
@@ -65,9 +65,6 @@ static const struct algo fdct_tab_arch[] = {
};
static const struct algo idct_tab_arch[] = {
-#if HAVE_MMX_EXTERNAL
- { "SIMPLE-MMX", ff_simple_idct_mmx, FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_MMX },
-#endif
#if CONFIG_MPEG4_DECODER && HAVE_X86ASM
#if HAVE_SSE2_EXTERNAL
{ "XVID-SSE2", ff_xvid_idct_sse2, FF_IDCT_PERM_SSE2, AV_CPU_FLAG_SSE2, 1 },
diff --git a/libavcodec/x86/idctdsp.asm b/libavcodec/x86/idctdsp.asm
index 089425a9ab..1cfdb5419d 100644
--- a/libavcodec/x86/idctdsp.asm
+++ b/libavcodec/x86/idctdsp.asm
@@ -37,47 +37,24 @@ SECTION .text
%macro PUT_SIGNED_PIXELS_CLAMPED_HALF 1
mova m1, [blockq+mmsize*0+%1]
mova m2, [blockq+mmsize*2+%1]
-%if mmsize == 8
- mova m3, [blockq+mmsize*4+%1]
- mova m4, [blockq+mmsize*6+%1]
-%endif
packsswb m1, [blockq+mmsize*1+%1]
packsswb m2, [blockq+mmsize*3+%1]
-%if mmsize == 8
- packsswb m3, [blockq+mmsize*5+%1]
- packsswb m4, [blockq+mmsize*7+%1]
-%endif
paddb m1, m0
paddb m2, m0
-%if mmsize == 8
- paddb m3, m0
- paddb m4, m0
- movq [pixelsq+lsizeq*0], m1
- movq [pixelsq+lsizeq*1], m2
- movq [pixelsq+lsizeq*2], m3
- movq [pixelsq+lsize3q ], m4
-%else
movq [pixelsq+lsizeq*0], m1
movhps [pixelsq+lsizeq*1], m1
movq [pixelsq+lsizeq*2], m2
movhps [pixelsq+lsize3q ], m2
-%endif
%endmacro
-%macro PUT_SIGNED_PIXELS_CLAMPED 1
-cglobal put_signed_pixels_clamped, 3, 4, %1, block, pixels, lsize, lsize3
+INIT_XMM sse2
+cglobal put_signed_pixels_clamped, 3, 4, 3, block, pixels, lsize, lsize3
mova m0, [pb_80]
lea lsize3q, [lsizeq*3]
PUT_SIGNED_PIXELS_CLAMPED_HALF 0
lea pixelsq, [pixelsq+lsizeq*4]
PUT_SIGNED_PIXELS_CLAMPED_HALF 64
RET
-%endmacro
-
-INIT_MMX mmx
-PUT_SIGNED_PIXELS_CLAMPED 0
-INIT_XMM sse2
-PUT_SIGNED_PIXELS_CLAMPED 3
;--------------------------------------------------------------------------
; void ff_put_pixels_clamped(const int16_t *block, uint8_t *pixels,
@@ -87,40 +64,21 @@ PUT_SIGNED_PIXELS_CLAMPED 3
%macro PUT_PIXELS_CLAMPED_HALF 1
mova m0, [blockq+mmsize*0+%1]
mova m1, [blockq+mmsize*2+%1]
-%if mmsize == 8
- mova m2, [blockq+mmsize*4+%1]
- mova m3, [blockq+mmsize*6+%1]
-%endif
packuswb m0, [blockq+mmsize*1+%1]
packuswb m1, [blockq+mmsize*3+%1]
-%if mmsize == 8
- packuswb m2, [blockq+mmsize*5+%1]
- packuswb m3, [blockq+mmsize*7+%1]
- movq [pixelsq], m0
- movq [lsizeq+pixelsq], m1
- movq [2*lsizeq+pixelsq], m2
- movq [lsize3q+pixelsq], m3
-%else
movq [pixelsq], m0
movhps [lsizeq+pixelsq], m0
movq [2*lsizeq+pixelsq], m1
movhps [lsize3q+pixelsq], m1
-%endif
%endmacro
-%macro PUT_PIXELS_CLAMPED 0
+INIT_XMM sse2
cglobal put_pixels_clamped, 3, 4, 2, block, pixels, lsize, lsize3
lea lsize3q, [lsizeq*3]
PUT_PIXELS_CLAMPED_HALF 0
lea pixelsq, [pixelsq+lsizeq*4]
PUT_PIXELS_CLAMPED_HALF 64
RET
-%endmacro
-
-INIT_MMX mmx
-PUT_PIXELS_CLAMPED
-INIT_XMM sse2
-PUT_PIXELS_CLAMPED
;--------------------------------------------------------------------------
; void ff_add_pixels_clamped(const int16_t *block, uint8_t *pixels,
@@ -130,41 +88,18 @@ PUT_PIXELS_CLAMPED
%macro ADD_PIXELS_CLAMPED 1
mova m0, [blockq+mmsize*0+%1]
mova m1, [blockq+mmsize*1+%1]
-%if mmsize == 8
- mova m5, [blockq+mmsize*2+%1]
- mova m6, [blockq+mmsize*3+%1]
-%endif
movq m2, [pixelsq]
movq m3, [pixelsq+lsizeq]
-%if mmsize == 8
- mova m7, m2
- punpcklbw m2, m4
- punpckhbw m7, m4
- paddsw m0, m2
- paddsw m1, m7
- mova m7, m3
- punpcklbw m3, m4
- punpckhbw m7, m4
- paddsw m5, m3
- paddsw m6, m7
-%else
punpcklbw m2, m4
punpcklbw m3, m4
paddsw m0, m2
paddsw m1, m3
-%endif
packuswb m0, m1
-%if mmsize == 8
- packuswb m5, m6
- movq [pixelsq], m0
- movq [pixelsq+lsizeq], m5
-%else
movq [pixelsq], m0
movhps [pixelsq+lsizeq], m0
-%endif
%endmacro
-%macro ADD_PIXELS_CLAMPED 0
+INIT_XMM sse2
cglobal add_pixels_clamped, 3, 3, 5, block, pixels, lsize
pxor m4, m4
ADD_PIXELS_CLAMPED 0
@@ -175,9 +110,3 @@ cglobal add_pixels_clamped, 3, 3, 5, block, pixels, lsize
lea pixelsq, [pixelsq+lsizeq*2]
ADD_PIXELS_CLAMPED 96
RET
-%endmacro
-
-INIT_MMX mmx
-ADD_PIXELS_CLAMPED
-INIT_XMM sse2
-ADD_PIXELS_CLAMPED
diff --git a/libavcodec/x86/idctdsp.h b/libavcodec/x86/idctdsp.h
index 0d0bdb5f57..738e4e36e4 100644
--- a/libavcodec/x86/idctdsp.h
+++ b/libavcodec/x86/idctdsp.h
@@ -22,16 +22,10 @@
#include <stddef.h>
#include <stdint.h>
-void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
- ptrdiff_t line_size);
void ff_add_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
ptrdiff_t line_size);
-void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
- ptrdiff_t line_size);
void ff_put_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
ptrdiff_t line_size);
-void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
- ptrdiff_t line_size);
void ff_put_signed_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
ptrdiff_t line_size);
diff --git a/libavcodec/x86/idctdsp_init.c b/libavcodec/x86/idctdsp_init.c
index 9103b92ce7..f28a1ad744 100644
--- a/libavcodec/x86/idctdsp_init.c
+++ b/libavcodec/x86/idctdsp_init.c
@@ -63,28 +63,24 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
{
int cpu_flags = av_get_cpu_flags();
+#if ARCH_X86_32
if (EXTERNAL_MMX(cpu_flags)) {
- c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
- c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
- c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
-
if (!high_bit_depth &&
avctx->lowres == 0 &&
(avctx->idct_algo == FF_IDCT_AUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
- c->idct_put = ff_simple_idct_put_mmx;
- c->idct_add = ff_simple_idct_add_mmx;
c->idct = ff_simple_idct_mmx;
- c->perm_type = FF_IDCT_PERM_SIMPLE;
}
}
+#endif
if (EXTERNAL_SSE2(cpu_flags)) {
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2;
c->put_pixels_clamped = ff_put_pixels_clamped_sse2;
c->add_pixels_clamped = ff_add_pixels_clamped_sse2;
+#if ARCH_X86_32
if (!high_bit_depth &&
avctx->lowres == 0 &&
(avctx->idct_algo == FF_IDCT_AUTO ||
@@ -94,6 +90,7 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
c->idct_add = ff_simple_idct_add_sse2;
c->perm_type = FF_IDCT_PERM_SIMPLE;
}
+#endif
if (ARCH_X86_64 &&
!high_bit_depth &&
diff --git a/libavcodec/x86/simple_idct.asm b/libavcodec/x86/simple_idct.asm
index 6fedbb5784..dcf0da6df1 100644
--- a/libavcodec/x86/simple_idct.asm
+++ b/libavcodec/x86/simple_idct.asm
@@ -25,6 +25,7 @@
%include "libavutil/x86/x86util.asm"
+%if ARCH_X86_32
SECTION_RODATA
cextern pb_80
@@ -846,26 +847,6 @@ cglobal simple_idct, 1, 2, 8, 128, block, t0
IDCT
RET
-cglobal simple_idct_put, 3, 5, 8, 128, pixels, lsize, block, lsize3, t0
- IDCT
- lea lsize3q, [lsizeq*3]
- PUT_PIXELS_CLAMPED_HALF 0
- lea pixelsq, [pixelsq+lsizeq*4]
- PUT_PIXELS_CLAMPED_HALF 64
-RET
-
-cglobal simple_idct_add, 3, 4, 8, 128, pixels, lsize, block, t0
- IDCT
- pxor m4, m4
- ADD_PIXELS_CLAMPED 0
- lea pixelsq, [pixelsq+lsizeq*2]
- ADD_PIXELS_CLAMPED 32
- lea pixelsq, [pixelsq+lsizeq*2]
- ADD_PIXELS_CLAMPED 64
- lea pixelsq, [pixelsq+lsizeq*2]
- ADD_PIXELS_CLAMPED 96
-RET
-
INIT_XMM sse2
cglobal simple_idct_put, 3, 5, 8, 128, pixels, lsize, block, lsize3, t0
@@ -887,3 +868,4 @@ cglobal simple_idct_add, 3, 4, 8, 128, pixels, lsize, block, t0
lea pixelsq, [pixelsq+lsizeq*2]
ADD_PIXELS_CLAMPED 96
RET
+%endif
More information about the ffmpeg-cvslog
mailing list