[FFmpeg-devel] [PATCH 09/41] avcodec/x86/vc1dsp_init: Disable overridden functions on x64

Andreas Rheinhardt andreas.rheinhardt at outlook.com
Fri Jun 10 02:54:51 EEST 2022


x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2). This commit therefore disables these functions
at compile-time.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at outlook.com>
---
 libavcodec/x86/h264_chromamc.asm     |  2 ++
 libavcodec/x86/vc1dsp_init.c         | 41 +++++++++++++++++++---------
 libavcodec/x86/vc1dsp_loopfilter.asm |  2 ++
 3 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm
index b5a78b537d..0421fa8695 100644
--- a/libavcodec/x86/h264_chromamc.asm
+++ b/libavcodec/x86/h264_chromamc.asm
@@ -448,7 +448,9 @@ chroma_mc2_mmx_func avg, h264
 
 INIT_MMX 3dnow
 chroma_mc8_mmx_func avg, h264, _rnd
+%if ARCH_X86_32
 chroma_mc8_mmx_func avg, vc1,  _nornd
+%endif
 chroma_mc8_mmx_func avg, rv40
 chroma_mc4_mmx_func avg, h264
 chroma_mc4_mmx_func avg, rv40
diff --git a/libavcodec/x86/vc1dsp_init.c b/libavcodec/x86/vc1dsp_init.c
index 2fbf0b3a74..66d894061c 100644
--- a/libavcodec/x86/vc1dsp_init.c
+++ b/libavcodec/x86/vc1dsp_init.c
@@ -33,9 +33,10 @@
 #include "vc1dsp.h"
 #include "config.h"
 
-#define LOOP_FILTER(EXT) \
+#define LOOP_FILTER4(EXT) \
 void ff_vc1_v_loop_filter4_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); \
-void ff_vc1_h_loop_filter4_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); \
+void ff_vc1_h_loop_filter4_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq);
+#define LOOP_FILTER816(EXT) \
 void ff_vc1_v_loop_filter8_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); \
 void ff_vc1_h_loop_filter8_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq); \
 \
@@ -52,9 +53,13 @@ static void vc1_h_loop_filter16_ ## EXT(uint8_t *src, ptrdiff_t stride, int pq)
 }
 
 #if HAVE_X86ASM
-LOOP_FILTER(mmxext)
-LOOP_FILTER(sse2)
-LOOP_FILTER(ssse3)
+LOOP_FILTER4(mmxext)
+#if ARCH_X86_32
+LOOP_FILTER816(mmxext)
+#endif
+LOOP_FILTER816(sse2)
+LOOP_FILTER4(ssse3)
+LOOP_FILTER816(ssse3)
 
 void ff_vc1_h_loop_filter8_sse4(uint8_t *src, ptrdiff_t stride, int pq);
 
@@ -71,12 +76,14 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, ptrdiff_t stride, int pq)
         ff_ ## OP ## pixels ## DEPTH ## INSN(dst, src, stride, DEPTH);     \
     }
 
-DECLARE_FUNCTION(put_,  8, _mmx)
+#if ARCH_X86_32
 DECLARE_FUNCTION(put_, 16, _mmx)
 DECLARE_FUNCTION(avg_,  8, _mmx)
 DECLARE_FUNCTION(avg_, 16, _mmx)
-DECLARE_FUNCTION(avg_,  8, _mmxext)
 DECLARE_FUNCTION(avg_, 16, _mmxext)
+#endif
+DECLARE_FUNCTION(put_,  8, _mmx)
+DECLARE_FUNCTION(avg_,  8, _mmxext)
 DECLARE_FUNCTION(put_, 16, _sse2)
 DECLARE_FUNCTION(avg_, 16, _sse2)
 
@@ -114,9 +121,10 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
         if (EXTERNAL_MMXEXT(cpu_flags))
         ff_vc1dsp_init_mmxext(dsp);
 
-#define ASSIGN_LF(EXT) \
+#define ASSIGN_LF4(EXT) \
         dsp->vc1_v_loop_filter4  = ff_vc1_v_loop_filter4_ ## EXT; \
-        dsp->vc1_h_loop_filter4  = ff_vc1_h_loop_filter4_ ## EXT; \
+        dsp->vc1_h_loop_filter4  = ff_vc1_h_loop_filter4_ ## EXT
+#define ASSIGN_LF816(EXT) \
         dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_ ## EXT; \
         dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_ ## EXT; \
         dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \
@@ -127,19 +135,25 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
         dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx;
 
         dsp->put_vc1_mspel_pixels_tab[1][0]      = put_vc1_mspel_mc00_8_mmx;
+#if ARCH_X86_32
         dsp->put_vc1_mspel_pixels_tab[0][0]      = put_vc1_mspel_mc00_16_mmx;
         dsp->avg_vc1_mspel_pixels_tab[1][0]      = avg_vc1_mspel_mc00_8_mmx;
         dsp->avg_vc1_mspel_pixels_tab[0][0]      = avg_vc1_mspel_mc00_16_mmx;
     }
     if (EXTERNAL_AMD3DNOW(cpu_flags)) {
         dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow;
+#endif
     }
     if (EXTERNAL_MMXEXT(cpu_flags)) {
-        ASSIGN_LF(mmxext);
-        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext;
+        ASSIGN_LF4(mmxext);
+#if ARCH_X86_32
+        ASSIGN_LF816(mmxext);
 
-        dsp->avg_vc1_mspel_pixels_tab[1][0]      = avg_vc1_mspel_mc00_8_mmxext;
         dsp->avg_vc1_mspel_pixels_tab[0][0]      = avg_vc1_mspel_mc00_16_mmxext;
+#endif
+        dsp->avg_vc1_mspel_pixels_tab[1][0]      = avg_vc1_mspel_mc00_8_mmxext;
+
+        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext;
 
         dsp->vc1_inv_trans_8x8_dc                = ff_vc1_inv_trans_8x8_dc_mmxext;
         dsp->vc1_inv_trans_4x8_dc                = ff_vc1_inv_trans_4x8_dc_mmxext;
@@ -156,7 +170,8 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
         dsp->avg_vc1_mspel_pixels_tab[0][0]      = avg_vc1_mspel_mc00_16_sse2;
     }
     if (EXTERNAL_SSSE3(cpu_flags)) {
-        ASSIGN_LF(ssse3);
+        ASSIGN_LF4(ssse3);
+        ASSIGN_LF816(ssse3);
         dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_ssse3;
         dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_ssse3;
     }
diff --git a/libavcodec/x86/vc1dsp_loopfilter.asm b/libavcodec/x86/vc1dsp_loopfilter.asm
index 74360949dc..3475a682b3 100644
--- a/libavcodec/x86/vc1dsp_loopfilter.asm
+++ b/libavcodec/x86/vc1dsp_loopfilter.asm
@@ -249,6 +249,7 @@ cglobal vc1_h_loop_filter4, 3,5,0
     call vc1_h_loop_filter_internal
     RET
 
+%if ARCH_X86_32
 ; void ff_vc1_v_loop_filter8_mmxext(uint8_t *src, ptrdiff_t stride, int pq)
 cglobal vc1_v_loop_filter8, 3,5,0
     START_V_FILTER
@@ -265,6 +266,7 @@ cglobal vc1_h_loop_filter8, 3,5,0
     lea  r0, [r0+4*r1]
     call vc1_h_loop_filter_internal
     RET
+%endif
 %endmacro
 
 INIT_MMX mmxext
-- 
2.34.1



More information about the ffmpeg-devel mailing list