[FFmpeg-devel] [PATCH 18/41] avcodec/x86/h264_intrapred_init: Disable overridden functions on x64
Andreas Rheinhardt
andreas.rheinhardt at outlook.com
Fri Jun 10 02:55:00 EEST 2022
x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT, SSE and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2). This commit therefore disables such
H.264-intrapred-dsp functions at compile-time.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at outlook.com>
---
libavcodec/x86/h264_intrapred.asm | 26 +++++++++++++++++++++++++
libavcodec/x86/h264_intrapred_10bit.asm | 16 +++++++++++++++
libavcodec/x86/h264_intrapred_init.c | 20 +++++++++++++++----
3 files changed, 58 insertions(+), 4 deletions(-)
diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
index b36c198fbb..9426598a63 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -48,6 +48,7 @@ cextern pw_8
; void ff_pred16x16_vertical_8(uint8_t *src, ptrdiff_t stride)
;-----------------------------------------------------------------------------
+%if ARCH_X86_32
INIT_MMX mmx
cglobal pred16x16_vertical_8, 2,3
sub r0, r1
@@ -63,6 +64,7 @@ cglobal pred16x16_vertical_8, 2,3
dec r2
jg .loop
REP_RET
+%endif
INIT_XMM sse
cglobal pred16x16_vertical_8, 2,3
@@ -114,8 +116,10 @@ cglobal pred16x16_horizontal_8, 2,3
REP_RET
%endmacro
+%if ARCH_X86_32
INIT_MMX mmx
PRED16x16_H
+%endif
INIT_MMX mmxext
PRED16x16_H
INIT_XMM ssse3
@@ -176,8 +180,10 @@ cglobal pred16x16_dc_8, 2,7
REP_RET
%endmacro
+%if ARCH_X86_32
INIT_MMX mmxext
PRED16x16_DC
+%endif
INIT_XMM sse2
PRED16x16_DC
INIT_XMM ssse3
@@ -187,6 +193,7 @@ PRED16x16_DC
; void ff_pred16x16_tm_vp8_8(uint8_t *src, ptrdiff_t stride)
;-----------------------------------------------------------------------------
+%if ARCH_X86_32
%macro PRED16x16_TM 0
cglobal pred16x16_tm_vp8_8, 2,5
sub r0, r1
@@ -227,6 +234,7 @@ INIT_MMX mmx
PRED16x16_TM
INIT_MMX mmxext
PRED16x16_TM
+%endif
INIT_XMM sse2
cglobal pred16x16_tm_vp8_8, 2,6,6
@@ -565,6 +573,7 @@ cglobal pred16x16_plane_%1_8, 2,9,7
REP_RET
%endmacro
+%if ARCH_X86_32
INIT_MMX mmx
H264_PRED16x16_PLANE h264
H264_PRED16x16_PLANE rv40
@@ -573,6 +582,7 @@ INIT_MMX mmxext
H264_PRED16x16_PLANE h264
H264_PRED16x16_PLANE rv40
H264_PRED16x16_PLANE svq3
+%endif
INIT_XMM sse2
H264_PRED16x16_PLANE h264
H264_PRED16x16_PLANE rv40
@@ -747,10 +757,12 @@ ALIGN 16
REP_RET
%endmacro
+%if ARCH_X86_32
INIT_MMX mmx
H264_PRED8x8_PLANE
INIT_MMX mmxext
H264_PRED8x8_PLANE
+%endif
INIT_XMM sse2
H264_PRED8x8_PLANE
INIT_XMM ssse3
@@ -794,8 +806,10 @@ cglobal pred8x8_horizontal_8, 2,3
REP_RET
%endmacro
+%if ARCH_X86_32
INIT_MMX mmx
PRED8x8_H
+%endif
INIT_MMX mmxext
PRED8x8_H
INIT_MMX ssse3
@@ -937,6 +951,7 @@ cglobal pred8x8_dc_rv40_8, 2,7
; void ff_pred8x8_tm_vp8_8(uint8_t *src, ptrdiff_t stride)
;-----------------------------------------------------------------------------
+%if ARCH_X86_32
%macro PRED8x8_TM 0
cglobal pred8x8_tm_vp8_8, 2,6
sub r0, r1
@@ -976,6 +991,7 @@ INIT_MMX mmx
PRED8x8_TM
INIT_MMX mmxext
PRED8x8_TM
+%endif
INIT_XMM sse2
cglobal pred8x8_tm_vp8_8, 2,6,4
@@ -1333,6 +1349,7 @@ PRED8x8L_VERTICAL
; int has_topright, ptrdiff_t stride)
;-----------------------------------------------------------------------------
+%if ARCH_X86_32
INIT_MMX mmxext
cglobal pred8x8l_down_left_8, 4,5
sub r0, r3
@@ -1440,6 +1457,7 @@ cglobal pred8x8l_down_left_8, 4,5
por mm1, mm0
movq [r0+r3*1], mm1
RET
+%endif
%macro PRED8x8L_DOWN_LEFT 0
cglobal pred8x8l_down_left_8, 4,4
@@ -1534,6 +1552,7 @@ PRED8x8L_DOWN_LEFT
; int has_topright, ptrdiff_t stride)
;-----------------------------------------------------------------------------
+%if ARCH_X86_32
INIT_MMX mmxext
cglobal pred8x8l_down_right_8, 4,5
sub r0, r3
@@ -1665,6 +1684,7 @@ cglobal pred8x8l_down_right_8, 4,5
por mm0, mm1
movq [r0+r3*1], mm0
RET
+%endif
%macro PRED8x8L_DOWN_RIGHT 0
cglobal pred8x8l_down_right_8, 4,5
@@ -1786,6 +1806,7 @@ PRED8x8L_DOWN_RIGHT
; int has_topright, ptrdiff_t stride)
;-----------------------------------------------------------------------------
+%if ARCH_X86_32
INIT_MMX mmxext
cglobal pred8x8l_vertical_right_8, 4,5
sub r0, r3
@@ -1892,6 +1913,7 @@ cglobal pred8x8l_vertical_right_8, 4,5
PALIGNR mm5, mm0, 7, mm1
movq [r4+r3*2], mm5
RET
+%endif
%macro PRED8x8L_VERTICAL_RIGHT 0
cglobal pred8x8l_vertical_right_8, 4,5,7
@@ -2192,6 +2214,7 @@ PRED8x8L_HORIZONTAL_UP
; int has_topright, ptrdiff_t stride)
;-----------------------------------------------------------------------------
+%if ARCH_X86_32
INIT_MMX mmxext
cglobal pred8x8l_horizontal_down_8, 4,5
sub r0, r3
@@ -2306,6 +2329,7 @@ cglobal pred8x8l_horizontal_down_8, 4,5
PALIGNR mm3, mm4, 6, mm4
movq [r0+r3*1], mm3
RET
+%endif
%macro PRED8x8L_HORIZONTAL_DOWN 0
cglobal pred8x8l_horizontal_down_8, 4,5
@@ -2508,8 +2532,10 @@ cglobal pred4x4_tm_vp8_8, 3,6
REP_RET
%endmacro
+%if ARCH_X86_32
INIT_MMX mmx
PRED4x4_TM
+%endif
INIT_MMX mmxext
PRED4x4_TM
diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm
index 629e0a72e3..e978d91ff1 100644
--- a/libavcodec/x86/h264_intrapred_10bit.asm
+++ b/libavcodec/x86/h264_intrapred_10bit.asm
@@ -411,8 +411,10 @@ cglobal pred8x8_dc_10, 2, 6
RET
%endmacro
+%if ARCH_X86_32
INIT_MMX mmxext
PRED8x8_DC pshufw
+%endif
INIT_XMM sse2
PRED8x8_DC pshuflw
@@ -526,8 +528,10 @@ cglobal pred8x8l_128_dc_10, 4, 4
RET
%endmacro
+%if ARCH_X86_32
INIT_MMX mmxext
PRED8x8L_128_DC
+%endif
INIT_XMM sse2
PRED8x8L_128_DC
@@ -1033,8 +1037,10 @@ cglobal pred16x16_vertical_10, 2, 3
REP_RET
%endmacro
+%if ARCH_X86_32
INIT_MMX mmxext
PRED16x16_VERTICAL
+%endif
INIT_XMM sse2
PRED16x16_VERTICAL
@@ -1057,8 +1063,10 @@ cglobal pred16x16_horizontal_10, 2, 3
REP_RET
%endmacro
+%if ARCH_X86_32
INIT_MMX mmxext
PRED16x16_HORIZONTAL
+%endif
INIT_XMM sse2
PRED16x16_HORIZONTAL
@@ -1103,8 +1111,10 @@ cglobal pred16x16_dc_10, 2, 6
REP_RET
%endmacro
+%if ARCH_X86_32
INIT_MMX mmxext
PRED16x16_DC
+%endif
INIT_XMM sse2
PRED16x16_DC
@@ -1135,8 +1145,10 @@ cglobal pred16x16_top_dc_10, 2, 3
REP_RET
%endmacro
+%if ARCH_X86_32
INIT_MMX mmxext
PRED16x16_TOP_DC
+%endif
INIT_XMM sse2
PRED16x16_TOP_DC
@@ -1172,8 +1184,10 @@ cglobal pred16x16_left_dc_10, 2, 6
REP_RET
%endmacro
+%if ARCH_X86_32
INIT_MMX mmxext
PRED16x16_LEFT_DC
+%endif
INIT_XMM sse2
PRED16x16_LEFT_DC
@@ -1193,7 +1207,9 @@ cglobal pred16x16_128_dc_10, 2,3
REP_RET
%endmacro
+%if ARCH_X86_32
INIT_MMX mmxext
PRED16x16_128_DC
+%endif
INIT_XMM sse2
PRED16x16_128_DC
diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
index a95cfbca55..b4b04beff5 100644
--- a/libavcodec/x86/h264_intrapred_init.c
+++ b/libavcodec/x86/h264_intrapred_init.c
@@ -193,10 +193,13 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
if (bit_depth == 8) {
if (EXTERNAL_MMX(cpu_flags)) {
+#if ARCH_X86_32
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_8_mmx;
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_mmx;
+#endif
if (chroma_format_idc <= 1) {
h->pred8x8 [VERT_PRED8x8 ] = ff_pred8x8_vertical_8_mmx;
+#if ARCH_X86_32
h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_8_mmx;
}
if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
@@ -214,23 +217,28 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
} else {
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_mmx;
}
+#endif
}
}
if (EXTERNAL_MMXEXT(cpu_flags)) {
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_mmxext;
+#if ARCH_X86_32
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_mmxext;
+#endif
if (chroma_format_idc <= 1)
h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_8_mmxext;
h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_8_mmxext;
h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_8_mmxext;
h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_8_mmxext;
h->pred8x8l [VERT_PRED ] = ff_pred8x8l_vertical_8_mmxext;
- h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_mmxext;
- h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_8_mmxext;
h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_8_mmxext;
+#if ARCH_X86_32
h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_8_mmxext;
+ h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_mmxext;
+ h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_8_mmxext;
h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_8_mmxext;
+#endif
h->pred4x4 [DIAG_DOWN_RIGHT_PRED ] = ff_pred4x4_down_right_8_mmxext;
h->pred4x4 [VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_8_mmxext;
h->pred4x4 [HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_8_mmxext;
@@ -252,11 +260,12 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
}
}
if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
- h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_8_mmxext;
h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_8_mmxext;
- h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_8_mmxext;
h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_8_mmxext;
h->pred4x4 [VERT_PRED ] = ff_pred4x4_vertical_vp8_8_mmxext;
+#if ARCH_X86_32
+ h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_8_mmxext;
+ h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_8_mmxext;
} else {
if (chroma_format_idc <= 1)
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_8_mmxext;
@@ -267,6 +276,7 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
} else {
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_h264_8_mmxext;
}
+#endif
}
}
@@ -338,6 +348,7 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext;
h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext;
+#if ARCH_X86_32
if (chroma_format_idc <= 1)
h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_mmxext;
@@ -349,6 +360,7 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
h->pred16x16[LEFT_DC_PRED8x8 ] = ff_pred16x16_left_dc_10_mmxext;
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_mmxext;
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_mmxext;
+#endif
}
if (EXTERNAL_SSE2(cpu_flags)) {
h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2;
--
2.34.1
More information about the ffmpeg-devel
mailing list