[FFmpeg-devel] [PATCH] x86/hevc_deblock: add ff_hevc_[hv]_loop_filter_luma_{8, 10}_sse2
James Almer
jamrial at gmail.com
Sun Jul 13 08:00:50 CEST 2014
Signed-off-by: James Almer <jamrial at gmail.com>
---
libavcodec/x86/hevc_deblock.asm | 8 +++++++-
libavcodec/x86/hevcdsp_init.c | 31 ++++++++++++++++++++-----------
2 files changed, 27 insertions(+), 12 deletions(-)
diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm
index d23cac7..c035668 100644
--- a/libavcodec/x86/hevc_deblock.asm
+++ b/libavcodec/x86/hevc_deblock.asm
@@ -728,7 +728,7 @@ cglobal hevc_h_loop_filter_chroma_10, 3, 4, 7, pix, stride, tc, pix0
RET
%if ARCH_X86_64
-INIT_XMM ssse3
+%macro LOOP_FILTER_LUMA 0
;-----------------------------------------------------------------------------
; void ff_hevc_v_loop_filter_luma(uint8_t *_pix, ptrdiff_t _stride, int *_beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
;-----------------------------------------------------------------------------
@@ -828,4 +828,10 @@ cglobal hevc_h_loop_filter_luma_10, 4, 15, 16, pix, stride, beta, tc, count, pix
movdqu [pixq+2*strideq], m6; q2
.bypassluma:
RET
+%endmacro
+
+INIT_XMM sse2
+LOOP_FILTER_LUMA
+INIT_XMM ssse3
+LOOP_FILTER_LUMA
%endif
diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c
index cad236d..2c76766 100644
--- a/libavcodec/x86/hevcdsp_init.c
+++ b/libavcodec/x86/hevcdsp_init.c
@@ -36,18 +36,20 @@ void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *_pix,
void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *_pix, ptrdiff_t stride, int *_beta, int *_tc, \
uint8_t *_no_p, uint8_t *_no_q);
-#define LFC_FUNCS(type, depth) \
-LFC_FUNC(h, depth, sse2) \
-LFC_FUNC(v, depth, sse2)
+#define LFC_FUNCS(type, depth, opt) \
+LFC_FUNC(h, depth, opt) \
+LFC_FUNC(v, depth, opt)
-#define LFL_FUNCS(type, depth) \
-LFL_FUNC(h, depth, ssse3) \
-LFL_FUNC(v, depth, ssse3)
+#define LFL_FUNCS(type, depth, opt) \
+LFL_FUNC(h, depth, opt) \
+LFL_FUNC(v, depth, opt)
-LFC_FUNCS(uint8_t, 8)
-LFC_FUNCS(uint8_t, 10)
-LFL_FUNCS(uint8_t, 8)
-LFL_FUNCS(uint8_t, 10)
+LFC_FUNCS(uint8_t, 8, sse2)
+LFC_FUNCS(uint8_t, 10, sse2)
+LFL_FUNCS(uint8_t, 8, sse2)
+LFL_FUNCS(uint8_t, 10, sse2)
+LFL_FUNCS(uint8_t, 8, ssse3)
+LFL_FUNCS(uint8_t, 10, ssse3)
#if HAVE_SSE2_EXTERNAL
void ff_hevc_idct32_dc_add_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
@@ -429,6 +431,10 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
if (EXTERNAL_SSE2(mm_flags)) {
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
+ if (ARCH_X86_64) {
+ c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
+ c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
+ }
c->transform_dc_add[2] = ff_hevc_idct16_dc_add_8_sse2;
c->transform_dc_add[3] = ff_hevc_idct32_dc_add_8_sse2;
@@ -460,7 +466,10 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
if (EXTERNAL_SSE2(mm_flags)) {
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
-
+ if (ARCH_X86_64) {
+ c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2;
+ c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
+ }
c->transform_dc_add[1] = ff_hevc_idct8_dc_add_10_sse2;
c->transform_dc_add[2] = ff_hevc_idct16_dc_add_10_sse2;
--
1.8.5.5
More information about the ffmpeg-devel
mailing list