[FFmpeg-devel] [PATCH 07/10] h264_idct8_dc_add

James Darnley jdarnley at obe.tv
Fri Mar 17 15:18:42 EET 2017


1.12x faster (638±12.7 vs. 568±4.3 decicycles) compared with mmxext
---
 libavcodec/x86/h264_idct.asm  | 11 +++++++++++
 libavcodec/x86/h264dsp_init.c |  2 ++
 2 files changed, 13 insertions(+)

diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index c4b6e55..a74e095 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -1188,3 +1188,14 @@ cglobal h264_idct_dc_add_8, 3, 4, 0, dst_, block_, stride_
     DC_ADD_INIT r3
     DC_ADD_MMXEXT_OP movd, dst_q, stride_q, r3
 RET
+
+; Not any faster
+cglobal h264_idct8_dc_add_8, 3, 4, 0
+    movsxdifnidn stride_q, stride_d
+    movsx             r3d, word [block_q]
+    mov   dword [block_q], 0
+    DC_ADD_INIT r3
+    DC_ADD_MMXEXT_OP movq, dst_q, stride_q, r3
+    lea          dst_q, [dst_q + stride_q*4]
+    DC_ADD_MMXEXT_OP movq, dst_q, stride_q, r3
+RET
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index 1aa66a8..de7becf 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -38,6 +38,7 @@ IDCT_ADD_FUNC(_dc, 8, mmxext)
 IDCT_ADD_FUNC(_dc, 8, avx)
 IDCT_ADD_FUNC(_dc, 10, mmxext)
 IDCT_ADD_FUNC(8_dc, 8, mmxext)
+IDCT_ADD_FUNC(8_dc, 8, avx)
 IDCT_ADD_FUNC(8_dc, 10, sse2)
 IDCT_ADD_FUNC(8, 8, mmx)
 IDCT_ADD_FUNC(8, 8, sse2)
@@ -344,6 +345,7 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
             c->h264_idct_add        = ff_h264_idct_add_8_avx;
             c->h264_idct8_add       = ff_h264_idct8_add_8_avx;
             c->h264_idct_dc_add     = ff_h264_idct_dc_add_8_avx;
+            c->h264_idct8_dc_add    = ff_h264_idct8_dc_add_8_avx;
         }
     } else if (bit_depth == 10) {
         if (EXTERNAL_MMXEXT(cpu_flags)) {
-- 
2.8.3



More information about the ffmpeg-devel mailing list