[FFmpeg-devel] [PATCH 5/6] avcodec/h264: add avx 8-bit h264_idct_dc_add

James Darnley jdarnley at obe.tv
Sat Apr 15 04:46:17 EEST 2017


Haswell:
 - 1.02x faster (405±0.7 vs. 397±0.8 decicycles) compared with mmxext

Skylake-U:
 - 1.06x faster (498±1.8 vs. 470±1.3 decicycles) compared with mmxext
---
 libavcodec/x86/h264_idct.asm  | 20 ++++++++++++++++++++
 libavcodec/x86/h264dsp_init.c |  2 ++
 2 files changed, 22 insertions(+)

diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index 24fb4d2..43f7791 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -1158,7 +1158,27 @@ INIT_XMM avx
     movd  [%7+%8], %4
 %endmacro
 
+%macro DC_ADD_INIT 1
+    add      %1d, 32
+    sar      %1d, 6
+    movd     m0, %1d
+    pshuflw  m0, m0, 0
+    lea      %1, [3*stride_q]
+    pxor     m1, m1
+    psubw    m1, m0
+    packuswb m0, m0
+    packuswb m1, m1
+%endmacro
+
 cglobal h264_idct_add_8, 3, 3, 8, dst_, block_, stride_
     movsxdifnidn stride_q, stride_d
     IDCT4_ADD    dst_q, block_q, stride_q
 RET
+
+cglobal h264_idct_dc_add_8, 3, 4, 6, dst_, block_, stride_
+    movsxdifnidn stride_q, stride_d
+    movsx             r3d, word [block_q]
+    mov   dword [block_q], 0
+    DC_ADD_INIT r3
+    DC_ADD_MMXEXT_OP movd, dst_q, stride_q, r3
+RET
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index 8ba085f..bf74937 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -35,6 +35,7 @@ IDCT_ADD_FUNC(, 8, mmx)
 IDCT_ADD_FUNC(, 8, avx)
 IDCT_ADD_FUNC(, 10, sse2)
 IDCT_ADD_FUNC(_dc, 8, mmxext)
+IDCT_ADD_FUNC(_dc, 8, avx)
 IDCT_ADD_FUNC(_dc, 10, mmxext)
 IDCT_ADD_FUNC(8_dc, 8, mmxext)
 IDCT_ADD_FUNC(8_dc, 10, sse2)
@@ -340,6 +341,7 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
             }
 
             c->h264_idct_add        = ff_h264_idct_add_8_avx;
+            c->h264_idct_dc_add     = ff_h264_idct_dc_add_8_avx;
         }
     } else if (bit_depth == 10) {
         if (EXTERNAL_MMXEXT(cpu_flags)) {
-- 
2.8.3



More information about the ffmpeg-devel mailing list