[FFmpeg-devel] [PATCH] vp9/x86: iwht4x4 (lossless) mmx.

Ronald S. Bultje rsbultje at gmail.com
Tue Jan 21 02:05:13 CET 2014


---
 libavcodec/x86/vp9dsp_init.c |  5 +++++
 libavcodec/x86/vp9itxfm.asm  | 43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index 9c322c1..9e4bc93 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -173,6 +173,7 @@ itxfm_funcs(16, ssse3);
 itxfm_funcs(16, avx);
 itxfm_func(idct, idct, 32, ssse3);
 itxfm_func(idct, idct, 32, avx);
+itxfm_func(iwht, iwht, 4, mmx);
 
 #undef itxfm_func
 #undef itxfm_funcs
@@ -223,6 +224,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
     if (EXTERNAL_MMX(cpu_flags)) {
         init_fpel(4, 0,  4, put, mmx);
         init_fpel(3, 0,  8, put, mmx);
+        dsp->itxfm_add[4 /* lossless */][DCT_DCT] =
+        dsp->itxfm_add[4 /* lossless */][ADST_DCT] =
+        dsp->itxfm_add[4 /* lossless */][DCT_ADST] =
+        dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx;
     }
 
     if (EXTERNAL_SSE(cpu_flags)) {
diff --git a/libavcodec/x86/vp9itxfm.asm b/libavcodec/x86/vp9itxfm.asm
index fe9f99a..3279b53 100644
--- a/libavcodec/x86/vp9itxfm.asm
+++ b/libavcodec/x86/vp9itxfm.asm
@@ -152,6 +152,49 @@ SECTION .text
 %endmacro
 
 ;-------------------------------------------------------------------------------------------
+; void vp9_iwht_iwht_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;-------------------------------------------------------------------------------------------
+
+%macro VP9_IWHT4_1D 0
+    SWAP                 1, 2
+    SWAP                 2, 3
+    paddw               m0, m2
+    psubw               m3, m1
+    psubw               m4, m0, m3
+    psraw               m4, 1
+    psubw               m5, m4, m1
+    SWAP                 5, 1
+    psubw               m4, m2
+    SWAP                 4, 2
+    psubw               m0, m1
+    paddw               m3, m2
+    SWAP                 2, 3
+    SWAP                 1, 2
+%endmacro
+
+INIT_MMX mmx
+cglobal vp9_iwht_iwht_4x4_add, 3, 3, 0, dst, stride, block, eob
+    mova                m0, [blockq+0*8]
+    mova                m1, [blockq+1*8]
+    mova                m2, [blockq+2*8]
+    mova                m3, [blockq+3*8]
+    psraw               m0, 2
+    psraw               m1, 2
+    psraw               m2, 2
+    psraw               m3, 2
+
+    VP9_IWHT4_1D
+    TRANSPOSE4x4W        0, 1, 2, 3, 4
+    VP9_IWHT4_1D
+
+    pxor                m4, m4
+    VP9_STORE_2X         0, 1, 5, 6, 4
+    lea               dstq, [dstq+strideq*2]
+    VP9_STORE_2X         2, 3, 5, 6, 4
+    ZERO_BLOCK      blockq, 8, 4, m4
+    RET
+
+;-------------------------------------------------------------------------------------------
 ; void vp9_idct_idct_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
 ;-------------------------------------------------------------------------------------------
 
-- 
1.8.4



More information about the ffmpeg-devel mailing list