[FFmpeg-devel] [PATCH] vp9/x86: iwht4x4 (lossless) mmx.

Clément Bœsch u at pkh.me
Wed Jan 22 07:48:57 CET 2014


On Mon, Jan 20, 2014 at 08:05:13PM -0500, Ronald S. Bultje wrote:
> ---
>  libavcodec/x86/vp9dsp_init.c |  5 +++++
>  libavcodec/x86/vp9itxfm.asm  | 43 +++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 48 insertions(+)
> 
> diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
> index 9c322c1..9e4bc93 100644
> --- a/libavcodec/x86/vp9dsp_init.c
> +++ b/libavcodec/x86/vp9dsp_init.c
> @@ -173,6 +173,7 @@ itxfm_funcs(16, ssse3);
>  itxfm_funcs(16, avx);
>  itxfm_func(idct, idct, 32, ssse3);
>  itxfm_func(idct, idct, 32, avx);
> +itxfm_func(iwht, iwht, 4, mmx);
>  
>  #undef itxfm_func
>  #undef itxfm_funcs
> @@ -223,6 +224,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
>      if (EXTERNAL_MMX(cpu_flags)) {
>          init_fpel(4, 0,  4, put, mmx);
>          init_fpel(3, 0,  8, put, mmx);
> +        dsp->itxfm_add[4 /* lossless */][DCT_DCT] =
> +        dsp->itxfm_add[4 /* lossless */][ADST_DCT] =
> +        dsp->itxfm_add[4 /* lossless */][DCT_ADST] =
> +        dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx;
>      }
>  
>      if (EXTERNAL_SSE(cpu_flags)) {
> diff --git a/libavcodec/x86/vp9itxfm.asm b/libavcodec/x86/vp9itxfm.asm
> index fe9f99a..3279b53 100644
> --- a/libavcodec/x86/vp9itxfm.asm
> +++ b/libavcodec/x86/vp9itxfm.asm
> @@ -152,6 +152,49 @@ SECTION .text
>  %endmacro
>  
>  ;-------------------------------------------------------------------------------------------
> +; void vp9_iwht_iwht_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
> +;-------------------------------------------------------------------------------------------
> +
> +%macro VP9_IWHT4_1D 0
> +    SWAP                 1, 2
> +    SWAP                 2, 3

Again, here and below, probably SWAP 1,2,3 and SWAP 3,2,1

> +    paddw               m0, m2
> +    psubw               m3, m1
> +    psubw               m4, m0, m3
> +    psraw               m4, 1
> +    psubw               m5, m4, m1
> +    SWAP                 5, 1
> +    psubw               m4, m2
> +    SWAP                 4, 2
> +    psubw               m0, m1
> +    paddw               m3, m2
> +    SWAP                 2, 3
> +    SWAP                 1, 2
> +%endmacro
> +
> +INIT_MMX mmx
> +cglobal vp9_iwht_iwht_4x4_add, 3, 3, 0, dst, stride, block, eob
> +    mova                m0, [blockq+0*8]
> +    mova                m1, [blockq+1*8]
> +    mova                m2, [blockq+2*8]
> +    mova                m3, [blockq+3*8]
> +    psraw               m0, 2
> +    psraw               m1, 2
> +    psraw               m2, 2
> +    psraw               m3, 2
> +
> +    VP9_IWHT4_1D
> +    TRANSPOSE4x4W        0, 1, 2, 3, 4
> +    VP9_IWHT4_1D
> +
> +    pxor                m4, m4
> +    VP9_STORE_2X         0, 1, 5, 6, 4
> +    lea               dstq, [dstq+strideq*2]
> +    VP9_STORE_2X         2, 3, 5, 6, 4
> +    ZERO_BLOCK      blockq, 8, 4, m4
> +    RET
> +

Rest probably OK :)

-- 
Clément B.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 490 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20140122/d581a389/attachment.asc>


More information about the ffmpeg-devel mailing list