[FFmpeg-devel] [PATCH 01/15] vp9/x86: save one register in loopfilter surface coverage.

Clément Bœsch u at pkh.me
Sat Dec 27 17:24:36 CET 2014


On Sat, Dec 27, 2014 at 11:02:36AM -0500, Ronald S. Bultje wrote:
> ---
>  libavcodec/x86/vp9lpf.asm | 56 +++++++++++++++++++++++------------------------
>  1 file changed, 28 insertions(+), 28 deletions(-)
> 
> diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
> index 416f08f..e0f7386 100644
> --- a/libavcodec/x86/vp9lpf.asm
> +++ b/libavcodec/x86/vp9lpf.asm
> @@ -278,22 +278,22 @@ SECTION .text
>  %endmacro
>  
>  %macro DEFINE_REAL_P7_TO_Q7 0-1 0
> -%define P7 dst1q + 2*mstrideq  + %1
> -%define P6 dst1q +   mstrideq  + %1
> -%define P5 dst1q               + %1
> -%define P4 dst1q +    strideq  + %1
> -%define P3 dstq  + 4*mstrideq  + %1
> -%define P2 dstq  +   mstride3q + %1
> -%define P1 dstq  + 2*mstrideq  + %1
> -%define P0 dstq  +   mstrideq  + %1
> -%define Q0 dstq                + %1
> -%define Q1 dstq  +   strideq   + %1
> -%define Q2 dstq  + 2*strideq   + %1
> -%define Q3 dstq  +   stride3q  + %1
> -%define Q4 dstq  + 4*strideq   + %1
> -%define Q5 dst2q + mstrideq    + %1
> -%define Q6 dst2q               + %1
> -%define Q7 dst2q +  strideq    + %1
> +%define P7 dstq  + 4*mstrideq  + %1
> +%define P6 dstq  +   mstride3q + %1
> +%define P5 dstq  + 2*mstrideq  + %1
> +%define P4 dstq  +   mstrideq  + %1
> +%define P3 dstq                + %1
> +%define P2 dstq  +    strideq  + %1
> +%define P1 dstq  + 2* strideq  + %1
> +%define P0 dstq  +    stride3q + %1
> +%define Q0 dstq  + 4* strideq  + %1
> +%define Q1 dst2q +   mstride3q + %1
> +%define Q2 dst2q + 2*mstrideq  + %1
> +%define Q3 dst2q +   mstrideq  + %1
> +%define Q4 dst2q               + %1
> +%define Q5 dst2q +    strideq  + %1
> +%define Q6 dst2q + 2* strideq  + %1
> +%define Q7 dst2q +    stride3q + %1
>  %endmacro
>  
>  ; ..............AB -> AAAAAAAABBBBBBBB
> @@ -308,26 +308,26 @@ SECTION .text
>  %endmacro
>  
>  %macro LOOPFILTER 2 ; %1=v/h %2=size1
> -    lea mstrideq, [strideq]
> -    neg mstrideq
> +    mov               mstrideq, strideq
> +    neg               mstrideq
>  
> -    lea stride3q, [strideq+2*strideq]
> -    mov mstride3q, stride3q
> -    neg mstride3q
> +    lea               stride3q, [strideq*3]
> +    lea              mstride3q, [mstrideq*3]
>  
>  %ifidn %1, h
>  %if %2 > 16
>  %define movx movh
> -    lea dstq, [dstq + 8*strideq - 4]
> +    lea                   dstq, [dstq + 4*strideq - 4]
>  %else
>  %define movx movu
> -    lea dstq, [dstq + 8*strideq - 8] ; go from top center (h pos) to center left (v pos)
> +    lea                   dstq, [dstq + 4*strideq - 8] ; go from top center (h pos) to center left (v pos)
>  %endif
> +    lea                  dst2q, [dstq + 8*strideq]
> +%else
> +    lea                   dstq, [dstq + 4*mstrideq]
> +    lea                  dst2q, [dstq + 8*strideq]
>  %endif
>  
> -    lea dst1q, [dstq + 2*mstride3q]                         ; dst1q = &dst[stride * -6]
> -    lea dst2q, [dstq + 2* stride3q]                         ; dst2q = &dst[stride * +6]
> -
>      DEFINE_REAL_P7_TO_Q7
>  
>  %ifidn %1, h
> @@ -795,9 +795,9 @@ SECTION .text
>  
>  %macro LPF_16_VH 2
>  INIT_XMM %2
> -cglobal vp9_loop_filter_v_%1_16, 5,10,16,      dst, stride, E, I, H, mstride, dst1, dst2, stride3, mstride3
> +cglobal vp9_loop_filter_v_%1_16, 5,10,16,      dst, stride, E, I, H, mstride, dst2, stride3, mstride3

Since you drop one register, it should probably become 5,9,16

>      LOOPFILTER v, %1
> -cglobal vp9_loop_filter_h_%1_16, 5,10,16, 256, dst, stride, E, I, H, mstride, dst1, dst2, stride3, mstride3
> +cglobal vp9_loop_filter_h_%1_16, 5,10,16, 256, dst, stride, E, I, H, mstride, dst2, stride3, mstride3

ditto

LGTM otherwise

-- 
Clément B.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 473 bytes
Desc: not available
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20141227/58729372/attachment.asc>


More information about the ffmpeg-devel mailing list