[FFmpeg-devel] [PATCH 2/3] x86/hevcdsp: add ff_hevc_sao_edge_filter_8_{ssse3, avx2}

James Almer jamrial at gmail.com
Fri Feb 6 16:41:51 CET 2015


On 06/02/15 9:49 AM, Christophe Gisquet wrote:
> diff --git a/libavcodec/x86/hevc_sao.asm b/libavcodec/x86/hevc_sao.asm
> index 5136121..8619716 100644
> --- a/libavcodec/x86/hevc_sao.asm
> +++ b/libavcodec/x86/hevc_sao.asm
> @@ -296,14 +296,16 @@ HEVC_SAO_BAND_FILTER_16 12, 64, 2
>  %if WIN64
>  cglobal hevc_sao_edge_filter_%1_8, 4, 8, 8, dst, src, dststride, offset, a_stride, b_stride, height, tmp
>  %define  eoq heightq
> -    movsxd           eoq, dword r4m
> -    movsx      a_strideq, byte [pb_eo+eoq*4+1]
> -    movsx      b_strideq, byte [pb_eo+eoq*4+3]
> +    movsxd     b_strideq, dword r4m
> +    lea             tmpq, [pb_eo]
> +    lea              eoq, [tmpq+4*b_strideq]
> +    movsx      a_strideq, byte [eoq+1]
> +    movsx      b_strideq, byte [eoq+3]
>      imul       a_strideq, EDGE_SRCSTRIDE
>      imul       b_strideq, EDGE_SRCSTRIDE
> -    movsx           tmpq, byte [pb_eo+eoq*4]
> +    movsx           tmpq, byte [eoq]
>      add        a_strideq, tmpq
> -    movsx           tmpq, byte [pb_eo+eoq*4+2]
> +    movsx           tmpq, byte [eoq+2]
>      add        b_strideq, tmpq
>      mov          heightd, r6m
>  
> @@ -442,14 +444,16 @@ INIT_YMM cpuname
>  %if WIN64
>  cglobal hevc_sao_edge_filter_%2_%1, 4, 8, 16, dst, src, dststride, offset, a_stride, b_stride, height, tmp
>  %define  eoq heightq
> -    movsxd           eoq, dword r4m
> -    movsx      a_strideq, byte [pb_eo+eoq*4+1]
> -    movsx      b_strideq, byte [pb_eo+eoq*4+3]
> +    movsxd     b_strideq, dword r4m
> +    lea             tmpq, [pb_eo]
> +    lea              eoq, [tmpq+4*b_strideq]
> +    movsx      a_strideq, byte [eoq+1]
> +    movsx      b_strideq, byte [eoq+3]
>      imul       a_strideq, EDGE_SRCSTRIDE>>1
>      imul       b_strideq, EDGE_SRCSTRIDE>>1
> -    movsx           tmpq, byte [pb_eo+eoq*4]
> +    movsx           tmpq, byte [eoq]
>      add        a_strideq, tmpq
> -    movsx           tmpq, byte [pb_eo+eoq*4+2]
> +    movsx           tmpq, byte [eoq+2]
>      add        b_strideq, tmpq
>      mov          heightd, r6m
>      add        a_strideq, a_strideq

Wouldn't it be better to just use the same code as UNIX64 instead? Now that we're going to load 
the address of the table to a reg, there's not point in having a whole separate init path for 
WIN64.
One for X86_64 and one for X86_32 (Where applicable) is much cleaner.


More information about the ffmpeg-devel mailing list