[FFmpeg-devel] [PATCH 04/11] x86: dcadsp: implement SSE lfe_dir

Loren Merritt lorenm at u.washington.edu
Fri Feb 7 03:12:55 CET 2014


On Thu, 6 Feb 2014, Christophe Gisquet wrote:

> diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
> index 03593ce..4a682be 100644
> --- a/libavcodec/x86/dcadsp.asm
> +++ b/libavcodec/x86/dcadsp.asm
> @@ -88,3 +88,108 @@ INT8X8_FMUL_INT32  3
>
>  INIT_XMM sse4
>  INT8X8_FMUL_INT32  3
> +
> +; %1=v0/v1  %2=in1  %3=in2
> +%macro FIR_LOOP 2-3
> +.loop%1:
> +%define va          m1
> +%define vb          m2
> +%if %1
> +%define OFFSET      0
> +%else
> +%define OFFSET      NUM_COEF*count
> +%endif
> +; for v0, incrementint and for v1, decrementing
> +    mova        va, [cf0q + OFFSET]
> +    mova        vb, [cf0q + OFFSET + 4*NUM_COEF]
> +%if %0 == 3
> +    mova        m4, [cf0q + OFFSET + mmsize]
> +    mova     SCALE, [cf0q + OFFSET + 4*NUM_COEF + mmsize]
> +%endif
> +    mulps       va, %2
> +    mulps       vb, %2
> +%if %0 == 3
> +    mulps       m4, %3
> +    mulps    SCALE, %3
> +    addps       va, m4
> +    addps       vb, SCALE
> +%endif
> +    ; va = va1 va2 va3 va4
> +    ; vb = vb1 vb2 vb3 vb4
> +%if %1
> +%define   O1    vb
> +%define   O2    va
> +%else
> +%define   O1    va
> +%define   O2    vb
> +%endif

Can this be simplified with
%if %1
SWAP va, vb
%endif
and no O1, O2 variables?

> +    mova        m4, O1
> +    unpcklps    O1, O2 ; va3 vb3 va4 vb4
> +    unpckhps    m4, O2 ; va1 vb1 va2 vb2
> +    addps       m4, O1 ; va1+3 vb1+3 va2+4 vb2+4
> +    movhlps     O2, m4 ; va1+3  vb1+3
> +    addps       O2, m4 ; va0..4 vb0..4
> +%if %1
> +    movh    [outq + count], O2
> +    sub       cf0q, 8*NUM_COEF
> +%else
> +    movh    [outq + count], O2

factor out of the %if

> +%endif
> +    add      count, 8
> +    jl   .loop%1
> +%endmacro

--Loren Merritt


More information about the ffmpeg-devel mailing list