[FFmpeg-devel] [PATCH] avfilter: add hflip x86 SIMD

Paul B Mahol onemda at gmail.com
Sun Dec 3 21:57:20 EET 2017


On 12/3/17, Martin Vignali <martin.vignali at gmail.com> wrote:
> Maybe the problem come from the skip part :
>
> +INIT_XMM ssse3
>> +cglobal hflip_byte, 3, 5, 3, src, dst, w, x, v
>> +    mova    m0, [pb_flip_byte]
>> +    mov     xq, 0
>> +    mov     wd, dword wm
>> +    sub     wq, 2 * mmsize
>> +    cmp     wq, mmsize
>> +    jl .skip
>> +
>> +    .loop0:
>> +        neg     xq
>> +        movu    m1, [srcq + xq -     mmsize + 1]
>> +        movu    m2, [srcq + xq - 2 * mmsize + 1]
>> +        pshufb  m1, m0
>> +        pshufb  m2, m0
>> +        neg     xq
>> +        movu    [dstq + xq         ], m1
>> +        movu    [dstq + xq + mmsize], m2
>> +        add     xq, mmsize * 2
>> +        cmp     xq, wq
>> +        jl .loop0
>> +
>> +.skip:
>> +    add     wq, 2 * mmsize
>>
>
> ==> use xq instead of wq ?

Nope.

>
>
>> +    .loop1:
>> +        neg    xq
>> +        mov    vb, [srcq + xq]
>> +        neg    xq
>> +        mov    [dstq + xq], vb
>> +        add    xq, 1
>> +        cmp    xq, wq
>> +        jl .loop1
>> +RET
>> +
>> +cglobal hflip_short, 3, 5, 3, src, dst, w, x, v
>> +    mova    m0, [pb_flip_short]
>> +    mov     xq, 0
>> +    mov     wd, dword wm
>> +    add     wq, wq
>> +    sub     wq, 2 * mmsize
>> +    cmp     wq, mmsize
>> +    jl .skip
>> +
>> +    .loop0:
>> +        neg     xq
>> +        movu    m1, [srcq + xq -     mmsize + 2]
>> +        movu    m2, [srcq + xq - 2 * mmsize + 2]
>> +        pshufb  m1, m0
>> +        pshufb  m2, m0
>> +        neg     xq
>> +        movu    [dstq + xq         ], m1
>> +        movu    [dstq + xq + mmsize], m2
>> +        add     xq, mmsize
>> +        cmp     xq, wq
>> +        jl .loop0
>> +
>> +.skip:
>> +    add     wq, 2 * mmsize
>>
>
>
> ==> same here ?

Nope, This is for case when width is not multiple of mmsize.


More information about the ffmpeg-devel mailing list