[FFmpeg-devel] [PATCH] avfilter: add hflip x86 SIMD

Martin Vignali martin.vignali at gmail.com
Sun Dec 3 22:15:14 EET 2017


I modify the checkasm test, to test various width

if (check_func(s.flip_line[0], "hflip_%s", report_name)) {
        for (i = 1; i < w; i++) {
            call_ref(src, dst_ref, i);
            call_new(src, dst_new, i);
            if (memcmp(dst_ref, dst_new, WIDTH)) {
                printf("FAIL : W = %d\n", i);
                fail();
            }
        }
        bench_new(src, dst_new, WIDTH);
    }


This asm seems to be ok (same idea for the hflip_short version)
hflip_byte_c: 28.4
hflip_byte_ssse3: 23.7
hflip_short_c: 275.9
hflip_short_ssse3: 65.2


INIT_XMM ssse3
cglobal hflip_byte, 3, 5, 3, src, dst, w, x, v
    mova    m0, [pb_flip_byte]
    mov     xq, 0
    mov     wd, dword wm
    sub     wq, 2 * mmsize
    ;cmp     wq, mmsize ; <==== Doesn't seems to be need
    jl .skip

    .loop0:
        neg     xq
        movu    m1, [srcq + xq -     mmsize + 1]
        movu    m2, [srcq + xq - 2 * mmsize + 1]
        pshufb  m1, m0
        pshufb  m2, m0
        neg     xq
        movu    [dstq + xq         ], m1
        movu    [dstq + xq + mmsize], m2
        add     xq, mmsize * 2
        cmp     xq, wq
        jl .loop0

    cmp xq, wq ;<====
    je .end ;<====


   sub xq, mmsize *2 ;<====
   jmp .loop1 ;<====




.skip:
    add     wq, 2 * mmsize
    .loop1:
        neg    xq
        mov    vb, [srcq + xq]
        neg    xq
        mov    [dstq + xq], vb
        add    xq, 1
        cmp    xq, wq
        jl .loop1
.end:
RET


More information about the ffmpeg-devel mailing list