[FFmpeg-devel] [PATCH] avfilter: add hflip x86 SIMD
Martin Vignali
martin.vignali at gmail.com
Sun Dec 3 22:15:14 EET 2017
I modify the checkasm test, to test various width
if (check_func(s.flip_line[0], "hflip_%s", report_name)) {
for (i = 1; i < w; i++) {
call_ref(src, dst_ref, i);
call_new(src, dst_new, i);
if (memcmp(dst_ref, dst_new, WIDTH)) {
printf("FAIL : W = %d\n", i);
fail();
}
}
bench_new(src, dst_new, WIDTH);
}
This asm seems to be ok (same idea for the hflip_short version)
hflip_byte_c: 28.4
hflip_byte_ssse3: 23.7
hflip_short_c: 275.9
hflip_short_ssse3: 65.2
INIT_XMM ssse3
cglobal hflip_byte, 3, 5, 3, src, dst, w, x, v
mova m0, [pb_flip_byte]
mov xq, 0
mov wd, dword wm
sub wq, 2 * mmsize
;cmp wq, mmsize ; <==== Doesn't seems to be need
jl .skip
.loop0:
neg xq
movu m1, [srcq + xq - mmsize + 1]
movu m2, [srcq + xq - 2 * mmsize + 1]
pshufb m1, m0
pshufb m2, m0
neg xq
movu [dstq + xq ], m1
movu [dstq + xq + mmsize], m2
add xq, mmsize * 2
cmp xq, wq
jl .loop0
cmp xq, wq ;<====
je .end ;<====
sub xq, mmsize *2 ;<====
jmp .loop1 ;<====
.skip:
add wq, 2 * mmsize
.loop1:
neg xq
mov vb, [srcq + xq]
neg xq
mov [dstq + xq], vb
add xq, 1
cmp xq, wq
jl .loop1
.end:
RET
More information about the ffmpeg-devel
mailing list