[FFmpeg-devel] [PATCH] ac3enc: Add x86-optimized function to speed up log2_tab().
Loren Merritt
lorenm
Sun Feb 13 23:49:11 CET 2011
>+cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
>+ pxor m2, m2
>+ pxor m3, m3
>+.loop:
>+%ifidn %2, min_max
>+ mova m0, [srcq]
>+ mova m1, [srcq+mmsize]
>+ pminsw m2, m0
>+ pminsw m2, m1
>+ pmaxsw m3, m0
>+ pmaxsw m3, m1
>+%else ; or_abs
>+%ifidn %1, mmx
>+ mova m0, [srcq]
>+ mova m1, [srcq+mmsize]
>+ ABS2 m0, m1, m3, m4
>+%else ; ssse3
>+ ; using memory args is faster for ssse3
>+ pabsw m0, [srcq]
>+ pabsw m1, [srcq+mmsize]
>+%endif
>+ por m2, m0
>+ por m2, m1
>+%endif
>+ add srcq, mmsize*2
>+ sub lend, mmsize
>+ ja .loop
>+%ifidn %2, min_max
>+ ABS2 m2, m3, m0, m1
>+ por m2, m3
>+%endif
>+%ifidn mmsize, 16
>+ mova m0, m2
>+ punpckhqdq m0, m0
movhlps
--Loren Merritt
More information about the ffmpeg-devel
mailing list