[FFmpeg-devel] [PATCH 08/15] vp8/x86: save one register in SIGN_ADD/SUB.
Clément Bœsch
u at pkh.me
Sat Dec 27 19:00:47 CET 2014
On Sat, Dec 27, 2014 at 11:02:43AM -0500, Ronald S. Bultje wrote:
> ---
> libavcodec/x86/vp9lpf.asm | 28 ++++++++++++++--------------
> 1 file changed, 14 insertions(+), 14 deletions(-)
>
> diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
> index 15d4159..a1c8ddd 100644
> --- a/libavcodec/x86/vp9lpf.asm
> +++ b/libavcodec/x86/vp9lpf.asm
> @@ -141,17 +141,17 @@ SECTION .text
> %endmacro
>
> ; clip_u8(u8 + i8)
> -%macro SIGN_ADD 5 ; dst, u8, i8, tmp1, tmp2
> - EXTRACT_POS_NEG %3, %4, %5
> - psubusb %1, %2, %4 ; sub the negatives
> - paddusb %1, %5 ; add the positives
> +%macro SIGN_ADD 4 ; dst, u8, i8, tmp1
> + EXTRACT_POS_NEG %3, %4, %1
> + paddusb %1, %2 ; add the positives
> + psubusb %1, %4 ; sub the negatives
> %endmacro
>
> ; clip_u8(u8 - i8)
> -%macro SIGN_SUB 5 ; dst, u8, i8, tmp1, tmp2
> - EXTRACT_POS_NEG %3, %4, %5
> - psubusb %1, %2, %5 ; sub the positives
> - paddusb %1, %4 ; add the negatives
> +%macro SIGN_SUB 4 ; dst, u8, i8, tmp1
> + EXTRACT_POS_NEG %3, %1, %4
> + paddusb %1, %2 ; add the negatives
> + psubusb %1, %4 ; sub the positives
> %endmacro
>
> %macro FILTER6_INIT 4 ; %1=dst %2=h/l %3=cache, %4=stack_off
> @@ -577,8 +577,8 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4, dst, stride, mstride, dst2,
> paddsb m4, [pb_3] ; m4: f2 = clip(f + 3, 127)
> mova m14, [pb_10] ; will be reused in filter4()
> SRSHIFT3B_2X m6, m4, m14, m7 ; f1 and f2 sign byte shift by 3
> - SIGN_SUB m7, m12, m6, m5, m9 ; m7 = q0 - f1
> - SIGN_ADD m8, m11, m4, m5, m9 ; m8 = p0 + f2
> + SIGN_SUB m7, m12, m6, m5 ; m7 = q0 - f1
> + SIGN_ADD m8, m11, m4, m5 ; m8 = p0 + f2%if %2 != 44
fingers fart in the comment?
> %if %2 != 44
> pandn m6, m2, m3 ; ~mask(in) & mask(fm)
> pand m6, m0 ; (~mask(in) & mask(fm)) & mask(hev)
> @@ -606,18 +606,18 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4, dst, stride, mstride, dst2,
> %define q0tmp m2
> pandn m0, m3
> %endif
> - SIGN_SUB q0tmp, m12, m6, m4, m14 ; q0 - f1
> + SIGN_SUB q0tmp, m12, m6, m4 ; q0 - f1
> MASK_APPLY q0tmp, m7, m0, m5 ; filter4(q0) & mask
> mova [Q0], q0tmp
> - SIGN_ADD p0tmp, m11, m15, m4, m14 ; p0 + f2
> + SIGN_ADD p0tmp, m11, m15, m4 ; p0 + f2
> MASK_APPLY p0tmp, m8, m0, m5 ; filter4(p0) & mask
> mova [P0], p0tmp
> paddb m6, [pb_80] ;
> pxor m8, m8 ; f=(f1+1)>>1
> pavgb m6, m8 ;
> psubb m6, [pb_40] ;
> - SIGN_ADD m7, m10, m6, m8, m9 ; p1 + f
> - SIGN_SUB m4, m13, m6, m8, m9 ; q1 - f
> + SIGN_ADD m7, m10, m6, m8 ; p1 + f
> + SIGN_SUB m4, m13, m6, m8 ; q1 - f
> MASK_APPLY m7, m10, m0, m14 ; m7 = filter4(p1)
> MASK_APPLY m4, m13, m0, m14 ; m4 = filter4(q1)
> mova [P1], m7
LGTM, cool.
--
Clément B.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 473 bytes
Desc: not available
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20141227/ad8da28f/attachment.asc>
More information about the ffmpeg-devel
mailing list