[FFmpeg-cvslog] vp9/x86: save one register on 32bit idct32x32.
Ronald S. Bultje
git at videolan.org
Tue Dec 16 03:17:59 CET 2014
ffmpeg | branch: master | Ronald S. Bultje <rsbultje at gmail.com> | Mon Dec 15 20:40:48 2014 -0500| [0a7964dca5e52536c05a72987c3d7dbb12add942] | committer: Michael Niedermayer
vp9/x86: save one register on 32bit idct32x32.
Fixes build on win32.
Signed-off-by: Michael Niedermayer <michaelni at gmx.at>
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0a7964dca5e52536c05a72987c3d7dbb12add942
---
libavcodec/x86/vp9itxfm.asm | 51 ++++++++++++++++++++++++++++++++++++++++---
1 file changed, 48 insertions(+), 3 deletions(-)
diff --git a/libavcodec/x86/vp9itxfm.asm b/libavcodec/x86/vp9itxfm.asm
index 908040c..64859a0 100644
--- a/libavcodec/x86/vp9itxfm.asm
+++ b/libavcodec/x86/vp9itxfm.asm
@@ -2526,7 +2526,8 @@ IADST16_FN iadst, IADST16, iadst, IADST16, avx
%macro VP9_IDCT_IDCT_32x32_ADD_XMM 1
INIT_XMM %1
-cglobal vp9_idct_idct_32x32_add, 4, 7 + ARCH_X86_64 * 2, 16, 2048, dst, stride, block, eob
+cglobal vp9_idct_idct_32x32_add, 0, 6 + ARCH_X86_64 * 3, 16, 2048, dst, stride, block, eob
+ movifnidn eobd, dword eobm
%if cpuflag(ssse3)
cmp eobd, 135
jg .idctfull
@@ -2540,6 +2541,9 @@ cglobal vp9_idct_idct_32x32_add, 4, 7 + ARCH_X86_64 * 2, 16, 2048, dst, stride,
%endif
; dc-only case
+ movifnidn blockq, blockmp
+ movifnidn dstq, dstmp
+ movifnidn strideq, stridemp
%if cpuflag(ssse3)
movd m0, [blockq]
mova m1, [pw_11585x2]
@@ -2572,15 +2576,22 @@ cglobal vp9_idct_idct_32x32_add, 4, 7 + ARCH_X86_64 * 2, 16, 2048, dst, stride,
%if ARCH_X86_64
DEFINE_ARGS dst_bak, stride, block, cnt, dst, stride30, dst_end, stride2, tmp
%else
- DEFINE_ARGS dst, stride, block, stride30, dst_end, stride2, tmp
-%define cntd dword r4m
%define dst_bakq r0mp
%endif
%if cpuflag(ssse3)
.idct8x8:
+%if ARCH_X86_32
+ DEFINE_ARGS block, u1, u2, u3, u4, tmp
+ mov blockq, r2mp
+%endif
mov tmpq, rsp
VP9_IDCT32_1D blockq, 1, 8
+%if ARCH_X86_32
+ DEFINE_ARGS dst, stride, stride30, dst_end, stride2, tmp
+ mov strideq, r1mp
+%define cntd dword r3m
+%endif
mov stride30q, strideq ; stride
lea stride2q, [strideq*2] ; stride*2
shl stride30q, 5 ; stride*32
@@ -2597,10 +2608,18 @@ cglobal vp9_idct_idct_32x32_add, 4, 7 + ARCH_X86_64 * 2, 16, 2048, dst, stride,
; at the end of the loop, m7 should still be zero
; use that to zero out block coefficients
+%if ARCH_X86_32
+ DEFINE_ARGS block
+ mov blockq, r2mp
+%endif
ZERO_BLOCK blockq, 64, 8, m1
RET
.idct16x16:
+%if ARCH_X86_32
+ DEFINE_ARGS block, tmp, cnt
+ mov blockq, r2mp
+%endif
mov cntd, 2
mov tmpq, rsp
.loop1_16x16:
@@ -2609,7 +2628,14 @@ cglobal vp9_idct_idct_32x32_add, 4, 7 + ARCH_X86_64 * 2, 16, 2048, dst, stride,
add tmpq, 512
dec cntd
jg .loop1_16x16
+
+%if ARCH_X86_64
sub blockq, 32
+%else
+ DEFINE_ARGS dst, stride, stride30, dst_end, stride2, tmp
+ mov strideq, r1mp
+%define cntd dword r3m
+%endif
mov stride30q, strideq ; stride
lea stride2q, [strideq*2] ; stride*2
@@ -2628,11 +2654,19 @@ cglobal vp9_idct_idct_32x32_add, 4, 7 + ARCH_X86_64 * 2, 16, 2048, dst, stride,
; at the end of the loop, m7 should still be zero
; use that to zero out block coefficients
+%if ARCH_X86_32
+ DEFINE_ARGS block
+ mov blockq, r2mp
+%endif
ZERO_BLOCK blockq, 64, 16, m1
RET
%endif
.idctfull:
+%if ARCH_X86_32
+ DEFINE_ARGS block, tmp, cnt
+ mov blockq, r2mp
+%endif
mov cntd, 4
mov tmpq, rsp
.loop1_full:
@@ -2641,7 +2675,14 @@ cglobal vp9_idct_idct_32x32_add, 4, 7 + ARCH_X86_64 * 2, 16, 2048, dst, stride,
add tmpq, 512
dec cntd
jg .loop1_full
+
+%if ARCH_X86_64
sub blockq, 64
+%else
+ DEFINE_ARGS dst, stride, stride30, dst_end, stride2, tmp
+ mov strideq, r1mp
+%define cntd dword r3m
+%endif
mov stride30q, strideq ; stride
lea stride2q, [strideq*2] ; stride*2
@@ -2660,6 +2701,10 @@ cglobal vp9_idct_idct_32x32_add, 4, 7 + ARCH_X86_64 * 2, 16, 2048, dst, stride,
; at the end of the loop, m7 should still be zero
; use that to zero out block coefficients
+%if ARCH_X86_32
+ DEFINE_ARGS block
+ mov blockq, r2mp
+%endif
ZERO_BLOCK blockq, 64, 32, m1
RET
%endmacro
More information about the ffmpeg-cvslog
mailing list