[FFmpeg-cvslog] x86: fft: fix imdct_half() for AVX

Thu Aug 2 23:39:53 CEST 2012

ffmpeg | branch: master | Ronald S. Bultje <rsbultje at gmail.com> | Thu Aug  2 12:15:46 2012 -0500| [c728518b3cbb0daf0d0a65ba7adfcb48c5629b93] | committer: Justin Ruggles

x86: fft: fix imdct_half() for AVX

Some calculations were changed in b6a3849 to use mmsize, which was not correct
for the AVX version, which uses INIT_YMM and therefore has mmsize == 32.

Fixes Bug 341.

Signed-off-by: Justin Ruggles <justin.ruggles at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c728518b3cbb0daf0d0a65ba7adfcb48c5629b93
---

 libavcodec/x86/fft_mmx.asm |   13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm
index 81e4411..ac53296 100644
--- a/libavcodec/x86/fft_mmx.asm
+++ b/libavcodec/x86/fft_mmx.asm
@@ -1009,7 +1009,11 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
     push  rrevtab
 %endif
 
-    sub   r3, mmsize/4
+%if mmsize == 8
+    sub   r3, 2
+%else
+    sub   r3, 4
+%endif
 %if ARCH_X86_64 || mmsize == 8
     xor   r4, r4
     sub   r4, r3
@@ -1036,7 +1040,9 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
     mova [r1+r5*8], m0
     mova [r1+r6*8], m2
     add    r4, 2
-%elif ARCH_X86_64
+    sub    r4, 2
+%else
+%if ARCH_X86_64
     movzx  r5,  word [rrevtab+r4-4]
     movzx  r6,  word [rrevtab+r4-2]
     movzx  r10, word [rrevtab+r3]
@@ -1057,7 +1063,8 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
     movlps [r1+r5*8], xmm1
     movhps [r1+r4*8], xmm1
 %endif
-    sub    r3, mmsize/4
+    sub    r3, 4
+%endif
     jns    .pre
 
     mov  r5, r0