[FFmpeg-devel] r9017 breaks WMA decoding on Intel Macs
Guillaume POIRIER
poirierg
Thu May 31 00:22:26 CEST 2007
Hi,
On 5/30/07, Michael Niedermayer <michaelni at gmx.at> wrote:
> On Wed, May 30, 2007 at 02:07:19PM +0200, Guillaume POIRIER wrote:
> > Ok, fine with me. Michael, do you think that the patch I posted
> > earlier (100% based on Trent's, only fixing minor issues) should be
> > applied?
>
> well, after actually reading the code ... the loops should be written
> in asm not by using for() / while() this will make the code faster
> and it will make the n+%m code naturally dissapear
Well, after getting a hint from Apple guys, here's an updated patch
that gets rid of all assembler warnings without touching the code as
much as Trent had done.
Now, maybe writing the loop in ASM ourselves will help, but at least,
this patch doesn't harm and works on all supported platforms!
Guillaume
--
Y'a pas de gonzesse hooligan,
Imb?cile et meurtri?re
Y'en a pas m?me en grande Bretagne
A part bien s?r Madame Thatcher
-- Renaud (sur "Miss Maggie")
-------------- next part --------------
Index: libavcodec/i386/fft_sse.c
===================================================================
--- libavcodec/i386/fft_sse.c (revision 9152)
+++ libavcodec/i386/fft_sse.c (working copy)
@@ -170,12 +170,12 @@
asm volatile (
"movaps %0, %%xmm0 \n\t" // xmm0 = r0 X r1 X : in2
"movaps %1, %%xmm3 \n\t" // xmm3 = X i1 X i0: in1
- "movaps -16+%0, %%xmm4 \n\t" // xmm4 = r0 X r1 X : in2
- "movaps 16+%1, %%xmm7 \n\t" // xmm7 = X i1 X i0: in1
+ "movaps -16%0, %%xmm4 \n\t" // xmm4 = r0 X r1 X : in2
+ "movaps 16%1, %%xmm7 \n\t" // xmm7 = X i1 X i0: in1
"movlps %2, %%xmm1 \n\t" // xmm1 = X X R1 R0: tcos
"movlps %3, %%xmm2 \n\t" // xmm2 = X X I1 I0: tsin
- "movlps 8+%2, %%xmm5 \n\t" // xmm5 = X X R1 R0: tcos
- "movlps 8+%3, %%xmm6 \n\t" // xmm6 = X X I1 I0: tsin
+ "movlps 8%2, %%xmm5 \n\t" // xmm5 = X X R1 R0: tcos
+ "movlps 8%3, %%xmm6 \n\t" // xmm6 = X X I1 I0: tsin
"shufps $95, %%xmm0, %%xmm0 \n\t" // xmm0 = r1 r1 r0 r0
"shufps $160,%%xmm3, %%xmm3 \n\t" // xmm3 = i1 i1 i0 i0
"shufps $95, %%xmm4, %%xmm4 \n\t" // xmm4 = r1 r1 r0 r0
@@ -222,13 +222,13 @@
for (k = 0; k < n4; k += 4) {
asm (
"movaps %0, %%xmm0 \n\t" // xmm0 = i1 r1 i0 r0: z
- "movaps 16+%0, %%xmm4 \n\t" // xmm4 = i1 r1 i0 r0: z
+ "movaps 16%0, %%xmm4 \n\t" // xmm4 = i1 r1 i0 r0: z
"movlps %1, %%xmm1 \n\t" // xmm1 = X X R1 R0: tcos
- "movlps 8+%1, %%xmm5 \n\t" // xmm5 = X X R1 R0: tcos
+ "movlps 8%1, %%xmm5 \n\t" // xmm5 = X X R1 R0: tcos
"movaps %%xmm0, %%xmm3 \n\t" // xmm3 = i1 r1 i0 r0
"movaps %%xmm4, %%xmm7 \n\t" // xmm7 = i1 r1 i0 r0
"movlps %2, %%xmm2 \n\t" // xmm2 = X X I1 I0: tsin
- "movlps 8+%2, %%xmm6 \n\t" // xmm6 = X X I1 I0: tsin
+ "movlps 8%2, %%xmm6 \n\t" // xmm6 = X X I1 I0: tsin
"shufps $160,%%xmm0, %%xmm0 \n\t" // xmm0 = r1 r1 r0 r0
"shufps $245,%%xmm3, %%xmm3 \n\t" // xmm3 = i1 i1 i0 i0
"shufps $160,%%xmm4, %%xmm4 \n\t" // xmm4 = r1 r1 r0 r0
@@ -248,7 +248,7 @@
"addps %%xmm3, %%xmm0 \n\t" // xmm0 = result
"addps %%xmm7, %%xmm4 \n\t" // xmm4 = result
"movaps %%xmm0, %0 \n\t"
- "movaps %%xmm4, 16+%0 \n\t"
+ "movaps %%xmm4, 16%0 \n\t"
:"+m"(z[k])
:"m"(tcos[k]), "m"(tsin[k])
#ifndef ARCH_X86_64
More information about the ffmpeg-devel
mailing list