[Ffmpeg-devel] fft_3dn2.c compile error
Reimar Doeffinger
Reimar.Doeffinger
Wed Aug 9 12:49:54 CEST 2006
Hello,
On Tue, Aug 08, 2006 at 11:47:00PM -0700, Loren Merritt wrote:
> Fixed sorta. Does anyone see a workaround that isn't slower?
No, I don't really. But just to collected ideas I attached the variant I
first thought of for the second part. I didn't test it at all, and it
certainly is incorrect if n is not divisible by 2.
Greetings,
Reimar Doeffinger
-------------- next part --------------
Index: libavcodec/i386/fft_3dn2.c
===================================================================
--- libavcodec/i386/fft_3dn2.c (revision 5966)
+++ libavcodec/i386/fft_3dn2.c (working copy)
@@ -199,9 +199,6 @@
asm volatile(
"movq %0, %%mm0 \n\t"
"pswapd %1, %%mm1 \n\t"
- ::"m"(z[k]), "m"(z[-1-k])
- );
- asm volatile(
"movq %%mm0, %%mm2 \n\t"
"pxor %%mm7, %%mm2 \n\t"
"punpckldq %%mm1, %%mm2 \n\t"
@@ -210,13 +207,12 @@
"pswapd %%mm0, %%mm4 \n\t"
"pxor %%mm7, %%mm0 \n\t"
"pxor %%mm7, %%mm4 \n\t"
- "movq %%mm0, %0 \n\t" // { -z[n8+k].im, z[n8-1-k].re }
- "movq %%mm4, %1 \n\t" // { -z[n8-1-k].re, z[n8+k].im }
- "movq %%mm2, %2 \n\t" // { -z[n8+k].re, z[n8-1-k].im }
- "movq %%mm3, %3 \n\t" // { z[n8-1-k].im, -z[n8+k].re }
- :"=m"(output[2*k]), "=m"(output[n2-2-2*k]),
- "=m"(output[n2+2*k]), "=m"(output[n-2-2*k])
- ::"memory"
+ "movq %%mm0, (%2) \n\t" // { -z[n8+k].im, z[n8-1-k].re }
+ "movq %%mm4, (%3) \n\t" // { -z[n8-1-k].re, z[n8+k].im }
+ "movq %%mm2, (%2, %4)\n\t" // { -z[n8+k].re, z[n8-1-k].im }
+ "movq %%mm3, (%3, %4)\n\t" // { z[n8-1-k].im, -z[n8+k].re }
+ ::"m"(z[k]), "m"(z[-1-k]), "r"(&output[2*k]), "r"(&output[n2-2-2*k]), "r"(n2)
+ :"memory"
);
}
asm volatile("femms");
More information about the ffmpeg-devel
mailing list