[Ffmpeg-cvslog] r6312 - trunk/libavcodec/i386/fft_3dn2.c
lorenm
subversion
Thu Sep 21 19:42:24 CEST 2006
Author: lorenm
Date: Thu Sep 21 19:42:23 2006
New Revision: 6312
Modified:
trunk/libavcodec/i386/fft_3dn2.c
Log:
tweak ff_imdct_calc_3dn2
Modified: trunk/libavcodec/i386/fft_3dn2.c
==============================================================================
--- trunk/libavcodec/i386/fft_3dn2.c (original)
+++ trunk/libavcodec/i386/fft_3dn2.c Thu Sep 21 19:42:23 2006
@@ -177,15 +177,13 @@
);
}
- z += n8;
+ k = n-8;
asm volatile("movd %0, %%mm7" ::"r"(1<<31));
- for(k = 0; k < n8; k++) {
- asm volatile(
- "movq %0, %%mm0 \n\t"
- "pswapd %1, %%mm1 \n\t"
- ::"m"(z[k]), "m"(z[-1-k])
- );
- asm volatile(
+ asm volatile(
+ "1: \n\t"
+ "movq (%4,%0), %%mm0 \n\t" // z[n8+k]
+ "neg %0 \n\t"
+ "pswapd -8(%4,%0), %%mm1 \n\t" // z[n8-1-k]
"movq %%mm0, %%mm2 \n\t"
"pxor %%mm7, %%mm2 \n\t"
"punpckldq %%mm1, %%mm2 \n\t"
@@ -194,15 +192,17 @@
"pswapd %%mm0, %%mm4 \n\t"
"pxor %%mm7, %%mm0 \n\t"
"pxor %%mm7, %%mm4 \n\t"
- "movq %%mm0, %0 \n\t" // { -z[n8+k].im, z[n8-1-k].re }
- "movq %%mm4, %1 \n\t" // { -z[n8-1-k].re, z[n8+k].im }
- "movq %%mm2, %2 \n\t" // { -z[n8+k].re, z[n8-1-k].im }
- "movq %%mm3, %3 \n\t" // { z[n8-1-k].im, -z[n8+k].re }
- :"=m"(output[2*k]), "=m"(output[n2-2-2*k]),
- "=m"(output[n2+2*k]), "=m"(output[n-2-2*k])
- ::"memory"
- );
- }
+ "movq %%mm3, -8(%3,%0) \n\t" // output[n-2-2*k] = { z[n8-1-k].im, -z[n8+k].re }
+ "movq %%mm4, -8(%2,%0) \n\t" // output[n2-2-2*k]= { -z[n8-1-k].re, z[n8+k].im }
+ "neg %0 \n\t"
+ "movq %%mm0, (%1,%0) \n\t" // output[2*k] = { -z[n8+k].im, z[n8-1-k].re }
+ "movq %%mm2, (%2,%0) \n\t" // output[n2+2*k] = { -z[n8+k].re, z[n8-1-k].im }
+ "sub $8, %0 \n\t"
+ "jge 1b \n\t"
+ :"+r"(k)
+ :"r"(output), "r"(output+n2), "r"(output+n), "r"(z+n8)
+ :"memory"
+ );
asm volatile("femms");
}
More information about the ffmpeg-cvslog
mailing list