Index: liba52/imdct.c =================================================================== RCS file: /cvsroot/mplayer/main/liba52/imdct.c,v retrieving revision 1.25 diff -u -r1.25 imdct.c --- liba52/imdct.c 26 Apr 2004 19:47:50 -0000 1.25 +++ liba52/imdct.c 2 May 2004 20:33:14 -0000 @@ -932,37 +932,34 @@ /* 4-7. iterations */ for (m=3; m < 7; m++) { - two_m = (1 << m); - two_m_plus_one = two_m<<1; - asm volatile( - "movl %0, %%esi \n\t" - ".balign 16 \n\t" - "1: \n\t" - "xorl %%edi, %%edi \n\t" // k - "leal (%%esi, %3), %%edx \n\t" - "2: \n\t" - "movaps (%%edx, %%edi), %%xmm1 \n\t" - "movaps (%4, %%edi, 2), %%xmm2 \n\t" - "mulps %%xmm1, %%xmm2 \n\t" - "shufps $0xB1, %%xmm1, %%xmm1 \n\t" - "mulps 16(%4, %%edi, 2), %%xmm1 \n\t" - "movaps (%%esi, %%edi), %%xmm0 \n\t" - "addps %%xmm2, %%xmm1 \n\t" - "movaps %%xmm1, %%xmm2 \n\t" - "addps %%xmm0, %%xmm1 \n\t" - "subps %%xmm2, %%xmm0 \n\t" - "movaps %%xmm1, (%%esi, %%edi) \n\t" - "movaps %%xmm0, (%%edx, %%edi) \n\t" - "addl $16, %%edi \n\t" - "cmpl %3, %%edi \n\t" //FIXME (opt) count against 0 - " jb 2b \n\t" - "addl %2, %%esi \n\t" - "cmpl %1, %%esi \n\t" - " jb 1b \n\t" - :: "g" (buf), "m" (buf+128), "m" (two_m_plus_one<<3), "r" (two_m<<3), - "r" (sseW[m]) - : "%esi", "%edi", "%edx" - ); + int two_m8 = (8 << m); + unsigned long i = 0; + do { + char *esi = (char*) buf + i; + char *edx = esi + two_m8; + unsigned long k = 0; + do { + asm volatile( + "movaps %0, %%xmm1 \n\t" + "movaps %2, %%xmm2 \n\t" + "mulps %%xmm1, %%xmm2 \n\t" + "shufps $0xB1, %%xmm1, %%xmm1 \n\t" + "mulps %3, %%xmm1 \n\t" + "movaps %1, %%xmm0 \n\t" + "addps %%xmm2, %%xmm1 \n\t" + "movaps %%xmm1, %%xmm2 \n\t" + "addps %%xmm0, %%xmm1 \n\t" + "subps %%xmm2, %%xmm0 \n\t" + "movaps %%xmm1, %1 \n\t" + "movaps %%xmm0, %0 \n\t" + :: "m" (edx[k]), "m" (esi[k]), + "m" (((char*)sseW[m])[2*k]), "m" (((char*)sseW[m])[2*k+16]) + ); + k += 16; + } while (k < two_m8); + i += 2 * two_m8; + } while (i < 8*128); + } /* Post IFFT complex multiply plus IFFT complex conjugate*/