[Mplayer-cvslog] CVS: main/libac3/mmx srfft_3dnow.c,1.3,1.4 imdct_3dnow.c,1.1,1.2
Nick Kurshev
nickols_k at users.sourceforge.net
Thu May 24 11:43:51 CEST 2001
Update of /cvsroot/mplayer/main/libac3/mmx
In directory usw-pr-cvs1:/tmp/cvs-serv1565/main/libac3/mmx
Modified Files:
srfft_3dnow.c imdct_3dnow.c
Log Message:
3dnow! expansion
Index: srfft_3dnow.c
===================================================================
RCS file: /cvsroot/mplayer/main/libac3/mmx/srfft_3dnow.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -r1.3 -r1.4
*** srfft_3dnow.c 2001/05/23 09:48:26 1.3
--- srfft_3dnow.c 2001/05/24 09:43:49 1.4
***************
*** 146,168 ****
asm volatile(
! "movq 8(%0), %%mm0\n\t"
! "movq 24(%0), %%mm1\n\t"
! "movq %%mm0, %1\n\t" /* wT1 = x[1]; */
! "movq %%mm1, %2\n\t" /* wB1 = x[3]; */
! :"=r"(x),"=m"(wT1), "=m"(wB1)
! :"0"(x)
:"memory");
asm volatile(
! "movq 16(%0), %%mm3\n\t"
! "movq 32(%0), %%mm4\n\t"
! "movq 48(%0), %%mm5\n\t"
! "movq %%mm3, 8(%0)\n\t" /* x[1] = x[2]; */
! "movq %%mm4, 16(%0)\n\t" /* x[2] = x[4]; */
! "movq %%mm5, 24(%0)\n\t" /* x[3] = x[6]; */
:"=r"(x)
:"0"(x)
:"memory");
! asm volatile("femms":::"memory");
fft_4(&x[0]);
--- 146,168 ----
asm volatile(
! "movq 8(%2), %%mm0\n\t"
! "movq 24(%2), %%mm1\n\t"
! "movq %%mm0, %0\n\t" /* wT1 = x[1]; */
! "movq %%mm1, %1\n\t" /* wB1 = x[3]; */
! :"=m"(wT1), "=m"(wB1)
! :"r"(x)
:"memory");
asm volatile(
! "movq 16(%0), %%mm2\n\t"
! "movq 32(%0), %%mm3\n\t"
! "movq 48(%0), %%mm4\n\t"
! "movq %%mm2, 8(%0)\n\t" /* x[1] = x[2]; */
! "movq %%mm3, 16(%0)\n\t" /* x[2] = x[4]; */
! "movq %%mm4, 24(%0)\n\t" /* x[3] = x[6]; */
:"=r"(x)
:"0"(x)
:"memory");
!
fft_4(&x[0]);
Index: imdct_3dnow.c
===================================================================
RCS file: /cvsroot/mplayer/main/libac3/mmx/imdct_3dnow.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -r1.1 -r1.2
*** imdct_3dnow.c 2001/05/23 08:20:16 1.1
--- imdct_3dnow.c 2001/05/24 09:43:49 1.2
***************
*** 31,99 ****
{
int i, j;
! float tmp_a_r, tmp_a_i;
float *data_ptr;
float *delay_ptr;
float *window_ptr;
// 512 IMDCT with source and dest data in 'data'
// Pre IFFT complex multiply plus IFFT complex conjugate
for( i=0; i < 128; i++) {
j = pm128[i];
! //a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]);
! //c = data[2*j] * xcos1[j];
! //b = data[256-2*j-1] * xsin1[j];
! //buf1[i].re = a - b + c;
! //buf1[i].im = b + c;
! buf[i].re = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]);
! buf[i].im = -1.0 * (data[2*j] * xcos1[j] + data[256-2*j-1] * xsin1[j]);
}
fft_128p (&buf[0]);
// Post IFFT complex multiply plus IFFT complex conjugate
! asm volatile ("femms":::"memory");
#ifndef HAVE_3DNOWEX
! asm volatile ("movl $1, %%eax\n\t"
! "movd %%eax, %%mm4\n\t"
! "negl %%eax\n\t"
! "movd %%eax, %%mm5\n\t"
! "punpckldq %%mm5, %%mm4\n\t" /* 1.0 | -1.0 */
! "pi2fd %%mm4, %%mm4\n\t":::"eax","memory");
#endif
for (i=0; i < 128; i++) {
! asm volatile("movq %1, %%mm0\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
! "movq %%mm0, %%mm1\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
#ifndef HAVE_3DNOWEX
! "movq %%mm1, %%mm2\n\t"
! "psrlq $32, %%mm1\n\t"
! "punpckldq %%mm2, %%mm1\n\t"
#else
! "pswapd %%mm1, %%mm1\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
#endif
! "movd %3, %%mm3\n\t" /* ac3_xsin[i] */
! "punpckldq %2, %%mm3\n\t" /* ac3_xsin[i] | ac3_xcos[i] */
! "pfmul %%mm3, %%mm0\n\t"
! "pfmul %%mm3, %%mm1\n\t"
#ifndef HAVE_3DNOWEX
! "pfmul %%mm4, %%mm0\n\t"
! "pfacc %%mm1, %%mm0\n\t"
! "movd %%mm0, 4%0\n\t"
! "psrlq $32, %%mm0\n\t"
! "movd %%mm0, %0\n\t"
! #else
! "pfpnacc %%mm1, %%mm0\n\t" /* mm0 = mm0[0] - mm0[1] | mm1[0] + mm1[1] */
! "pswapd %%mm0, %%mm0\n\t"
! "movq %%mm0, %0"
! #endif
! :"=m"(buf[i])
! :"0"(buf[i]),"m"(xcos1[i]),"m"(xsin1[i])
! :"memory");
! /*
ac3_buf[i].re =(tmp_a_r * ac3_xcos1[i]) + (tmp_a_i * ac3_xsin1[i]);
ac3_buf[i].im =(tmp_a_r * ac3_xsin1[i]) - (tmp_a_i * ac3_xcos1[i]);
! */
}
- asm volatile ("femms":::"memory");
data_ptr = data;
--- 31,140 ----
{
int i, j;
! // float tmp_a_r, tmp_a_i;
float *data_ptr;
float *delay_ptr;
float *window_ptr;
+ asm volatile ("femms":::"memory");
// 512 IMDCT with source and dest data in 'data'
// Pre IFFT complex multiply plus IFFT complex conjugate
+ asm volatile (
+ "movl $1, %%eax\n\t"
+ "movd %%eax, %%mm7\n\t"
+ "negl %%eax\n\t"
+ "movd %%eax, %%mm6\n\t"
+ "punpckldq %%mm6, %%mm7\n\t" /* 1.0 | -1.0 */
+ "pi2fd %%mm7, %%mm7\n\t"
+ :::"eax","memory");
for( i=0; i < 128; i++) {
j = pm128[i];
! asm volatile(
! "movd %1, %%mm0\n\t"
! "movd %3, %%mm1\n\t"
! "punpckldq %2, %%mm0\n\t" /* mm0 = data[256-2*j-1] | data[2*j]*/
! "punpckldq %4, %%mm1\n\t" /* mm1 = xcos[j] | xsin[j] */
! "movq %%mm0, %%mm2\n\t"
! "pfmul %%mm1, %%mm0\n\t"
! #ifdef HAVE_3DNOWEX
! "pswapd %%mm1, %%mm1\n\t"
! #else
! "movq %%mm1, %%mm5\n\t"
! "psrlq $32, %%mm1\n\t"
! "punpckldq %%mm5, %%mm1\n\t"
! #endif
! "pfmul %%mm1, %%mm2\n\t"
! #ifdef HAVE_3DNOWEX
! "pfpnacc %%mm2, %%mm0\n\t"
! #else
! "pfmul %%mm7, %%mm0\n\t"
! "pfacc %%mm2, %%mm0\n\t"
! #endif
! "pfmul %%mm7, %%mm0\n\t"
! "movq %%mm0, %0"
! :"=m"(buf[i])
! :"m"(data[256-2*j-1]), "m"(data[2*j]), "m"(xcos1[j]), "m"(xsin1[j])
! :"memory");
! /*
! buf[i].re = (data[256-2*j-1] * xcos1[j] - data[2*j] * xsin1[j]);
! buf[i].im = (data[256-2*j-1] * xsin1[j] + data[2*j] * xcos1[j])*(-1.0);
! */
}
+ asm volatile ("femms":::"memory");
fft_128p (&buf[0]);
+ asm volatile ("femms":::"memory");
// Post IFFT complex multiply plus IFFT complex conjugate
! asm volatile (
! "movl $1, %%eax\n\t"
! "movd %%eax, %%mm7\n\t"
! "negl %%eax\n\t"
! "movd %%eax, %%mm6\n\t"
#ifndef HAVE_3DNOWEX
! "punpckldq %%mm6, %%mm7\n\t" /* 1.0 | -1.0 */
! "punpckldq %%mm7, %%mm6\n\t" /* -1.0 | 1.0 */
! "pi2fd %%mm7, %%mm7\n\t"
! "pi2fd %%mm6, %%mm6\n\t"
! #else
! "punpckldq %%mm6, %%mm7\n\t" /* 1.0 | -1.0 */
! "pi2fd %%mm7, %%mm7\n\t"
! "pswapd %%mm7, %%mm6\n\t" /* -1.0 | 1.0 */
#endif
+ :::"eax","memory");
for (i=0; i < 128; i++) {
! asm volatile(
! "movq %1, %%mm0\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
! "movq %%mm0, %%mm1\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
#ifndef HAVE_3DNOWEX
! "movq %%mm1, %%mm2\n\t"
! "psrlq $32, %%mm1\n\t"
! "punpckldq %%mm2, %%mm1\n\t"
#else
! "pswapd %%mm1, %%mm1\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
#endif
! "movd %3, %%mm3\n\t" /* ac3_xsin[i] */
! "punpckldq %2, %%mm3\n\t" /* ac3_xsin[i] | ac3_xcos[i] */
! "pfmul %%mm3, %%mm0\n\t"
! "pfmul %%mm3, %%mm1\n\t"
#ifndef HAVE_3DNOWEX
! "pfmul %%mm7, %%mm0\n\t"
! "pfacc %%mm1, %%mm0\n\t"
! "movd %%mm0, 4%0\n\t"
! "psrlq $32, %%mm0\n\t"
! "movd %%mm0, %0\n\t"
! #else
! "pfpnacc %%mm1, %%mm0\n\t" /* mm0 = mm0[0] - mm0[1] | mm1[0] + mm1[1] */
! "pswapd %%mm0, %%mm0\n\t"
! "movq %%mm0, %0"
! #endif
! :"=m"(buf[i])
! :"0"(buf[i]),"m"(xcos1[i]),"m"(xsin1[i])
! :"memory");
! /*
ac3_buf[i].re =(tmp_a_r * ac3_xcos1[i]) + (tmp_a_i * ac3_xsin1[i]);
ac3_buf[i].im =(tmp_a_r * ac3_xsin1[i]) - (tmp_a_i * ac3_xcos1[i]);
! */
}
data_ptr = data;
***************
*** 103,113 ****
--- 144,188 ----
// Window and convert to real valued signal
for (i=0; i< 64; i++) {
+
+ asm volatile(
+ "movd %1, %%mm0\n\t"
+ "punpckldq %2, %%mm0\n\t"
+ "pfmul %3, %%mm0\n\t"
+ "pfmul %%mm6, %%mm0\n\t"
+ "pfadd %4, %%mm0\n\t"
+ "movq %%mm0, %0"
+ :"=m"(*data_ptr)
+ :"m"(buf[64+i].im), "m"(buf[64-i-1].re), "m"(*window_ptr), "m"(*delay_ptr)
+ :"memory");
+ data_ptr += 2;
+ window_ptr += 2;
+ delay_ptr += 2;
+
+ /*
*data_ptr++ = -buf[64+i].im * *window_ptr++ + *delay_ptr++;
*data_ptr++ = buf[64-i-1].re * *window_ptr++ + *delay_ptr++;
+ */
}
for(i=0; i< 64; i++) {
+
+ asm volatile(
+ "movd %1, %%mm0\n\t"
+ "punpckldq %2, %%mm0\n\t"
+ "pfmul %3, %%mm0\n\t"
+ "pfmul %%mm6, %%mm0\n\t"
+ "pfadd %4, %%mm0\n\t"
+ "movq %%mm0, %0"
+ :"=m"(*data_ptr)
+ :"m"(buf[i].re), "m"(buf[128-i-1].im), "m"(*window_ptr), "m"(*delay_ptr)
+ :"memory");
+ data_ptr += 2;
+ window_ptr += 2;
+ delay_ptr += 2;
+
+ /*
*data_ptr++ = -buf[i].re * *window_ptr++ + *delay_ptr++;
*data_ptr++ = buf[128-i-1].im * *window_ptr++ + *delay_ptr++;
+ */
}
***************
*** 116,127 ****
for(i=0; i< 64; i++) {
! *delay_ptr++ = -buf[64+i].re * *--window_ptr;
! *delay_ptr++ = buf[64-i-1].im * *--window_ptr;
}
for(i=0; i<64; i++) {
*delay_ptr++ = buf[i].im * *--window_ptr;
*delay_ptr++ = -buf[128-i-1].re * *--window_ptr;
}
}
--- 191,247 ----
for(i=0; i< 64; i++) {
!
! window_ptr -=2;
! asm volatile(
! "movd %1, %%mm0\n\t"
! "punpckldq %2, %%mm0\n\t"
! #ifdef HAVE_3DNOWEX
! "pswapd %3, %%mm3\n\t"
! #else
! "movq %3, %%mm3\n\t"
! "psrlq $32, %%mm3\n\t"
! "punpckldq %3, %%mm3\n\t"
! #endif
! "pfmul %%mm3, %%mm0\n\t"
! "pfmul %%mm6, %%mm0\n\t"
! "movq %%mm0, %0"
! :"=m"(*delay_ptr)
! :"m"(buf[64+i].re), "m"(buf[64-i-1].im), "m"(*window_ptr)
! :"memory");
! delay_ptr += 2;
! /*
! window_ptr--;
! *delay_ptr++ = -buf[64+i].re * *window_ptr;
! window_ptr--;
! *delay_ptr++ = buf[64-i-1].im * *window_ptr;
! */
}
for(i=0; i<64; i++) {
+ window_ptr -= 2;
+ asm volatile(
+ "movd %1, %%mm0\n\t"
+ "punpckldq %2, %%mm0\n\t"
+ #ifdef HAVE_3DNOWEX
+ "pswapd %3, %%mm3\n\t"
+ #else
+ "movq %3, %%mm3\n\t"
+ "psrlq $32, %%mm3\n\t"
+ "punpckldq %3, %%mm3\n\t"
+ #endif
+ "pfmul %%mm3, %%mm0\n\t"
+ "pfmul %%mm7, %%mm0\n\t"
+ "movq %%mm0, %0"
+ :"=m"(*delay_ptr)
+ :"m"(buf[i].im), "m"(buf[128-i-1].re), "m"(*window_ptr)
+ :"memory");
+ delay_ptr += 2;
+
+ /*
*delay_ptr++ = buf[i].im * *--window_ptr;
*delay_ptr++ = -buf[128-i-1].re * *--window_ptr;
+ */
}
+ asm volatile ("femms":::"memory");
}
_______________________________________________
Mplayer-cvslog mailing list
Mplayer-cvslog at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog
More information about the MPlayer-cvslog
mailing list