[Mplayer-cvslog] CVS: main/libac3/mmx srfft_3dnow.c,1.3,1.4 imdct_3dnow.c,1.1,1.2

Nick Kurshev nickols_k at users.sourceforge.net
Thu May 24 11:43:51 CEST 2001


Update of /cvsroot/mplayer/main/libac3/mmx
In directory usw-pr-cvs1:/tmp/cvs-serv1565/main/libac3/mmx

Modified Files:
	srfft_3dnow.c imdct_3dnow.c 
Log Message:
3dnow! expansion

Index: srfft_3dnow.c
===================================================================
RCS file: /cvsroot/mplayer/main/libac3/mmx/srfft_3dnow.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -r1.3 -r1.4
*** srfft_3dnow.c	2001/05/23 09:48:26	1.3
--- srfft_3dnow.c	2001/05/24 09:43:49	1.4
***************
*** 146,168 ****
    
    asm volatile(
! 	"movq	8(%0), %%mm0\n\t"
! 	"movq	24(%0), %%mm1\n\t"
! 	"movq	%%mm0, %1\n\t"  /* wT1 = x[1]; */
! 	"movq	%%mm1, %2\n\t" /* wB1 = x[3]; */
! 	:"=r"(x),"=m"(wT1), "=m"(wB1)
! 	:"0"(x)
  	:"memory");
  
    asm volatile(
! 	"movq	16(%0), %%mm3\n\t"
! 	"movq	32(%0), %%mm4\n\t"
! 	"movq	48(%0), %%mm5\n\t"
! 	"movq	%%mm3, 8(%0)\n\t"  /* x[1] = x[2]; */
! 	"movq	%%mm4, 16(%0)\n\t" /* x[2] = x[4]; */
! 	"movq	%%mm5, 24(%0)\n\t" /* x[3] = x[6]; */
  	:"=r"(x)
  	:"0"(x)
  	:"memory");
!   asm volatile("femms":::"memory");
    fft_4(&x[0]);
  
--- 146,168 ----
    
    asm volatile(
! 	"movq	8(%2), %%mm0\n\t"
! 	"movq	24(%2), %%mm1\n\t"
! 	"movq	%%mm0, %0\n\t"  /* wT1 = x[1]; */
! 	"movq	%%mm1, %1\n\t" /* wB1 = x[3]; */
! 	:"=m"(wT1), "=m"(wB1)
! 	:"r"(x)
  	:"memory");
  
    asm volatile(
! 	"movq	16(%0), %%mm2\n\t"
! 	"movq	32(%0), %%mm3\n\t"
! 	"movq	48(%0), %%mm4\n\t"
! 	"movq	%%mm2, 8(%0)\n\t"  /* x[1] = x[2]; */
! 	"movq	%%mm3, 16(%0)\n\t" /* x[2] = x[4]; */
! 	"movq	%%mm4, 24(%0)\n\t" /* x[3] = x[6]; */
  	:"=r"(x)
  	:"0"(x)
  	:"memory");
! 
    fft_4(&x[0]);
  

Index: imdct_3dnow.c
===================================================================
RCS file: /cvsroot/mplayer/main/libac3/mmx/imdct_3dnow.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -r1.1 -r1.2
*** imdct_3dnow.c	2001/05/23 08:20:16	1.1
--- imdct_3dnow.c	2001/05/24 09:43:49	1.2
***************
*** 31,99 ****
  {
  	int i, j;
! 	float tmp_a_r, tmp_a_i;
  	float *data_ptr;
  	float *delay_ptr;
  	float *window_ptr;
  
  // 512 IMDCT with source and dest data in 'data'
  // Pre IFFT complex multiply plus IFFT complex conjugate
  
  	for( i=0; i < 128; i++) {
  		j = pm128[i];
! 		//a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]);
! 		//c = data[2*j] * xcos1[j];
! 		//b = data[256-2*j-1] * xsin1[j];
! 		//buf1[i].re = a - b + c;
! 		//buf1[i].im = b + c;
! 		buf[i].re = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]);
! 		buf[i].im = -1.0 * (data[2*j] * xcos1[j] + data[256-2*j-1] * xsin1[j]);
  	}
  
  	fft_128p (&buf[0]);
  
  // Post IFFT complex multiply  plus IFFT complex conjugate
! 	asm volatile ("femms":::"memory");
  #ifndef HAVE_3DNOWEX
! 	asm volatile ("movl $1, %%eax\n\t"
! 		      "movd %%eax, %%mm4\n\t"
! 		      "negl %%eax\n\t"
! 		      "movd %%eax, %%mm5\n\t"
! 		      "punpckldq %%mm5, %%mm4\n\t" /* 1.0 | -1.0 */
! 		      "pi2fd %%mm4, %%mm4\n\t":::"eax","memory");
  #endif
  	for (i=0; i < 128; i++) {
! 	    asm volatile("movq %1, %%mm0\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
! 		         "movq %%mm0, %%mm1\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
  #ifndef HAVE_3DNOWEX
! 			 "movq %%mm1, %%mm2\n\t"
! 			 "psrlq $32, %%mm1\n\t"
! 			 "punpckldq %%mm2, %%mm1\n\t"
  #else			 
! 		         "pswapd %%mm1, %%mm1\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
  #endif			 
! 			 "movd %3, %%mm3\n\t" /* ac3_xsin[i] */
! 			 "punpckldq %2, %%mm3\n\t" /* ac3_xsin[i] | ac3_xcos[i] */
! 			 "pfmul %%mm3, %%mm0\n\t"
! 			 "pfmul %%mm3, %%mm1\n\t"
  #ifndef HAVE_3DNOWEX
! 			 "pfmul %%mm4, %%mm0\n\t"
! 			 "pfacc %%mm1, %%mm0\n\t"
! 			 "movd %%mm0, 4%0\n\t"
! 			 "psrlq $32, %%mm0\n\t"
! 			 "movd %%mm0, %0\n\t"
! #else
! 			 "pfpnacc %%mm1, %%mm0\n\t" /* mm0 = mm0[0] - mm0[1] | mm1[0] + mm1[1] */
! 			 "pswapd %%mm0, %%mm0\n\t"
! 			 "movq %%mm0, %0"
! #endif
! 			 :"=m"(buf[i])
! 			 :"0"(buf[i]),"m"(xcos1[i]),"m"(xsin1[i])
! 			 :"memory");
! 			 /*
  		ac3_buf[i].re =(tmp_a_r * ac3_xcos1[i])  +  (tmp_a_i  * ac3_xsin1[i]);
  		ac3_buf[i].im =(tmp_a_r * ac3_xsin1[i])  -  (tmp_a_i  * ac3_xcos1[i]);
! 		*/
  	}
- 	asm volatile ("femms":::"memory");
  
  	data_ptr = data;
--- 31,140 ----
  {
  	int i, j;
! //	float tmp_a_r, tmp_a_i;
  	float *data_ptr;
  	float *delay_ptr;
  	float *window_ptr;
  
+   asm volatile ("femms":::"memory");
  // 512 IMDCT with source and dest data in 'data'
  // Pre IFFT complex multiply plus IFFT complex conjugate
  
+   asm volatile (
+ 	"movl $1, %%eax\n\t"
+ 	"movd %%eax, %%mm7\n\t"
+ 	"negl %%eax\n\t"
+ 	"movd %%eax, %%mm6\n\t"
+ 	"punpckldq %%mm6, %%mm7\n\t" /* 1.0 | -1.0 */
+ 	"pi2fd %%mm7, %%mm7\n\t"
+ 	:::"eax","memory");
  	for( i=0; i < 128; i++) {
  		j = pm128[i];
! 	asm volatile(
! 		"movd	%1, %%mm0\n\t"
! 		"movd	%3, %%mm1\n\t"
! 		"punpckldq %2, %%mm0\n\t" /* mm0 = data[256-2*j-1] | data[2*j]*/
! 		"punpckldq %4, %%mm1\n\t" /* mm1 = xcos[j] | xsin[j] */
! 		"movq	%%mm0, %%mm2\n\t"
! 		"pfmul	%%mm1, %%mm0\n\t"
! #ifdef HAVE_3DNOWEX
! 		"pswapd	%%mm1, %%mm1\n\t"
! #else
! 		"movq %%mm1, %%mm5\n\t"
! 		"psrlq $32, %%mm1\n\t"
! 		"punpckldq %%mm5, %%mm1\n\t"
! #endif
! 		"pfmul	%%mm1, %%mm2\n\t"
! #ifdef HAVE_3DNOWEX
! 		"pfpnacc %%mm2, %%mm0\n\t"
! #else
! 		"pfmul	%%mm7, %%mm0\n\t"
! 		"pfacc	%%mm2, %%mm0\n\t"
! #endif
! 		"pfmul	%%mm7, %%mm0\n\t"
! 		"movq	%%mm0, %0"
! 		:"=m"(buf[i])
! 		:"m"(data[256-2*j-1]), "m"(data[2*j]), "m"(xcos1[j]), "m"(xsin1[j])
! 		:"memory");
! /*
! 		buf[i].re = (data[256-2*j-1] * xcos1[j] - data[2*j] * xsin1[j]);
! 		buf[i].im = (data[256-2*j-1] * xsin1[j] + data[2*j] * xcos1[j])*(-1.0);
! */
  	}
  
+   asm volatile ("femms":::"memory");
  	fft_128p (&buf[0]);
+   asm volatile ("femms":::"memory");
  
  // Post IFFT complex multiply  plus IFFT complex conjugate
!   asm volatile (
! 	"movl $1, %%eax\n\t"
! 	"movd %%eax, %%mm7\n\t"
! 	"negl %%eax\n\t"
! 	"movd %%eax, %%mm6\n\t"
  #ifndef HAVE_3DNOWEX
! 	"punpckldq %%mm6, %%mm7\n\t" /* 1.0 | -1.0 */
! 	"punpckldq %%mm7, %%mm6\n\t" /* -1.0 | 1.0 */
! 	"pi2fd %%mm7, %%mm7\n\t"
! 	"pi2fd %%mm6, %%mm6\n\t"
! #else
! 	"punpckldq %%mm6, %%mm7\n\t" /* 1.0 | -1.0 */
! 	"pi2fd %%mm7, %%mm7\n\t"
! 	"pswapd %%mm7, %%mm6\n\t" /* -1.0 | 1.0 */
  #endif
+ 	:::"eax","memory");
  	for (i=0; i < 128; i++) {
! 	    asm volatile(
! 		"movq %1, %%mm0\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
! 		"movq %%mm0, %%mm1\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
  #ifndef HAVE_3DNOWEX
! 		"movq %%mm1, %%mm2\n\t"
! 		"psrlq $32, %%mm1\n\t"
! 		"punpckldq %%mm2, %%mm1\n\t"
  #else			 
! 		"pswapd %%mm1, %%mm1\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
  #endif			 
! 		"movd %3, %%mm3\n\t" /* ac3_xsin[i] */
! 		"punpckldq %2, %%mm3\n\t" /* ac3_xsin[i] | ac3_xcos[i] */
! 		"pfmul %%mm3, %%mm0\n\t"
! 		"pfmul %%mm3, %%mm1\n\t"
  #ifndef HAVE_3DNOWEX
! 		"pfmul %%mm7, %%mm0\n\t"
! 		"pfacc %%mm1, %%mm0\n\t"
! 		"movd %%mm0, 4%0\n\t"
! 		"psrlq $32, %%mm0\n\t"
! 		"movd %%mm0, %0\n\t"
! #else
! 		"pfpnacc %%mm1, %%mm0\n\t" /* mm0 = mm0[0] - mm0[1] | mm1[0] + mm1[1] */
! 		"pswapd %%mm0, %%mm0\n\t"
! 		"movq %%mm0, %0"
! #endif
! 		:"=m"(buf[i])
! 		:"0"(buf[i]),"m"(xcos1[i]),"m"(xsin1[i])
! 		:"memory");
! /*
  		ac3_buf[i].re =(tmp_a_r * ac3_xcos1[i])  +  (tmp_a_i  * ac3_xsin1[i]);
  		ac3_buf[i].im =(tmp_a_r * ac3_xsin1[i])  -  (tmp_a_i  * ac3_xcos1[i]);
! */
  	}
  
  	data_ptr = data;
***************
*** 103,113 ****
--- 144,188 ----
  // Window and convert to real valued signal
  	for (i=0; i< 64; i++) {
+ 
+ 	asm volatile(
+ 		"movd	%1, %%mm0\n\t"
+ 		"punpckldq %2, %%mm0\n\t"
+ 		"pfmul	%3, %%mm0\n\t"
+ 		"pfmul	%%mm6, %%mm0\n\t"
+ 		"pfadd	%4, %%mm0\n\t"
+ 		"movq	%%mm0, %0"
+ 		:"=m"(*data_ptr)
+ 		:"m"(buf[64+i].im), "m"(buf[64-i-1].re), "m"(*window_ptr), "m"(*delay_ptr)
+ 		:"memory");
+ 		data_ptr += 2;
+ 		window_ptr += 2;
+ 		delay_ptr += 2;
+ 
+ /*              
  		*data_ptr++   = -buf[64+i].im   * *window_ptr++ + *delay_ptr++;
  		*data_ptr++   = buf[64-i-1].re * *window_ptr++ + *delay_ptr++;
+ */
  	}
  
  	for(i=0; i< 64; i++) {
+ 
+ 	asm volatile(
+ 		"movd	%1, %%mm0\n\t"
+ 		"punpckldq %2, %%mm0\n\t"
+ 		"pfmul	%3, %%mm0\n\t"
+ 		"pfmul	%%mm6, %%mm0\n\t"
+ 		"pfadd	%4, %%mm0\n\t"
+ 		"movq	%%mm0, %0"
+ 		:"=m"(*data_ptr)
+ 		:"m"(buf[i].re), "m"(buf[128-i-1].im), "m"(*window_ptr), "m"(*delay_ptr)
+ 		:"memory");
+ 		data_ptr += 2;
+ 		window_ptr += 2;
+ 		delay_ptr += 2;
+ 
+ /*
  		*data_ptr++  = -buf[i].re       * *window_ptr++ + *delay_ptr++;
  		*data_ptr++  = buf[128-i-1].im * *window_ptr++ + *delay_ptr++;
+ */
  	}
  
***************
*** 116,127 ****
  
  	for(i=0; i< 64; i++) {
! 		*delay_ptr++  = -buf[64+i].re   * *--window_ptr;
! 		*delay_ptr++  =  buf[64-i-1].im * *--window_ptr;
  	}
  
  	for(i=0; i<64; i++) {
  		*delay_ptr++  =  buf[i].im       * *--window_ptr;
  		*delay_ptr++  = -buf[128-i-1].re * *--window_ptr;
  	}
  }
  
--- 191,247 ----
  
  	for(i=0; i< 64; i++) {
! 
! 	    window_ptr -=2;
! 	    asm volatile(
! 		"movd	%1, %%mm0\n\t"
! 		"punpckldq %2, %%mm0\n\t"
! #ifdef HAVE_3DNOWEX
! 		"pswapd	%3, %%mm3\n\t"
! #else
! 		"movq	%3, %%mm3\n\t"
! 		"psrlq	$32, %%mm3\n\t"
! 		"punpckldq %3, %%mm3\n\t"
! #endif
! 		"pfmul	%%mm3, %%mm0\n\t"
! 		"pfmul	%%mm6, %%mm0\n\t"
! 		"movq	%%mm0, %0"
! 		:"=m"(*delay_ptr)
! 		:"m"(buf[64+i].re), "m"(buf[64-i-1].im), "m"(*window_ptr)
! 		:"memory");
! 		delay_ptr += 2;
! /*
! 		window_ptr--;
! 		*delay_ptr++  = -buf[64+i].re   * *window_ptr;
! 		window_ptr--;
! 		*delay_ptr++  =  buf[64-i-1].im * *window_ptr;
! */
  	}
  
  	for(i=0; i<64; i++) {
+ 	window_ptr -= 2;
+ 	asm volatile(
+ 		"movd	%1, %%mm0\n\t"
+ 		"punpckldq %2, %%mm0\n\t"
+ #ifdef HAVE_3DNOWEX
+ 		"pswapd	%3, %%mm3\n\t"
+ #else
+ 		"movq	%3, %%mm3\n\t"
+ 		"psrlq	$32, %%mm3\n\t"
+ 		"punpckldq %3, %%mm3\n\t"
+ #endif
+ 		"pfmul	%%mm3, %%mm0\n\t"
+ 		"pfmul	%%mm7, %%mm0\n\t"
+ 		"movq	%%mm0, %0"
+ 		:"=m"(*delay_ptr)
+ 		:"m"(buf[i].im), "m"(buf[128-i-1].re), "m"(*window_ptr)
+ 		:"memory");
+ 		delay_ptr += 2;
+ 
+ /*
  		*delay_ptr++  =  buf[i].im       * *--window_ptr;
  		*delay_ptr++  = -buf[128-i-1].re * *--window_ptr;
+ */
  	}
+   asm volatile ("femms":::"memory");
  }
  


_______________________________________________
Mplayer-cvslog mailing list
Mplayer-cvslog at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog



More information about the MPlayer-cvslog mailing list