[Mplayer-cvslog] CVS: main/libac3/mmx srfft_3dnow.c,1.6,1.7 srfftp_3dnow.h,1.2,1.3

Nick Kurshev nickols_k at users.sourceforge.net
Sun May 27 16:13:07 CEST 2001


Update of /cvsroot/mplayer/main/libac3/mmx
In directory usw-pr-cvs1:/tmp/cvs-serv16094/main/libac3/mmx

Modified Files:
	srfft_3dnow.c srfftp_3dnow.h 
Log Message:
Improvements

Index: srfft_3dnow.c
===================================================================
RCS file: /cvsroot/mplayer/main/libac3/mmx/srfft_3dnow.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -r1.6 -r1.7
*** srfft_3dnow.c	2001/05/26 15:43:54	1.6
--- srfft_3dnow.c	2001/05/27 14:13:05	1.7
***************
*** 37,42 ****
    /* x[k] = sum_{i=0..3} x[i] * w^{i*k}, w=e^{-2*pi/4} 
     */
!   asm volatile("femms":::"memory");
!   asm volatile(
  	"movl	$-1, %%eax\n\t"
  	"movd	%%eax, %%mm6\n\t"
--- 37,42 ----
    /* x[k] = sum_{i=0..3} x[i] * w^{i*k}, w=e^{-2*pi/4} 
     */
!   __asm__ __volatile__("femms":::"memory");
!   __asm__ __volatile__(
  	"movl	$-1, %%eax\n\t"
  	"movd	%%eax, %%mm6\n\t"
***************
*** 88,92 ****
  	:"eax","memory");
  
!   asm volatile("femms":::"memory");
  }
  #if 0
--- 88,92 ----
  	:"eax","memory");
  
!   __asm__ __volatile__("femms":::"memory");
  }
  #if 0
***************
*** 146,150 ****
    complex_t wT1, wB1, wT2, wB2;
    
!   asm volatile(
  	"movq	8(%2), %%mm0\n\t"
  	"movq	24(%2), %%mm1\n\t"
--- 146,150 ----
    complex_t wT1, wB1, wT2, wB2;
    
!   __asm__ __volatile__(
  	"movq	8(%2), %%mm0\n\t"
  	"movq	24(%2), %%mm1\n\t"
***************
*** 155,159 ****
  	:"memory");
  
!   asm volatile(
  	"movq	16(%0), %%mm2\n\t"
  	"movq	32(%0), %%mm3\n\t"
--- 155,159 ----
  	:"memory");
  
!   __asm__ __volatile__(
  	"movq	16(%0), %%mm2\n\t"
  	"movq	32(%0), %%mm3\n\t"
***************
*** 167,223 ****
  
    fft_4(&x[0]);
- 
    
!   /* x[0] x[4] */
!   wT2.re = x[5].re;
!   wT2.re += x[7].re;
!   wT2.re += wT1.re;
!   wT2.re += wB1.re;
!   wT2.im = wT2.re;
!   wT2.re += x[0].re;
!   wT2.im = x[0].re - wT2.im;
!   x[0].re = wT2.re;
!   x[4].re = wT2.im;
! 
!   wT2.im = x[5].im;
!   wT2.im += x[7].im;
!   wT2.im += wT1.im;
!   wT2.im += wB1.im;
!   wT2.re = wT2.im;
!   wT2.re += x[0].im;
!   wT2.im = x[0].im - wT2.im;
!   x[0].im = wT2.re;
!   x[4].im = wT2.im;
    
!   /* x[2] x[6] */
!   wT2.re = x[5].im;
!   wT2.re -= x[7].im;
!   wT2.re += wT1.im;
!   wT2.re -= wB1.im;
!   wT2.im = wT2.re;
!   wT2.re += x[2].re;
!   wT2.im = x[2].re - wT2.im;
!   x[2].re = wT2.re;
!   x[6].re = wT2.im;
! 
!   wT2.im = x[5].re;
!   wT2.im -= x[7].re;
!   wT2.im += wT1.re;
!   wT2.im -= wB1.re;
!   wT2.re = wT2.im;
!   wT2.re += x[2].im;
!   wT2.im = x[2].im - wT2.im;
!   x[2].im = wT2.im;
!   x[6].im = wT2.re;
    
  
    /* x[1] x[5] */
    wT2.re = wT1.re;
-   wT2.re += wB1.im;
-   wT2.re -= x[5].re;
-   wT2.re -= x[7].im;
    wT2.im = wT1.im;
!   wT2.im -= wB1.re;
    wT2.im -= x[5].im;
    wT2.im += x[7].re;
  
--- 167,223 ----
  
    fft_4(&x[0]);
    
!   /* x[0] x[4] x[2] x[6] */
    
!   __asm__ __volatile__(
!       "movq	40(%1), %%mm0\n\t"
!       "movq	%%mm0,	%%mm3\n\t"
!       "movq	56(%1),	%%mm1\n\t"
!       "pfadd	%%mm1,	%%mm0\n\t"
!       "pfsub	%%mm1,	%%mm3\n\t"
!       "movq	(%2),	%%mm2\n\t"
!       "pfadd	%%mm2,	%%mm0\n\t"
!       "pfadd	%%mm2,	%%mm3\n\t"
!       "movq	(%3),	%%mm1\n\t"
!       "pfadd	%%mm1,	%%mm0\n\t"
!       "pfsub	%%mm1,	%%mm3\n\t"
!       "movq	(%1),	%%mm1\n\t"
!       "movq	16(%1),	%%mm4\n\t"
!       "movq	%%mm1,	%%mm2\n\t"
! #ifdef HAVE_3DNOWEX
!       "pswapd	%%mm3,	%%mm3\n\t"
! #else
!       "movq	%%mm3,	%%mm6\n\t"
!       "psrlq	$32,	%%mm3\n\t"
!       "punpckldq %%mm6,	%%mm3\n\t"
! #endif
!       "pfadd	%%mm0,	%%mm1\n\t"
!       "movq	%%mm4,	%%mm5\n\t"
!       "pfsub	%%mm0,	%%mm2\n\t"
!       "pfadd	%%mm3,	%%mm4\n\t"
!       "movq	%%mm1,	(%0)\n\t"
!       "pfsub	%%mm3,	%%mm5\n\t"
!       "movq	%%mm2,	32(%0)\n\t"
!       "movd	%%mm4,	16(%0)\n\t"
!       "movd	%%mm5,	48(%0)\n\t"
!       "psrlq	$32, %%mm4\n\t"
!       "psrlq	$32, %%mm5\n\t"
!       "movd	%%mm4,	52(%0)\n\t"
!       "movd	%%mm5,	20(%0)"
!       :"=r"(x)
!       :"0"(x), "r"(&wT1), "r"(&wB1)
!       :"memory");
    
+   __asm__ __volatile__("femms":::"memory");
  
    /* x[1] x[5] */
    wT2.re = wT1.re;
    wT2.im = wT1.im;
!   wT2.re -= x[5].re;
    wT2.im -= x[5].im;
+   
+   wT2.re += wB1.im;
+   wT2.im -= wB1.re;
+   wT2.re -= x[7].im;
    wT2.im += x[7].re;
  

Index: srfftp_3dnow.h
===================================================================
RCS file: /cvsroot/mplayer/main/libac3/mmx/srfftp_3dnow.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -r1.2 -r1.3
*** srfftp_3dnow.h	2001/05/26 15:43:54	1.2
--- srfftp_3dnow.h	2001/05/27 14:13:05	1.3
***************
*** 34,42 ****
  #define SRFFTP_3DNOW_H__
  
! static float HSQRT2_3DNOW = 0.707106781188;
  
  #ifdef HAVE_3DNOWEX
  #define TRANS_FILL_MM6_MM7_3DNOW()\
!     asm(\
  	"movl	$-1, %%eax\n\t"\
  	"movd	%%eax, %%mm7\n\t"\
--- 34,42 ----
  #define SRFFTP_3DNOW_H__
  
! static complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188, 0.707106781188 };
  
  #ifdef HAVE_3DNOWEX
  #define TRANS_FILL_MM6_MM7_3DNOW()\
!     __asm__ __volatile__(\
  	"movl	$-1, %%eax\n\t"\
  	"movd	%%eax, %%mm7\n\t"\
***************
*** 49,53 ****
  #else
  #define TRANS_FILL_MM6_MM7_3DNOW()\
!     asm(\
  	"movl	$-1, %%eax\n\t"\
  	"movd	%%eax, %%mm7\n\t"\
--- 49,53 ----
  #else
  #define TRANS_FILL_MM6_MM7_3DNOW()\
!     __asm__ __volatile__(\
  	"movl	$-1, %%eax\n\t"\
  	"movd	%%eax, %%mm7\n\t"\
***************
*** 72,78 ****
  #define TRANSZERO_3DNOW(A0,A4,A8,A12) \
  { \
!     asm volatile("femms":::"memory");\
      TRANS_FILL_MM6_MM7_3DNOW()\
!     asm(\
  	"movq	%4, %%mm0\n\t" /* mm0 = wTB[0]*/\
  	"movq	%5, %%mm1\n\t" /* mm1 = wTB[k*2]*/ \
--- 72,78 ----
  #define TRANSZERO_3DNOW(A0,A4,A8,A12) \
  { \
!     __asm__ __volatile__("femms":::"memory");\
      TRANS_FILL_MM6_MM7_3DNOW()\
!     __asm__ __volatile__(\
  	"movq	%4, %%mm0\n\t" /* mm0 = wTB[0]*/\
  	"movq	%5, %%mm1\n\t" /* mm1 = wTB[k*2]*/ \
***************
*** 99,110 ****
  	:"m"(wTB[0]), "m"(wTB[k*2]), "0"(A0), "2"(A4)\
  	:"memory");\
!     asm volatile("femms":::"memory");\
  }
  
  #define TRANSHALF_16_3DNOW(A2,A6,A10,A14)\
  {\
!     asm volatile("femms":::"memory");\
      TRANS_FILL_MM6_MM7_3DNOW()\
!     asm(\
  	"movq	%4, %%mm0\n\t"/*u.re = wTB[2].im + wTB[2].re;*/\
  	"movq	%%mm0, %%mm1\n\t"\
--- 99,110 ----
  	:"m"(wTB[0]), "m"(wTB[k*2]), "0"(A0), "2"(A4)\
  	:"memory");\
!     __asm__ __volatile__("femms":::"memory");\
  }
  
  #define TRANSHALF_16_3DNOW(A2,A6,A10,A14)\
  {\
!     __asm__ __volatile__("femms":::"memory");\
      TRANS_FILL_MM6_MM7_3DNOW()\
!     __asm__ __volatile__(\
  	"movq	%4, %%mm0\n\t"/*u.re = wTB[2].im + wTB[2].re;*/\
  	"movq	%%mm0, %%mm1\n\t"\
***************
*** 122,127 ****
  	"pfmul	%%mm6, %%mm1\n\t"/*u.re = u.re + a.re;*/\
  	"pfadd	%%mm1, %%mm0\n\t"/*u.im = u.im - a.im; mm0 = u*/\
! 	"movd	%8, %%mm2\n\t"\
! 	"punpckldq %8, %%mm2\n\t"\
  	"pfmul	%%mm2, %%mm3\n\t" /* v *= HSQRT2_3DNOW; */\
  	"pfmul	%%mm2, %%mm0\n\t" /* u *= HSQRT2_3DNOW; */\
--- 122,126 ----
  	"pfmul	%%mm6, %%mm1\n\t"/*u.re = u.re + a.re;*/\
  	"pfadd	%%mm1, %%mm0\n\t"/*u.im = u.im - a.im; mm0 = u*/\
! 	"movq	%8, %%mm2\n\t"\
  	"pfmul	%%mm2, %%mm3\n\t" /* v *= HSQRT2_3DNOW; */\
  	"pfmul	%%mm2, %%mm0\n\t" /* u *= HSQRT2_3DNOW; */\
***************
*** 144,148 ****
  	:"m"(wTB[2]), "m"(wTB[6]), "0"(A2), "2"(A6), "m"(HSQRT2_3DNOW)\
  	:"memory");\
!     asm volatile("femms":::"memory");\
  }
  
--- 143,147 ----
  	:"m"(wTB[2]), "m"(wTB[6]), "0"(A2), "2"(A6), "m"(HSQRT2_3DNOW)\
  	:"memory");\
!     __asm__ __volatile__("femms":::"memory");\
  }
  


_______________________________________________
Mplayer-cvslog mailing list
Mplayer-cvslog at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog



More information about the MPlayer-cvslog mailing list