[Mplayer-cvslog] CVS: main/libac3/mmx srfft_3dnow.c,1.5,1.6 srfftp_3dnow.h,1.1,1.2

Nick Kurshev nickols_k at users.sourceforge.net
Sat May 26 17:43:57 CEST 2001


Update of /cvsroot/mplayer/main/libac3/mmx
In directory usw-pr-cvs1:/tmp/cvs-serv19399/main/libac3/mmx

Modified Files:
	srfft_3dnow.c srfftp_3dnow.h 
Log Message:
Improvements

Index: srfft_3dnow.c
===================================================================
RCS file: /cvsroot/mplayer/main/libac3/mmx/srfft_3dnow.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -r1.5 -r1.6
*** srfft_3dnow.c	2001/05/26 10:29:41	1.5
--- srfft_3dnow.c	2001/05/26 15:43:54	1.6
***************
*** 427,431 ****
  
    /* transform x[2], x[10], x[6], x[14] */
!   TRANSHALF_16(x[2],x[6],x[10],x[14]);
  
    /* transform x[3], x[11], x[7], x[15] */
--- 427,431 ----
  
    /* transform x[2], x[10], x[6], x[14] */
!   TRANSHALF_16_3DNOW(x[2],x[6],x[10],x[14]);
  
    /* transform x[3], x[11], x[7], x[15] */

Index: srfftp_3dnow.h
===================================================================
RCS file: /cvsroot/mplayer/main/libac3/mmx/srfftp_3dnow.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -r1.1 -r1.2
*** srfftp_3dnow.h	2001/05/26 10:29:41	1.1
--- srfftp_3dnow.h	2001/05/26 15:43:54	1.2
***************
*** 34,37 ****
--- 34,39 ----
  #define SRFFTP_3DNOW_H__
  
+ static float HSQRT2_3DNOW = 0.707106781188;
+ 
  #ifdef HAVE_3DNOWEX
  #define TRANS_FILL_MM6_MM7_3DNOW()\
***************
*** 60,67 ****
  
  #ifdef HAVE_3DNOWEX
! #define PSWAP_MM(mm_base,mm_hlp) "pswapd	"##mm_base","##mm_base" \n\t"
  #else
  #define PSWAP_MM(mm_base,mm_hlp)\
! 	"movq	"##mm_base","##mm_hlp" \n\t"\
  	"psrlq $32, "##mm_base"\n\t"\
  	"punpckldq "##mm_hlp","##mm_base"\n\t"
--- 62,69 ----
  
  #ifdef HAVE_3DNOWEX
! #define PSWAP_MM(mm_base,mm_hlp) "pswapd	"##mm_base","##mm_base"\n\t"
  #else
  #define PSWAP_MM(mm_base,mm_hlp)\
! 	"movq	"##mm_base","##mm_hlp"\n\t"\
  	"psrlq $32, "##mm_base"\n\t"\
  	"punpckldq "##mm_hlp","##mm_base"\n\t"
***************
*** 83,99 ****
  	PSWAP_MM("%%mm4","%%mm2")/* mm4 = v*/\
  	"movq	%6, %%mm0\n\t" /* a1 = A0;*/\
- 	"movq	%%mm0, %%mm1\n\t"\
- 	"pfadd	%%mm5, %%mm0\n\t" /*A0 = a1 + u;*/\
- 	"pfsub	%%mm5, %%mm1\n\t" /*A1 = a1 - u;*/\
- 	"movq	%%mm0, %0\n\t"\
- 	"movq	%%mm1, %1\n\t"\
  	"movq	%7, %%mm2\n\t" /* a1 = A4;*/\
  	"movq	%%mm2, %%mm3\n\t"\
  	"pfadd	%%mm4, %%mm2\n\t" /*A12 = a1 + v;*/\
  	"pfsub	%%mm4, %%mm3\n\t" /*A4  = a1 - v;*/\
  	"movq	%%mm2, %3\n\t"\
  	"movq	%%mm3, %2"\
  	:"=m"(A0), "=m"(A8), "=m"(A4), "=m"(A12)\
  	:"m"(wTB[0]), "m"(wTB[k*2]), "0"(A0), "2"(A4)\
  	:"memory");\
      asm volatile("femms":::"memory");\
--- 85,146 ----
  	PSWAP_MM("%%mm4","%%mm2")/* mm4 = v*/\
  	"movq	%6, %%mm0\n\t" /* a1 = A0;*/\
  	"movq	%7, %%mm2\n\t" /* a1 = A4;*/\
+ 	"movq	%%mm0, %%mm1\n\t"\
  	"movq	%%mm2, %%mm3\n\t"\
+ 	"pfadd	%%mm5, %%mm0\n\t" /*A0 = a1 + u;*/\
  	"pfadd	%%mm4, %%mm2\n\t" /*A12 = a1 + v;*/\
+ 	"pfsub	%%mm5, %%mm1\n\t" /*A1 = a1 - u;*/\
  	"pfsub	%%mm4, %%mm3\n\t" /*A4  = a1 - v;*/\
+ 	"movq	%%mm0, %0\n\t"\
  	"movq	%%mm2, %3\n\t"\
+ 	"movq	%%mm1, %1\n\t"\
  	"movq	%%mm3, %2"\
  	:"=m"(A0), "=m"(A8), "=m"(A4), "=m"(A12)\
  	:"m"(wTB[0]), "m"(wTB[k*2]), "0"(A0), "2"(A4)\
+ 	:"memory");\
+     asm volatile("femms":::"memory");\
+ }
+ 
+ #define TRANSHALF_16_3DNOW(A2,A6,A10,A14)\
+ {\
+     asm volatile("femms":::"memory");\
+     TRANS_FILL_MM6_MM7_3DNOW()\
+     asm(\
+ 	"movq	%4, %%mm0\n\t"/*u.re = wTB[2].im + wTB[2].re;*/\
+ 	"movq	%%mm0, %%mm1\n\t"\
+ 	"pfmul	%%mm7, %%mm1\n\t"\
+ 	"pfacc	%%mm1, %%mm0\n\t"/*u.im = wTB[2].im - wTB[2].re; mm0 = u*/\
+ 	"movq	%5, %%mm1\n\t"  /*a.re = wTB[6].im - wTB[6].re; */\
+ 	"movq	%%mm1, %%mm2\n\t"\
+ 	"pfmul	%%mm7, %%mm1\n\t"\
+ 	"pfacc	%%mm2, %%mm1\n\t"/*a.im = wTB[6].im + wTB[6].re;  mm1 = a*/\
+ 	"movq	%%mm1, %%mm2\n\t"\
+ 	"pfmul	%%mm7, %%mm2\n\t"/*v.im = u.re - a.re;*/\
+ 	"movq	%%mm0, %%mm3\n\t"/*v.re = u.im + a.im;*/\
+ 	"pfadd	%%mm2, %%mm3\n\t"\
+ 	PSWAP_MM("%%mm3","%%mm2")/*mm3 = v*/\
+ 	"pfmul	%%mm6, %%mm1\n\t"/*u.re = u.re + a.re;*/\
+ 	"pfadd	%%mm1, %%mm0\n\t"/*u.im = u.im - a.im; mm0 = u*/\
+ 	"movd	%8, %%mm2\n\t"\
+ 	"punpckldq %8, %%mm2\n\t"\
+ 	"pfmul	%%mm2, %%mm3\n\t" /* v *= HSQRT2_3DNOW; */\
+ 	"pfmul	%%mm2, %%mm0\n\t" /* u *= HSQRT2_3DNOW; */\
+ 	"movq	%6, %%mm1\n\t" /* a1 = A2;*/\
+ 	"movq	%%mm1, %%mm2\n\t"\
+ 	"pfadd	%%mm0, %%mm1\n\t" /*A2 = a1 + u;*/\
+ 	"pfsub	%%mm0, %%mm2\n\t" /*A2 = a1 - u;*/\
+ 	"movq	%%mm1, %0\n\t"\
+ 	"movq	%%mm2, %1\n\t"\
+ 	"movq	%7, %%mm1\n\t" /* a1 = A6;*/\
+ 	"movq	%%mm1, %%mm2\n\t"\
+ 	"movq	%%mm3, %%mm4\n\t"\
+ 	"pfmul	%%mm6, %%mm4\n\t"/*A6.re  = a1.re + v.re;*/\
+ 	"pfadd	%%mm4, %%mm1\n\t"/*A6.im  = a1.im - v.im;*/\
+ 	"pfmul	%%mm7, %%mm3\n\t"/*A14.re = a1.re - v.re;*/\
+ 	"pfadd	%%mm3, %%mm2\n\t"/*A14.im = a1.im + v.im;*/\
+ 	"movq	%%mm1, %2\n\t"\
+ 	"movq	%%mm2, %3"\
+ 	:"=m"(A2), "=m"(A10), "=m"(A6), "=m"(A14)\
+ 	:"m"(wTB[2]), "m"(wTB[6]), "0"(A2), "2"(A6), "m"(HSQRT2_3DNOW)\
  	:"memory");\
      asm volatile("femms":::"memory");\


_______________________________________________
Mplayer-cvslog mailing list
Mplayer-cvslog at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog



More information about the MPlayer-cvslog mailing list