[Mplayer-cvslog] CVS: main/libac3/mmx imdct_3dnow.c,1.4,1.5 srfft_3dnow.c,1.8,1.9 srfftp_3dnow.h,1.3,1.4
Nick Kurshev
nickols_k at users.sourceforge.net
Thu May 31 19:58:59 CEST 2001
- Previous message: [Mplayer-cvslog] CVS: main/libac3 Makefile,1.5,1.6 imdct.c,1.2,1.3 srfftp.h,1.1.1.1,1.2
- Next message: [Mplayer-cvslog] CVS: main subreader.c,1.15,1.16 subreader.h,1.6,1.7
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvsroot/mplayer/main/libac3/mmx
In directory usw-pr-cvs1:/tmp/cvs-serv11371/main/libac3/mmx
Modified Files:
imdct_3dnow.c srfft_3dnow.c srfftp_3dnow.h
Log Message:
libac3 now is full 3dnow! optimized
Index: imdct_3dnow.c
===================================================================
RCS file: /cvsroot/mplayer/main/libac3/mmx/imdct_3dnow.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -r1.4 -r1.5
*** imdct_3dnow.c 2001/05/27 01:53:17 1.4
--- imdct_3dnow.c 2001/05/31 17:58:56 1.5
***************
*** 80,86 ****
}
- __asm__ __volatile__ ("femms":::"memory");
fft_128p (&buf[0]);
- __asm__ __volatile__ ("femms":::"memory");
// Post IFFT complex multiply plus IFFT complex conjugate
--- 80,84 ----
***************
*** 227,231 ****
--- 225,231 ----
}
+ __asm__ __volatile__ ("femms":::"memory");
fft_128p(&buf[0]);
+ __asm__ __volatile__ ("femms":::"memory");
/* Post IFFT complex multiply plus IFFT complex conjugate*/
***************
*** 309,314 ****
--- 309,316 ----
}
+ __asm__ __volatile__ ("femms":::"memory");
fft_64p(&buf1[0]);
fft_64p(&buf2[0]);
+ __asm__ __volatile__ ("femms":::"memory");
#ifdef DEBUG
***************
*** 404,409 ****
--- 406,413 ----
+ __asm__ __volatile__ ("femms":::"memory");
fft_64p(&buf1[0]);
fft_64p(&buf2[0]);
+ __asm__ __volatile__ ("femms":::"memory");
#ifdef DEBUG
Index: srfft_3dnow.c
===================================================================
RCS file: /cvsroot/mplayer/main/libac3/mmx/srfft_3dnow.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -C2 -r1.8 -r1.9
*** srfft_3dnow.c 2001/05/27 16:54:02 1.8
--- srfft_3dnow.c 2001/05/31 17:58:56 1.9
***************
*** 37,41 ****
/* x[k] = sum_{i=0..3} x[i] * w^{i*k}, w=e^{-2*pi/4}
*/
- __asm__ __volatile__("femms":::"memory");
__asm__ __volatile__ (
"movl $1, %%eax\n\t"
--- 37,40 ----
***************
*** 90,95 ****
:"0"(x)
:"memory");
-
- __asm__ __volatile__("femms":::"memory");
}
#if 0
--- 89,92 ----
***************
*** 318,322 ****
:"m"(wT1), "m"(wB2), "m"(wB1), "0"(x), "m"(HSQRT2_3DNOW)
:"memory");
- __asm__ __volatile__("femms":::"memory");
}
#if 0
--- 315,318 ----
***************
*** 445,449 ****
--- 441,447 ----
register complex_t *x2k, *x3k, *x4k, *wB;
register float a_r, a_i, a1_r, a1_i, u_r, u_i, v_r, v_i;
+ complex_t a, a1, u, v;
+ TRANS_FILL_MM6_MM7_3DNOW();
x2k = x + 2 * k;
x3k = x2k + 2 * k;
***************
*** 452,461 ****
TRANSZERO_3DNOW(x[0],x2k[0],x3k[0],x4k[0]);
! TRANS(x[1],x2k[1],x3k[1],x4k[1],wTB[1],wB[1],d[1],d_3[1]);
--k;
for(;;) {
! TRANS(x[2],x2k[2],x3k[2],x4k[2],wTB[2],wB[2],d[2],d_3[2]);
! TRANS(x[3],x2k[3],x3k[3],x4k[3],wTB[3],wB[3],d[3],d_3[3]);
if (!--k) break;
x += 2;
--- 450,459 ----
TRANSZERO_3DNOW(x[0],x2k[0],x3k[0],x4k[0]);
! TRANS_3DNOW(x[1],x2k[1],x3k[1],x4k[1],wTB[1],wB[1],d[1],d_3[1]);
--k;
for(;;) {
! TRANS_3DNOW(x[2],x2k[2],x3k[2],x4k[2],wTB[2],wB[2],d[2],d_3[2]);
! TRANS_3DNOW(x[3],x2k[3],x3k[3],x4k[3],wTB[3],wB[3],d[3],d_3[3]);
if (!--k) break;
x += 2;
***************
*** 473,484 ****
void fft_asmb16(complex_t *x, complex_t *wTB)
{
! register float a_r, a_i, a1_r, a1_i, u_r, u_i, v_r, v_i;
int k = 2;
/* transform x[0], x[8], x[4], x[12] */
TRANSZERO_3DNOW(x[0],x[4],x[8],x[12]);
/* transform x[1], x[9], x[5], x[13] */
! TRANS(x[1],x[5],x[9],x[13],wTB[1],wTB[5],delta16[1],delta16_3[1]);
/* transform x[2], x[10], x[6], x[14] */
--- 471,483 ----
void fft_asmb16(complex_t *x, complex_t *wTB)
{
! complex_t a, a1, u, v;
int k = 2;
+ TRANS_FILL_MM6_MM7_3DNOW();
/* transform x[0], x[8], x[4], x[12] */
TRANSZERO_3DNOW(x[0],x[4],x[8],x[12]);
/* transform x[1], x[9], x[5], x[13] */
! TRANS_3DNOW(x[1],x[5],x[9],x[13],wTB[1],wTB[5],delta16[1],delta16_3[1]);
/* transform x[2], x[10], x[6], x[14] */
***************
*** 486,490 ****
/* transform x[3], x[11], x[7], x[15] */
! TRANS(x[3],x[7],x[11],x[15],wTB[3],wTB[7],delta16[3],delta16_3[3]);
}
--- 485,489 ----
/* transform x[3], x[11], x[7], x[15] */
! TRANS_3DNOW(x[3],x[7],x[11],x[15],wTB[3],wTB[7],delta16[3],delta16_3[3]);
}
Index: srfftp_3dnow.h
===================================================================
RCS file: /cvsroot/mplayer/main/libac3/mmx/srfftp_3dnow.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -r1.3 -r1.4
*** srfftp_3dnow.h 2001/05/27 14:13:05 1.3
--- srfftp_3dnow.h 2001/05/31 17:58:56 1.4
***************
*** 69,77 ****
"punpckldq "##mm_hlp","##mm_base"\n\t"
#endif
#define TRANSZERO_3DNOW(A0,A4,A8,A12) \
{ \
- __asm__ __volatile__("femms":::"memory");\
- TRANS_FILL_MM6_MM7_3DNOW()\
__asm__ __volatile__(\
"movq %4, %%mm0\n\t" /* mm0 = wTB[0]*/\
--- 69,84 ----
"punpckldq "##mm_hlp","##mm_base"\n\t"
#endif
+ #ifdef HAVE_3DNOWEX
+ #define PFNACC_MM(mm_base,mm_hlp) "pfnacc "##mm_base","##mm_base"\n\t"
+ #else
+ #define PFNACC_MM(mm_base,mm_hlp)\
+ "movq "##mm_base","##mm_hlp"\n\t"\
+ "psrlq $32,"##mm_hlp"\n\t"\
+ "punpckldq "##mm_hlp","##mm_hlp"\n\t"\
+ "pfsub "##mm_hlp","##mm_base"\n\t"
+ #endif
#define TRANSZERO_3DNOW(A0,A4,A8,A12) \
{ \
__asm__ __volatile__(\
"movq %4, %%mm0\n\t" /* mm0 = wTB[0]*/\
***************
*** 99,109 ****
:"m"(wTB[0]), "m"(wTB[k*2]), "0"(A0), "2"(A4)\
:"memory");\
- __asm__ __volatile__("femms":::"memory");\
}
#define TRANSHALF_16_3DNOW(A2,A6,A10,A14)\
{\
- __asm__ __volatile__("femms":::"memory");\
- TRANS_FILL_MM6_MM7_3DNOW()\
__asm__ __volatile__(\
"movq %4, %%mm0\n\t"/*u.re = wTB[2].im + wTB[2].re;*/\
--- 106,113 ----
***************
*** 143,147 ****
:"m"(wTB[2]), "m"(wTB[6]), "0"(A2), "2"(A6), "m"(HSQRT2_3DNOW)\
:"memory");\
! __asm__ __volatile__("femms":::"memory");\
}
--- 147,200 ----
:"m"(wTB[2]), "m"(wTB[6]), "0"(A2), "2"(A6), "m"(HSQRT2_3DNOW)\
:"memory");\
! }
!
! #define TRANS_3DNOW(A1,A5,A9,A13,WT,WB,D,D3)\
! { \
! __asm__ __volatile__(\
! "movq %1, %%mm4\n\t"\
! "movq %%mm4, %%mm5\n\t"\
! "punpckldq %%mm4, %%mm4\n\t"/*mm4 = D.re | D.re */\
! "punpckhdq %%mm5, %%mm5\n\t"/*mm5 = D.im | D.im */\
! "movq %0, %%mm0\n\t"\
! "pfmul %%mm0, %%mm4\n\t"/* mm4 =u.re | u.im */\
! "pfmul %%mm0, %%mm5\n\t"/* mm5 = a.re | a.im */\
! PSWAP_MM("%%mm5","%%mm3")\
! "pfmul %%mm7, %%mm5\n\t"\
! "pfadd %%mm5, %%mm4\n\t"/* mm4 = u*/\
! "movq %3, %%mm1\n\t"\
! "movq %2, %%mm0\n\t"\
! PSWAP_MM("%%mm1","%%mm3")\
! "movq %%mm0, %%mm2\n\t"\
! "pfmul %%mm1, %%mm0\n\t"/* mm0 = a*/\
! "pfmul %3, %%mm2\n\t"/* mm2 = v*/\
! PFNACC_MM("%%mm2","%%mm3")\
! "pfacc %%mm0, %%mm0\n\t"\
! "punpckldq %%mm0,%%mm2\n\t"/*mm2 = v.re | a.re*/\
! "movq %%mm2, %%mm3\n\t"\
! "pfmul %%mm7, %%mm3\n\t"\
! "movq %%mm4, %%mm5\n\t"\
! "pfmul %%mm6, %%mm5\n\t"\
! "pfadd %%mm3, %%mm5\n\t"\
! PSWAP_MM("%%mm5","%%mm3")/* mm5 = v*/\
! "pfadd %%mm2, %%mm4\n\t"\
! :\
! :"m"(WT), "m"(D), "m"(WB), "m"(D3)\
! :"memory");\
! __asm__ __volatile__(\
! "movq %4, %%mm0\n\t"/* a1 = A1*/\
! "movq %%mm0, %%mm1\n\t"\
! "pfadd %%mm4, %%mm0\n\t"/*A1 = a1 + u*/\
! "pfsub %%mm4, %%mm1\n\t"/*A9 = a1 - u*/\
! "movq %%mm0, %0\n\t"\
! "movq %%mm1, %1\n\t"\
! "movq %5, %%mm2\n\t"/* a1 = A5*/\
! "movq %%mm2, %%mm3\n\t"\
! "pfsub %%mm5, %%mm2\n\t"/*A5 = a1 - v*/\
! "pfadd %%mm5, %%mm3\n\t"/*A9 = a1 + v*/\
! "movq %%mm2, %2\n\t"\
! "movq %%mm3, %3"\
! :"=m"(A1), "=m"(A9), "=m"(A5), "=m"(A13)\
! :"0"(A1), "2"(A5), "m"(u), "m"(v)\
! :"memory");\
}
_______________________________________________
Mplayer-cvslog mailing list
Mplayer-cvslog at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog
- Previous message: [Mplayer-cvslog] CVS: main/libac3 Makefile,1.5,1.6 imdct.c,1.2,1.3 srfftp.h,1.1.1.1,1.2
- Next message: [Mplayer-cvslog] CVS: main subreader.c,1.15,1.16 subreader.h,1.6,1.7
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the MPlayer-cvslog
mailing list