[Mplayer-cvslog] CVS: main/libac3/mmx srfftp_3dnow.h,NONE,1.1 imdct_3dnow.c,1.2,1.3 srfft_3dnow.c,1.4,1.5
Nick Kurshev
nickols_k at users.sourceforge.net
Sat May 26 12:29:43 CEST 2001
- Previous message: [Mplayer-cvslog] CVS: main/DOCS/Hungarian DVD-FAQ,1.3,1.4
- Next message: [Mplayer-cvslog] CVS: main/DOCS/Spanish BUGREPORTS,NONE,1.1 TVout-G400,NONE,1.1 INSTALL,1.2,1.3 MPlayer-FAQ,1.17,1.18 OpenDivX,1.3,1.4 README,1.21,1.22 SOUNDCARDS,1.11,1.12 VIDEOCARDS,1.7,1.8 example.conf,1.10,1.11
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvsroot/mplayer/main/libac3/mmx
In directory usw-pr-cvs1:/tmp/cvs-serv1933/main/libac3/mmx
Modified Files:
imdct_3dnow.c srfft_3dnow.c
Added Files:
srfftp_3dnow.h
Log Message:
Improvements
--- NEW FILE ---
/*
* srfftp.h
*
* Copyright (C) Yuqing Deng <Yuqing_Deng at brown.edu> - April 2000
*
* 64 and 128 point split radix fft for ac3dec
*
* The algorithm is desribed in the book:
* "Computational Frameworks of the Fast Fourier Transform".
*
* The ideas and the the organization of code borrowed from djbfft written by
* D. J. Bernstein <djb at cr.py.to>. djbff can be found at
* http://cr.yp.to/djbfft.html.
*
* srfftp.h is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* srfftp.h is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Modified for using AMD's 3DNow! - 3DNowEx(DSP)! SIMD operations
* by Nick Kurshev <nickols_k at mail.ru>
*/
#ifndef SRFFTP_3DNOW_H__
#define SRFFTP_3DNOW_H__
#ifdef HAVE_3DNOWEX
#define TRANS_FILL_MM6_MM7_3DNOW()\
asm(\
"movl $-1, %%eax\n\t"\
"movd %%eax, %%mm7\n\t"\
"negl %%eax\n\t"\
"movd %%eax, %%mm6\n\t"\
"punpckldq %%mm6, %%mm7\n\t" /* -1.0 | 1.0 */\
"pi2fd %%mm7, %%mm7\n\t"\
"pswapd %%mm7, %%mm6\n\t"/* 1.0 | -1.0 */\
:::"eax","memory");
#else
#define TRANS_FILL_MM6_MM7_3DNOW()\
asm(\
"movl $-1, %%eax\n\t"\
"movd %%eax, %%mm7\n\t"\
"negl %%eax\n\t"\
"movd %%eax, %%mm6\n\t"\
"punpckldq %%mm6, %%mm7\n\t" /* -1.0 | 1.0 */\
"punpckldq %%mm7, %%mm6\n\t" /* 1.0 | -1.0 */\
"pi2fd %%mm7, %%mm7\n\t"\
"pi2fd %%mm6, %%mm6\n\t"\
:::"eax","memory");
#endif
#ifdef HAVE_3DNOWEX
#define PSWAP_MM(mm_base,mm_hlp) "pswapd "##mm_base","##mm_base" \n\t"
#else
#define PSWAP_MM(mm_base,mm_hlp)\
"movq "##mm_base","##mm_hlp" \n\t"\
"psrlq $32, "##mm_base"\n\t"\
"punpckldq "##mm_hlp","##mm_base"\n\t"
#endif
#define TRANSZERO_3DNOW(A0,A4,A8,A12) \
{ \
asm volatile("femms":::"memory");\
TRANS_FILL_MM6_MM7_3DNOW()\
asm(\
"movq %4, %%mm0\n\t" /* mm0 = wTB[0]*/\
"movq %5, %%mm1\n\t" /* mm1 = wTB[k*2]*/ \
"movq %%mm0, %%mm5\n\t"/*u.re = wTB[0].re + wTB[k*2].re;*/\
"pfadd %%mm1, %%mm5\n\t"/*u.im = wTB[0].im + wTB[k*2].im; mm5 = u*/\
"pfmul %%mm6, %%mm0\n\t"/*mm0 = wTB[0].re | -wTB[0].im */\
"pfmul %%mm7, %%mm1\n\t"/*mm1 = -wTB[k*2].re | wTB[k*2].im */\
"pfadd %%mm1, %%mm0\n\t"/*v.im = wTB[0].re - wTB[k*2].re;*/\
"movq %%mm0, %%mm4\n\t"/*v.re =-wTB[0].im + wTB[k*2].im;*/\
PSWAP_MM("%%mm4","%%mm2")/* mm4 = v*/\
"movq %6, %%mm0\n\t" /* a1 = A0;*/\
"movq %%mm0, %%mm1\n\t"\
"pfadd %%mm5, %%mm0\n\t" /*A0 = a1 + u;*/\
"pfsub %%mm5, %%mm1\n\t" /*A1 = a1 - u;*/\
"movq %%mm0, %0\n\t"\
"movq %%mm1, %1\n\t"\
"movq %7, %%mm2\n\t" /* a1 = A4;*/\
"movq %%mm2, %%mm3\n\t"\
"pfadd %%mm4, %%mm2\n\t" /*A12 = a1 + v;*/\
"pfsub %%mm4, %%mm3\n\t" /*A4 = a1 - v;*/\
"movq %%mm2, %3\n\t"\
"movq %%mm3, %2"\
:"=m"(A0), "=m"(A8), "=m"(A4), "=m"(A12)\
:"m"(wTB[0]), "m"(wTB[k*2]), "0"(A0), "2"(A4)\
:"memory");\
asm volatile("femms":::"memory");\
}
#endif
Index: imdct_3dnow.c
===================================================================
RCS file: /cvsroot/mplayer/main/libac3/mmx/imdct_3dnow.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -r1.2 -r1.3
*** imdct_3dnow.c 2001/05/24 09:43:49 1.2
--- imdct_3dnow.c 2001/05/26 10:29:41 1.3
***************
*** 31,35 ****
{
int i, j;
- // float tmp_a_r, tmp_a_i;
float *data_ptr;
float *delay_ptr;
--- 31,34 ----
***************
*** 76,83 ****
:"m"(data[256-2*j-1]), "m"(data[2*j]), "m"(xcos1[j]), "m"(xsin1[j])
:"memory");
! /*
! buf[i].re = (data[256-2*j-1] * xcos1[j] - data[2*j] * xsin1[j]);
! buf[i].im = (data[256-2*j-1] * xsin1[j] + data[2*j] * xcos1[j])*(-1.0);
! */
}
--- 75,80 ----
:"m"(data[256-2*j-1]), "m"(data[2*j]), "m"(xcos1[j]), "m"(xsin1[j])
:"memory");
! /* buf[i].re = (data[256-2*j-1] * xcos1[j] - data[2*j] * xsin1[j]);
! buf[i].im = (data[256-2*j-1] * xsin1[j] + data[2*j] * xcos1[j])*(-1.0);*/
}
***************
*** 132,139 ****
:"0"(buf[i]),"m"(xcos1[i]),"m"(xsin1[i])
:"memory");
! /*
! ac3_buf[i].re =(tmp_a_r * ac3_xcos1[i]) + (tmp_a_i * ac3_xsin1[i]);
! ac3_buf[i].im =(tmp_a_r * ac3_xsin1[i]) - (tmp_a_i * ac3_xcos1[i]);
! */
}
--- 129,134 ----
:"0"(buf[i]),"m"(xcos1[i]),"m"(xsin1[i])
:"memory");
! /* ac3_buf[i].re =(tmp_a_r * ac3_xcos1[i]) + (tmp_a_i * ac3_xsin1[i]);
! ac3_buf[i].im =(tmp_a_r * ac3_xsin1[i]) - (tmp_a_i * ac3_xcos1[i]);*/
}
***************
*** 141,245 ****
delay_ptr = delay;
window_ptr = window;
-
// Window and convert to real valued signal
for (i=0; i< 64; i++) {
!
asm volatile(
! "movd %1, %%mm0\n\t"
! "punpckldq %2, %%mm0\n\t"
! "pfmul %3, %%mm0\n\t"
! "pfmul %%mm6, %%mm0\n\t"
! "pfadd %4, %%mm0\n\t"
! "movq %%mm0, %0"
! :"=m"(*data_ptr)
! :"m"(buf[64+i].im), "m"(buf[64-i-1].re), "m"(*window_ptr), "m"(*delay_ptr)
:"memory");
data_ptr += 2;
window_ptr += 2;
delay_ptr += 2;
-
- /*
- *data_ptr++ = -buf[64+i].im * *window_ptr++ + *delay_ptr++;
- *data_ptr++ = buf[64-i-1].re * *window_ptr++ + *delay_ptr++;
- */
- }
-
- for(i=0; i< 64; i++) {
-
- asm volatile(
- "movd %1, %%mm0\n\t"
- "punpckldq %2, %%mm0\n\t"
- "pfmul %3, %%mm0\n\t"
- "pfmul %%mm6, %%mm0\n\t"
- "pfadd %4, %%mm0\n\t"
- "movq %%mm0, %0"
- :"=m"(*data_ptr)
- :"m"(buf[i].re), "m"(buf[128-i-1].im), "m"(*window_ptr), "m"(*delay_ptr)
- :"memory");
- data_ptr += 2;
- window_ptr += 2;
- delay_ptr += 2;
-
- /*
- *data_ptr++ = -buf[i].re * *window_ptr++ + *delay_ptr++;
- *data_ptr++ = buf[128-i-1].im * *window_ptr++ + *delay_ptr++;
- */
}
!
// The trailing edge of the window goes into the delay line
delay_ptr = delay;
-
for(i=0; i< 64; i++) {
!
window_ptr -=2;
asm volatile(
! "movd %1, %%mm0\n\t"
! "punpckldq %2, %%mm0\n\t"
#ifdef HAVE_3DNOWEX
! "pswapd %3, %%mm3\n\t"
#else
! "movq %3, %%mm3\n\t"
! "psrlq $32, %%mm3\n\t"
! "punpckldq %3, %%mm3\n\t"
#endif
"pfmul %%mm3, %%mm0\n\t"
"pfmul %%mm6, %%mm0\n\t"
! "movq %%mm0, %0"
! :"=m"(*delay_ptr)
! :"m"(buf[64+i].re), "m"(buf[64-i-1].im), "m"(*window_ptr)
! :"memory");
! delay_ptr += 2;
! /*
! window_ptr--;
! *delay_ptr++ = -buf[64+i].re * *window_ptr;
! window_ptr--;
! *delay_ptr++ = buf[64-i-1].im * *window_ptr;
! */
! }
!
! for(i=0; i<64; i++) {
! window_ptr -= 2;
! asm volatile(
! "movd %1, %%mm0\n\t"
! "punpckldq %2, %%mm0\n\t"
! #ifdef HAVE_3DNOWEX
! "pswapd %3, %%mm3\n\t"
! #else
! "movq %3, %%mm3\n\t"
! "psrlq $32, %%mm3\n\t"
! "punpckldq %3, %%mm3\n\t"
! #endif
! "pfmul %%mm3, %%mm0\n\t"
! "pfmul %%mm7, %%mm0\n\t"
! "movq %%mm0, %0"
! :"=m"(*delay_ptr)
! :"m"(buf[i].im), "m"(buf[128-i-1].re), "m"(*window_ptr)
:"memory");
delay_ptr += 2;
-
- /*
- *delay_ptr++ = buf[i].im * *--window_ptr;
- *delay_ptr++ = -buf[128-i-1].re * *--window_ptr;
- */
}
asm volatile ("femms":::"memory");
--- 136,194 ----
delay_ptr = delay;
window_ptr = window;
// Window and convert to real valued signal
for (i=0; i< 64; i++) {
! /* merge two loops in one to enable working of 2 decoders */
asm volatile(
! "movd 516(%1), %%mm0\n\t"
! "movd (%1), %%mm1\n\t" /**data_ptr++=-buf[64+i].im**window_ptr+++*delay_ptr++;*/
! "punpckldq (%2), %%mm0\n\t"/*data_ptr[128]=-buf[i].re*window_ptr[128]+delay_ptr[128];*/
! "punpckldq 516(%2), %%mm1\n\t"
! "pfmul (%3), %%mm0\n\t"/**data_ptr++=buf[64-i-1].re**window_ptr+++*delay_ptr++;*/
! "pfmul 512(%3), %%mm1\n\t"
! "pfmul %%mm6, %%mm0\n\t"/*data_ptr[128]=buf[128-i-1].im*window_ptr[128]+delay_ptr[128];*/
! "pfmul %%mm6, %%mm1\n\t"
! "pfadd (%4), %%mm0\n\t"
! "pfadd 512(%4), %%mm1\n\t"
! "movq %%mm0, (%0)\n\t"
! "movq %%mm1, 512(%0)"
! :"=r"(data_ptr)
! :"r"(&buf[i].re), "r"(&buf[64-i-1].re), "r"(window_ptr), "r"(delay_ptr), "0"(data_ptr)
:"memory");
data_ptr += 2;
window_ptr += 2;
delay_ptr += 2;
}
! window_ptr += 128;
// The trailing edge of the window goes into the delay line
delay_ptr = delay;
for(i=0; i< 64; i++) {
! /* merge two loops in one to enable working of 2 decoders */
window_ptr -=2;
asm volatile(
! "movd 508(%1), %%mm0\n\t"
! "movd (%1), %%mm1\n\t"
! "punpckldq (%2), %%mm0\n\t"
! "punpckldq 508(%2), %%mm1\n\t"
#ifdef HAVE_3DNOWEX
! "pswapd (%3), %%mm3\n\t"
! "pswapd -512(%3), %%mm4\n\t"
#else
! "movq (%3), %%mm3\n\t"/**delay_ptr++=-buf[64+i].re**--window_ptr;*/
! "movq -512(%3), %%mm4\n\t"
! "psrlq $32, %%mm3\n\t"/*delay_ptr[128]=buf[i].im**window_ptr[-512];*/
! "psrlq $32, %%mm4\n\t"/**delay_ptr++=buf[64-i-1].im**--window_ptr;*/
! "punpckldq (%3), %%mm3\n\t"/*delay_ptr[128]=-buf[128-i-1].re**window_ptr[-512];*/
! "punpckldq -512(%3), %%mm4\n\t"
#endif
"pfmul %%mm3, %%mm0\n\t"
+ "pfmul %%mm4, %%mm1\n\t"
"pfmul %%mm6, %%mm0\n\t"
! "pfmul %%mm7, %%mm1\n\t"
! "movq %%mm0, (%0)\n\t"
! "movq %%mm1, 512(%0)"
! :"=r"(delay_ptr)
! :"r"(&buf[i].im), "r"(&buf[64-i-1].im), "r"(window_ptr), "0"(delay_ptr)
:"memory");
delay_ptr += 2;
}
asm volatile ("femms":::"memory");
Index: srfft_3dnow.c
===================================================================
RCS file: /cvsroot/mplayer/main/libac3/mmx/srfft_3dnow.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -r1.4 -r1.5
*** srfft_3dnow.c 2001/05/24 09:43:49 1.4
--- srfft_3dnow.c 2001/05/26 10:29:41 1.5
***************
*** 30,33 ****
--- 30,34 ----
* by Nick Kurshev <nickols_k at mail.ru>
*/
+ #include "mmx/srfftp_3dnow.h"
void fft_4(complex_t *x)
***************
*** 394,398 ****
wB = wTB + 2 * k;
! TRANSZERO(x[0],x2k[0],x3k[0],x4k[0]);
TRANS(x[1],x2k[1],x3k[1],x4k[1],wTB[1],wB[1],d[1],d_3[1]);
--- 395,399 ----
wB = wTB + 2 * k;
! TRANSZERO_3DNOW(x[0],x2k[0],x3k[0],x4k[0]);
TRANS(x[1],x2k[1],x3k[1],x4k[1],wTB[1],wB[1],d[1],d_3[1]);
***************
*** 420,424 ****
/* transform x[0], x[8], x[4], x[12] */
! TRANSZERO(x[0],x[4],x[8],x[12]);
/* transform x[1], x[9], x[5], x[13] */
--- 421,425 ----
/* transform x[0], x[8], x[4], x[12] */
! TRANSZERO_3DNOW(x[0],x[4],x[8],x[12]);
/* transform x[1], x[9], x[5], x[13] */
_______________________________________________
Mplayer-cvslog mailing list
Mplayer-cvslog at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog
- Previous message: [Mplayer-cvslog] CVS: main/DOCS/Hungarian DVD-FAQ,1.3,1.4
- Next message: [Mplayer-cvslog] CVS: main/DOCS/Spanish BUGREPORTS,NONE,1.1 TVout-G400,NONE,1.1 INSTALL,1.2,1.3 MPlayer-FAQ,1.17,1.18 OpenDivX,1.3,1.4 README,1.21,1.22 SOUNDCARDS,1.11,1.12 VIDEOCARDS,1.7,1.8 example.conf,1.10,1.11
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the MPlayer-cvslog
mailing list