[Mplayer-cvslog] CVS: main/mp3lib dct64_MMX.s,NONE,1.1 decode_MMX.s,NONE,1.1 tabinit_MMX.s,NONE,1.1 Makefile,1.8,1.9 d_cpu.h,1.2,1.3 d_cpu.s,1.4,1.5 dct36.c,1.1.1.1,1.2 dct64_3dnow.s,1.1.1.1,1.2 dct64_k7.s,1.3,1.4 decod386.c,1.4,1.5 layer2.c,1.1.1.1,1.2 layer3.c,1.2,1.3 mpg123.h,1.3,1.4 sr1.c,1.7,1.8 tabinit.c,1.2,1.3 test2.c,1.2,1.3
Nick Kurshev
nick at mplayerhq.banki.hu
Fri Jun 29 19:53:56 CEST 2001
Update of /cvsroot/mplayer/main/mp3lib
In directory mplayerhq:/var/tmp.root/cvs-serv16908
Modified Files:
Makefile d_cpu.h d_cpu.s dct36.c dct64_3dnow.s dct64_k7.s
decod386.c layer2.c layer3.c mpg123.h sr1.c tabinit.c test2.c
Added Files:
dct64_MMX.s decode_MMX.s tabinit_MMX.s
Log Message:
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
--- NEW FILE ---
# This code was taken from http://www.mpg123.org
# See ChangeLog of mpg123-0.59s-pre.1 for detail
# Applied to mplayer by Nick Kurshev <nickols_k at mail.ru>
.data
.align 4
costab:
.long 1056974725
.long 1057056395
.long 1057223771
.long 1057485416
.long 1057855544
.long 1058356026
.long 1059019886
.long 1059897405
.long 1061067246
.long 1062657950
.long 1064892987
.long 1066774581
[...989 lines suppressed...]
flds 108(%edx)
fadds 124(%edx)
fld %st(0)
fadds 76(%edx)
fistp 288(%edi)
fadds 92(%edx)
fistp 352(%edi)
flds 124(%edx)
fist 480(%edi)
fadds 92(%edx)
fistp 416(%edi)
movsw
addl $256,%esp
popl %edi
popl %esi
popl %ebx
ret
--- NEW FILE ---
# this code comes under GPL
# This code was taken from http://www.mpg123.org
# See ChangeLog of mpg123-0.59s-pre.1 for detail
# Applied to mplayer by Nick Kurshev <nickols_k at mail.ru>
#
# TODO: Partial loops unrolling and removing MOVW insn.
#
.text
.globl synth_1to1_MMX_s
synth_1to1_MMX_s:
pushl %ebp
pushl %edi
pushl %esi
pushl %ebx
movl 24(%esp),%ecx
movl 28(%esp),%edi
movl $15,%ebx
movl 36(%esp),%edx
leal (%edi,%ecx,2),%edi
decl %ecx
movl 32(%esp),%esi
movl (%edx),%eax
jecxz .L1
decl %eax
andl %ebx,%eax
leal 1088(%esi),%esi
movl %eax,(%edx)
.L1:
leal (%esi,%eax,2),%edx
movl %eax,%ebp
incl %eax
pushl 20(%esp)
andl %ebx,%eax
leal 544(%esi,%eax,2),%ecx
incl %ebx
testl $1, %eax
jnz .L2
xchgl %edx,%ecx
incl %ebp
leal 544(%esi),%esi
.L2:
emms
pushl %edx
pushl %ecx
call *dct64_MMX_func
addl $12,%esp
leal 1(%ebx), %ecx
subl %ebp,%ebx
leal decwins(%ebx,%ebx,1), %edx
.L3:
movq (%edx),%mm0
pmaddwd (%esi),%mm0
movq 8(%edx),%mm1
pmaddwd 8(%esi),%mm1
movq 16(%edx),%mm2
pmaddwd 16(%esi),%mm2
movq 24(%edx),%mm3
pmaddwd 24(%esi),%mm3
paddd %mm1,%mm0
paddd %mm2,%mm0
paddd %mm3,%mm0
movq %mm0,%mm1
psrlq $32,%mm1
paddd %mm1,%mm0
psrad $13,%mm0
packssdw %mm0,%mm0
movd %mm0,%eax
movw %ax, (%edi)
leal 32(%esi),%esi
leal 64(%edx),%edx
leal 4(%edi),%edi
decl %ecx
jnz .L3
subl $64,%esi
movl $15,%ecx
.L4:
movq (%edx),%mm0
pmaddwd (%esi),%mm0
movq 8(%edx),%mm1
pmaddwd 8(%esi),%mm1
movq 16(%edx),%mm2
pmaddwd 16(%esi),%mm2
movq 24(%edx),%mm3
pmaddwd 24(%esi),%mm3
paddd %mm1,%mm0
paddd %mm2,%mm0
paddd %mm3,%mm0
movq %mm0,%mm1
psrlq $32,%mm1
paddd %mm0,%mm1
psrad $13,%mm1
packssdw %mm1,%mm1
psubd %mm0,%mm0
psubsw %mm1,%mm0
movd %mm0,%eax
movw %ax,(%edi)
subl $32,%esi
addl $64,%edx
leal 4(%edi),%edi
decl %ecx
jnz .L4
emms
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
--- NEW FILE ---
# This code was taken from http://www.mpg123.org
# See ChangeLog of mpg123-0.59s-pre.1 for detail
# Applied to mplayer by Nick Kurshev <nickols_k at mail.ru>
.bss
.align 8
.comm decwin,2176,32
.align 8
.comm decwins,2176,32
.data
.align 8
intwinbase_MMX:
.value 0, -1, -1, -1, -1, -1, -1, -2
.value -2, -2, -2, -3, -3, -4, -4, -5
.value -5, -6, -7, -7, -8, -9, -10, -11
.value -13, -14, -16, -17, -19, -21, -24, -26
.value -29, -31, -35, -38, -41, -45, -49, -53
.value -58, -63, -68, -73, -79, -85, -91, -97
.value -104, -111, -117, -125, -132, -139, -147, -154
.value -161, -169, -176, -183, -190, -196, -202, -208
.value -213, -218, -222, -225, -227, -228, -228, -227
.value -224, -221, -215, -208, -200, -189, -177, -163
.value -146, -127, -106, -83, -57, -29, 2, 36
.value 72, 111, 153, 197, 244, 294, 347, 401
.value 459, 519, 581, 645, 711, 779, 848, 919
.value 991, 1064, 1137, 1210, 1283, 1356, 1428, 1498
.value 1567, 1634, 1698, 1759, 1817, 1870, 1919, 1962
.value 2001, 2032, 2057, 2075, 2085, 2087, 2080, 2063
.value 2037, 2000, 1952, 1893, 1822, 1739, 1644, 1535
.value 1414, 1280, 1131, 970, 794, 605, 402, 185
.value -45, -288, -545, -814, -1095, -1388, -1692, -2006
.value -2330, -2663, -3004, -3351, -3705, -4063, -4425, -4788
.value -5153, -5517, -5879, -6237, -6589, -6935, -7271, -7597
.value -7910, -8209, -8491, -8755, -8998, -9219, -9416, -9585
.value -9727, -9838, -9916, -9959, -9966, -9935, -9863, -9750
.value -9592, -9389, -9139, -8840, -8492, -8092, -7640, -7134
.value -6574, -5959, -5288, -4561, -3776, -2935, -2037, -1082
.value -70, 998, 2122, 3300, 4533, 5818, 7154, 8540
.value 9975, 11455, 12980, 14548, 16155, 17799, 19478, 21189
.value 22929, 24694, 26482, 28289, 30112, 31947,-26209,-24360
.value -22511,-20664,-18824,-16994,-15179,-13383,-11610, -9863
.value -8147, -6466, -4822, -3222, -1667, -162, 1289, 2684
.value 4019, 5290, 6494, 7629, 8692, 9679, 10590, 11420
.value 12169, 12835, 13415, 13908, 14313, 14630, 14856, 14992
.value 15038
intwindiv:
.long 0x47800000 # 65536.0
.text
.align 32
.globl make_decode_tables_MMX
make_decode_tables_MMX:
pushl %edi
pushl %esi
pushl %ebx
xorl %ecx,%ecx
xorl %ebx,%ebx
movl $32,%esi
movl $intwinbase_MMX,%edi
negl 16(%esp) # scaleval
pushl $2 # intwinbase step
.L00:
cmpl $528,%ecx
jnc .L02
movswl (%edi),%eax
cmpl $intwinbase_MMX+444,%edi
jc .L01
addl $60000,%eax
.L01:
pushl %eax
fildl (%esp)
fdivs intwindiv
fimull 24(%esp)
popl %eax
fsts decwin(,%ecx,4)
fstps decwin+64(,%ecx,4)
.L02:
leal -1(%esi),%edx
and %ebx,%edx
cmp $31,%edx
jnz .L03
addl $-1023,%ecx
test %esi,%ebx
jz .L03
negl 20(%esp)
.L03:
addl %esi,%ecx
addl (%esp),%edi
incl %ebx
cmpl $intwinbase_MMX,%edi
jz .L04
cmp $256,%ebx
jnz .L00
negl (%esp)
jmp .L00
.L04:
popl %eax
xorl %ecx,%ecx
xorl %ebx,%ebx
pushl $2
.L05:
cmpl $528,%ecx
jnc .L11
movswl (%edi),%eax
cmpl $intwinbase_MMX+444,%edi
jc .L06
addl $60000,%eax
.L06:
cltd
imull 20(%esp)
shrdl $17,%edx,%eax
cmpl $32767,%eax
movl $1055,%edx
jle .L07
movl $32767,%eax
jmp .L08
.L07:
cmpl $-32767,%eax
jge .L08
movl $-32767,%eax
.L08:
cmpl $512,%ecx
jnc .L09
subl %ecx,%edx
movw %ax,decwins(,%edx,2)
movw %ax,decwins-32(,%edx,2)
.L09:
testl $1,%ecx
jnz .L10
negl %eax
.L10:
movw %ax,decwins(,%ecx,2)
movw %ax,decwins+32(,%ecx,2)
.L11:
leal -1(%esi),%edx
and %ebx,%edx
cmp $31,%edx
jnz .L12
addl $-1023,%ecx
test %esi,%ebx
jz .L12
negl 20(%esp)
.L12:
addl %esi,%ecx
addl (%esp),%edi
incl %ebx
cmpl $intwinbase_MMX,%edi
jz .L13
cmp $256,%ebx
jnz .L05
negl (%esp)
jmp .L05
.L13:
popl %eax
popl %ebx
popl %esi
popl %edi
ret
Index: Makefile
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/Makefile,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- Makefile 26 Jun 2001 23:15:58 -0000 1.8
+++ Makefile 29 Jun 2001 17:53:53 -0000 1.9
@@ -1,8 +1,10 @@
include config.mak
-SRCS = sr1.c d_cpu.s decode_i586.s $(OPTIONAL_SRCS)
-OBJS = sr1.o d_cpu.o decode_i586.o $(OPTIONAL_OBJS)
+SRCS = sr1.c d_cpu.s decode_i586.s dct64_MMX.s decode_MMX.s tabinit_MMX.s\
+dct36_3dnow.s dct64_3dnow.s dct36_k7.s dct64_k7.s
+OBJS = sr1.o d_cpu.o decode_i586.o dct64_MMX.o decode_MMX.o tabinit_MMX.o\
+dct36_3dnow.o dct64_3dnow.o dct36_k7.o dct64_k7.o
# OBJS = $(SRCS:.c,.s=.o)
CFLAGS = $(OPTFLAGS) $(EXTRA_INC)
Index: d_cpu.h
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/d_cpu.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- d_cpu.h 7 Apr 2001 16:59:51 -0000 1.2
+++ d_cpu.h 29 Jun 2001 17:53:53 -0000 1.3
@@ -9,9 +9,12 @@
unsigned int _CpuID;
unsigned int _i586;
unsigned int _3dnow;
+unsigned int _isse;
+unsigned int _has_mmx;
extern unsigned long CpuDetect( void );
extern unsigned long ipentium( void );
+extern unsigned long isse( void );
extern unsigned long a3dnow( void );
#endif
Index: d_cpu.s
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/d_cpu.s,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- d_cpu.s 9 May 2001 16:46:02 -0000 1.4
+++ d_cpu.s 29 Jun 2001 17:53:53 -0000 1.5
@@ -9,6 +9,7 @@
.globl CpuDetect
.globl ipentium
.globl a3dnow
+.globl isse
/ ---------------------------------------------------------------------------
/ in C: unsigned long CpuDetect( void );
@@ -45,7 +46,9 @@
/ ---------------------------------------------------------------------------
/ in C: unsigled long ipentium( void );
-/ return: 0 if the processor is not P5 or above else above 1.
+/ return: 0 if this processor i386 or i486
+/ 1 otherwise
+/ 2 if this cpu supports mmx
/ ---------------------------------------------------------------------------
ipentium:
pushl %ebx
@@ -63,10 +66,15 @@
jz no_cpuid
movl $1,%eax
cpuid
- shrl $8,%eax
- cmpl $5,%eax
- jb no_cpuid
- movl $1,%eax
+ movl %eax, %ecx
+ xorl %eax, %eax
+ shrl $8,%ecx
+ cmpl $5,%ecx
+ jb exit
+ incl %eax
+ test $0x00800000, %edx
+ jz exit
+ incl %eax
jmp exit
no_cpuid:
xorl %eax,%eax
@@ -109,6 +117,36 @@
inc %eax
exit2:
+ popl %ecx
+ popl %edx
+ popl %ebx
+ ret
+
+/ ---------------------------------------------------------------------------
+/ in C: unsigned long isse( void );
+/ return: 0 if this processor does not support sse
+/ 1 otherwise
+/ 2 if this cpu supports sse2 extension
+/ ---------------------------------------------------------------------------
+isse:
+ pushl %ebx
+ pushl %edx
+ pushl %ecx
+
+ call ipentium
+ testl %eax,%eax
+ jz exit3
+
+ movl $1,%eax
+ cpuid
+ xorl %eax, %eax
+ testl $0x02000000,%edx
+ jz exit3
+ incl %eax
+ testl $0x04000000,%edx
+ jz exit3
+ incl %eax
+exit3:
popl %ecx
popl %edx
popl %ebx
Index: dct36.c
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/dct36.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- dct36.c 24 Feb 2001 20:31:08 -0000 1.1.1.1
+++ dct36.c 29 Jun 2001 17:53:53 -0000 1.2
@@ -193,7 +193,7 @@
sum1 = (tmp2b - tmp1b) * tfcos36[(v)]; \
MACRO0(v); }
- register const real *c = nCOS9;
+ register const real *c = COS9;
register real *out2 = o2;
register real *w = wintab;
register real *out1 = o1;
Index: dct64_3dnow.s
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/dct64_3dnow.s,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- dct64_3dnow.s 24 Feb 2001 20:31:11 -0000 1.1.1.1
+++ dct64_3dnow.s 29 Jun 2001 17:53:53 -0000 1.2
@@ -1,706 +1,932 @@
-///
-/// Replacement of dct64() with AMD's 3DNow! SIMD operations support
-///
-/// Syuuhei Kashiyama <squash at mb.kcom.ne.jp>
-///
-/// The author of this program disclaim whole expressed or implied
-/// warranties with regard to this program, and in no event shall the
-/// author of this program liable to whatever resulted from the use of
-/// this program. Use it at your own risk.
-///
-
[...1606 lines suppressed...]
+ fadds 124(%edx)
+ fld %st(0)
+ fadds 76(%edx)
+ fistp 288(%edi)
+ fadds 92(%edx)
+ fistp 352(%edi)
+
+ flds 124(%edx)
+ fist 480(%edi)
+ fadds 92(%edx)
+ fistp 416(%edi)
+ movsw
+.L_bye:
+ addl $256,%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ ret
+
Index: dct64_k7.s
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/dct64_k7.s,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- dct64_k7.s 20 Jun 2001 07:54:19 -0000 1.3
+++ dct64_k7.s 29 Jun 2001 17:53:53 -0000 1.4
@@ -1,677 +1,804 @@
-///
-/// Replacement of dct64() with AMD's 3DNowEx(DSP)! SIMD operations support
-///
-/// This code based 'dct64_3dnow.s' by Syuuhei Kashiyama
-/// <squash at mb.kcom.ne.jp>,only some types of changes have been made:
-///
-/// - added new opcodes PSWAPD, PFPNACC
-/// - decreased number of opcodes (as it was suggested by k7 manual)
-/// (using memory reference as operand of instructions)
-/// - Phase 6 is rewritten with mixing of cpu and mmx opcodes
-/// - change function name for support 3DNowEx! automatic detect
[...1442 lines suppressed...]
+ fadds 124(%edx)
+ fld %st(0)
+ fadds 76(%edx)
+ fistp 288(%edi)
+ fadds 92(%edx)
+ fistp 352(%edi)
+
+ flds 124(%edx)
+ fist 480(%edi)
+ fadds 92(%edx)
+ fistp 416(%edi)
+ movsw
+.L_bye:
+ addl $256,%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ ret
+
Index: decod386.c
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/decod386.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- decod386.c 13 May 2001 18:30:18 -0000 1.4
+++ decod386.c 29 Jun 2001 17:53:53 -0000 1.5
@@ -105,6 +105,15 @@
}
#endif
+synth_func_t synth_func;
+
+int synth_1to1_MMX( real *bandPtr,int channel,short * samples)
+{
+ static short buffs[2][2][0x110];
+ static int bo = 1;
+ synth_1to1_MMX_s(bandPtr, channel, samples, (short *) buffs, &bo);
+ return 0;
+ }
static int synth_1to1(real *bandPtr,int channel,unsigned char *out,int *pnt)
{
@@ -117,39 +126,12 @@
int clip = 0;
int bo1;
- #ifdef HAVE_SSE_MP3
- //if ( _3dnow )
- {
- int ret;
- ret=synth_1to1_sse( bandPtr,channel,out+*pnt );
- *pnt+=128;
- return ret;
- }
- #endif
- #ifdef HAVE_3DNOWEX
- if ( _3dnow > 1 )
+ if ( synth_func )
{
int ret;
- ret=synth_1to1_3dnowex( bandPtr,channel,out+*pnt );
+ ret=(*synth_func)( bandPtr,channel,samples);
*pnt+=128;
return ret;
- }
- #endif
- #ifdef HAVE_3DNOW
- if ( _3dnow )
- {
- int ret;
- ret=synth_1to1_3dnow( bandPtr,channel,out+*pnt );
- *pnt+=128;
- return ret;
- }
- #endif
- if ( _i586 )
- {
- int ret;
- ret=synth_1to1_pent( bandPtr,channel,out+*pnt );
- *pnt+=128;
- return ret;
}
if(!channel) { /* channel=0 */
Index: layer2.c
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/layer2.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- layer2.c 24 Feb 2001 20:31:08 -0000 1.1.1.1
+++ layer2.c 29 Jun 2001 17:53:53 -0000 1.2
@@ -50,8 +50,16 @@
{
double m=mulmul[k];
table = muls[k];
+ if(_has_mmx)
+ {
+ for(j=3,i=0;i<63;i++,j--)
+ *table++ = 16384 * m * pow(2.0,(double) j / 3.0);
+ }
+ else
for(j=3,i=0;i<63;i++,j--)
+ {
*table++ = m * pow(2.0,(double) j / 3.0);
+ }
*table++ = 0.0;
}
}
Index: layer3.c
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/layer3.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- layer3.c 7 May 2001 01:59:58 -0000 1.2
+++ layer3.c 29 Jun 2001 17:53:53 -0000 1.3
@@ -22,9 +22,9 @@
#define GP2MAX (256+118+4)
static real gainpow2[GP2MAX];
-static real nCOS9[9];
+real COS9[9];
static real COS6_1,COS6_2;
-static real tfcos36[9];
+real tfcos36[9];
static real tfcos12[3];
#ifdef NEW_DCT9
static real cos9[3],cos18[3];
@@ -111,8 +111,12 @@
int i,j,k,l;
for(i=-256;i<118+4;i++)
- gainpow2[i+256] = pow((double)2.0,-0.25 * (double) (i+210) );
-
+ {
+ if(_has_mmx)
+ gainpow2[i+256] = 16384.0 * pow((double)2.0,-0.25 * (double) (i+210) );
+ else
+ gainpow2[i+256] = pow((double)2.0,-0.25 * (double) (i+210) );
+ }
for(i=0;i<8207;i++)
ispow[i] = pow((double)i,(double)4.0/3.0);
@@ -139,7 +143,7 @@
}
for(i=0;i<9;i++)
- nCOS9[i] = cos( M_PI / 18.0 * (double) i);
+ COS9[i] = cos( M_PI / 18.0 * (double) i);
for(i=0;i<9;i++)
tfcos36[i] = 0.5 / cos ( M_PI * (double) (i*2+1) / 36.0 );
@@ -1533,6 +1537,9 @@
/*
* III_hybrid
*/
+
+dct36_func_t dct36_func;
+
static void III_hybrid(real fsIn[SBLIMIT][SSLIMIT],real tsOut[SSLIMIT][SBLIMIT],
int ch,struct gr_info_s *gr_info)
{
@@ -1553,8 +1560,8 @@
if(gr_info->mixed_block_flag) {
sb = 2;
- dct36(fsIn[0],rawout1,rawout2,win[0],tspnt);
- dct36(fsIn[1],rawout1+18,rawout2+18,win1[0],tspnt+1);
+ (*dct36_func)(fsIn[0],rawout1,rawout2,win[0],tspnt);
+ (*dct36_func)(fsIn[1],rawout1+18,rawout2+18,win1[0],tspnt+1);
rawout1 += 36; rawout2 += 36; tspnt += 2;
}
@@ -1567,8 +1574,8 @@
}
else {
for (; sb<gr_info->maxb; sb+=2,tspnt+=2,rawout1+=36,rawout2+=36) {
- dct36(fsIn[sb],rawout1,rawout2,win[bt],tspnt);
- dct36(fsIn[sb+1],rawout1+18,rawout2+18,win1[bt],tspnt+1);
+ (*dct36_func)(fsIn[sb],rawout1,rawout2,win[bt],tspnt);
+ (*dct36_func)(fsIn[sb+1],rawout1+18,rawout2+18,win1[bt],tspnt+1);
}
}
Index: mpg123.h
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/mpg123.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- mpg123.h 13 May 2001 18:30:18 -0000 1.3
+++ mpg123.h 29 Jun 2001 17:53:53 -0000 1.4
@@ -104,33 +104,22 @@
};
static long freqs[9];
-#ifdef HAVE_3DNOW
- real decwin[2*(512+32)];
-#else
- real decwin[512+32];
-#endif
- real *pnts[];
+extern real decwin[(512+32)];
+extern real *pnts[];
static int do_layer2(struct frame *fr,int single);
static int do_layer3(struct frame *fr,int single);
static int synth_1to1(real *bandPtr,int channel,unsigned char *out,int *pnt);
-extern int synth_1to1_pent( real *,int,unsigned char * );
+extern int synth_1to1_pent( real *,int,short * );
+extern void make_decode_tables_MMX(long scaleval);
+extern int synth_1to1_MMX( real *,int,short * );
+extern int synth_1to1_MMX_s(real *, int, short *, short *, int *);
extern void dct64(real *a,real *b,real *c);
-#ifdef HAVE_3DNOW
- extern void dct64_3dnow( real *,real *, real * );
- extern void dct36_3dnow(real *,real *,real *,real *,real *);
- extern int synth_1to1_3dnow( real *,int,unsigned char * );
-#endif
-#ifdef HAVE_3DNOWEX
- extern void dct64_3dnowex( real *,real *, real * );
- extern void dct36_3dnowex(real *,real *,real *,real *,real *);
- extern int synth_1to1_3dnowex( real *,int,unsigned char * );
-#endif
-#ifdef HAVE_SSE_MP3
-// extern void dct64_3dnow( real *,real *, real * );
-// extern void dct36_3dnow(real *,real *,real *,real *,real *);
- extern int synth_1to1_sse( real *,int,unsigned char * );
-#endif
+extern void dct36_3dnow(real *,real *,real *,real *,real *);
+extern void dct36_3dnowex(real *,real *,real *,real *,real *);
+extern void dct36_sse(real *,real *,real *,real *,real *);
+typedef int (*synth_func_t)( real *,int,short * );
+typedef void (*dct36_func_t)(real *,real *,real *,real *,real *);
Index: sr1.c
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/sr1.c,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -r1.7 -r1.8
--- sr1.c 19 Jun 2001 22:07:19 -0000 1.7
+++ sr1.c 29 Jun 2001 17:53:53 -0000 1.8
@@ -343,6 +343,12 @@
static int tables_done_flag=0;
+/* It's hidden from gcc in assembler */
+extern void dct64_MMX( void );
+extern void dct64_MMX_3dnow( void );
+extern void dct64_MMX_3dnowex( void );
+void (*dct64_MMX_func)( void );
+
// Init decoder tables. Call first, once!
#ifdef USE_FAKE_MONO
void MP3_Init(int fakemono){
@@ -351,20 +357,41 @@
#endif
_CpuID=CpuDetect();
_i586=ipentium();
-#ifdef HAVE_3DNOW
+#ifndef HAVE_MMX
+ _i586 &= 1;
+#endif
_3dnow=a3dnow();
+#ifndef HAVE_3DNOW
+ _3dnow = 0;
#endif
-
- printf( "mp3lib: Processor ID: %x\n",_CpuID );
- printf( "mp3lib: i586 processor %sdetected.\n",(_i586?"":"not ") );
-#ifdef HAVE_3DNOW
- printf( "mp3lib: AMD 3dnow! extension %sdetected.\n",(_3dnow?"":"not ") );
+#ifndef HAVE_3DNOWEX
+ _3dnow &= 1;
+#endif
+ _isse=isse();
+#ifndef HAVE_SSE
+ _isse = 0;
#endif
-#ifdef HAVE_3DNOWEX
- printf( "mp3lib: AMD 3dnow-dsp! extension %sdetected.\n",(_3dnow>1?"":"not ") );
+#ifndef HAVE_SSE2
+ _isse &= 1;
#endif
+ _has_mmx=_i586>1||_3dnow||_isse;
+ printf( "mp3lib: Processor ID: %x\n",_CpuID );
+ if(_i586&&!_3dnow&&!_isse)
+ printf( "mp3lib: Using Pentium%s optimized decore.\n",(_i586>1?"-MMX":""));
+ else
+ if(_isse)
+ /*
+ Note: It's ok, Since K8 will have SSE2 support and will much faster
+ of P4 ;)
+ */
+ printf( "mp3lib: Using SSE%s! optimized decore.\n",(_isse>1?"2":""));
+ else
+ if(_3dnow)
+ printf( "mp3lib: Using AMD 3dnow%s! optimized decore.\n",(_3dnow>1?"-dsp(k7)":""));
- make_decode_tables(outscale);
+/* Use it for any MMX cpu */
+ if(_has_mmx) make_decode_tables_MMX(outscale);
+ else make_decode_tables(outscale);
#ifdef USE_FAKE_MONO
if (fakemono == 1)
fr.synth=synth_1to1_l;
@@ -381,6 +408,42 @@
init_layer2();
init_layer3(fr.down_sample_sblimit);
tables_done_flag=1;
+
+ dct36_func=dct36;
+ if(_isse)
+ {
+ synth_func=synth_1to1_MMX;
+ dct64_MMX_func=dct64_MMX;
+ }
+ else
+ if ( _3dnow > 1 )
+ {
+ synth_func=synth_1to1_MMX;
+ dct36_func=dct36_3dnowex;
+ dct64_MMX_func=dct64_MMX_3dnowex;
+ }
+ else
+ if ( _3dnow )
+ {
+ synth_func=synth_1to1_MMX;
+ dct36_func=dct36_3dnow;
+ dct64_MMX_func=dct64_MMX_3dnow;
+ }
+ else
+ if ( _i586 > 1)
+ {
+ synth_func=synth_1to1_MMX;
+ dct64_MMX_func=dct64_MMX;
+ }
+ else
+ if ( _i586 )
+ {
+ synth_func=synth_1to1_pent;
+ }
+ else
+ {
+ synth_func = NULL;
+ }
}
#if 0
Index: tabinit.c
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/tabinit.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- tabinit.c 7 Jun 2001 09:08:32 -0000 1.2
+++ tabinit.c 29 Jun 2001 17:53:53 -0000 1.3
@@ -1,20 +1,7 @@
+real decwin[(512+32)], cos64[32], cos32[16], cos16[8], cos8[4], cos4[2];
+real *pnts[]={ cos64,cos32,cos16,cos8,cos4 };
-
-#ifdef HAVE_3DNOW
- real decwin[2*(512+32)] __attribute__((aligned(8)));
- real cos64[32] __attribute__((aligned(8)));
- real cos32[16] __attribute__((aligned(8)));
- real cos16[8] __attribute__((aligned(8)));
- real cos8[4] __attribute__((aligned(8)));
- real cos4[2] __attribute__((aligned(8)));
- real *pnts[]={ cos64,cos32,cos16,cos8,cos4 };
-#else
- real decwin[512+32];
- real cos64[16],cos32[8],cos16[4],cos8[2],cos4[1];
- real *pnts[] = { cos64,cos32,cos16,cos8,cos4 };
-#endif
-
-long intwinbase[] = {
+static long intwinbase[] = {
0, -1, -1, -1, -1, -1, -1, -2, -2, -2,
-2, -3, -3, -4, -4, -5, -5, -6, -7, -7,
-8, -9, -10, -11, -13, -14, -16, -17, -19, -21,
@@ -42,7 +29,7 @@
64019, 65290, 66494, 67629, 68692, 69679, 70590, 71420, 72169, 72835,
73415, 73908, 74313, 74630, 74856, 74992, 75038 };
- void make_decode_tables(long scaleval)
+void make_decode_tables(long scaleval)
{
int i,j,k,kr,divv;
real *table,*costab;
@@ -53,17 +40,13 @@
kr=0x10>>i; divv=0x40>>i;
costab = pnts[i];
for(k=0;k<kr;k++) costab[k] = 1.0 / (2.0 * cos(M_PI * ((double) k * 2.0 + 1.0) / (double) divv));
- #ifdef HAVE_3DNOW
- if ( _3dnow ) for(k=0;k<kr;k++) costab[k+kr]=-costab[k];
- #endif
-
}
table = decwin;
scaleval = -scaleval;
for(i=0,j=0;i<256;i++,j++,table+=32)
{
- if(table < decwin+512+16)
+ if(table < decwin+512+16)
table[16] = table[0] = (double) intwinbase[j] / 65536.0 * (double) scaleval;
if(i % 32 == 31)
table -= 1023;
@@ -80,14 +63,6 @@
if(i % 64 == 63)
scaleval = - scaleval;
}
- #ifdef HAVE_3DNOW
- if ( _3dnow )
- for(i=0;i<512+32;i++)
- {
- decwin[512+31-i]*=65536.0; // allows faster clipping in 3dnow code
- decwin[512+32+i]=decwin[512+31-i];
- }
- #endif
}
Index: test2.c
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/test2.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- test2.c 17 May 2001 18:20:14 -0000 1.2
+++ test2.c 29 Jun 2001 17:53:53 -0000 1.3
@@ -1,5 +1,5 @@
-// gcc test.c -I.. -L. -lMP3 -lm -o test2 -O4
+//gcc test2.c -O2 -I.. -L. ../libvo/aclib.c -lMP3 -lm -o test2
#include <stdio.h>
#include <stdlib.h>
_______________________________________________
Mplayer-cvslog mailing list
Mplayer-cvslog at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog
More information about the MPlayer-cvslog
mailing list