[Mplayer-cvslog] CVS: main/mp3lib dct64_MMX.s,NONE,1.1 decode_MMX.s,NONE,1.1 tabinit_MMX.s,NONE,1.1 Makefile,1.8,1.9 d_cpu.h,1.2,1.3 d_cpu.s,1.4,1.5 dct36.c,1.1.1.1,1.2 dct64_3dnow.s,1.1.1.1,1.2 dct64_k7.s,1.3,1.4 decod386.c,1.4,1.5 layer2.c,1.1.1.1,1.2 layer3.c,1.2,1.3 mpg123.h,1.3,1.4 sr1.c,1.7,1.8 tabinit.c,1.2,1.3 test2.c,1.2,1.3

Nick Kurshev nick at mplayerhq.banki.hu
Fri Jun 29 19:53:56 CEST 2001


Update of /cvsroot/mplayer/main/mp3lib
In directory mplayerhq:/var/tmp.root/cvs-serv16908

Modified Files:
	Makefile d_cpu.h d_cpu.s dct36.c dct64_3dnow.s dct64_k7.s 
	decod386.c layer2.c layer3.c mpg123.h sr1.c tabinit.c test2.c 
Added Files:
	dct64_MMX.s decode_MMX.s tabinit_MMX.s 
Log Message:
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.

--- NEW FILE ---
# This code was taken from http://www.mpg123.org
# See ChangeLog of mpg123-0.59s-pre.1 for detail
# Applied to mplayer by Nick Kurshev <nickols_k at mail.ru>

.data
	.align 4
costab:
	.long 1056974725
	.long 1057056395
	.long 1057223771
	.long 1057485416
	.long 1057855544
	.long 1058356026
	.long 1059019886
	.long 1059897405
	.long 1061067246
	.long 1062657950
	.long 1064892987
	.long 1066774581
[...989 lines suppressed...]
	flds  108(%edx)
	fadds 124(%edx)
	fld      %st(0)
	fadds  76(%edx)
	fistp 288(%edi)
	fadds  92(%edx)
	fistp 352(%edi)

	flds  124(%edx)
	fist  480(%edi)
	fadds  92(%edx)
	fistp 416(%edi)
	movsw
	addl $256,%esp
	popl %edi
	popl %esi
	popl %ebx
	ret
	


--- NEW FILE ---
# this code comes under GPL
# This code was taken from http://www.mpg123.org
# See ChangeLog of mpg123-0.59s-pre.1 for detail
# Applied to mplayer by Nick Kurshev <nickols_k at mail.ru>
#
# TODO: Partial loops unrolling and removing MOVW insn.
#

.text

.globl synth_1to1_MMX_s

synth_1to1_MMX_s:
        pushl %ebp
        pushl %edi
        pushl %esi
        pushl %ebx
        movl 24(%esp),%ecx              
        movl 28(%esp),%edi              
        movl $15,%ebx
        movl 36(%esp),%edx              
        leal (%edi,%ecx,2),%edi
	decl %ecx
        movl 32(%esp),%esi              
        movl (%edx),%eax                
        jecxz .L1
        decl %eax
        andl %ebx,%eax                  
        leal 1088(%esi),%esi                            
        movl %eax,(%edx)                
.L1:
        leal (%esi,%eax,2),%edx         
        movl %eax,%ebp                  
        incl %eax                       
        pushl 20(%esp)                  
        andl %ebx,%eax                  
        leal 544(%esi,%eax,2),%ecx      
        incl %ebx                       
	testl $1, %eax
	jnz .L2                       
        xchgl %edx,%ecx
	incl %ebp
        leal 544(%esi),%esi           
.L2: 
	emms
        pushl %edx
        pushl %ecx
        call *dct64_MMX_func
        addl $12,%esp
	leal 1(%ebx), %ecx
        subl %ebp,%ebx                

	leal decwins(%ebx,%ebx,1), %edx
.L3: 
        movq  (%edx),%mm0
        pmaddwd (%esi),%mm0
        movq  8(%edx),%mm1
        pmaddwd 8(%esi),%mm1
        movq  16(%edx),%mm2
        pmaddwd 16(%esi),%mm2
        movq  24(%edx),%mm3
        pmaddwd 24(%esi),%mm3
        paddd %mm1,%mm0
        paddd %mm2,%mm0
        paddd %mm3,%mm0
        movq  %mm0,%mm1
        psrlq $32,%mm1
        paddd %mm1,%mm0
        psrad $13,%mm0
        packssdw %mm0,%mm0
        movd %mm0,%eax
	movw %ax, (%edi)

        leal 32(%esi),%esi
        leal 64(%edx),%edx
        leal 4(%edi),%edi                
	decl %ecx
        jnz  .L3


        subl $64,%esi                    
        movl $15,%ecx
.L4: 
        movq  (%edx),%mm0
        pmaddwd (%esi),%mm0
        movq  8(%edx),%mm1
        pmaddwd 8(%esi),%mm1
        movq  16(%edx),%mm2
        pmaddwd 16(%esi),%mm2
        movq  24(%edx),%mm3
        pmaddwd 24(%esi),%mm3
        paddd %mm1,%mm0
        paddd %mm2,%mm0
        paddd %mm3,%mm0
        movq  %mm0,%mm1
        psrlq $32,%mm1
        paddd %mm0,%mm1
        psrad $13,%mm1
        packssdw %mm1,%mm1
        psubd %mm0,%mm0
        psubsw %mm1,%mm0
        movd %mm0,%eax
	movw %ax,(%edi)

        subl $32,%esi
        addl $64,%edx
        leal 4(%edi),%edi                
        decl %ecx
	jnz  .L4
	emms
        popl %ebx
        popl %esi
        popl %edi
        popl %ebp
        ret



--- NEW FILE ---
# This code was taken from http://www.mpg123.org
# See ChangeLog of mpg123-0.59s-pre.1 for detail
# Applied to mplayer by Nick Kurshev <nickols_k at mail.ru>
.bss
	.align 8
    	.comm	decwin,2176,32
	.align 8
	.comm	decwins,2176,32
.data
	.align 8
intwinbase_MMX:
	.value      0,    -1,    -1,    -1,    -1,    -1,    -1,    -2
	.value     -2,    -2,    -2,    -3,    -3,    -4,    -4,    -5
	.value     -5,    -6,    -7,    -7,    -8,    -9,   -10,   -11
	.value    -13,   -14,   -16,   -17,   -19,   -21,   -24,   -26
	.value    -29,   -31,   -35,   -38,   -41,   -45,   -49,   -53
	.value    -58,   -63,   -68,   -73,   -79,   -85,   -91,   -97
	.value   -104,  -111,  -117,  -125,  -132,  -139,  -147,  -154
	.value   -161,  -169,  -176,  -183,  -190,  -196,  -202,  -208
	.value   -213,  -218,  -222,  -225,  -227,  -228,  -228,  -227
	.value   -224,  -221,  -215,  -208,  -200,  -189,  -177,  -163
	.value   -146,  -127,  -106,   -83,   -57,   -29,     2,    36
	.value     72,   111,   153,   197,   244,   294,   347,   401
	.value    459,   519,   581,   645,   711,   779,   848,   919
	.value    991,  1064,  1137,  1210,  1283,  1356,  1428,  1498
	.value   1567,  1634,  1698,  1759,  1817,  1870,  1919,  1962
	.value   2001,  2032,  2057,  2075,  2085,  2087,  2080,  2063
	.value   2037,  2000,  1952,  1893,  1822,  1739,  1644,  1535
	.value   1414,  1280,  1131,   970,   794,   605,   402,   185
	.value    -45,  -288,  -545,  -814, -1095, -1388, -1692, -2006
	.value  -2330, -2663, -3004, -3351, -3705, -4063, -4425, -4788
	.value  -5153, -5517, -5879, -6237, -6589, -6935, -7271, -7597
	.value  -7910, -8209, -8491, -8755, -8998, -9219, -9416, -9585
	.value  -9727, -9838, -9916, -9959, -9966, -9935, -9863, -9750
	.value  -9592, -9389, -9139, -8840, -8492, -8092, -7640, -7134
	.value  -6574, -5959, -5288, -4561, -3776, -2935, -2037, -1082
	.value    -70,   998,  2122,  3300,  4533,  5818,  7154,  8540
	.value   9975, 11455, 12980, 14548, 16155, 17799, 19478, 21189
	.value  22929, 24694, 26482, 28289, 30112, 31947,-26209,-24360
	.value -22511,-20664,-18824,-16994,-15179,-13383,-11610, -9863
	.value  -8147, -6466, -4822, -3222, -1667,  -162,  1289,  2684
	.value   4019,  5290,  6494,  7629,  8692,  9679, 10590, 11420
	.value  12169, 12835, 13415, 13908, 14313, 14630, 14856, 14992
	.value  15038

intwindiv:
	.long 0x47800000			# 65536.0
.text
	.align 32
.globl make_decode_tables_MMX
make_decode_tables_MMX:
	pushl %edi
	pushl %esi
	pushl %ebx

	xorl %ecx,%ecx
	xorl %ebx,%ebx
	movl $32,%esi
	movl $intwinbase_MMX,%edi
	negl 16(%esp)				# scaleval
	pushl $2				# intwinbase step
.L00:
	cmpl $528,%ecx
	jnc .L02
	movswl (%edi),%eax
	cmpl $intwinbase_MMX+444,%edi
	jc .L01
	addl $60000,%eax
.L01:
	pushl %eax
	fildl (%esp)
	fdivs intwindiv
	fimull 24(%esp)
	popl %eax
	fsts  decwin(,%ecx,4)
	fstps decwin+64(,%ecx,4)
.L02:
	leal -1(%esi),%edx
	and %ebx,%edx
	cmp $31,%edx
	jnz .L03
	addl $-1023,%ecx
	test %esi,%ebx
	jz  .L03
	negl 20(%esp)
.L03:
	addl %esi,%ecx
	addl (%esp),%edi
	incl %ebx
	cmpl $intwinbase_MMX,%edi
	jz .L04
	cmp $256,%ebx
	jnz .L00
	negl (%esp)
	jmp .L00
.L04:
	popl %eax

	xorl %ecx,%ecx
	xorl %ebx,%ebx
	pushl $2
.L05:
	cmpl $528,%ecx
	jnc .L11
	movswl (%edi),%eax
	cmpl $intwinbase_MMX+444,%edi
	jc .L06
	addl $60000,%eax
.L06:
	cltd
	imull 20(%esp)
	shrdl $17,%edx,%eax
	cmpl $32767,%eax
	movl $1055,%edx
	jle .L07
	movl $32767,%eax
	jmp .L08
.L07:
	cmpl $-32767,%eax
	jge .L08
	movl $-32767,%eax
.L08:
	cmpl $512,%ecx
	jnc .L09
	subl %ecx,%edx
	movw %ax,decwins(,%edx,2)
	movw %ax,decwins-32(,%edx,2)
.L09:
	testl $1,%ecx
	jnz .L10
	negl %eax
.L10:
	movw %ax,decwins(,%ecx,2)
	movw %ax,decwins+32(,%ecx,2)
.L11:
	leal -1(%esi),%edx
	and %ebx,%edx
	cmp $31,%edx
	jnz .L12
	addl $-1023,%ecx
	test %esi,%ebx
	jz  .L12
	negl 20(%esp)
.L12:
	addl %esi,%ecx
	addl (%esp),%edi
	incl %ebx
	cmpl $intwinbase_MMX,%edi
	jz .L13
	cmp $256,%ebx
	jnz .L05
	negl (%esp)
	jmp .L05
.L13:
	popl %eax
	
	popl %ebx
	popl %esi
	popl %edi
	ret


Index: Makefile
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/Makefile,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- Makefile	26 Jun 2001 23:15:58 -0000	1.8
+++ Makefile	29 Jun 2001 17:53:53 -0000	1.9
@@ -1,8 +1,10 @@
 
 include config.mak
 
-SRCS = sr1.c d_cpu.s decode_i586.s $(OPTIONAL_SRCS)
-OBJS = sr1.o d_cpu.o decode_i586.o $(OPTIONAL_OBJS)
+SRCS = sr1.c d_cpu.s decode_i586.s dct64_MMX.s decode_MMX.s tabinit_MMX.s\
+dct36_3dnow.s dct64_3dnow.s dct36_k7.s dct64_k7.s
+OBJS = sr1.o d_cpu.o decode_i586.o dct64_MMX.o decode_MMX.o tabinit_MMX.o\
+dct36_3dnow.o dct64_3dnow.o dct36_k7.o dct64_k7.o
 # OBJS = $(SRCS:.c,.s=.o)
 CFLAGS  = $(OPTFLAGS) $(EXTRA_INC)
 

Index: d_cpu.h
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/d_cpu.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- d_cpu.h	7 Apr 2001 16:59:51 -0000	1.2
+++ d_cpu.h	29 Jun 2001 17:53:53 -0000	1.3
@@ -9,9 +9,12 @@
 unsigned int _CpuID;
 unsigned int _i586;
 unsigned int _3dnow;
+unsigned int _isse;
+unsigned int _has_mmx;
 
 extern unsigned long CpuDetect( void );
 extern unsigned long ipentium( void );
+extern unsigned long isse( void );
 extern unsigned long a3dnow( void );
 
 #endif

Index: d_cpu.s
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/d_cpu.s,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- d_cpu.s	9 May 2001 16:46:02 -0000	1.4
+++ d_cpu.s	29 Jun 2001 17:53:53 -0000	1.5
@@ -9,6 +9,7 @@
 .globl CpuDetect
 .globl ipentium
 .globl a3dnow
+.globl isse
 
 / ---------------------------------------------------------------------------
 /  in C: unsigned long CpuDetect( void );
@@ -45,7 +46,9 @@
 
 / ---------------------------------------------------------------------------
 /  in C: unsigled long ipentium( void );
-/   return: 0 if the processor is not P5 or above else above 1.
+/  return: 0 if this processor i386 or i486
+/          1 otherwise
+/          2 if this cpu supports mmx
 / ---------------------------------------------------------------------------
 ipentium:
         pushl  %ebx
@@ -63,10 +66,15 @@
         jz     no_cpuid
         movl   $1,%eax
         cpuid
-        shrl   $8,%eax
-        cmpl   $5,%eax
-        jb     no_cpuid
-        movl   $1,%eax
+	movl   %eax, %ecx
+	xorl   %eax, %eax
+        shrl   $8,%ecx
+        cmpl   $5,%ecx
+        jb     exit
+        incl   %eax
+	test   $0x00800000, %edx
+	jz     exit
+	incl   %eax
         jmp    exit
 no_cpuid:
         xorl   %eax,%eax
@@ -109,6 +117,36 @@
         inc    %eax
 exit2:
 
+        popl   %ecx
+        popl   %edx
+        popl   %ebx
+        ret
+
+/ ---------------------------------------------------------------------------
+/  in C: unsigned long isse( void );
+/  return: 0 if this processor does not support sse
+/          1 otherwise
+/          2 if this cpu supports sse2 extension
+/ ---------------------------------------------------------------------------
+isse:
+        pushl  %ebx
+        pushl  %edx
+        pushl  %ecx
+
+        call   ipentium
+        testl  %eax,%eax
+        jz     exit3
+
+        movl   $1,%eax
+        cpuid
+	xorl   %eax, %eax
+        testl  $0x02000000,%edx
+        jz     exit3
+	incl   %eax
+        testl  $0x04000000,%edx
+        jz     exit3
+        incl   %eax
+exit3:
         popl   %ecx
         popl   %edx
         popl   %ebx

Index: dct36.c
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/dct36.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- dct36.c	24 Feb 2001 20:31:08 -0000	1.1.1.1
+++ dct36.c	29 Jun 2001 17:53:53 -0000	1.2
@@ -193,7 +193,7 @@
     sum1 = (tmp2b - tmp1b) * tfcos36[(v)]; \
 	MACRO0(v); }
 
-    register const real *c = nCOS9;
+    register const real *c = COS9;
     register real *out2 = o2;
 	register real *w = wintab;
 	register real *out1 = o1;

Index: dct64_3dnow.s
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/dct64_3dnow.s,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- dct64_3dnow.s	24 Feb 2001 20:31:11 -0000	1.1.1.1
+++ dct64_3dnow.s	29 Jun 2001 17:53:53 -0000	1.2
@@ -1,706 +1,932 @@
-///
-/// Replacement of dct64() with AMD's 3DNow! SIMD operations support
-///
-/// Syuuhei Kashiyama <squash at mb.kcom.ne.jp>
-///
-/// The author of this program disclaim whole expressed or implied
-/// warranties with regard to this program, and in no event shall the
-/// author of this program liable to whatever resulted from the use of
-/// this program. Use it at your own risk.
-///
-
[...1606 lines suppressed...]
+	fadds 124(%edx)
+	fld      %st(0)
+	fadds  76(%edx)
+	fistp 288(%edi)
+	fadds  92(%edx)
+	fistp 352(%edi)
+
+	flds  124(%edx)
+	fist  480(%edi)
+	fadds  92(%edx)
+	fistp 416(%edi)
+	movsw
+.L_bye:
+	addl $256,%esp
+	popl %edi
+	popl %esi
+	popl %ebx
+	ret
+	
 

Index: dct64_k7.s
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/dct64_k7.s,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- dct64_k7.s	20 Jun 2001 07:54:19 -0000	1.3
+++ dct64_k7.s	29 Jun 2001 17:53:53 -0000	1.4
@@ -1,677 +1,804 @@
-///
-/// Replacement of dct64() with AMD's 3DNowEx(DSP)! SIMD operations support
-///
-/// This code based 'dct64_3dnow.s' by Syuuhei Kashiyama
-/// <squash at mb.kcom.ne.jp>,only some types of changes have been made:
-///
-///  - added new opcodes PSWAPD, PFPNACC
-///  - decreased number of opcodes (as it was suggested by k7 manual)
-///    (using memory reference as operand of instructions)
-///  - Phase 6 is rewritten with mixing of cpu and mmx opcodes
-///  - change function name for support 3DNowEx! automatic detect
[...1442 lines suppressed...]
+	fadds 124(%edx)
+	fld      %st(0)
+	fadds  76(%edx)
+	fistp 288(%edi)
+	fadds  92(%edx)
+	fistp 352(%edi)
+
+	flds  124(%edx)
+	fist  480(%edi)
+	fadds  92(%edx)
+	fistp 416(%edi)
+	movsw
+.L_bye:
+	addl $256,%esp
+	popl %edi
+	popl %esi
+	popl %ebx
+	ret
+	
 

Index: decod386.c
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/decod386.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- decod386.c	13 May 2001 18:30:18 -0000	1.4
+++ decod386.c	29 Jun 2001 17:53:53 -0000	1.5
@@ -105,6 +105,15 @@
 }
 #endif
 
+synth_func_t synth_func;
+
+int synth_1to1_MMX( real *bandPtr,int channel,short * samples)
+{
+    static short buffs[2][2][0x110];
+    static int bo = 1;
+    synth_1to1_MMX_s(bandPtr, channel, samples, (short *) buffs, &bo); 
+    return 0;
+  } 
 
 static int synth_1to1(real *bandPtr,int channel,unsigned char *out,int *pnt)
 {
@@ -117,39 +126,12 @@
   int clip = 0;
   int bo1;
 
-  #ifdef HAVE_SSE_MP3
-  //if ( _3dnow )
-   {
-    int ret;
-    ret=synth_1to1_sse( bandPtr,channel,out+*pnt );
-    *pnt+=128;
-    return ret;
-   }
-  #endif
-  #ifdef HAVE_3DNOWEX
-  if ( _3dnow > 1 )
+  if ( synth_func )
    {
     int ret;
-    ret=synth_1to1_3dnowex( bandPtr,channel,out+*pnt );
+    ret=(*synth_func)( bandPtr,channel,samples);
     *pnt+=128;
     return ret;
-   }
-  #endif
-  #ifdef HAVE_3DNOW
-  if ( _3dnow )
-   {
-    int ret;
-    ret=synth_1to1_3dnow( bandPtr,channel,out+*pnt );
-    *pnt+=128;
-    return ret;
-   }
-  #endif
-  if ( _i586 )
-   {
-     int ret;
-     ret=synth_1to1_pent( bandPtr,channel,out+*pnt );
-     *pnt+=128;
-     return ret;
    }
 
   if(!channel) {     /* channel=0 */

Index: layer2.c
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/layer2.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- layer2.c	24 Feb 2001 20:31:08 -0000	1.1.1.1
+++ layer2.c	29 Jun 2001 17:53:53 -0000	1.2
@@ -50,8 +50,16 @@
   {
     double m=mulmul[k];
     table = muls[k];
+    if(_has_mmx) 
+    {
+        for(j=3,i=0;i<63;i++,j--)
+	  *table++ = 16384 * m * pow(2.0,(double) j / 3.0);
+    }
+    else
     for(j=3,i=0;i<63;i++,j--)
+    {
       *table++ = m * pow(2.0,(double) j / 3.0);
+    }
     *table++ = 0.0;
   }
 }

Index: layer3.c
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/layer3.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- layer3.c	7 May 2001 01:59:58 -0000	1.2
+++ layer3.c	29 Jun 2001 17:53:53 -0000	1.3
@@ -22,9 +22,9 @@
 #define GP2MAX (256+118+4)
 static real gainpow2[GP2MAX];
 
-static real nCOS9[9];
+real COS9[9];
 static real COS6_1,COS6_2;
-static real tfcos36[9];
+real tfcos36[9];
 static real tfcos12[3];
 #ifdef NEW_DCT9
 static real cos9[3],cos18[3];
@@ -111,8 +111,12 @@
   int i,j,k,l;
 
   for(i=-256;i<118+4;i++)
-    gainpow2[i+256] = pow((double)2.0,-0.25 * (double) (i+210) );
-
+  {
+    if(_has_mmx)
+      gainpow2[i+256] = 16384.0 * pow((double)2.0,-0.25 * (double) (i+210) );
+    else
+      gainpow2[i+256] = pow((double)2.0,-0.25 * (double) (i+210) );
+  }
   for(i=0;i<8207;i++)
     ispow[i] = pow((double)i,(double)4.0/3.0);
 
@@ -139,7 +143,7 @@
   }
 
   for(i=0;i<9;i++)
-    nCOS9[i] = cos( M_PI / 18.0 * (double) i);
+    COS9[i] = cos( M_PI / 18.0 * (double) i);
 
   for(i=0;i<9;i++)
     tfcos36[i] = 0.5 / cos ( M_PI * (double) (i*2+1) / 36.0 );
@@ -1533,6 +1537,9 @@
 /*
  * III_hybrid
  */
+ 
+dct36_func_t dct36_func;
+  
 static void III_hybrid(real fsIn[SBLIMIT][SSLIMIT],real tsOut[SSLIMIT][SBLIMIT],
    int ch,struct gr_info_s *gr_info)
 {
@@ -1553,8 +1560,8 @@
 
    if(gr_info->mixed_block_flag) {
      sb = 2;
-     dct36(fsIn[0],rawout1,rawout2,win[0],tspnt);
-     dct36(fsIn[1],rawout1+18,rawout2+18,win1[0],tspnt+1);
+     (*dct36_func)(fsIn[0],rawout1,rawout2,win[0],tspnt);
+     (*dct36_func)(fsIn[1],rawout1+18,rawout2+18,win1[0],tspnt+1);
      rawout1 += 36; rawout2 += 36; tspnt += 2;
    }
  
@@ -1567,8 +1574,8 @@
    }
    else {
      for (; sb<gr_info->maxb; sb+=2,tspnt+=2,rawout1+=36,rawout2+=36) {
-       dct36(fsIn[sb],rawout1,rawout2,win[bt],tspnt);
-       dct36(fsIn[sb+1],rawout1+18,rawout2+18,win1[bt],tspnt+1);
+       (*dct36_func)(fsIn[sb],rawout1,rawout2,win[bt],tspnt);
+       (*dct36_func)(fsIn[sb+1],rawout1+18,rawout2+18,win1[bt],tspnt+1);
      }
    }
 

Index: mpg123.h
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/mpg123.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- mpg123.h	13 May 2001 18:30:18 -0000	1.3
+++ mpg123.h	29 Jun 2001 17:53:53 -0000	1.4
@@ -104,33 +104,22 @@
 };
 
 static long freqs[9];
-#ifdef HAVE_3DNOW
-        real decwin[2*(512+32)];
-#else
-        real decwin[512+32];
-#endif
-       real *pnts[];
+extern real decwin[(512+32)];
+extern real *pnts[];
 
 static int do_layer2(struct frame *fr,int single);
 static int do_layer3(struct frame *fr,int single);
 static int synth_1to1(real *bandPtr,int channel,unsigned char *out,int *pnt);
 
-extern int  synth_1to1_pent( real *,int,unsigned char * );
+extern int synth_1to1_pent( real *,int,short * );
+extern void make_decode_tables_MMX(long scaleval);
+extern int synth_1to1_MMX( real *,int,short * );
+extern int synth_1to1_MMX_s(real *, int, short *, short *, int *);
 extern void dct64(real *a,real *b,real *c);
 
-#ifdef HAVE_3DNOW
- extern void dct64_3dnow( real *,real *, real * );
- extern void dct36_3dnow(real *,real *,real *,real *,real *);
- extern int  synth_1to1_3dnow( real *,int,unsigned char * );
-#endif
-#ifdef HAVE_3DNOWEX
- extern void dct64_3dnowex( real *,real *, real * );
- extern void dct36_3dnowex(real *,real *,real *,real *,real *);
- extern int  synth_1to1_3dnowex( real *,int,unsigned char * );
-#endif
-#ifdef HAVE_SSE_MP3
-// extern void dct64_3dnow( real *,real *, real * );
-// extern void dct36_3dnow(real *,real *,real *,real *,real *);
- extern int  synth_1to1_sse( real *,int,unsigned char * );
-#endif
+extern void dct36_3dnow(real *,real *,real *,real *,real *);
+extern void dct36_3dnowex(real *,real *,real *,real *,real *);
+extern void dct36_sse(real *,real *,real *,real *,real *);
 
+typedef int (*synth_func_t)( real *,int,short * );
+typedef void (*dct36_func_t)(real *,real *,real *,real *,real *);

Index: sr1.c
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/sr1.c,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -r1.7 -r1.8
--- sr1.c	19 Jun 2001 22:07:19 -0000	1.7
+++ sr1.c	29 Jun 2001 17:53:53 -0000	1.8
@@ -343,6 +343,12 @@
 
 static int tables_done_flag=0;
 
+/* It's hidden from gcc in assembler */
+extern void dct64_MMX( void );
+extern void dct64_MMX_3dnow( void );
+extern void dct64_MMX_3dnowex( void );
+void (*dct64_MMX_func)( void );
+
 // Init decoder tables.  Call first, once!
 #ifdef USE_FAKE_MONO
 void MP3_Init(int fakemono){
@@ -351,20 +357,41 @@
 #endif
     _CpuID=CpuDetect();
     _i586=ipentium();
-#ifdef HAVE_3DNOW
+#ifndef HAVE_MMX
+    _i586 &= 1;
+#endif
     _3dnow=a3dnow();
+#ifndef HAVE_3DNOW
+    _3dnow = 0;
 #endif
-
-    printf( "mp3lib: Processor ID: %x\n",_CpuID );
-    printf( "mp3lib: i586 processor %sdetected.\n",(_i586?"":"not ") );
-#ifdef HAVE_3DNOW
-    printf( "mp3lib: AMD 3dnow! extension %sdetected.\n",(_3dnow?"":"not ") );
+#ifndef HAVE_3DNOWEX
+    _3dnow &= 1;
+#endif
+    _isse=isse();
+#ifndef HAVE_SSE
+    _isse = 0;
 #endif
-#ifdef HAVE_3DNOWEX
-    printf( "mp3lib: AMD 3dnow-dsp! extension %sdetected.\n",(_3dnow>1?"":"not ") );
+#ifndef HAVE_SSE2
+    _isse &= 1;
 #endif
+    _has_mmx=_i586>1||_3dnow||_isse;
+    printf( "mp3lib: Processor ID: %x\n",_CpuID );
+    if(_i586&&!_3dnow&&!_isse)
+      printf( "mp3lib: Using Pentium%s optimized decore.\n",(_i586>1?"-MMX":""));
+    else
+    if(_isse) 
+    /*
+       Note: It's ok, Since K8 will have SSE2 support and will much faster
+       of P4 ;) 
+     */
+      printf( "mp3lib: Using SSE%s! optimized decore.\n",(_isse>1?"2":""));
+    else
+    if(_3dnow)
+      printf( "mp3lib: Using AMD 3dnow%s! optimized decore.\n",(_3dnow>1?"-dsp(k7)":""));
 
-    make_decode_tables(outscale);
+/* Use it for any MMX cpu */
+   if(_has_mmx)	make_decode_tables_MMX(outscale);
+   else		make_decode_tables(outscale);
 #ifdef USE_FAKE_MONO
     if (fakemono == 1)
         fr.synth=synth_1to1_l;
@@ -381,6 +408,42 @@
     init_layer2();
     init_layer3(fr.down_sample_sblimit);
     tables_done_flag=1;
+
+    dct36_func=dct36;
+  if(_isse)
+  {
+    synth_func=synth_1to1_MMX;
+    dct64_MMX_func=dct64_MMX;
+  }    
+  else
+  if ( _3dnow > 1 )
+  {
+     synth_func=synth_1to1_MMX;
+     dct36_func=dct36_3dnowex;
+     dct64_MMX_func=dct64_MMX_3dnowex;
+  }
+  else
+  if ( _3dnow )
+  {
+    synth_func=synth_1to1_MMX;
+    dct36_func=dct36_3dnow;
+    dct64_MMX_func=dct64_MMX_3dnow;
+  }
+  else
+  if ( _i586 > 1)
+  {
+    synth_func=synth_1to1_MMX;
+    dct64_MMX_func=dct64_MMX;
+  }    
+  else
+  if ( _i586 )
+  {
+    synth_func=synth_1to1_pent;
+  }    
+  else
+  {
+    synth_func = NULL;
+  }
 }
 
 #if 0

Index: tabinit.c
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/tabinit.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- tabinit.c	7 Jun 2001 09:08:32 -0000	1.2
+++ tabinit.c	29 Jun 2001 17:53:53 -0000	1.3
@@ -1,20 +1,7 @@
+real decwin[(512+32)], cos64[32], cos32[16], cos16[8], cos8[4], cos4[2];
+real *pnts[]={ cos64,cos32,cos16,cos8,cos4 };
 
-
-#ifdef HAVE_3DNOW
-        real decwin[2*(512+32)] __attribute__((aligned(8)));
-        real cos64[32] __attribute__((aligned(8)));
-	real cos32[16] __attribute__((aligned(8)));
-	real cos16[8] __attribute__((aligned(8)));
-	real cos8[4] __attribute__((aligned(8)));
-	real cos4[2] __attribute__((aligned(8)));
-        real *pnts[]={ cos64,cos32,cos16,cos8,cos4 };
-#else
-        real decwin[512+32];
-        real cos64[16],cos32[8],cos16[4],cos8[2],cos4[1];
-        real *pnts[] = { cos64,cos32,cos16,cos8,cos4 };
-#endif
-
-long intwinbase[] = {
+static long intwinbase[] = {
      0,    -1,    -1,    -1,    -1,    -1,    -1,    -2,    -2,    -2,
     -2,    -3,    -3,    -4,    -4,    -5,    -5,    -6,    -7,    -7,
     -8,    -9,   -10,   -11,   -13,   -14,   -16,   -17,   -19,   -21,
@@ -42,7 +29,7 @@
  64019, 65290, 66494, 67629, 68692, 69679, 70590, 71420, 72169, 72835,
  73415, 73908, 74313, 74630, 74856, 74992, 75038 };
 
-       void make_decode_tables(long scaleval)
+void make_decode_tables(long scaleval)
 {
   int i,j,k,kr,divv;
   real *table,*costab;
@@ -53,17 +40,13 @@
     kr=0x10>>i; divv=0x40>>i;
     costab = pnts[i];
     for(k=0;k<kr;k++) costab[k] = 1.0 / (2.0 * cos(M_PI * ((double) k * 2.0 + 1.0) / (double) divv));
-    #ifdef HAVE_3DNOW
-     if ( _3dnow ) for(k=0;k<kr;k++) costab[k+kr]=-costab[k];
-    #endif
-
   }
 
   table = decwin;
   scaleval = -scaleval;
   for(i=0,j=0;i<256;i++,j++,table+=32)
   {
-         if(table < decwin+512+16)
+    if(table < decwin+512+16)
       table[16] = table[0] = (double) intwinbase[j] / 65536.0 * (double) scaleval;
     if(i % 32 == 31)
       table -= 1023;
@@ -80,14 +63,6 @@
     if(i % 64 == 63)
       scaleval = - scaleval;
   }
-  #ifdef HAVE_3DNOW
-   if ( _3dnow )
-    for(i=0;i<512+32;i++)
-     {
-      decwin[512+31-i]*=65536.0; // allows faster clipping in 3dnow code
-      decwin[512+32+i]=decwin[512+31-i];
-     }
-  #endif
 }
 
 

Index: test2.c
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/test2.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- test2.c	17 May 2001 18:20:14 -0000	1.2
+++ test2.c	29 Jun 2001 17:53:53 -0000	1.3
@@ -1,5 +1,5 @@
 
-// gcc test.c -I.. -L. -lMP3 -lm -o test2 -O4
+//gcc test2.c -O2 -I.. -L. ../libvo/aclib.c -lMP3 -lm -o test2
 
 #include <stdio.h>
 #include <stdlib.h>


_______________________________________________
Mplayer-cvslog mailing list
Mplayer-cvslog at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog



More information about the MPlayer-cvslog mailing list