[MPlayer-dev-eng] [PATCH] Make mp3lib SIMD optimizations work on AMD64, the Finale

Zuxy Meng zuxy.meng at gmail.com
Wed Jun 6 11:01:29 CEST 2007


Hi,

2007/6/6, Diego Biurrun <diego at biurrun.de>:
> On Wed, Jun 06, 2007 at 04:34:57PM +0800, Zuxy Meng wrote:
> >
> > 2007/6/6, Diego Biurrun <diego at biurrun.de>:
> > > On Wed, Jun 06, 2007 at 02:33:28PM +0800, Zuxy Meng wrote:
> > > >
> > > > The attached patch modifies macros and the Makefile, effectively
> > > > turning everything on for AMD64. The result is 47% faster decoding on
> > > > a K8.
> > > >
> > > > --- mp3lib/Makefile   ?????? 23483??
> > > > +++ mp3lib/Makefile   ????????????
> > > > @@ -3,18 +3,23 @@
> > > >
> > > >  SRCS_COMMON = sr1.c
> > > > +ifeq ($(TARGET_ARCH_X86),yes)
> > > > +SRCS_COMMON-$(TARGET_MMX)     += decode_MMX.c
> > > > +SRCS_COMMON-$(TARGET_SSE)     += dct64_sse.c
> > > >  ifeq ($(TARGET_ARCH_X86_32),yes)
> > > >  SRCS_COMMON                   += decode_i586.c
> > > > -SRCS_COMMON-$(TARGET_MMX)     += decode_MMX.c dct64_MMX.c
> > > > +SRCS_COMMON-$(TARGET_MMX)     += dct64_MMX.c
> > > >  SRCS_COMMON-$(TARGET_3DNOW)   += dct36_3dnow.c dct64_3dnow.c
> > > >  SRCS_COMMON-$(TARGET_3DNOWEX) += dct36_k7.c dct64_k7.c
> > > > -SRCS_COMMON-$(TARGET_SSE)     += dct64_sse.c
> > > >  endif
> > > > +endif
> > > >  SRCS_COMMON-$(TARGET_ALTIVEC) += dct64_altivec.c
> > >
> > > I'd prefer it you would avoid the nested conditionals.
> >
> > OK.
> >
> > > >  include ../mpcommon.mak
> > > >
> > > > +ifeq ($(TARGET_ARCH_X86_32),yes)
> > > >  decode_i586.o: CFLAGS += -fomit-frame-pointer
> > > > +endif
> > >
> > > This is ugly, move it into the conditional above.  Also, could you
> > > doublecheck it is still required?
> >
> > CFLAGS are defined in mpcommon.mak so this line can't be merged with
> > other parts.
>
> Right.
>
> > And this line must be disabled for AMD64 because
> > decode_i586 can't compile.
>
> Then it should be unnecessary, decode_i586 is not compiled at all on
> AMD64...

I didn't remember clearly. Anyway the revised patch is attached. If
someone meets problems in compiling we can just put that line back.

-- 
Zuxy
Beauty is truth,
While truth is beauty.
PGP KeyID: E8555ED6
-------------- next part --------------
Index: mp3lib/sr1.c
===================================================================
--- mp3lib/sr1.c	?????? 23483??
+++ mp3lib/sr1.c	????????????
@@ -32,8 +32,9 @@
 
 #include "libvo/fastmemcpy.h"
 
-#ifdef ARCH_X86_32
-#define CAN_COMPILE_X86_ASM
+#ifdef ARCH_X86_64
+#undef HAVE_3DNOW
+#undef HAVE_3DNOWEX
 #endif
 
 //static FILE* mp3_file=NULL;
@@ -137,7 +138,7 @@
 //  if(MP3_frames>=7741) printf("getbits_fast: bits=%d  bitsleft=%d  wordptr=%x\n",number_of_bits,bitsleft,wordpointer);
   if((bitsleft-=number_of_bits)<0) return 0;
   if(!number_of_bits) return 0;
-#if defined(CAN_COMPILE_X86_ASM)
+#ifdef ARCH_X86
   rval = bswap_16(*((uint16_t *)wordpointer));
 #else
   /*
@@ -180,7 +181,7 @@
 
 LOCAL int stream_head_read(unsigned char *hbuf,uint32_t *newhead){
   if(mp3_read(hbuf,4) != 4) return FALSE;
-#if defined(CAN_COMPILE_X86_ASM)
+#ifdef ARCH_X86
   *newhead = bswap_32(*((uint32_t*)hbuf));
 #else
   /*
@@ -415,8 +416,6 @@
 
     make_decode_tables(outscale);
 
-#ifdef CAN_COMPILE_X86_ASM
-
 #ifdef HAVE_MMX
     if (gCpuCaps.hasMMX)
     {
@@ -459,6 +458,7 @@
     }
     else
 #endif
+#ifdef ARCH_X86_32
     if (gCpuCaps.cpuType >= CPUTYPE_I586)
     {
 	synth_func = synth_1to1_pent;
Index: mp3lib/decod386.c
===================================================================
--- mp3lib/decod386.c	?????? 23483??
+++ mp3lib/decod386.c	????????????
@@ -102,7 +102,7 @@
 
 static synth_func_t synth_func;
 
-#if defined(CAN_COMPILE_X86_ASM) && defined(HAVE_MMX)
+#ifdef HAVE_MMX
 extern int synth_1to1_MMX( real *bandPtr,int channel,short * samples);
 #endif
 
@@ -125,7 +125,7 @@
   *pnt += 128;
 
 /* optimized for x86 */
-#if defined(CAN_COMPILE_X86_ASM)
+#ifdef ARCH_X86
   if ( synth_func )
    {
 //    printf("Calling %p, bandPtr=%p channel=%d samples=%p\n",synth_func,bandPtr,channel,samples);
Index: mp3lib/Makefile
===================================================================
--- mp3lib/Makefile	?????? 23483??
+++ mp3lib/Makefile	????????????
@@ -3,12 +3,15 @@
 LIBNAME_COMMON = libmp3.a
 
 SRCS_COMMON = sr1.c
+ifeq ($(TARGET_ARCH_X86),yes)
+SRCS_COMMON-$(TARGET_MMX)     += decode_MMX.c
+SRCS_COMMON-$(TARGET_SSE)     += dct64_sse.c
+endif
 ifeq ($(TARGET_ARCH_X86_32),yes)
 SRCS_COMMON                   += decode_i586.c
-SRCS_COMMON-$(TARGET_MMX)     += decode_MMX.c dct64_MMX.c
+SRCS_COMMON-$(TARGET_MMX)     += dct64_MMX.c
 SRCS_COMMON-$(TARGET_3DNOW)   += dct36_3dnow.c dct64_3dnow.c
 SRCS_COMMON-$(TARGET_3DNOWEX) += dct36_k7.c dct64_k7.c
-SRCS_COMMON-$(TARGET_SSE)     += dct64_sse.c
 endif
 SRCS_COMMON-$(TARGET_ALTIVEC) += dct64_altivec.c
 


More information about the MPlayer-dev-eng mailing list