[MPlayer-dev-eng] [PATCH] Make mp3lib SIMD optimizations work on AMD64, the Finale
Zuxy Meng
zuxy.meng at gmail.com
Wed Jun 6 11:01:29 CEST 2007
Hi,
2007/6/6, Diego Biurrun <diego at biurrun.de>:
> On Wed, Jun 06, 2007 at 04:34:57PM +0800, Zuxy Meng wrote:
> >
> > 2007/6/6, Diego Biurrun <diego at biurrun.de>:
> > > On Wed, Jun 06, 2007 at 02:33:28PM +0800, Zuxy Meng wrote:
> > > >
> > > > The attached patch modifies macros and the Makefile, effectively
> > > > turning everything on for AMD64. The result is 47% faster decoding on
> > > > a K8.
> > > >
> > > > --- mp3lib/Makefile ?????? 23483??
> > > > +++ mp3lib/Makefile ????????????
> > > > @@ -3,18 +3,23 @@
> > > >
> > > > SRCS_COMMON = sr1.c
> > > > +ifeq ($(TARGET_ARCH_X86),yes)
> > > > +SRCS_COMMON-$(TARGET_MMX) += decode_MMX.c
> > > > +SRCS_COMMON-$(TARGET_SSE) += dct64_sse.c
> > > > ifeq ($(TARGET_ARCH_X86_32),yes)
> > > > SRCS_COMMON += decode_i586.c
> > > > -SRCS_COMMON-$(TARGET_MMX) += decode_MMX.c dct64_MMX.c
> > > > +SRCS_COMMON-$(TARGET_MMX) += dct64_MMX.c
> > > > SRCS_COMMON-$(TARGET_3DNOW) += dct36_3dnow.c dct64_3dnow.c
> > > > SRCS_COMMON-$(TARGET_3DNOWEX) += dct36_k7.c dct64_k7.c
> > > > -SRCS_COMMON-$(TARGET_SSE) += dct64_sse.c
> > > > endif
> > > > +endif
> > > > SRCS_COMMON-$(TARGET_ALTIVEC) += dct64_altivec.c
> > >
> > > I'd prefer it you would avoid the nested conditionals.
> >
> > OK.
> >
> > > > include ../mpcommon.mak
> > > >
> > > > +ifeq ($(TARGET_ARCH_X86_32),yes)
> > > > decode_i586.o: CFLAGS += -fomit-frame-pointer
> > > > +endif
> > >
> > > This is ugly, move it into the conditional above. Also, could you
> > > doublecheck it is still required?
> >
> > CFLAGS are defined in mpcommon.mak so this line can't be merged with
> > other parts.
>
> Right.
>
> > And this line must be disabled for AMD64 because
> > decode_i586 can't compile.
>
> Then it should be unnecessary, decode_i586 is not compiled at all on
> AMD64...
I didn't remember clearly. Anyway the revised patch is attached. If
someone meets problems in compiling we can just put that line back.
--
Zuxy
Beauty is truth,
While truth is beauty.
PGP KeyID: E8555ED6
-------------- next part --------------
Index: mp3lib/sr1.c
===================================================================
--- mp3lib/sr1.c ?????? 23483??
+++ mp3lib/sr1.c ????????????
@@ -32,8 +32,9 @@
#include "libvo/fastmemcpy.h"
-#ifdef ARCH_X86_32
-#define CAN_COMPILE_X86_ASM
+#ifdef ARCH_X86_64
+#undef HAVE_3DNOW
+#undef HAVE_3DNOWEX
#endif
//static FILE* mp3_file=NULL;
@@ -137,7 +138,7 @@
// if(MP3_frames>=7741) printf("getbits_fast: bits=%d bitsleft=%d wordptr=%x\n",number_of_bits,bitsleft,wordpointer);
if((bitsleft-=number_of_bits)<0) return 0;
if(!number_of_bits) return 0;
-#if defined(CAN_COMPILE_X86_ASM)
+#ifdef ARCH_X86
rval = bswap_16(*((uint16_t *)wordpointer));
#else
/*
@@ -180,7 +181,7 @@
LOCAL int stream_head_read(unsigned char *hbuf,uint32_t *newhead){
if(mp3_read(hbuf,4) != 4) return FALSE;
-#if defined(CAN_COMPILE_X86_ASM)
+#ifdef ARCH_X86
*newhead = bswap_32(*((uint32_t*)hbuf));
#else
/*
@@ -415,8 +416,6 @@
make_decode_tables(outscale);
-#ifdef CAN_COMPILE_X86_ASM
-
#ifdef HAVE_MMX
if (gCpuCaps.hasMMX)
{
@@ -459,6 +458,7 @@
}
else
#endif
+#ifdef ARCH_X86_32
if (gCpuCaps.cpuType >= CPUTYPE_I586)
{
synth_func = synth_1to1_pent;
Index: mp3lib/decod386.c
===================================================================
--- mp3lib/decod386.c ?????? 23483??
+++ mp3lib/decod386.c ????????????
@@ -102,7 +102,7 @@
static synth_func_t synth_func;
-#if defined(CAN_COMPILE_X86_ASM) && defined(HAVE_MMX)
+#ifdef HAVE_MMX
extern int synth_1to1_MMX( real *bandPtr,int channel,short * samples);
#endif
@@ -125,7 +125,7 @@
*pnt += 128;
/* optimized for x86 */
-#if defined(CAN_COMPILE_X86_ASM)
+#ifdef ARCH_X86
if ( synth_func )
{
// printf("Calling %p, bandPtr=%p channel=%d samples=%p\n",synth_func,bandPtr,channel,samples);
Index: mp3lib/Makefile
===================================================================
--- mp3lib/Makefile ?????? 23483??
+++ mp3lib/Makefile ????????????
@@ -3,12 +3,15 @@
LIBNAME_COMMON = libmp3.a
SRCS_COMMON = sr1.c
+ifeq ($(TARGET_ARCH_X86),yes)
+SRCS_COMMON-$(TARGET_MMX) += decode_MMX.c
+SRCS_COMMON-$(TARGET_SSE) += dct64_sse.c
+endif
ifeq ($(TARGET_ARCH_X86_32),yes)
SRCS_COMMON += decode_i586.c
-SRCS_COMMON-$(TARGET_MMX) += decode_MMX.c dct64_MMX.c
+SRCS_COMMON-$(TARGET_MMX) += dct64_MMX.c
SRCS_COMMON-$(TARGET_3DNOW) += dct36_3dnow.c dct64_3dnow.c
SRCS_COMMON-$(TARGET_3DNOWEX) += dct36_k7.c dct64_k7.c
-SRCS_COMMON-$(TARGET_SSE) += dct64_sse.c
endif
SRCS_COMMON-$(TARGET_ALTIVEC) += dct64_altivec.c
More information about the MPlayer-dev-eng
mailing list