[MPlayer-dev-eng] [PATCH] Rewrite synth_1to1_MMX, paving the way toward x86-64

Zuxy Meng zuxy.meng at gmail.com
Fri May 18 05:07:23 CEST 2007


Hi,

2007/5/18, Guillaume POIRIER <poirierg at gmail.com>:
> Hi,
>
> On 5/17/07, Zuxy Meng <zuxy.meng at gmail.com> wrote:
> > Hi,
> >
> > The original version of synth_1to1_MMX hardcoded 32-bit pointers and
> > x86 ABI, making it difficult to port to x86-64. I rewrote the general
> > code in C and removed hardcoded 32-bit registers in the remaining MMX
> > code (therefore obsoleting my previous patch to align the stack
> > manually). The result is not only easier to port but also faster:
> >
> > (Tested with rdtsc on a Dothan)
> > before: 1019 cycles
> > after: 976 cycles
> >
> > Of course the decoded result is identical.
>
> I have no problem with the content of the patch, however, this patch
> has to be splitted in 2 parts, if that's manageable: the one that
> changes the ASM from hardcoded regs to %num, then the rest of the
> changes.

Hopefully this one is easier to review; I didn't touch hardcoded regs.

Apart from rewrite generic code to C, I made the following
modification to MMX code:
1. Use add/sub instead of lea, when possible (IIRC add/sub may be shorter).
2. Remove an unnecessary conditional jump (I figured out that the jump
is never taken, and the test confirmed that).

-- 
Zuxy
Beauty is truth,
While truth is beauty.
PGP KeyID: E8555ED6
-------------- next part --------------
Index: mp3lib/decod386.c
===================================================================
--- mp3lib/decod386.c	?????? 23328??
+++ mp3lib/decod386.c	????????????
@@ -124,13 +124,7 @@
 static synth_func_t synth_func;
 
 #if defined(CAN_COMPILE_X86_ASM) && defined(HAVE_MMX)
-int synth_1to1_MMX( real *bandPtr,int channel,short * samples)
-{
-    static short buffs[2][2][0x110];
-    static int bo = 1;
-    synth_1to1_MMX_s(bandPtr, channel, samples, (short *) buffs, &bo); 
-    return 0;
-} 
+extern int synth_1to1_MMX( real *bandPtr,int channel,short * samples);
 #endif
 
 #ifdef HAVE_ALTIVEC
Index: mp3lib/decode_MMX.c
===================================================================
--- mp3lib/decode_MMX.c	?????? 23328??
+++ mp3lib/decode_MMX.c	????????????
@@ -11,6 +11,8 @@
 #include "mangle.h"
 #define real float /* ugly - but only way */
 
+extern short mp3lib_decwins[1088];
+extern void (*dct64_MMX_func)(short*, short*, real*);
 static unsigned long long attribute_used __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL;
 static unsigned long long attribute_used __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL;
 unsigned long __attribute__((aligned(16))) costab_mmx[] =
@@ -48,49 +50,38 @@
 	1060439283,
 };
 
-static int temp; // buggy gcc 3.x fails if this is moved into the function :(
-void synth_1to1_MMX_s(real *bandPtr, int channel, short *samples,
-                      short *buffs, int *bo)
+int synth_1to1_MMX(real *bandPtr, int channel, short *samples)
 {
+    static short buffs[2][2][0x110] __attribute__((aligned(8)));
+    static int bo = 1;
+    short *b0, (*buf)[0x110], *a, *b;
+    short* window;
+    int bo1, i = 8;
 
+    if (channel == 0) {
+	bo = (bo - 1) & 0xf;
+	buf = buffs[1];
+    } else {
+	samples++;
+	buf = buffs[0];
+    }
+
+    if (bo & 1) {
+	b0 = buf[1];
+	bo1 = bo + 1;
+       	a = buf[0] + bo;
+	b = buf[1] + ((bo + 1) & 0xf);
+    } else {
+	b0 = buf[0];
+	bo1 = bo;
+	b = buf[0] + bo;
+       	a = buf[1] + ((bo + 1) & 0xf);
+    }
+
+    dct64_MMX_func(a, b, bandPtr);
+    window = mp3lib_decwins + 16 - bo1;
+    //printf("DEBUG: channel %d, bo %d, off %d\n", channel, bo, 16 - bo1);
 __asm __volatile(
-        "movl %1,%%ecx\n\t"
-        "movl %2,%%edi\n\t"
-        "movl $15,%%ebx\n\t"
-        "movl %4,%%edx\n\t"
-        "leal (%%edi,%%ecx,2),%%edi\n\t"
-	"decl %%ecx\n\t"
-        "movl %3,%%esi\n\t"
-        "movl (%%edx),%%eax\n\t"
-        "jecxz .L01\n\t"
-        "decl %%eax\n\t"
-        "andl %%ebx,%%eax\n\t"
-        "leal 1088(%%esi),%%esi\n\t"
-        "movl %%eax,(%%edx)\n\t"
-".L01:\n\t"
-        "leal (%%esi,%%eax,2),%%edx\n\t"
-        "movl %%eax,%5\n\t"
-        "incl %%eax\n\t"
-        "andl %%ebx,%%eax\n\t"
-        "leal 544(%%esi,%%eax,2),%%ecx\n\t"
-	"incl %%ebx\n\t"
-	"testl $1, %%eax\n\t"
-	"jnz .L02\n\t"
-        "xchgl %%edx,%%ecx\n\t"
-	"incl %5\n\t"
-        "leal 544(%%esi),%%esi\n\t"
-".L02:\n\t"
-	"emms\n\t"
-        "pushl %0\n\t"
-        "pushl %%edx\n\t"
-        "pushl %%ecx\n\t"
-        "call *"MANGLE(dct64_MMX_func)"\n\t"
-	"addl $12, %%esp\n\t"
-	"leal 1(%%ebx), %%ecx\n\t"
-        "subl %5,%%ebx\n\t"
-	"pushl %%ecx\n\t"
-	"leal "MANGLE(mp3lib_decwins)"(%%ebx,%%ebx,1), %%edx\n\t"
-	"shrl $1, %%ecx\n\t"
 ASMALIGN(4)
 ".L03:\n\t"
         "movq  (%%edx),%%mm0\n\t"
@@ -133,17 +124,13 @@
 	"por    %%mm0, %%mm1\n\t"
 	"movq   %%mm1,(%%edi)\n\t"
 
-        "leal 64(%%esi),%%esi\n\t"
-        "leal 128(%%edx),%%edx\n\t"
-        "leal 8(%%edi),%%edi\n\t"
+        "add $64,%%esi\n\t"
+        "add $128,%%edx\n\t"
+        "add $8,%%edi\n\t"
 
 	"decl %%ecx\n\t"
         "jnz  .L03\n\t"
 
-	"popl %%ecx\n\t"
-	"andl $1, %%ecx\n\t"
-	"jecxz .next_loop\n\t"
-
         "movq  (%%edx),%%mm0\n\t"
         "pmaddwd (%%esi),%%mm0\n\t"
         "movq  8(%%edx),%%mm1\n\t"
@@ -162,12 +149,10 @@
         "packssdw %%mm0,%%mm0\n\t"
         "movd %%mm0,%%eax\n\t"
 	"movw %%ax, (%%edi)\n\t"
-        "leal 32(%%esi),%%esi\n\t"
-        "leal 64(%%edx),%%edx\n\t"
-        "leal 4(%%edi),%%edi\n\t"               
+        "sub $32,%%esi\n\t"
+        "add $64,%%edx\n\t"
+        "add $4,%%edi\n\t"               
 	
-".next_loop:\n\t"
-        "subl $64,%%esi\n\t"
         "movl $7,%%ecx\n\t"
 ASMALIGN(4)
 ".L04:\n\t"
@@ -215,9 +200,9 @@
 	"por    %%mm0, %%mm1\n\t"
 	"movq   %%mm1,(%%edi)\n\t"
 
-        "subl $64,%%esi\n\t"
-        "addl $128,%%edx\n\t"
-        "leal 8(%%edi),%%edi\n\t"
+        "sub $64,%%esi\n\t"
+        "add $128,%%edx\n\t"
+        "add $8,%%edi\n\t"
         "decl %%ecx\n\t"
 	"jnz  .L04\n\t"
 
@@ -242,7 +227,9 @@
         "movd %%mm0,%%eax\n\t"
 	"movw %%ax,(%%edi)\n\t"
 	"emms\n\t"
-        :
-	:"m"(bandPtr),"m"(channel),"m"(samples),"m"(buffs),"m"(bo), "m"(temp)
-	:"memory","%edi","%esi","%eax","%ebx","%ecx","%edx","%esp");
+	:"+c"(i), "+d"(window), "+S"(b0), "+D"(samples)
+	:
+	:"memory", "%eax");
+    return 0;
 }
+


More information about the MPlayer-dev-eng mailing list