[FFmpeg-devel] [PATCH] M68K: Optimized MUL64/MULH/MULLfunctionsfor 68060
ami_stuff
ami_stuff
Mon Aug 3 11:12:12 CEST 2009
> >> > :"d2", "d3", "d4", "d5");
> >>
> >> Avoid using hardcoded registers, and prefer explicitly declared temp
> >> variables.
> >
> > Hmm, I don't know how to do it
>
> int t1, t2, t3, t4;
> asm("..." : "=&d"(t1), "=&d"(t2), "=&d"(t3), "=&d"(t4));
>
> > and what code GCC will generate after this change.
>
> Try and see.
You mean something like this?
inline int64_t MUL64(int a, int b)
{
int t1, t2, t3, t4;
union { uint64_t x; unsigned hl[2]; } x;
__asm__(
"move.l %0, %5 \n\t"
"move.l %0, %4 \n\t"
"bge.b 0f \n\t"
"neg.l %0 \n\t"
"neg.l %4 \n\t"
"0: \n\t"
"eor.l %1, %5 \n\t"
"move.l %1, %3 \n\t"
"bge.b 1f \n\t"
"neg.l %1 \n\t"
"neg.l %3 \n\t"
"1: \n\t"
"move.w #16, %5 \n\t"
"move.l %0, %2 \n\t"
"mulu.w %1,%0 \n\t"
"lsr.l %5, %3 \n\t"
"lsr.l %5, %4 \n\t"
"mulu.w %3, %2 \n\t"
"mulu.w %4, %1 \n\t"
"mulu.w %4, %3 \n\t"
"move.l %2, %4 \n\t"
"lsr.l %5, %2 \n\t"
"add.w %1, %4 \n\t"
"addx.l %2, %3 \n\t"
"lsl.l %5, %4 \n\t"
"lsr.l %5, %1 \n\t"
"add.l %4, %0 \n\t"
"addx.l %3, %1 \n\t"
"tst.l %5 \n\t"
"bpl.b 2f \n\t"
"neg.l %0 \n\t"
"negx.l %1 \n\t"
"2: \n\t"
:"=&d"(x.hl[1]), "=&d"(x.hl[0]), "=&d"(t1), "=&d"(t2), "=&d"(t3), "=&d"(t4)
:"0"(a), "1"(b));
return x.x;
#NO_APP
.text
.even
.globl _MUL64
_MUL64:
movem.l #15360,-(sp)
move.l 20(sp),d1
move.l 24(sp),d0
#APP
move.l d1, d5
move.l d1, d4
bge.b 0f
neg.l d1
neg.l d4
0:
eor.l d0, d5
move.l d0, d3
bge.b 1f
neg.l d0
neg.l d3
1:
move.w #16, d5
move.l d1, d2
mulu.w d0,d1
lsr.l d5, d3
lsr.l d5, d4
mulu.w d3, d2
mulu.w d4, d0
mulu.w d4, d3
move.l d2, d4
lsr.l d5, d2
add.w d0, d4
addx.l d2, d3
lsl.l d5, d4
lsr.l d5, d0
add.l d4, d1
addx.l d3, d0
tst.l d5
bpl.b 2f
neg.l d1
negx.l d0
2:
#NO_APP
movem.l (sp)+,#60
rts
If so, I will modify MULH the same way.
More information about the ffmpeg-devel
mailing list