Update of /cvsroot/mplayer/main/mp3lib In directory usw-pr-cvs1:/tmp/cvs-serv6994/main/mp3lib Modified Files: decode_k7.s Log Message: Last minute improvements before release Index: decode_k7.s =================================================================== RCS file: /cvsroot/mplayer/main/mp3lib/decode_k7.s,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -r1.5 -r1.6 *** decode_k7.s 2001/05/24 09:42:18 1.5 --- decode_k7.s 2001/06/08 09:41:22 1.6 *************** *** 8,11 **** --- 8,15 ---- /// - decreased number of opcodes (as it was suggested by k7 manual) /// (using memory reference as operand of instructions) + /// - added PREFETCHW opcode. It has different semantic than k6-2 + /// and saves 15-25 cpu clocks for athlon. + /// - partial unrolling loops for removing slower MOVW insns. + /// (Note probably same operation should be done for decode_3dnow) /// - change function name for support 3DNowEx! automatic detect /// *************** *** 43,49 **** .comm buffs,4352,4 .data ! .align 4 ! bo: ! .long 1 .text /* int synth_1to1(real *bandPtr,int channel,unsigned char *out) */ --- 47,54 ---- .comm buffs,4352,4 .data ! .align 8 ! null_one: .long 0x0000ffff, 0x0000ffff ! one_null: .long 0xffff0000, 0xffff0000 ! bo: .long 1 .text /* int synth_1to1(real *bandPtr,int channel,unsigned char *out) */ *************** *** 101,105 **** movl %eax,%ecx subl %edx,%ecx ! movl $16,%ebp .L55: --- 106,111 ---- movl %eax,%ecx subl %edx,%ecx ! movl $8,%ebp ! prefetchw (%esi) .L55: *************** *** 107,150 **** movq (%ecx),%mm0 pfmul (%ebx),%mm0 movq 8(%ecx),%mm1 pfmul 8(%ebx),%mm1 pfadd %mm1,%mm0 movq 16(%ebx),%mm2 pfmul 16(%ecx),%mm2 pfadd %mm2,%mm0 movq 24(%ecx),%mm3 pfmul 24(%ebx),%mm3 pfadd %mm3,%mm0 ! movq 32(%ebx),%mm4 ! pfmul 32(%ecx),%mm4 ! pfadd %mm4,%mm0 ! ! movq 40(%ecx),%mm5 ! pfmul 40(%ebx),%mm5 ! pfadd %mm5,%mm0 ! ! movq 48(%ebx),%mm6 ! pfmul 48(%ecx),%mm6 ! pfadd %mm6,%mm0 ! ! movq 56(%ecx),%mm7 ! pfmul 56(%ebx),%mm7 ! pfadd %mm7,%mm0 ! ! pfnacc %mm0, %mm0 ! ! pf2id %mm0,%mm0 ! movd %mm0,%eax ! sar $16,%eax ! movw %ax,(%esi) ! addl $64,%ebx ! subl $-128,%ecx ! addl $4,%esi decl %ebp jnz .L55 --- 113,180 ---- movq (%ecx),%mm0 pfmul (%ebx),%mm0 + movq 128(%ecx),%mm4 + pfmul 64(%ebx),%mm4 movq 8(%ecx),%mm1 pfmul 8(%ebx),%mm1 pfadd %mm1,%mm0 + movq 136(%ecx),%mm5 + pfmul 72(%ebx),%mm5 + pfadd %mm5,%mm4 movq 16(%ebx),%mm2 pfmul 16(%ecx),%mm2 pfadd %mm2,%mm0 + movq 80(%ebx),%mm6 + pfmul 144(%ecx),%mm6 + pfadd %mm6,%mm4 movq 24(%ecx),%mm3 pfmul 24(%ebx),%mm3 pfadd %mm3,%mm0 + movq 152(%ecx),%mm7 + pfmul 88(%ebx),%mm7 + pfadd %mm7,%mm4 ! movq 32(%ebx),%mm1 ! pfmul 32(%ecx),%mm1 ! pfadd %mm1,%mm0 ! movq 96(%ebx),%mm5 ! pfmul 160(%ecx),%mm5 ! pfadd %mm5,%mm4 ! ! movq 40(%ecx),%mm2 ! pfmul 40(%ebx),%mm2 ! pfadd %mm2,%mm0 ! movq 168(%ecx),%mm6 ! pfmul 104(%ebx),%mm6 ! pfadd %mm6,%mm4 ! movq 48(%ebx),%mm3 ! pfmul 48(%ecx),%mm3 ! pfadd %mm3,%mm0 ! movq 112(%ebx),%mm7 ! pfmul 176(%ecx),%mm7 ! pfadd %mm7,%mm4 ! movq 56(%ecx),%mm1 ! pfmul 56(%ebx),%mm1 ! pfadd %mm1,%mm0 ! movq 184(%ecx),%mm5 ! pfmul 120(%ebx),%mm5 ! pfadd %mm5,%mm4 ! ! pfnacc %mm4, %mm0 ! movq (%esi), %mm1 ! pf2id %mm0, %mm0 ! pand one_null, %mm1 ! psrld $16,%mm0 ! pand null_one, %mm0 ! por %mm0, %mm1 ! movq %mm1,(%esi) ! ! addl $128,%ebx ! addl $256,%ecx ! addl $8,%esi decl %ebp jnz .L55 *************** *** 152,155 **** --- 182,187 ---- / --- end of loop 1 --- + prefetchw (%esi) /* prefetching for writing this block and next loop */ + movd (%ecx),%mm0 pfmul (%ebx),%mm0 *************** *** 190,213 **** movw %ax,(%esi) ! addl $-64,%ebx addl $4,%esi addl $256,%ecx ! movl $15,%ebp .L68: pxor %mm0, %mm0 movq (%ecx),%mm1 pfmul (%ebx),%mm1 pfsub %mm1,%mm0 movq 8(%ecx),%mm2 pfmul 8(%ebx),%mm2 pfsub %mm2,%mm0 movq 16(%ecx),%mm3 pfmul 16(%ebx),%mm3 pfsub %mm3,%mm0 movq 24(%ecx),%mm4 pfmul 24(%ebx),%mm4 --- 222,321 ---- movw %ax,(%esi) ! subl $64,%ebx addl $4,%esi addl $256,%ecx ! movl $7,%ebp .L68: pxor %mm0, %mm0 + pxor %mm4, %mm4 movq (%ecx),%mm1 pfmul (%ebx),%mm1 pfsub %mm1,%mm0 + movq 128(%ecx),%mm5 + pfmul -64(%ebx),%mm5 + pfsub %mm5,%mm4 movq 8(%ecx),%mm2 pfmul 8(%ebx),%mm2 pfsub %mm2,%mm0 + movq 136(%ecx),%mm6 + pfmul -56(%ebx),%mm6 + pfsub %mm6,%mm4 movq 16(%ecx),%mm3 pfmul 16(%ebx),%mm3 pfsub %mm3,%mm0 + movq 144(%ecx),%mm7 + pfmul -48(%ebx),%mm7 + pfsub %mm7,%mm4 + movq 24(%ecx),%mm1 + pfmul 24(%ebx),%mm1 + pfsub %mm1,%mm0 + movq 152(%ecx),%mm5 + pfmul -40(%ebx),%mm5 + pfsub %mm5,%mm4 + + movq 32(%ecx),%mm2 + pfmul 32(%ebx),%mm2 + pfsub %mm2,%mm0 + movq 160(%ecx),%mm6 + pfmul -32(%ebx),%mm6 + pfsub %mm6,%mm4 + + movq 40(%ecx),%mm3 + pfmul 40(%ebx),%mm3 + pfsub %mm3,%mm0 + movq 168(%ecx),%mm7 + pfmul -24(%ebx),%mm7 + pfsub %mm7,%mm4 + + movq 48(%ecx),%mm1 + pfmul 48(%ebx),%mm1 + pfsub %mm1,%mm0 + movq 176(%ecx),%mm5 + pfmul -16(%ebx),%mm5 + pfsub %mm5,%mm4 + + movq 56(%ecx),%mm2 + pfmul 56(%ebx),%mm2 + pfsub %mm2,%mm0 + movq 184(%ecx),%mm6 + pfmul -8(%ebx),%mm6 + pfsub %mm6,%mm4 + + pfacc %mm4,%mm0 + movq (%esi), %mm1 + pf2id %mm0, %mm0 + pand one_null, %mm1 + psrld $16,%mm0 + pand null_one, %mm0 + por %mm0, %mm1 + movq %mm1,(%esi) + + subl $128,%ebx + addl $256,%ecx + addl $8,%esi + decl %ebp + jnz .L68 + + / --- end of loop 2 + + pxor %mm0, %mm0 + + movq (%ecx),%mm1 + pfmul (%ebx),%mm1 + pfsub %mm1,%mm0 + + movq 8(%ecx),%mm2 + pfmul 8(%ebx),%mm2 + pfsub %mm2,%mm0 + + movq 16(%ecx),%mm3 + pfmul 16(%ebx),%mm3 + pfsub %mm3,%mm0 + movq 24(%ecx),%mm4 pfmul 24(%ebx),%mm4 *************** *** 238,249 **** movw %ax,(%esi) - - addl $-64,%ebx - subl $-128,%ecx - addl $4,%esi - decl %ebp - jnz .L68 - - / --- end of loop 2 femms --- 346,349 ---- _______________________________________________ Mplayer-cvslog mailing list Mplayer-cvslog@lists.sourceforge.net http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog