[Mplayer-cvslog] CVS: main/mp3lib decode_MMX.s,1.1,1.2

Nick Kurshev nick at mplayerhq.banki.hu
Tue Jul 3 11:25:19 CEST 2001


Update of /cvsroot/mplayer/main/mp3lib
In directory mplayerhq:/var/tmp.root/cvs-serv17263/main/mp3lib

Modified Files:
	decode_MMX.s 
Log Message:
Partial loops unrolling

Index: decode_MMX.s
===================================================================
RCS file: /cvsroot/mplayer/main/mp3lib/decode_MMX.s,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- decode_MMX.s	29 Jun 2001 17:53:53 -0000	1.1
+++ decode_MMX.s	3 Jul 2001 09:25:16 -0000	1.2
@@ -3,9 +3,15 @@
 # See ChangeLog of mpg123-0.59s-pre.1 for detail
 # Applied to mplayer by Nick Kurshev <nickols_k at mail.ru>
 #
-# TODO: Partial loops unrolling and removing MOVW insn.
+# Local ChangeLog:
+# - Partial loops unrolling and removing MOVW insn from loops
 #
 
+.data
+.align 8
+null_one: .long 0x0000ffff, 0x0000ffff
+one_null: .long 0xffff0000, 0xffff0000
+
 .text
 
 .globl synth_1to1_MMX_s
@@ -49,64 +55,160 @@
         addl $12,%esp
 	leal 1(%ebx), %ecx
         subl %ebp,%ebx                
-
+	pushl %ecx
 	leal decwins(%ebx,%ebx,1), %edx
+	shrl $1, %ecx
+.align 16
 .L3: 
         movq  (%edx),%mm0
+        movq  64(%edx),%mm4
         pmaddwd (%esi),%mm0
+        pmaddwd 32(%esi),%mm4
         movq  8(%edx),%mm1
+        movq  72(%edx),%mm5
         pmaddwd 8(%esi),%mm1
+        pmaddwd 40(%esi),%mm5
         movq  16(%edx),%mm2
+        movq  80(%edx),%mm6
         pmaddwd 16(%esi),%mm2
+        pmaddwd 48(%esi),%mm6
         movq  24(%edx),%mm3
+        movq  88(%edx),%mm7
         pmaddwd 24(%esi),%mm3
+        pmaddwd 56(%esi),%mm7
         paddd %mm1,%mm0
+        paddd %mm5,%mm4
         paddd %mm2,%mm0
+        paddd %mm6,%mm4
         paddd %mm3,%mm0
+        paddd %mm7,%mm4
         movq  %mm0,%mm1
+        movq  %mm4,%mm5
         psrlq $32,%mm1
+        psrlq $32,%mm5
         paddd %mm1,%mm0
+        paddd %mm5,%mm4
         psrad $13,%mm0
+        psrad $13,%mm4
         packssdw %mm0,%mm0
-        movd %mm0,%eax
-	movw %ax, (%edi)
+        packssdw %mm4,%mm4
+
+	movq	(%edi), %mm1
+	punpckldq %mm4, %mm0
+	pand   one_null, %mm1
+	pand   null_one, %mm0
+	por    %mm0, %mm1
+	movq   %mm1,(%edi)
+
+        leal 64(%esi),%esi
+        leal 128(%edx),%edx
+        leal 8(%edi),%edi                
 
-        leal 32(%esi),%esi
-        leal 64(%edx),%edx
-        leal 4(%edi),%edi                
 	decl %ecx
         jnz  .L3
 
+	popl %ecx
+	andl $1, %ecx
+	jecxz .next_loop
 
+        movq  (%edx),%mm0
+        pmaddwd (%esi),%mm0
+        movq  8(%edx),%mm1
+        pmaddwd 8(%esi),%mm1
+        movq  16(%edx),%mm2
+        pmaddwd 16(%esi),%mm2
+        movq  24(%edx),%mm3
+        pmaddwd 24(%esi),%mm3
+        paddd %mm1,%mm0
+        paddd %mm2,%mm0
+        paddd %mm3,%mm0
+        movq  %mm0,%mm1
+        psrlq $32,%mm1
+        paddd %mm1,%mm0
+        psrad $13,%mm0
+        packssdw %mm0,%mm0
+        movd %mm0,%eax
+	movw %ax, (%edi)
+        leal 32(%esi),%esi
+        leal 64(%edx),%edx
+        leal 4(%edi),%edi                
+	
+.next_loop:
         subl $64,%esi                    
-        movl $15,%ecx
+        movl $7,%ecx
+.align 16
 .L4: 
         movq  (%edx),%mm0
+        movq  64(%edx),%mm4
         pmaddwd (%esi),%mm0
+        pmaddwd -32(%esi),%mm4
         movq  8(%edx),%mm1
+        movq  72(%edx),%mm5
         pmaddwd 8(%esi),%mm1
+        pmaddwd -24(%esi),%mm5
         movq  16(%edx),%mm2
+        movq  80(%edx),%mm6
         pmaddwd 16(%esi),%mm2
+        pmaddwd -16(%esi),%mm6
         movq  24(%edx),%mm3
+        movq  88(%edx),%mm7
         pmaddwd 24(%esi),%mm3
+        pmaddwd -8(%esi),%mm7
         paddd %mm1,%mm0
+        paddd %mm5,%mm4
         paddd %mm2,%mm0
+        paddd %mm6,%mm4
         paddd %mm3,%mm0
+        paddd %mm7,%mm4
         movq  %mm0,%mm1
+        movq  %mm4,%mm5
         psrlq $32,%mm1
+        psrlq $32,%mm5
         paddd %mm0,%mm1
+        paddd %mm4,%mm5
         psrad $13,%mm1
+        psrad $13,%mm5
         packssdw %mm1,%mm1
+        packssdw %mm5,%mm5
         psubd %mm0,%mm0
+        psubd %mm4,%mm4
         psubsw %mm1,%mm0
-        movd %mm0,%eax
-	movw %ax,(%edi)
+        psubsw %mm5,%mm4
 
-        subl $32,%esi
-        addl $64,%edx
-        leal 4(%edi),%edi                
+	movq	(%edi), %mm1
+	punpckldq %mm4, %mm0
+	pand   one_null, %mm1
+	pand   null_one, %mm0
+	por    %mm0, %mm1
+	movq   %mm1,(%edi)
+
+        subl $64,%esi
+        addl $128,%edx
+        leal 8(%edi),%edi                
         decl %ecx
 	jnz  .L4
+
+        movq  (%edx),%mm0
+        pmaddwd (%esi),%mm0
+        movq  8(%edx),%mm1
+        pmaddwd 8(%esi),%mm1
+        movq  16(%edx),%mm2
+        pmaddwd 16(%esi),%mm2
+        movq  24(%edx),%mm3
+        pmaddwd 24(%esi),%mm3
+        paddd %mm1,%mm0
+        paddd %mm2,%mm0
+        paddd %mm3,%mm0
+        movq  %mm0,%mm1
+        psrlq $32,%mm1
+        paddd %mm0,%mm1
+        psrad $13,%mm1
+        packssdw %mm1,%mm1
+        psubd %mm0,%mm0
+        psubsw %mm1,%mm0
+        movd %mm0,%eax
+	movw %ax,(%edi)
+
 	emms
         popl %ebx
         popl %esi


_______________________________________________
Mplayer-cvslog mailing list
Mplayer-cvslog at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog



More information about the MPlayer-cvslog mailing list