[Mplayer-cvslog] CVS: main/libavcodec/i386 sad_mmx2.s,NONE,1.1 fdct_mmx.s,1.1,1.2 sad_mmx.s,1.2,1.3

Nick Kurshev nick at mplayer.dev.hu
Tue Jul 10 10:29:08 CEST 2001


Update of /cvsroot/mplayer/main/libavcodec/i386
In directory mplayer:/var/tmp.root/cvs-serv2386/i386

Modified Files:
	fdct_mmx.s sad_mmx.s 
Added Files:
	sad_mmx2.s 
Log Message:
Sync with mplayer's config semantic

--- NEW FILE ---
;  MMX2 optimized routines for SAD of 16*16 macroblocks
;	Copyright (C) Juan J. Sierralta P. <juanjo at atmlab.utfsm.cl>
;
;  dist1_* Original Copyright (C) 2000 Chris Atenasio <chris at crud.net>
;  Enhancements and rest Copyright (C) 2000 Andrew Stevens <as at comlab.ox.ac.uk>

;
;  This program is free software; you can redistribute it and/or
;  modify it under the terms of the GNU General Public License
;  as published by the Free Software Foundation; either version 2
;  of the License, or (at your option) any later version.
;
;  This program is distributed in the hope that it will be useful,
;  but WITHOUT ANY WARRANTY; without even the implied warranty of
;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;  GNU General Public License for more details.
;
;  You should have received a copy of the GNU General Public License
;  along with this program; if not, write to the Free Software
;  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
;

global pix_abs16x16_mmx2

; int  pix_abs16x16_mmx(unsigned char *pix1,unsigned char *pix2, int lx, int h);
; esi = p1 (init:		blk1)
; edi = p2 (init:		blk2)
; ecx = rowsleft (init:	 h)
; edx = lx;

; mm0 = distance accumulators (4 words)
; mm1 = distance accumulators (4 words) 
; mm2 = temp 
; mm3 = temp
; mm4 = temp
; mm5 = temp 
; mm6 = temp
; mm7 = temp


align 16
pix_abs16x16_mmx2:
	push				ebp					; save frame pointer
	mov				ebp, esp

	push				ebx					; Saves registers (called saves convention in
	push				ecx					; x86 GCC it seems)
	push				edx					; 
	push				esi
	push				edi
		
	pxor				mm0, mm0				; zero acculumators
	pxor				mm1, mm1
	mov				esi, [ebp+8]		; get pix1
	mov				edi, [ebp+12]		; get pix2
	prefetchnta			[esi]
	prefetchnta			[edi]
	mov				edx, [ebp+16]		; get lx
	mov				ecx, [ebp+20]		; get rowsleft
	jmp				.next4row
align 16

.next4row:
	; First row
	
	prefetchnta			[esi+edx]
	prefetchnta			[edi+edx]
	movq				mm4, [edi]		; load first 8 bytes of pix2 row 
	movq				mm5, [edi+8]	; load last 8 bytes of pix2 row
	psadbw			mm4, [esi]		; SAD of first 8 bytes
	psadbw			mm5, [esi+8]	; SAD of last 8 bytes
	paddw				mm0, mm4			; Add to acumulators
	paddw				mm1, mm5
		
	; Second row	

	add				edi, edx;
	add				esi, edx;
	prefetchnta			[esi+edx]
	prefetchnta			[edi+edx]
	
	movq				mm6, [edi]		; load first 8 bytes of pix2 row 
	movq				mm7, [edi+8]	; load last 8 bytes of pix2 row
	psadbw			mm6, [esi]		; SAD of first 8 bytes
	psadbw			mm7, [esi+8]	; SAD of last 8 bytes
	paddw				mm0, mm6			; Add to acumulators
	paddw				mm1, mm7
		
	; Third row
	
	add				edi, edx;
	add				esi, edx;
	prefetchnta			[esi+edx]
	prefetchnta			[edi+edx]
	
	movq				mm4, [edi]		; load first 8 bytes of pix2 row 
	movq				mm5, [edi+8]	; load last 8 bytes of pix2 row
	psadbw			mm4, [esi]		; SAD of first 8 bytes
	psadbw			mm5, [esi+8]	; SAD of last 8 bytes
	paddw				mm0, mm4			; Add to acumulators
	paddw				mm1, mm5
		
	; Fourth row	
	add				edi, edx;
	add				esi, edx;
	prefetchnta			[esi+edx]
	prefetchnta			[edi+edx]
	
	movq				mm6, [edi]		; load first 8 bytes of pix2 row 
	movq				mm7, [edi+8]	; load last 8 bytes of pix2 row
	psadbw			mm6, [esi]		; SAD of first 8 bytes
	psadbw			mm7, [esi+8]	; SAD of last 8 bytes
	paddw				mm0, mm6			; Add to acumulators
	paddw				mm1, mm7
	
	; Loop termination

	add				esi, edx		; update pointers to next row
	add				edi, edx		
	prefetchnta			[esi+edx]
	prefetchnta			[edi+edx]
	sub				ecx,4
	test				ecx, ecx		; check rowsleft
	jnz				near .next4row
	
	paddd				mm0, mm1		; Sum acumulators
	movd				eax, mm0		; Store return value

	pop edi
	pop esi	
	pop edx			
	pop ecx			
	pop ebx			

	pop ebp							; restore stack pointer

	;emms								; clear mmx registers
	ret								; return



Index: fdct_mmx.s
===================================================================
RCS file: /cvsroot/mplayer/main/libavcodec/i386/fdct_mmx.s,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fdct_mmx.s	6 Jul 2001 03:32:40 -0000	1.1
+++ fdct_mmx.s	10 Jul 2001 08:29:06 -0000	1.2
@@ -106,7 +106,7 @@
 ;     //
 ;    
 
-align 32
+align 16
 fdct_mmx:
 	push ebp			; save stack pointer
 	mov ebp, esp		; link

Index: sad_mmx.s
===================================================================
RCS file: /cvsroot/mplayer/main/libavcodec/i386/sad_mmx.s,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- sad_mmx.s	9 Jul 2001 08:31:10 -0000	1.2
+++ sad_mmx.s	10 Jul 2001 08:29:06 -0000	1.3
@@ -1,4 +1,4 @@
-;  MMX/MMX2 optimized routines for SAD of 16*16 macroblocks
+;  MMX optimized routines for SAD of 16*16 macroblocks
 ;	Copyright (C) Juan J. Sierralta P. <juanjo at atmlab.utfsm.cl>
 ;
 ;  dist1_* Original Copyright (C) 2000 Chris Atenasio <chris at crud.net>
@@ -38,7 +38,7 @@
 ; mm7 = temp
 
 
-align 32
+align 16
 pix_abs16x16_mmx:
 	push				ebp					; save frame pointer
 	mov				ebp, esp
@@ -57,7 +57,7 @@
 	mov				edx, [ebp+16]		; get lx
 	mov				ecx, [ebp+20]		; get rowsleft
 	jmp				.nextrow
-align 32
+align 16
 
 .nextrow:
 	; First 8 bytes of the row
@@ -169,123 +169,6 @@
 	;emms								; clear mmx registers
 	ret								; return
 
-global pix_abs16x16_mmx2
-
-; int  pix_abs16x16_mmx(unsigned char *pix1,unsigned char *pix2, int lx, int h);
-; esi = p1 (init:		blk1)
-; edi = p2 (init:		blk2)
-; ecx = rowsleft (init:	 h)
-; edx = lx;
-
-; mm0 = distance accumulators (4 words)
-; mm1 = distance accumulators (4 words) 
-; mm2 = temp 
-; mm3 = temp
-; mm4 = temp
-; mm5 = temp 
-; mm6 = temp
-; mm7 = temp
-
-
-align 32
-pix_abs16x16_mmx2:
-	push				ebp					; save frame pointer
-	mov				ebp, esp
-
-	push				ebx					; Saves registers (called saves convention in
-	push				ecx					; x86 GCC it seems)
-	push				edx					; 
-	push				esi
-	push				edi
-		
-	pxor				mm0, mm0				; zero acculumators
-	pxor				mm1, mm1
-	mov				esi, [ebp+8]		; get pix1
-	mov				edi, [ebp+12]		; get pix2
-	prefetchnta			[esi]
-	prefetchnta			[edi]
-	mov				edx, [ebp+16]		; get lx
-	mov				ecx, [ebp+20]		; get rowsleft
-	jmp				.next4row
-align 32
-
-.next4row:
-	; First row
-	
-	prefetchnta			[esi+edx]
-	prefetchnta			[edi+edx]
-	movq				mm4, [edi]		; load first 8 bytes of pix2 row 
-	movq				mm5, [edi+8]	; load last 8 bytes of pix2 row
-	psadbw			mm4, [esi]		; SAD of first 8 bytes
-	psadbw			mm5, [esi+8]	; SAD of last 8 bytes
-	paddw				mm0, mm4			; Add to acumulators
-	paddw				mm1, mm5
-		
-	; Second row	
-
-	add				edi, edx;
-	add				esi, edx;
-	prefetchnta			[esi+edx]
-	prefetchnta			[edi+edx]
-	
-	movq				mm6, [edi]		; load first 8 bytes of pix2 row 
-	movq				mm7, [edi+8]	; load last 8 bytes of pix2 row
-	psadbw			mm6, [esi]		; SAD of first 8 bytes
-	psadbw			mm7, [esi+8]	; SAD of last 8 bytes
-	paddw				mm0, mm6			; Add to acumulators
-	paddw				mm1, mm7
-		
-	; Third row
-	
-	add				edi, edx;
-	add				esi, edx;
-	prefetchnta			[esi+edx]
-	prefetchnta			[edi+edx]
-	
-	movq				mm4, [edi]		; load first 8 bytes of pix2 row 
-	movq				mm5, [edi+8]	; load last 8 bytes of pix2 row
-	psadbw			mm4, [esi]		; SAD of first 8 bytes
-	psadbw			mm5, [esi+8]	; SAD of last 8 bytes
-	paddw				mm0, mm4			; Add to acumulators
-	paddw				mm1, mm5
-		
-	; Fourth row	
-	add				edi, edx;
-	add				esi, edx;
-	prefetchnta			[esi+edx]
-	prefetchnta			[edi+edx]
-	
-	movq				mm6, [edi]		; load first 8 bytes of pix2 row 
-	movq				mm7, [edi+8]	; load last 8 bytes of pix2 row
-	psadbw			mm6, [esi]		; SAD of first 8 bytes
-	psadbw			mm7, [esi+8]	; SAD of last 8 bytes
-	paddw				mm0, mm6			; Add to acumulators
-	paddw				mm1, mm7
-	
-	; Loop termination
-
-	add				esi, edx		; update pointers to next row
-	add				edi, edx		
-	prefetchnta			[esi+edx]
-	prefetchnta			[edi+edx]
-	sub				ecx,4
-	test				ecx, ecx		; check rowsleft
-	jnz				near .next4row
-	
-	paddd				mm0, mm1		; Sum acumulators
-	movd				eax, mm0		; Store return value
-
-	pop edi
-	pop esi	
-	pop edx			
-	pop ecx			
-	pop ebx			
-
-	pop ebp							; restore stack pointer
-
-	;emms								; clear mmx registers
-	ret								; return
-		
 global pix_abs16x16_x2_mmx
 
 ; int  pix_abs16x16_x2_mmx(unsigned char *pix1,unsigned char *pix2, int lx, int h);
@@ -304,7 +187,7 @@
 ; mm7 = temp
 
 
-align 32
+align 16
 pix_abs16x16_x2_mmx:
 	push				ebp					; save frame pointer
 	mov				ebp, esp
@@ -323,7 +206,7 @@
 	mov				edx, [ebp+16]		; get lx
 	mov				ecx, [ebp+20]		; get rowsleft
 	jmp				.nextrow_x2
-align 32
+align 16
 
 .nextrow_x2:
 	; First 8 bytes of the row
@@ -441,7 +324,7 @@
 ; mm7 = temp
 
 
-align 32
+align 16
 pix_abs16x16_y2_mmx:
 	push				ebp					; save frame pointer
 	mov				ebp, esp
@@ -462,7 +345,7 @@
 	mov				ebx, edi
 	add				ebx, edx
 	jmp				.nextrow_y2
-align 32
+align 16
 
 .nextrow_y2:
 	; First 8 bytes of the row
@@ -583,7 +466,7 @@
 ; mm7 = temp comparison bit mask p2,p1
 
 
-align 32
+align 16
 pix_abs16x16_xy2_mmx:
 	push ebp		; save stack pointer
 	mov ebp, esp	; so that we can do this
@@ -603,7 +486,7 @@
 	mov ebx, esi
     add ebx, edx		
 	jmp .nextrowmm11					; snap to it
-align 32
+align 16
 .nextrowmm11:
 
 		;; 




More information about the MPlayer-cvslog mailing list