[Mplayer-cvslog] CVS: main/libavcodec/i386 sad_mmx2.s,NONE,1.1 fdct_mmx.s,1.1,1.2 sad_mmx.s,1.2,1.3
Nick Kurshev
nick at mplayer.dev.hu
Tue Jul 10 10:29:08 CEST 2001
- Previous message: [Mplayer-cvslog] CVS: main/libac3 Makefile,1.11,1.12 imdct.c,1.7,1.8 srfft.c,1.2,1.3
- Next message: [Mplayer-cvslog] CVS: main/libavcodec dsputil.c,1.1,1.2 dsputil.h,1.3,1.4 h263.c,1.2,1.3 Makefile,1.2,1.3 motion_est.c,1.2,1.3 mpegvideo.c,1.2,1.3
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvsroot/mplayer/main/libavcodec/i386
In directory mplayer:/var/tmp.root/cvs-serv2386/i386
Modified Files:
fdct_mmx.s sad_mmx.s
Added Files:
sad_mmx2.s
Log Message:
Sync with mplayer's config semantic
--- NEW FILE ---
; MMX2 optimized routines for SAD of 16*16 macroblocks
; Copyright (C) Juan J. Sierralta P. <juanjo at atmlab.utfsm.cl>
;
; dist1_* Original Copyright (C) 2000 Chris Atenasio <chris at crud.net>
; Enhancements and rest Copyright (C) 2000 Andrew Stevens <as at comlab.ox.ac.uk>
;
; This program is free software; you can redistribute it and/or
; modify it under the terms of the GNU General Public License
; as published by the Free Software Foundation; either version 2
; of the License, or (at your option) any later version.
;
; This program is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with this program; if not, write to the Free Software
; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
;
global pix_abs16x16_mmx2
; int pix_abs16x16_mmx(unsigned char *pix1,unsigned char *pix2, int lx, int h);
; esi = p1 (init: blk1)
; edi = p2 (init: blk2)
; ecx = rowsleft (init: h)
; edx = lx;
; mm0 = distance accumulators (4 words)
; mm1 = distance accumulators (4 words)
; mm2 = temp
; mm3 = temp
; mm4 = temp
; mm5 = temp
; mm6 = temp
; mm7 = temp
align 16
pix_abs16x16_mmx2:
push ebp ; save frame pointer
mov ebp, esp
push ebx ; Saves registers (called saves convention in
push ecx ; x86 GCC it seems)
push edx ;
push esi
push edi
pxor mm0, mm0 ; zero acculumators
pxor mm1, mm1
mov esi, [ebp+8] ; get pix1
mov edi, [ebp+12] ; get pix2
prefetchnta [esi]
prefetchnta [edi]
mov edx, [ebp+16] ; get lx
mov ecx, [ebp+20] ; get rowsleft
jmp .next4row
align 16
.next4row:
; First row
prefetchnta [esi+edx]
prefetchnta [edi+edx]
movq mm4, [edi] ; load first 8 bytes of pix2 row
movq mm5, [edi+8] ; load last 8 bytes of pix2 row
psadbw mm4, [esi] ; SAD of first 8 bytes
psadbw mm5, [esi+8] ; SAD of last 8 bytes
paddw mm0, mm4 ; Add to acumulators
paddw mm1, mm5
; Second row
add edi, edx;
add esi, edx;
prefetchnta [esi+edx]
prefetchnta [edi+edx]
movq mm6, [edi] ; load first 8 bytes of pix2 row
movq mm7, [edi+8] ; load last 8 bytes of pix2 row
psadbw mm6, [esi] ; SAD of first 8 bytes
psadbw mm7, [esi+8] ; SAD of last 8 bytes
paddw mm0, mm6 ; Add to acumulators
paddw mm1, mm7
; Third row
add edi, edx;
add esi, edx;
prefetchnta [esi+edx]
prefetchnta [edi+edx]
movq mm4, [edi] ; load first 8 bytes of pix2 row
movq mm5, [edi+8] ; load last 8 bytes of pix2 row
psadbw mm4, [esi] ; SAD of first 8 bytes
psadbw mm5, [esi+8] ; SAD of last 8 bytes
paddw mm0, mm4 ; Add to acumulators
paddw mm1, mm5
; Fourth row
add edi, edx;
add esi, edx;
prefetchnta [esi+edx]
prefetchnta [edi+edx]
movq mm6, [edi] ; load first 8 bytes of pix2 row
movq mm7, [edi+8] ; load last 8 bytes of pix2 row
psadbw mm6, [esi] ; SAD of first 8 bytes
psadbw mm7, [esi+8] ; SAD of last 8 bytes
paddw mm0, mm6 ; Add to acumulators
paddw mm1, mm7
; Loop termination
add esi, edx ; update pointers to next row
add edi, edx
prefetchnta [esi+edx]
prefetchnta [edi+edx]
sub ecx,4
test ecx, ecx ; check rowsleft
jnz near .next4row
paddd mm0, mm1 ; Sum acumulators
movd eax, mm0 ; Store return value
pop edi
pop esi
pop edx
pop ecx
pop ebx
pop ebp ; restore stack pointer
;emms ; clear mmx registers
ret ; return
Index: fdct_mmx.s
===================================================================
RCS file: /cvsroot/mplayer/main/libavcodec/i386/fdct_mmx.s,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- fdct_mmx.s 6 Jul 2001 03:32:40 -0000 1.1
+++ fdct_mmx.s 10 Jul 2001 08:29:06 -0000 1.2
@@ -106,7 +106,7 @@
; //
;
-align 32
+align 16
fdct_mmx:
push ebp ; save stack pointer
mov ebp, esp ; link
Index: sad_mmx.s
===================================================================
RCS file: /cvsroot/mplayer/main/libavcodec/i386/sad_mmx.s,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- sad_mmx.s 9 Jul 2001 08:31:10 -0000 1.2
+++ sad_mmx.s 10 Jul 2001 08:29:06 -0000 1.3
@@ -1,4 +1,4 @@
-; MMX/MMX2 optimized routines for SAD of 16*16 macroblocks
+; MMX optimized routines for SAD of 16*16 macroblocks
; Copyright (C) Juan J. Sierralta P. <juanjo at atmlab.utfsm.cl>
;
; dist1_* Original Copyright (C) 2000 Chris Atenasio <chris at crud.net>
@@ -38,7 +38,7 @@
; mm7 = temp
-align 32
+align 16
pix_abs16x16_mmx:
push ebp ; save frame pointer
mov ebp, esp
@@ -57,7 +57,7 @@
mov edx, [ebp+16] ; get lx
mov ecx, [ebp+20] ; get rowsleft
jmp .nextrow
-align 32
+align 16
.nextrow:
; First 8 bytes of the row
@@ -169,123 +169,6 @@
;emms ; clear mmx registers
ret ; return
-global pix_abs16x16_mmx2
-
-; int pix_abs16x16_mmx(unsigned char *pix1,unsigned char *pix2, int lx, int h);
-; esi = p1 (init: blk1)
-; edi = p2 (init: blk2)
-; ecx = rowsleft (init: h)
-; edx = lx;
-
-; mm0 = distance accumulators (4 words)
-; mm1 = distance accumulators (4 words)
-; mm2 = temp
-; mm3 = temp
-; mm4 = temp
-; mm5 = temp
-; mm6 = temp
-; mm7 = temp
-
-
-align 32
-pix_abs16x16_mmx2:
- push ebp ; save frame pointer
- mov ebp, esp
-
- push ebx ; Saves registers (called saves convention in
- push ecx ; x86 GCC it seems)
- push edx ;
- push esi
- push edi
-
- pxor mm0, mm0 ; zero acculumators
- pxor mm1, mm1
- mov esi, [ebp+8] ; get pix1
- mov edi, [ebp+12] ; get pix2
- prefetchnta [esi]
- prefetchnta [edi]
- mov edx, [ebp+16] ; get lx
- mov ecx, [ebp+20] ; get rowsleft
- jmp .next4row
-align 32
-
-.next4row:
- ; First row
-
- prefetchnta [esi+edx]
- prefetchnta [edi+edx]
- movq mm4, [edi] ; load first 8 bytes of pix2 row
- movq mm5, [edi+8] ; load last 8 bytes of pix2 row
- psadbw mm4, [esi] ; SAD of first 8 bytes
- psadbw mm5, [esi+8] ; SAD of last 8 bytes
- paddw mm0, mm4 ; Add to acumulators
- paddw mm1, mm5
-
- ; Second row
-
- add edi, edx;
- add esi, edx;
- prefetchnta [esi+edx]
- prefetchnta [edi+edx]
-
- movq mm6, [edi] ; load first 8 bytes of pix2 row
- movq mm7, [edi+8] ; load last 8 bytes of pix2 row
- psadbw mm6, [esi] ; SAD of first 8 bytes
- psadbw mm7, [esi+8] ; SAD of last 8 bytes
- paddw mm0, mm6 ; Add to acumulators
- paddw mm1, mm7
-
- ; Third row
-
- add edi, edx;
- add esi, edx;
- prefetchnta [esi+edx]
- prefetchnta [edi+edx]
-
- movq mm4, [edi] ; load first 8 bytes of pix2 row
- movq mm5, [edi+8] ; load last 8 bytes of pix2 row
- psadbw mm4, [esi] ; SAD of first 8 bytes
- psadbw mm5, [esi+8] ; SAD of last 8 bytes
- paddw mm0, mm4 ; Add to acumulators
- paddw mm1, mm5
-
- ; Fourth row
- add edi, edx;
- add esi, edx;
- prefetchnta [esi+edx]
- prefetchnta [edi+edx]
-
- movq mm6, [edi] ; load first 8 bytes of pix2 row
- movq mm7, [edi+8] ; load last 8 bytes of pix2 row
- psadbw mm6, [esi] ; SAD of first 8 bytes
- psadbw mm7, [esi+8] ; SAD of last 8 bytes
- paddw mm0, mm6 ; Add to acumulators
- paddw mm1, mm7
-
- ; Loop termination
-
- add esi, edx ; update pointers to next row
- add edi, edx
- prefetchnta [esi+edx]
- prefetchnta [edi+edx]
- sub ecx,4
- test ecx, ecx ; check rowsleft
- jnz near .next4row
-
- paddd mm0, mm1 ; Sum acumulators
- movd eax, mm0 ; Store return value
-
- pop edi
- pop esi
- pop edx
- pop ecx
- pop ebx
-
- pop ebp ; restore stack pointer
-
- ;emms ; clear mmx registers
- ret ; return
-
global pix_abs16x16_x2_mmx
; int pix_abs16x16_x2_mmx(unsigned char *pix1,unsigned char *pix2, int lx, int h);
@@ -304,7 +187,7 @@
; mm7 = temp
-align 32
+align 16
pix_abs16x16_x2_mmx:
push ebp ; save frame pointer
mov ebp, esp
@@ -323,7 +206,7 @@
mov edx, [ebp+16] ; get lx
mov ecx, [ebp+20] ; get rowsleft
jmp .nextrow_x2
-align 32
+align 16
.nextrow_x2:
; First 8 bytes of the row
@@ -441,7 +324,7 @@
; mm7 = temp
-align 32
+align 16
pix_abs16x16_y2_mmx:
push ebp ; save frame pointer
mov ebp, esp
@@ -462,7 +345,7 @@
mov ebx, edi
add ebx, edx
jmp .nextrow_y2
-align 32
+align 16
.nextrow_y2:
; First 8 bytes of the row
@@ -583,7 +466,7 @@
; mm7 = temp comparison bit mask p2,p1
-align 32
+align 16
pix_abs16x16_xy2_mmx:
push ebp ; save stack pointer
mov ebp, esp ; so that we can do this
@@ -603,7 +486,7 @@
mov ebx, esi
add ebx, edx
jmp .nextrowmm11 ; snap to it
-align 32
+align 16
.nextrowmm11:
;;
- Previous message: [Mplayer-cvslog] CVS: main/libac3 Makefile,1.11,1.12 imdct.c,1.7,1.8 srfft.c,1.2,1.3
- Next message: [Mplayer-cvslog] CVS: main/libavcodec dsputil.c,1.1,1.2 dsputil.h,1.3,1.4 h263.c,1.2,1.3 Makefile,1.2,1.3 motion_est.c,1.2,1.3 mpegvideo.c,1.2,1.3
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the MPlayer-cvslog
mailing list