CVS: main/libvo fastmemcpy.h,1.3,1.4
Update of /cvsroot/mplayer/main/libvo In directory usw-pr-cvs1:/tmp/cvs-serv7666 Modified Files: fastmemcpy.h Log Message: P3 fixes... Index: fastmemcpy.h =================================================================== RCS file: /cvsroot/mplayer/main/libvo/fastmemcpy.h,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -r1.3 -r1.4 *** fastmemcpy.h 2001/04/12 00:09:57 1.3 --- fastmemcpy.h 2001/04/12 14:40:10 1.4 *************** *** 3,31 **** for MMX2 instruction set. I have done it since linux uses page aligned blocks but mplayer uses weakly ordered data and original sources can not ! speedup their. Only using prefetch and movntq together have effect! If you have questions please contact with me: Nick Kurshev: nickols_k@mail.ru. */ ! ! #ifndef HAVE_MMX2 ! //static inline void * __memcpy(void * to, const void * from, unsigned n) ! inline static void * fast_memcpy(void * to, const void * from, unsigned n) ! { ! int d0, d1, d2; ! __asm__ __volatile__( ! "rep ; movsl\n\t" ! "testb $2,%b4\n\t" ! "je 1f\n\t" ! "movsw\n" ! "1:\ttestb $1,%b4\n\t" ! "je 2f\n\t" ! "movsb\n" ! "2:" ! : "=&c" (d0), "=&D" (d1), "=&S" (d2) ! :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) ! : "memory"); ! return (to); } ! #else ! //inline static void *__memcpy_mmx2(void *to, const void *from, unsigned len) inline static void * fast_memcpy(void * to, const void * from, unsigned len) { --- 3,19 ---- for MMX2 instruction set. I have done it since linux uses page aligned blocks but mplayer uses weakly ordered data and original sources can not ! speedup their. Only using prefetchnta and movntq together have effect! If you have questions please contact with me: Nick Kurshev: nickols_k@mail.ru. */ ! #ifdef HAVE_MMX2 ! /* for small memory blocks (<256 bytes) this version is faster */ ! #define small_memcpy(to,from,n)\ ! {\ ! __asm__ __volatile__(\ ! "rep ; movsb\n"\ ! ::"D" (to), "S" (from),"c" (n)\ ! : "memory");\ } ! inline static void * fast_memcpy(void * to, const void * from, unsigned len) { *************** *** 37,46 **** p = to; i = len >> 6; /* len/64 */ __asm__ __volatile__ ( ! "1: prefetch (%0)\n" /* This set is 28 bytes */ ! " prefetch 64(%0)\n" ! " prefetch 128(%0)\n" ! " prefetch 192(%0)\n" ! " prefetch 256(%0)\n" "2: \n" ".section .fixup, \"ax\"\n" --- 25,37 ---- p = to; i = len >> 6; /* len/64 */ + len&=63; + __asm__ __volatile__ ( ! "1: prefetchnta (%0)\n" /* This set is 28 bytes */ ! " prefetchnta 64(%0)\n" ! " prefetchnta 128(%0)\n" ! " prefetchnta 192(%0)\n" ! " prefetchnta 256(%0)\n" ! #if 0 "2: \n" ".section .fixup, \"ax\"\n" *************** *** 52,55 **** --- 43,47 ---- " .long 1b, 3b\n" ".previous" + #endif : : "r" (from) ); *************** *** 58,62 **** { __asm__ __volatile__ ( ! "1: prefetch 320(%0)\n" "2: movq (%0), %%mm0\n" " movq 8(%0), %%mm1\n" --- 50,54 ---- { __asm__ __volatile__ ( ! "1: prefetchnta 320(%0)\n" "2: movq (%0), %%mm0\n" " movq 8(%0), %%mm1\n" *************** *** 75,78 **** --- 67,71 ---- " movntq %%mm2, 48(%1)\n" " movntq %%mm3, 56(%1)\n" + #if 0 ".section .fixup, \"ax\"\n" "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ *************** *** 83,86 **** --- 76,80 ---- " .long 1b, 3b\n" ".previous" + #endif : : "r" (from), "r" (to) : "memory"); from+=64; *************** *** 92,100 **** * Now do the tail of the block */ ! memcpy(to, from, len&63); return p; } #endif - #define memcpy(a,b,c) fast_memcpy(a,b,c) --- 86,94 ---- * Now do the tail of the block */ ! small_memcpy(to, from, len); return p; } + #define memcpy(a,b,c) fast_memcpy(a,b,c) #endif _______________________________________________ Mplayer-cvslog mailing list Mplayer-cvslog@lists.sourceforge.net http://lists.sourceforge.net/lists/listinfo/mplayer-cvslog
participants (1)
-
GEREOFFY