[Mplayer-users] [mplayer-20010411 PATCH] Fix MMX2 support

Nick Kurshev nick at radiotelcom.ru
Thu Apr 12 14:00:59 CEST 2001


Hello!

I'm sorry! In previous patch I sent you version that is workable only on K7 cpu.
(instruction PREFETCH exists only on K6-2, K7 cpus. Full analog of it is PREFETCHNTA
that exists on both K7 and P3 processors). Also I've slightly rewrite small_memcpy version
for speedup of mplayer. (Previous version of small_memcpy may be faster only for 4-byte
aligned data but for misaligned ones it's slower. Using MOVSB for small blocks is faster
for such data.
Below is patch for it:
diff -u -r -N main/libvo/fastmemcpy.h main.new/libvo/fastmemcpy.h
--- main/libvo/fastmemcpy.h	Thu Apr 12 04:09:57 2001
+++ main.new/libvo/fastmemcpy.h	Thu Apr 12 11:44:07 2001
@@ -2,31 +2,19 @@
  This part of code was taken by from Linux-2.4.3 and slightly modified
 for MMX2 instruction set. I have done it since linux uses page aligned
 blocks but mplayer uses weakly ordered data and original sources can not
-speedup their. Only using prefetch and movntq together have effect! 
+speedup their. Only using prefetchnta and movntq together have effect! 
 If you have questions please contact with me: Nick Kurshev: nickols_k at mail.ru.
 */
-
-#ifndef HAVE_MMX2
-//static inline void * __memcpy(void * to, const void * from, unsigned n)
-inline static void * fast_memcpy(void * to, const void * from, unsigned n)
-{
-int d0, d1, d2;
-__asm__ __volatile__(
-	"rep ; movsl\n\t"
-	"testb $2,%b4\n\t"
-	"je 1f\n\t"
-	"movsw\n"
-	"1:\ttestb $1,%b4\n\t"
-	"je 2f\n\t"
-	"movsb\n"
-	"2:"
-	: "=&c" (d0), "=&D" (d1), "=&S" (d2)
-	:"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
-	: "memory");
-return (to);
+#ifdef HAVE_MMX2
+/* for small memory blocks (<256 bytes) this version is faster */
+#define small_memcpy(to,from,n)\
+{\
+__asm__ __volatile__(\
+	"rep ; movsb\n"\
+	::"D" (to), "S" (from),"c" (n)\
+	: "memory");\
 }
-#else
-//inline static void *__memcpy_mmx2(void *to, const void *from, unsigned len)
+
 inline static void * fast_memcpy(void * to, const void * from, unsigned len)
 {
 	void *p;
@@ -37,11 +25,12 @@
   	  p = to;
 	  i = len >> 6; /* len/64 */
 	__asm__ __volatile__ (
-		"1: prefetch (%0)\n"		/* This set is 28 bytes */
-		"   prefetch 64(%0)\n"
-		"   prefetch 128(%0)\n"
-		"   prefetch 192(%0)\n"
-		"   prefetch 256(%0)\n"
+		"1: prefetchnta (%0)\n"		/* This set is 28 bytes */
+		"   prefetchnta 64(%0)\n"
+		"   prefetchnta 128(%0)\n"
+		"   prefetchnta 192(%0)\n"
+		"   prefetchnta 256(%0)\n"
+#if 0		
 		"2:  \n"
 		".section .fixup, \"ax\"\n"
 		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
@@ -51,13 +40,14 @@
 		"	.align 4\n"
 		"	.long 1b, 3b\n"
 		".previous"
+#endif		
 		: : "r" (from) );
 		
 	
 	for(; i>0; i--)
 	{
 		__asm__ __volatile__ (
-		"1:  prefetch 320(%0)\n"
+		"1:  prefetchnta 320(%0)\n"
 		"2:  movq (%0), %%mm0\n"
 		"  movq 8(%0), %%mm1\n"
 		"  movq 16(%0), %%mm2\n"
@@ -74,6 +64,7 @@
 		"  movntq %%mm1, 40(%1)\n"
 		"  movntq %%mm2, 48(%1)\n"
 		"  movntq %%mm3, 56(%1)\n"
+#if 0		
 		".section .fixup, \"ax\"\n"
 		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
 		"   jmp 2b\n"
@@ -82,6 +73,7 @@
 		"	.align 4\n"
 		"	.long 1b, 3b\n"
 		".previous"
+#endif		
 		: : "r" (from), "r" (to) : "memory");
 		from+=64;
 		to+=64;
@@ -91,10 +83,10 @@
 	/*
 	 *	Now do the tail of the block
 	 */
-	memcpy(to, from, len&63);
+	small_memcpy(to, from, len&63);
 	return p;
 }
+#define memcpy(a,b,c) fast_memcpy(a,b,c)
 #endif
 
-#define memcpy(a,b,c) fast_memcpy(a,b,c)
 
Also I want suggest you don't put your own CFLAGS in config.mak. I have exported CFLAGS
from /etc/profile and I think many people have it too:

diff -u -r -N main/configure main.new/configure
--- main/configure	Thu Apr 12 00:08:27 2001
+++ main.new/configure	Thu Apr 12 11:00:55 2001
@@ -659,6 +659,11 @@
  _lirclibs=''
 fi
 
+# checking for CFLAGS
+if test "$CFLAGS" = ""; then
+  CFLAGS="-O2 -fomit-frame-pointer -pipe -ffats-math"
+fi
+
 
 echo
 echo "Creating $MCONF"
@@ -669,7 +674,7 @@
 AR=ar
 CC=$_cc
 # OPTFLAGS=-O4 -march=$proc -mcpu=$proc -pipe -fomit-frame-pointer -ffast-math
-OPTFLAGS=-O4 -march=$proc -mcpu=$proc -pipe -ffast-math
+OPTFLAGS=$CFLAGS
 # LIBS=-L/usr/lib -L/usr/local/lib $_x11libdir $_gllib $_sdllib $_dgalib $_x11lib $_xvlib
 X_LIBS=$_x11libdir $_gllib $_sdllib $_dgalib $_x11lib $_xvlib $_vmlib $_svgalib
 TERMCAP_LIB=$_libtermcap


Best regards! Nick




_______________________________________________
Mplayer-users mailing list
Mplayer-users at lists.sourceforge.net
http://lists.sourceforge.net/lists/listinfo/mplayer-users



More information about the MPlayer-users mailing list