[MPlayer-dev-eng] [PATCH] negating unsigned in asm code

Reimar Döffinger Reimar.Doeffinger at stud.uni-karlsruhe.de
Tue Jun 28 12:39:31 CEST 2005


Hi,
rgb2rgb_template.c causes a lot of crashes on AMD64, since the current
code first negates unsigned variables and then extends them to 64 bits,
the result being a very big positive value instead of a negative one.
One possibility would be making all unsigned signed.
The attached patch instead does the subtraction/negating in assembler,
which fixes this as well (a bit at the cost of readability, but not much
IMHO).
Is it okay to apply??

Greetings,
Reimar Döffinger
-------------- next part --------------
Index: rgb2rgb_template.c
===================================================================
RCS file: /cvsroot/mplayer/main/postproc/rgb2rgb_template.c,v
retrieving revision 1.73
diff -u -r1.73 rgb2rgb_template.c
--- rgb2rgb_template.c	22 Mar 2005 10:11:42 -0000	1.73
+++ rgb2rgb_template.c	28 Jun 2005 10:33:55 -0000
@@ -1343,6 +1343,7 @@
 #ifdef HAVE_MMX
 /* TODO: unroll this loop */
 	asm volatile (
+		"sub 7, %2			\n\t"
 		"xor %%"REG_a", %%"REG_a"	\n\t"
 		".balign 16			\n\t"
 		"1:				\n\t"
@@ -1361,7 +1362,7 @@
 		"add $8, %%"REG_a"		\n\t"
 		"cmp %2, %%"REG_a"		\n\t"
 		" jb 1b				\n\t"
-		:: "r" (src), "r"(dst), "r" ((long)src_size-7)
+		:: "r" (src), "r"(dst), "r" ((long)src_size)
 		: "%"REG_a
 	);
 
@@ -2106,6 +2107,7 @@
 		{
 			asm volatile(
 				"mov %2, %%"REG_a"		\n\t"
+				"neg %%"REG_a"			\n\t"
 				"movq "MANGLE(bgr2YCoeff)", %%mm6		\n\t"
 				"movq "MANGLE(w1111)", %%mm5		\n\t"
 				"pxor %%mm7, %%mm7		\n\t"
@@ -2170,7 +2172,7 @@
 				MOVNTQ" %%mm0, (%1, %%"REG_a")	\n\t"
 				"add $8, %%"REG_a"		\n\t"
 				" js 1b				\n\t"
-				: : "r" (src+width*3), "r" (ydst+width), "g" ((long)-width)
+				: : "r" (src+width*3), "r" (ydst+width), "g" ((long)width)
 				: "%"REG_a, "%"REG_b
 			);
 			ydst += lumStride;
@@ -2179,6 +2181,7 @@
 		src -= srcStride*2;
 		asm volatile(
 			"mov %4, %%"REG_a"		\n\t"
+			"neg %%"REG_a"			\n\t"
 			"movq "MANGLE(w1111)", %%mm5		\n\t"
 			"movq "MANGLE(bgr2UCoeff)", %%mm6		\n\t"
 			"pxor %%mm7, %%mm7		\n\t"
@@ -2322,7 +2325,7 @@
 			"movd %%mm0, (%3, %%"REG_a")	\n\t"
 			"add $4, %%"REG_a"		\n\t"
 			" js 1b				\n\t"
-			: : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" ((long)-chromWidth)
+			: : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" ((long)chromWidth)
 			: "%"REG_a, "%"REG_b
 		);
 
@@ -2400,6 +2403,7 @@
 #ifdef HAVE_MMX
 #ifdef HAVE_SSE2
 		asm(
+			"sub 15, %3			\n\t"
 			"xor %%"REG_a", %%"REG_a"	\n\t"
 			"1:				\n\t"
 			PREFETCH" 64(%1, %%"REG_a")	\n\t"
@@ -2414,11 +2418,12 @@
 			"add $16, %%"REG_a"		\n\t"
 			"cmp %3, %%"REG_a"		\n\t"
 			" jb 1b				\n\t"
-			::"r"(dest), "r"(src1), "r"(src2), "r" ((long)width-15)
+			::"r"(dest), "r"(src1), "r"(src2), "r" ((long)width)
 			: "memory", "%"REG_a""
 		);
 #else
 		asm(
+			"sub 15, %3			\n\t"
 			"xor %%"REG_a", %%"REG_a"	\n\t"
 			"1:				\n\t"
 			PREFETCH" 64(%1, %%"REG_a")	\n\t"
@@ -2440,7 +2445,7 @@
 			"add $16, %%"REG_a"		\n\t"
 			"cmp %3, %%"REG_a"		\n\t"
 			" jb 1b				\n\t"
-			::"r"(dest), "r"(src1), "r"(src2), "r" ((long)width-15)
+			::"r"(dest), "r"(src1), "r"(src2), "r" ((long)width)
 			: "memory", "%"REG_a
 		);
 #endif


More information about the MPlayer-dev-eng mailing list