[MPlayer-dev-eng] [PATCH] negating unsigned in asm code
Reimar Döffinger
Reimar.Doeffinger at stud.uni-karlsruhe.de
Tue Jun 28 12:39:31 CEST 2005
Hi,
rgb2rgb_template.c causes a lot of crashes on AMD64, since the current
code first negates unsigned variables and then extends them to 64 bits,
the result being a very big positive value instead of a negative one.
One possibility would be making all unsigned signed.
The attached patch instead does the subtraction/negating in assembler,
which fixes this as well (a bit at the cost of readability, but not much
IMHO).
Is it okay to apply??
Greetings,
Reimar Döffinger
-------------- next part --------------
Index: rgb2rgb_template.c
===================================================================
RCS file: /cvsroot/mplayer/main/postproc/rgb2rgb_template.c,v
retrieving revision 1.73
diff -u -r1.73 rgb2rgb_template.c
--- rgb2rgb_template.c 22 Mar 2005 10:11:42 -0000 1.73
+++ rgb2rgb_template.c 28 Jun 2005 10:33:55 -0000
@@ -1343,6 +1343,7 @@
#ifdef HAVE_MMX
/* TODO: unroll this loop */
asm volatile (
+ "sub 7, %2 \n\t"
"xor %%"REG_a", %%"REG_a" \n\t"
".balign 16 \n\t"
"1: \n\t"
@@ -1361,7 +1362,7 @@
"add $8, %%"REG_a" \n\t"
"cmp %2, %%"REG_a" \n\t"
" jb 1b \n\t"
- :: "r" (src), "r"(dst), "r" ((long)src_size-7)
+ :: "r" (src), "r"(dst), "r" ((long)src_size)
: "%"REG_a
);
@@ -2106,6 +2107,7 @@
{
asm volatile(
"mov %2, %%"REG_a" \n\t"
+ "neg %%"REG_a" \n\t"
"movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t"
"movq "MANGLE(w1111)", %%mm5 \n\t"
"pxor %%mm7, %%mm7 \n\t"
@@ -2170,7 +2172,7 @@
MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t"
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
- : : "r" (src+width*3), "r" (ydst+width), "g" ((long)-width)
+ : : "r" (src+width*3), "r" (ydst+width), "g" ((long)width)
: "%"REG_a, "%"REG_b
);
ydst += lumStride;
@@ -2179,6 +2181,7 @@
src -= srcStride*2;
asm volatile(
"mov %4, %%"REG_a" \n\t"
+ "neg %%"REG_a" \n\t"
"movq "MANGLE(w1111)", %%mm5 \n\t"
"movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t"
"pxor %%mm7, %%mm7 \n\t"
@@ -2322,7 +2325,7 @@
"movd %%mm0, (%3, %%"REG_a") \n\t"
"add $4, %%"REG_a" \n\t"
" js 1b \n\t"
- : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" ((long)-chromWidth)
+ : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" ((long)chromWidth)
: "%"REG_a, "%"REG_b
);
@@ -2400,6 +2403,7 @@
#ifdef HAVE_MMX
#ifdef HAVE_SSE2
asm(
+ "sub 15, %3 \n\t"
"xor %%"REG_a", %%"REG_a" \n\t"
"1: \n\t"
PREFETCH" 64(%1, %%"REG_a") \n\t"
@@ -2414,11 +2418,12 @@
"add $16, %%"REG_a" \n\t"
"cmp %3, %%"REG_a" \n\t"
" jb 1b \n\t"
- ::"r"(dest), "r"(src1), "r"(src2), "r" ((long)width-15)
+ ::"r"(dest), "r"(src1), "r"(src2), "r" ((long)width)
: "memory", "%"REG_a""
);
#else
asm(
+ "sub 15, %3 \n\t"
"xor %%"REG_a", %%"REG_a" \n\t"
"1: \n\t"
PREFETCH" 64(%1, %%"REG_a") \n\t"
@@ -2440,7 +2445,7 @@
"add $16, %%"REG_a" \n\t"
"cmp %3, %%"REG_a" \n\t"
" jb 1b \n\t"
- ::"r"(dest), "r"(src1), "r"(src2), "r" ((long)width-15)
+ ::"r"(dest), "r"(src1), "r"(src2), "r" ((long)width)
: "memory", "%"REG_a
);
#endif
More information about the MPlayer-dev-eng
mailing list