[MPlayer-dev-eng] [PATCH] Fix packed YUV in dshow vo
Laurent
laurent.aml at gmail.com
Fri Oct 3 20:04:07 CEST 2008
On Fri, Oct 3, 2008 at 12:17 PM, compn <tempn at twmi.rr.com> wrote:
> On Fri, 3 Oct 2008 11:53:18 -0400, Laurent wrote:
>>MPlayer crashes on Intel-based GPU (eg 945) when the Packed YUV
>>colorspace is used.
>>
>>To reproduce the crash, start
>> mplayer.exe -vf format=YUY2 <yourvideo>
>
> got a gdb backtrace? (instructions in bugreports.html)
>
> -compn
> _______________________________________________
> MPlayer-dev-eng mailing list
> MPlayer-dev-eng at mplayerhq.hu
> https://lists.mplayerhq.hu/mailman/listinfo/mplayer-dev-eng
>
Here it is the trace.
There is a memory fault in the mem copy, as the source buffer is
smaller than the destination buffer, due to different strides.
In this case:
image size:320x240
source stride=640 (no padding)
source buffer size: 153600
destination stride=1024
destination buffer size: 245760
#0 0x00500b74 in fast_memcpy (to=0x1210c000, from=0x911c158,
len=245760) at libvo/aclib_template.c:191
#1 0x00429e9d in control (request=13, data=0x867af00) at
libvo/vo_directx.c:1478
#2 0x004a86fa in put_image (vf=0x8650f70, mpi=0x867af00, pts=0) at
libmpcodecs/vf_vo.c:178
#3 0x00475129 in filter_video (sh_video=0x8626008, frame=0x8619bd8,
pts=0) at libmpcodecs/dec_video.c:416
#4 0x004071fc in main (argc=15, argv=0x3f4028) at mplayer.c:1776
Dump of assembler code for function fast_memcpy:
0x00500b00 <fast_memcpy+0>: push %ebp
0x00500b01 <fast_memcpy+1>: mov %esp,%ebp
0x00500b03 <fast_memcpy+3>: push %edi
0x00500b04 <fast_memcpy+4>: push %esi
0x00500b05 <fast_memcpy+5>: push %ebx
0x00500b06 <fast_memcpy+6>: sub $0x4,%esp
0x00500b09 <fast_memcpy+9>: mov 0xc(%ebp),%esi
0x00500b0c <fast_memcpy+12>: prefetchnta (%esi)
0x00500b0f <fast_memcpy+15>: prefetchnta 0x40(%esi)
0x00500b13 <fast_memcpy+19>: prefetchnta 0x80(%esi)
0x00500b1a <fast_memcpy+26>: prefetchnta 0xc0(%esi)
0x00500b21 <fast_memcpy+33>: prefetchnta 0x100(%esi)
0x00500b28 <fast_memcpy+40>: cmpl $0x3f,0x10(%ebp)
0x00500b2c <fast_memcpy+44>: mov 0x8(%ebp),%edi
0x00500b2f <fast_memcpy+47>: jbe 0x500ba4 <fast_memcpy+164>
0x00500b31 <fast_memcpy+49>: mov 0x8(%ebp),%edi
0x00500b34 <fast_memcpy+52>: mov %edi,%eax
0x00500b36 <fast_memcpy+54>: and $0xf,%eax
0x00500b39 <fast_memcpy+57>: jne 0x500c00 <fast_memcpy+256>
0x00500b3f <fast_memcpy+63>: mov 0x10(%ebp),%eax
0x00500b42 <fast_memcpy+66>: shr $0x6,%eax
0x00500b45 <fast_memcpy+69>: test $0xf,%esi
0x00500b4b <fast_memcpy+75>: mov %eax,-0x10(%ebp)
0x00500b4e <fast_memcpy+78>: je 0x500bc0 <fast_memcpy+192>
0x00500b50 <fast_memcpy+80>: mov -0x10(%ebp),%edx
0x00500b53 <fast_memcpy+83>: test %edx,%edx
0x00500b55 <fast_memcpy+85>: je 0x500b9d <fast_memcpy+157>
0x00500b57 <fast_memcpy+87>: mov -0x10(%ebp),%ebx
0x00500b5a <fast_memcpy+90>: xor %ecx,%ecx
0x00500b5c <fast_memcpy+92>: lea 0x0(%esi,%eiz,1),%esi
0x00500b60 <fast_memcpy+96>: lea (%esi,%ecx,1),%edx
0x00500b63 <fast_memcpy+99>: lea (%edi,%ecx,1),%eax
0x00500b66 <fast_memcpy+102>: prefetchnta 0x140(%edx)
0x00500b6d <fast_memcpy+109>: movups (%edx),%xmm0
0x00500b70 <fast_memcpy+112>: movups 0x10(%edx),%xmm1
0x00500b74 <fast_memcpy+116>: movups 0x20(%edx),%xmm2
0x00500b78 <fast_memcpy+120>: movups 0x30(%edx),%xmm3
0x00500b7c <fast_memcpy+124>: movntps %xmm0,(%eax)
0x00500b7f <fast_memcpy+127>: movntps %xmm1,0x10(%eax)
0x00500b83 <fast_memcpy+131>: movntps %xmm2,0x20(%eax)
0x00500b87 <fast_memcpy+135>: movntps %xmm3,0x30(%eax)
0x00500b8b <fast_memcpy+139>: add $0x40,%ecx
0x00500b8e <fast_memcpy+142>: sub $0x1,%ebx
0x00500b91 <fast_memcpy+145>: jne 0x500b60 <fast_memcpy+96>
0x00500b93 <fast_memcpy+147>: mov -0x10(%ebp),%eax
0x00500b96 <fast_memcpy+150>: shl $0x6,%eax
0x00500b99 <fast_memcpy+153>: add %eax,%edi
0x00500b9b <fast_memcpy+155>: add %eax,%esi
0x00500b9d <fast_memcpy+157>: andl $0x3f,0x10(%ebp)
0x00500ba1 <fast_memcpy+161>: sfence
0x00500ba4 <fast_memcpy+164>: mov 0x10(%ebp),%eax
0x00500ba7 <fast_memcpy+167>: test %eax,%eax
0x00500ba9 <fast_memcpy+169>: je 0x500bb0 <fast_memcpy+176>
0x00500bab <fast_memcpy+171>: mov 0x10(%ebp),%ecx
0x00500bae <fast_memcpy+174>: rep movsb %ds:(%esi),%es:(%edi)
0x00500bb0 <fast_memcpy+176>: mov 0x8(%ebp),%eax
0x00500bb3 <fast_memcpy+179>: add $0x4,%esp
0x00500bb6 <fast_memcpy+182>: pop %ebx
0x00500bb7 <fast_memcpy+183>: pop %esi
0x00500bb8 <fast_memcpy+184>: pop %edi
0x00500bb9 <fast_memcpy+185>: pop %ebp
0x00500bba <fast_memcpy+186>: ret
0x00500bbb <fast_memcpy+187>: nop
0x00500bbc <fast_memcpy+188>: lea 0x0(%esi,%eiz,1),%esi
0x00500bc0 <fast_memcpy+192>: xor %ecx,%ecx
0x00500bc2 <fast_memcpy+194>: test %eax,%eax
0x00500bc4 <fast_memcpy+196>: mov %eax,%ebx
0x00500bc6 <fast_memcpy+198>: je 0x500b9d <fast_memcpy+157>
0x00500bc8 <fast_memcpy+200>: lea (%esi,%ecx,1),%edx
0x00500bcb <fast_memcpy+203>: lea (%edi,%ecx,1),%eax
0x00500bce <fast_memcpy+206>: prefetchnta 0x140(%edx)
0x00500bd5 <fast_memcpy+213>: movaps (%edx),%xmm0
0x00500bd8 <fast_memcpy+216>: movaps 0x10(%edx),%xmm1
0x00500bdc <fast_memcpy+220>: movaps 0x20(%edx),%xmm2
0x00500be0 <fast_memcpy+224>: movaps 0x30(%edx),%xmm3
0x00500be4 <fast_memcpy+228>: movntps %xmm0,(%eax)
0x00500be7 <fast_memcpy+231>: movntps %xmm1,0x10(%eax)
0x00500beb <fast_memcpy+235>: movntps %xmm2,0x20(%eax)
0x00500bef <fast_memcpy+239>: movntps %xmm3,0x30(%eax)
0x00500bf3 <fast_memcpy+243>: add $0x40,%ecx
0x00500bf6 <fast_memcpy+246>: sub $0x1,%ebx
0x00500bf9 <fast_memcpy+249>: jne 0x500bc8 <fast_memcpy+200>
0x00500bfb <fast_memcpy+251>: jmp 0x500b93 <fast_memcpy+147>
0x00500bfd <fast_memcpy+253>: lea 0x0(%esi),%esi
0x00500c00 <fast_memcpy+256>: mov $0x10,%ecx
0x00500c05 <fast_memcpy+261>: mov 0x8(%ebp),%edi
0x00500c08 <fast_memcpy+264>: sub %eax,%ecx
0x00500c0a <fast_memcpy+266>: sub %ecx,0x10(%ebp)
0x00500c0d <fast_memcpy+269>: rep movsb %ds:(%esi),%es:(%edi)
0x00500c0f <fast_memcpy+271>: jmp 0x500b3f <fast_memcpy+63>
End of assembler dump.
eax 0x12132e80 303246976
ecx 0x26e80 159360
edx 0x9142fd8 152317912
ebx 0x546 1350
esp 0x22ec18 0x22ec18
ebp 0x22ec28 0x22ec28
esi 0x911c158 152158552
edi 0x1210c000 303087616
eip 0x500b74 0x500b74 <fast_memcpy+116>
eflags 0x210202 [ IF RF ID ]
cs 0x1b 27
ss 0x23 35
ds 0x23 35
es 0x23 35
fs 0x3b 59
gs 0x0 0
st0 -nan(0x4600438046004380) (raw 0xffff4600438046004380)
st1 -nan(0x400000000000) (raw 0xffff0000400000000000)
st2 -nan(0x8c008700860086) (raw 0xffff008c008700860086)
st3 -nan(0x23000000000000) (raw 0xffff0023000000000000)
st4 -nan(0x2300000021c000) (raw 0xffff002300000021c000)
st5 -nan(0x430040804000400) (raw 0xffff0430040804000400)
st6 0 (raw 0x00000000000000000000)
st7 0 (raw 0x00000000000000000000)
fctrl 0xffff037f -64641
fstat 0xffff0000 -65536
ftag 0xffffffff -1
fiseg 0x1b 27
fioff 0x4a86e7 4884199
foseg 0xffff0023 -65501
fooff 0x867b380 141013888
fop 0x518 1304
xmm0 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double =
{0x8000000000000000, 0x8000000000000000}, v16_int8 = {0xee, 0xfe,
0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee,
0xfe, 0xee, 0xfe}, v8_int16 = {0xfeee, 0xfeee, 0xfeee, 0xfeee, 0xfeee,
0xfeee, 0xfeee, 0xfeee}, v4_int32 = {0xfeeefeee, 0xfeeefeee,
0xfeeefeee, 0xfeeefeee}, v2_int64 = {0xfeeefeeefeeefeee,
0xfeeefeeefeeefeee}, uint128 = 0xfeeefeeefeeefeeefeeefeeefeeefeee}
xmm1 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double =
{0x8000000000000000, 0x8000000000000000}, v16_int8 = {0xee, 0xfe,
0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee,
0xfe, 0xee, 0xfe}, v8_int16 = {0xfeee, 0xfeee, 0xfeee, 0xfeee, 0xfeee,
0xfeee, 0xfeee, 0xfeee}, v4_int32 = {0xfeeefeee, 0xfeeefeee,
0xfeeefeee, 0xfeeefeee}, v2_int64 = {0xfeeefeeefeeefeee,
0xfeeefeeefeeefeee}, uint128 = 0xfeeefeeefeeefeeefeeefeeefeeefeee}
xmm2 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double =
{0x8000000000000000, 0x8000000000000000}, v16_int8 = {0xee, 0xfe,
0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee,
0xfe, 0xee, 0xfe}, v8_int16 = {0xfeee, 0xfeee, 0xfeee, 0xfeee, 0xfeee,
0xfeee, 0xfeee, 0xfeee}, v4_int32 = {0xfeeefeee, 0xfeeefeee,
0xfeeefeee, 0xfeeefeee}, v2_int64 = {0xfeeefeeefeeefeee,
0xfeeefeeefeeefeee}, uint128 = 0xfeeefeeefeeefeeefeeefeeefeeefeee}
xmm3 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double =
{0x8000000000000000, 0x8000000000000000}, v16_int8 = {0xee, 0xfe,
0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee,
0xfe, 0xee, 0xfe}, v8_int16 = {0xfeee, 0xfeee, 0xfeee, 0xfeee, 0xfeee,
0xfeee, 0xfeee, 0xfeee}, v4_int32 = {0xfeeefeee, 0xfeeefeee,
0xfeeefeee, 0xfeeefeee}, v2_int64 = {0xfeeefeeefeeefeee,
0xfeeefeeefeeefeee}, uint128 = 0xfeeefeeefeeefeeefeeefeeefeeefeee}
xmm4 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0,
0x0}, v16_int8 = {0x88, 0xe, 0x99, 0x5, 0x3, 0x0 <repeats 11 times>},
v8_int16 = {0xe88, 0x599, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 =
{0x5990e88, 0x3, 0x0, 0x0}, v2_int64 = {0x305990e88, 0x0}, uint128 =
0x00000000000000000000000305990e88}
xmm5 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0,
0x0}, v16_int8 = {0xe2, 0xc6, 0x80, 0x6d, 0xd8, 0x66, 0xf5, 0xb, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int16 = {0xc6e2, 0x6d80,
0x66d8, 0xbf5, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x6d80c6e2, 0xbf566d8,
0x0, 0x0}, v2_int64 = {0xbf566d86d80c6e2, 0x0}, uint128 =
0x00000000000000000bf566d86d80c6e2}
xmm6 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0,
0x0}, v16_int8 = {0x0, 0xdc, 0x77, 0x5, 0x0, 0xdc, 0x77, 0x5, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int16 = {0xdc00, 0x577, 0xdc00,
0x577, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x577dc00, 0x577dc00, 0x0,
0x0}, v2_int64 = {0x577dc000577dc00, 0x0}, uint128 =
0x00000000000000000577dc000577dc00}
xmm7 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0,
0x0}, v16_int8 = {0x3c, 0xf5, 0xbd, 0x6, 0x58, 0xf4, 0xbd, 0x6, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int16 = {0xf53c, 0x6bd, 0xf458,
0x6bd, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x6bdf53c, 0x6bdf458, 0x0,
0x0}, v2_int64 = {0x6bdf45806bdf53c, 0x0}, uint128 =
0x000000000000000006bdf45806bdf53c}
mxcsr 0x1f80 [ IM DM ZM OM UM PM ]
mm0 {uint64 = 0x4600438046004380, v2_int32 = {0x46004380,
0x46004380}, v4_int16 = {0x4380, 0x4600, 0x4380, 0x4600}, v8_int8 =
{0x80, 0x43, 0x0, 0x46, 0x80, 0x43, 0x0, 0x46}}
mm1 {uint64 = 0x400000000000, v2_int32 = {0x0, 0x4000},
v4_int16 = {0x0, 0x0, 0x4000, 0x0}, v8_int8 = {0x0, 0x0, 0x0, 0x0,
0x0, 0x40, 0x0, 0x0}}
mm2 {uint64 = 0x8c008700860086, v2_int32 = {0x860086,
0x8c0087}, v4_int16 = {0x86, 0x86, 0x87, 0x8c}, v8_int8 = {0x86, 0x0,
0x86, 0x0, 0x87, 0x0, 0x8c, 0x0}}
mm3 {uint64 = 0x23000000000000, v2_int32 = {0x0, 0x230000},
v4_int16 = {0x0, 0x0, 0x0, 0x23}, v8_int8 = {0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x23, 0x0}}
mm4 {uint64 = 0x2300000021c000, v2_int32 = {0x21c000,
0x230000}, v4_int16 = {0xc000, 0x21, 0x0, 0x23}, v8_int8 = {0x0, 0xc0,
0x21, 0x0, 0x0, 0x0, 0x23, 0x0}}
mm5 {uint64 = 0x430040804000400, v2_int32 = {0x4000400,
0x4300408}, v4_int16 = {0x400, 0x400, 0x408, 0x430}, v8_int8 = {0x0,
0x4, 0x0, 0x4, 0x8, 0x4, 0x30, 0x4}}
mm6 {uint64 = 0x0, v2_int32 = {0x0, 0x0}, v4_int16 = {0x0,
0x0, 0x0, 0x0}, v8_int8 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}
mm7 {uint64 = 0x0, v2_int32 = {0x0, 0x0}, v4_int16 = {0x0,
0x0, 0x0, 0x0}, v8_int8 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}
More information about the MPlayer-dev-eng
mailing list