[MPlayer-dev-eng] [PATCH] Fix packed YUV in dshow vo

Laurent laurent.aml at gmail.com
Fri Oct 3 20:04:07 CEST 2008


On Fri, Oct 3, 2008 at 12:17 PM, compn <tempn at twmi.rr.com> wrote:
> On Fri, 3 Oct 2008 11:53:18 -0400, Laurent wrote:
>>MPlayer crashes on Intel-based GPU (eg 945) when the Packed YUV
>>colorspace is used.
>>
>>To reproduce the crash, start
>>  mplayer.exe -vf format=YUY2 <yourvideo>
>
> got a gdb backtrace? (instructions in bugreports.html)
>
> -compn
> _______________________________________________
> MPlayer-dev-eng mailing list
> MPlayer-dev-eng at mplayerhq.hu
> https://lists.mplayerhq.hu/mailman/listinfo/mplayer-dev-eng
>

Here it is the trace.
There is a memory fault in the mem copy, as the source buffer is
smaller than the destination buffer, due to different strides.

In this case:
  image size:320x240
  source stride=640 (no padding)
  source buffer size: 153600
  destination stride=1024
  destination buffer size: 245760



#0  0x00500b74 in fast_memcpy (to=0x1210c000, from=0x911c158,
len=245760) at libvo/aclib_template.c:191
#1  0x00429e9d in control (request=13, data=0x867af00) at
libvo/vo_directx.c:1478
#2  0x004a86fa in put_image (vf=0x8650f70, mpi=0x867af00, pts=0) at
libmpcodecs/vf_vo.c:178
#3  0x00475129 in filter_video (sh_video=0x8626008, frame=0x8619bd8,
pts=0) at libmpcodecs/dec_video.c:416
#4  0x004071fc in main (argc=15, argv=0x3f4028) at mplayer.c:1776


Dump of assembler code for function fast_memcpy:
0x00500b00 <fast_memcpy+0>:	push   %ebp
0x00500b01 <fast_memcpy+1>:	mov    %esp,%ebp
0x00500b03 <fast_memcpy+3>:	push   %edi
0x00500b04 <fast_memcpy+4>:	push   %esi
0x00500b05 <fast_memcpy+5>:	push   %ebx
0x00500b06 <fast_memcpy+6>:	sub    $0x4,%esp
0x00500b09 <fast_memcpy+9>:	mov    0xc(%ebp),%esi
0x00500b0c <fast_memcpy+12>:	prefetchnta (%esi)
0x00500b0f <fast_memcpy+15>:	prefetchnta 0x40(%esi)
0x00500b13 <fast_memcpy+19>:	prefetchnta 0x80(%esi)
0x00500b1a <fast_memcpy+26>:	prefetchnta 0xc0(%esi)
0x00500b21 <fast_memcpy+33>:	prefetchnta 0x100(%esi)
0x00500b28 <fast_memcpy+40>:	cmpl   $0x3f,0x10(%ebp)
0x00500b2c <fast_memcpy+44>:	mov    0x8(%ebp),%edi
0x00500b2f <fast_memcpy+47>:	jbe    0x500ba4 <fast_memcpy+164>
0x00500b31 <fast_memcpy+49>:	mov    0x8(%ebp),%edi
0x00500b34 <fast_memcpy+52>:	mov    %edi,%eax
0x00500b36 <fast_memcpy+54>:	and    $0xf,%eax
0x00500b39 <fast_memcpy+57>:	jne    0x500c00 <fast_memcpy+256>
0x00500b3f <fast_memcpy+63>:	mov    0x10(%ebp),%eax
0x00500b42 <fast_memcpy+66>:	shr    $0x6,%eax
0x00500b45 <fast_memcpy+69>:	test   $0xf,%esi
0x00500b4b <fast_memcpy+75>:	mov    %eax,-0x10(%ebp)
0x00500b4e <fast_memcpy+78>:	je     0x500bc0 <fast_memcpy+192>
0x00500b50 <fast_memcpy+80>:	mov    -0x10(%ebp),%edx
0x00500b53 <fast_memcpy+83>:	test   %edx,%edx
0x00500b55 <fast_memcpy+85>:	je     0x500b9d <fast_memcpy+157>
0x00500b57 <fast_memcpy+87>:	mov    -0x10(%ebp),%ebx
0x00500b5a <fast_memcpy+90>:	xor    %ecx,%ecx
0x00500b5c <fast_memcpy+92>:	lea    0x0(%esi,%eiz,1),%esi
0x00500b60 <fast_memcpy+96>:	lea    (%esi,%ecx,1),%edx
0x00500b63 <fast_memcpy+99>:	lea    (%edi,%ecx,1),%eax
0x00500b66 <fast_memcpy+102>:	prefetchnta 0x140(%edx)
0x00500b6d <fast_memcpy+109>:	movups (%edx),%xmm0
0x00500b70 <fast_memcpy+112>:	movups 0x10(%edx),%xmm1
0x00500b74 <fast_memcpy+116>:	movups 0x20(%edx),%xmm2
0x00500b78 <fast_memcpy+120>:	movups 0x30(%edx),%xmm3
0x00500b7c <fast_memcpy+124>:	movntps %xmm0,(%eax)
0x00500b7f <fast_memcpy+127>:	movntps %xmm1,0x10(%eax)
0x00500b83 <fast_memcpy+131>:	movntps %xmm2,0x20(%eax)
0x00500b87 <fast_memcpy+135>:	movntps %xmm3,0x30(%eax)
0x00500b8b <fast_memcpy+139>:	add    $0x40,%ecx
0x00500b8e <fast_memcpy+142>:	sub    $0x1,%ebx
0x00500b91 <fast_memcpy+145>:	jne    0x500b60 <fast_memcpy+96>
0x00500b93 <fast_memcpy+147>:	mov    -0x10(%ebp),%eax
0x00500b96 <fast_memcpy+150>:	shl    $0x6,%eax
0x00500b99 <fast_memcpy+153>:	add    %eax,%edi
0x00500b9b <fast_memcpy+155>:	add    %eax,%esi
0x00500b9d <fast_memcpy+157>:	andl   $0x3f,0x10(%ebp)
0x00500ba1 <fast_memcpy+161>:	sfence
0x00500ba4 <fast_memcpy+164>:	mov    0x10(%ebp),%eax
0x00500ba7 <fast_memcpy+167>:	test   %eax,%eax
0x00500ba9 <fast_memcpy+169>:	je     0x500bb0 <fast_memcpy+176>
0x00500bab <fast_memcpy+171>:	mov    0x10(%ebp),%ecx
0x00500bae <fast_memcpy+174>:	rep movsb %ds:(%esi),%es:(%edi)
0x00500bb0 <fast_memcpy+176>:	mov    0x8(%ebp),%eax
0x00500bb3 <fast_memcpy+179>:	add    $0x4,%esp
0x00500bb6 <fast_memcpy+182>:	pop    %ebx
0x00500bb7 <fast_memcpy+183>:	pop    %esi
0x00500bb8 <fast_memcpy+184>:	pop    %edi
0x00500bb9 <fast_memcpy+185>:	pop    %ebp
0x00500bba <fast_memcpy+186>:	ret
0x00500bbb <fast_memcpy+187>:	nop
0x00500bbc <fast_memcpy+188>:	lea    0x0(%esi,%eiz,1),%esi
0x00500bc0 <fast_memcpy+192>:	xor    %ecx,%ecx
0x00500bc2 <fast_memcpy+194>:	test   %eax,%eax
0x00500bc4 <fast_memcpy+196>:	mov    %eax,%ebx
0x00500bc6 <fast_memcpy+198>:	je     0x500b9d <fast_memcpy+157>
0x00500bc8 <fast_memcpy+200>:	lea    (%esi,%ecx,1),%edx
0x00500bcb <fast_memcpy+203>:	lea    (%edi,%ecx,1),%eax
0x00500bce <fast_memcpy+206>:	prefetchnta 0x140(%edx)
0x00500bd5 <fast_memcpy+213>:	movaps (%edx),%xmm0
0x00500bd8 <fast_memcpy+216>:	movaps 0x10(%edx),%xmm1
0x00500bdc <fast_memcpy+220>:	movaps 0x20(%edx),%xmm2
0x00500be0 <fast_memcpy+224>:	movaps 0x30(%edx),%xmm3
0x00500be4 <fast_memcpy+228>:	movntps %xmm0,(%eax)
0x00500be7 <fast_memcpy+231>:	movntps %xmm1,0x10(%eax)
0x00500beb <fast_memcpy+235>:	movntps %xmm2,0x20(%eax)
0x00500bef <fast_memcpy+239>:	movntps %xmm3,0x30(%eax)
0x00500bf3 <fast_memcpy+243>:	add    $0x40,%ecx
0x00500bf6 <fast_memcpy+246>:	sub    $0x1,%ebx
0x00500bf9 <fast_memcpy+249>:	jne    0x500bc8 <fast_memcpy+200>
0x00500bfb <fast_memcpy+251>:	jmp    0x500b93 <fast_memcpy+147>
0x00500bfd <fast_memcpy+253>:	lea    0x0(%esi),%esi
0x00500c00 <fast_memcpy+256>:	mov    $0x10,%ecx
0x00500c05 <fast_memcpy+261>:	mov    0x8(%ebp),%edi
0x00500c08 <fast_memcpy+264>:	sub    %eax,%ecx
0x00500c0a <fast_memcpy+266>:	sub    %ecx,0x10(%ebp)
0x00500c0d <fast_memcpy+269>:	rep movsb %ds:(%esi),%es:(%edi)
0x00500c0f <fast_memcpy+271>:	jmp    0x500b3f <fast_memcpy+63>
End of assembler dump.


eax            0x12132e80	303246976
ecx            0x26e80	159360
edx            0x9142fd8	152317912
ebx            0x546	1350
esp            0x22ec18	0x22ec18
ebp            0x22ec28	0x22ec28
esi            0x911c158	152158552
edi            0x1210c000	303087616
eip            0x500b74	0x500b74 <fast_memcpy+116>
eflags         0x210202	[ IF RF ID ]
cs             0x1b	27
ss             0x23	35
ds             0x23	35
es             0x23	35
fs             0x3b	59
gs             0x0	0
st0            -nan(0x4600438046004380)	(raw 0xffff4600438046004380)
st1            -nan(0x400000000000)	(raw 0xffff0000400000000000)
st2            -nan(0x8c008700860086)	(raw 0xffff008c008700860086)
st3            -nan(0x23000000000000)	(raw 0xffff0023000000000000)
st4            -nan(0x2300000021c000)	(raw 0xffff002300000021c000)
st5            -nan(0x430040804000400)	(raw 0xffff0430040804000400)
st6            0	(raw 0x00000000000000000000)
st7            0	(raw 0x00000000000000000000)
fctrl          0xffff037f	-64641
fstat          0xffff0000	-65536
ftag           0xffffffff	-1
fiseg          0x1b	27
fioff          0x4a86e7	4884199
foseg          0xffff0023	-65501
fooff          0x867b380	141013888
fop            0x518	1304
xmm0           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double =
{0x8000000000000000, 0x8000000000000000}, v16_int8 = {0xee, 0xfe,
0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee,
0xfe, 0xee, 0xfe}, v8_int16 = {0xfeee, 0xfeee, 0xfeee, 0xfeee, 0xfeee,
0xfeee, 0xfeee, 0xfeee}, v4_int32 = {0xfeeefeee, 0xfeeefeee,
0xfeeefeee, 0xfeeefeee}, v2_int64 = {0xfeeefeeefeeefeee,
0xfeeefeeefeeefeee}, uint128 = 0xfeeefeeefeeefeeefeeefeeefeeefeee}
xmm1           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double =
{0x8000000000000000, 0x8000000000000000}, v16_int8 = {0xee, 0xfe,
0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee,
0xfe, 0xee, 0xfe}, v8_int16 = {0xfeee, 0xfeee, 0xfeee, 0xfeee, 0xfeee,
0xfeee, 0xfeee, 0xfeee}, v4_int32 = {0xfeeefeee, 0xfeeefeee,
0xfeeefeee, 0xfeeefeee}, v2_int64 = {0xfeeefeeefeeefeee,
0xfeeefeeefeeefeee}, uint128 = 0xfeeefeeefeeefeeefeeefeeefeeefeee}
xmm2           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double =
{0x8000000000000000, 0x8000000000000000}, v16_int8 = {0xee, 0xfe,
0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee,
0xfe, 0xee, 0xfe}, v8_int16 = {0xfeee, 0xfeee, 0xfeee, 0xfeee, 0xfeee,
0xfeee, 0xfeee, 0xfeee}, v4_int32 = {0xfeeefeee, 0xfeeefeee,
0xfeeefeee, 0xfeeefeee}, v2_int64 = {0xfeeefeeefeeefeee,
0xfeeefeeefeeefeee}, uint128 = 0xfeeefeeefeeefeeefeeefeeefeeefeee}
xmm3           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double =
{0x8000000000000000, 0x8000000000000000}, v16_int8 = {0xee, 0xfe,
0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee, 0xfe, 0xee,
0xfe, 0xee, 0xfe}, v8_int16 = {0xfeee, 0xfeee, 0xfeee, 0xfeee, 0xfeee,
0xfeee, 0xfeee, 0xfeee}, v4_int32 = {0xfeeefeee, 0xfeeefeee,
0xfeeefeee, 0xfeeefeee}, v2_int64 = {0xfeeefeeefeeefeee,
0xfeeefeeefeeefeee}, uint128 = 0xfeeefeeefeeefeeefeeefeeefeeefeee}
xmm4           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0,
0x0}, v16_int8 = {0x88, 0xe, 0x99, 0x5, 0x3, 0x0 <repeats 11 times>},
v8_int16 = {0xe88, 0x599, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 =
{0x5990e88, 0x3, 0x0, 0x0}, v2_int64 = {0x305990e88, 0x0}, uint128 =
0x00000000000000000000000305990e88}
xmm5           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0,
0x0}, v16_int8 = {0xe2, 0xc6, 0x80, 0x6d, 0xd8, 0x66, 0xf5, 0xb, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int16 = {0xc6e2, 0x6d80,
0x66d8, 0xbf5, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x6d80c6e2, 0xbf566d8,
0x0, 0x0}, v2_int64 = {0xbf566d86d80c6e2, 0x0}, uint128 =
0x00000000000000000bf566d86d80c6e2}
xmm6           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0,
0x0}, v16_int8 = {0x0, 0xdc, 0x77, 0x5, 0x0, 0xdc, 0x77, 0x5, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int16 = {0xdc00, 0x577, 0xdc00,
0x577, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x577dc00, 0x577dc00, 0x0,
0x0}, v2_int64 = {0x577dc000577dc00, 0x0}, uint128 =
0x00000000000000000577dc000577dc00}
xmm7           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0,
0x0}, v16_int8 = {0x3c, 0xf5, 0xbd, 0x6, 0x58, 0xf4, 0xbd, 0x6, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int16 = {0xf53c, 0x6bd, 0xf458,
0x6bd, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x6bdf53c, 0x6bdf458, 0x0,
0x0}, v2_int64 = {0x6bdf45806bdf53c, 0x0}, uint128 =
0x000000000000000006bdf45806bdf53c}
mxcsr          0x1f80	[ IM DM ZM OM UM PM ]
mm0            {uint64 = 0x4600438046004380, v2_int32 = {0x46004380,
0x46004380}, v4_int16 = {0x4380, 0x4600, 0x4380, 0x4600}, v8_int8 =
{0x80, 0x43, 0x0, 0x46, 0x80, 0x43, 0x0, 0x46}}
mm1            {uint64 = 0x400000000000, v2_int32 = {0x0, 0x4000},
v4_int16 = {0x0, 0x0, 0x4000, 0x0}, v8_int8 = {0x0, 0x0, 0x0, 0x0,
0x0, 0x40, 0x0, 0x0}}
mm2            {uint64 = 0x8c008700860086, v2_int32 = {0x860086,
0x8c0087}, v4_int16 = {0x86, 0x86, 0x87, 0x8c}, v8_int8 = {0x86, 0x0,
0x86, 0x0, 0x87, 0x0, 0x8c, 0x0}}
mm3            {uint64 = 0x23000000000000, v2_int32 = {0x0, 0x230000},
v4_int16 = {0x0, 0x0, 0x0, 0x23}, v8_int8 = {0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x23, 0x0}}
mm4            {uint64 = 0x2300000021c000, v2_int32 = {0x21c000,
0x230000}, v4_int16 = {0xc000, 0x21, 0x0, 0x23}, v8_int8 = {0x0, 0xc0,
0x21, 0x0, 0x0, 0x0, 0x23, 0x0}}
mm5            {uint64 = 0x430040804000400, v2_int32 = {0x4000400,
0x4300408}, v4_int16 = {0x400, 0x400, 0x408, 0x430}, v8_int8 = {0x0,
0x4, 0x0, 0x4, 0x8, 0x4, 0x30, 0x4}}
mm6            {uint64 = 0x0, v2_int32 = {0x0, 0x0}, v4_int16 = {0x0,
0x0, 0x0, 0x0}, v8_int8 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}
mm7            {uint64 = 0x0, v2_int32 = {0x0, 0x0}, v4_int16 = {0x0,
0x0, 0x0, 0x0}, v8_int8 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}



More information about the MPlayer-dev-eng mailing list