[FFmpeg-cvslog] r12661 - in trunk/libavcodec/i386: dsputil_h264_template_mmx.c dsputil_h264_template_ssse3.c dsputil_mmx.c dsputil_mmx.h h264dsp_mmx.c
Mike Melanson
mike
Tue Apr 1 18:47:51 CEST 2008
Loren Merritt wrote:
> They all crashed on the first inter frame. Can you provide a backtrace and
> a disassembly of the chroma fnuctions? (No icc here, and I'm not about to
> install something that requires registration for a per-user license, even
> if it is in portage.)
Certainly. I ran ffmpeg_g compiled with icc on the sample in this test case:
http://fate.multimedia.cx/index.php?test_spec=7
And this is running on a VMware-hosted 32-bit Ubuntu session on a Core 2
Duo CPU. What's the best way disassembly for the chroma functions?
objdump on the object files?
$ gdb ./ffmpeg_g
GNU gdb 6.6-debian
Copyright (C) 2006 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain
conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB. Type "show warranty" for details.
This GDB was configured as "i486-linux-gnu"...
Using host libthread_db library "/lib/tls/i686/cmov/libthread_db.so.1".
(gdb) r -f h264 -i /mnt/fate-suite/h264-conformance/AUD_MW_E.264 -f
framecrc -
Starting program: /home/melanson/ffmpeg/build-icc/ffmpeg_g -f h264 -i
/mnt/fate-suite/h264-conformance/AUD_MW_E.264 -f framecrc -
FFmpeg version SVN-r12665, Copyright (c) 2000-2008 Fabrice Bellard, et al.
configuration: --cc=ccache /opt/intel/cc/10.1.012/bin/icc
libavutil version: 49.6.0
libavcodec version: 51.54.0
libavformat version: 52.13.0
libavdevice version: 52.0.0
built on Apr 1 2008 07:44:12, gcc: Intel(R) C++ gcc 4.1 mode
Input #0, h264, from '/mnt/fate-suite/h264-conformance/AUD_MW_E.264':
Duration: N/A, bitrate: N/A
Stream #0.0: Video: h264, yuv420p, 176x144, 25.00 tb(r)
Output #0, framecrc, to 'pipe:':
Stream #0.0: Video: rawvideo, yuv420p, 176x144, q=2-31, 200 kb/s,
25.00 tb(c)
Stream mapping:
Stream #0.0 -> #0.0
Press [q] to stop encoding
0, 0, 38016, 0xa6d63b2e
Program received signal SIGSEGV, Segmentation fault.
0x080ed539 in put_h264_chroma_mc8_ssse3_rnd (
dst=0x86a9798 '\200' <repeats 88 times>, "yyu", 't' <repeats 13
times>, '\200' <repeats 88 times>, "xxtttttu"...,
src=0x8648281
"tttttttwwwwwwwwuuuuzzzz\177\177\177\177\177\177\177\177}}}}}}}}vvvvvvvv",
't' <repeats 32 times>, 'y' <repeats 12 times>, 't' <repeats 20 times>,
"wwwwwwwwuuuuzzzz\177\177\177\177\177\177\177\177}}}}}}}}vvvvvvvv", 't'
<repeats 32 times>, 'y' <repeats 12 times>, "ttttt"..., stride=104, h=4,
x=3, y=1)
at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/i386/h264dsp_mmx.c:1981
1981 put_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 1);
(gdb) bt
#0 0x080ed539 in put_h264_chroma_mc8_ssse3_rnd (
dst=0x86a9798 '\200' <repeats 88 times>, "yyu", 't' <repeats 13
times>, '\200' <repeats 88 times>, "xxtttttu"...,
src=0x8648281
"tttttttwwwwwwwwuuuuzzzz\177\177\177\177\177\177\177\177}}}}}}}}vvvvvvvv",
't' <repeats 32 times>, 'y' <repeats 12 times>, 't' <repeats 20 times>,
"wwwwwwwwuuuuzzzz\177\177\177\177\177\177\177\177}}}}}}}}vvvvvvvv", 't'
<repeats 32 times>, 'y' <repeats 12 times>, "ttttt"..., stride=104, h=4,
x=3, y=1)
at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/i386/h264dsp_mmx.c:1981
#1 0x0825c0c4 in mc_part (h=0x68, n=4, square=5411, chroma_height=0,
delta=140868432, dest_y=0x825bc63 "?\a???\213\234$\214",
dest_cb=0x8657b64 "", dest_cr=0x8657b60 "", x_offset=1, y_offset=4,
qpix_put=0x8, chroma_put=0x86a1090, qpix_avg=0x86a95f8,
chroma_avg=0x86ab9f8, weight_op=0x0, weight_avg=0x4, list0=140872004,
list1=135189392)
at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/h264.c:1848
#2 0x0825bc63 in hl_motion (h=0x68, dest_y=0x4 <Address 0x4 out of bounds>,
dest_cb=0x1523 <Address 0x1523 out of bounds>,
dest_cr=0x86ab9f8 "\203\203\203\203\203\203\203\203", '\200'
<repeats 80 times>, '\203' <repeats 24 times>, '\200' <repeats 80
times>, "\203\203\203\203\203\203\203\203"..., qpix_put=0x8658904,
chroma_put=0x86588e0,
qpix_avg=0x8658a04, chroma_avg=0x86588f8, weight_op=0x8658d04,
weight_avg=0x8658d2c)
at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/h264.c:1890
#3 0x08256ec6 in hl_decode_mb_simple (h=0x68)
at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/h264.c:2714
#4 0x0824b3d8 in decode_slice (avctx=0x68, h=0x4)
at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/h264.c:6822
#5 0x0824b2dc in execute_decode_slices (h=0x68, context_count=4)
at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/h264.c:7406
#6 0x0824b0bd in decode_nal_units (h=0x68,
buf=0x4 <Address 0x4 out of bounds>, buf_size=5411)
at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/h264.c:7592
#7 0x0824a3d1 in decode_frame (avctx=0x862f3d0, data=0xbfce36f0,
data_size=0xbfce37b0, buf=0x8682dc0 "", buf_size=364)
at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/h264.c:7722
#8 0x080e7e86 in avcodec_decode_video (avctx=0x16c, picture=0x8682dc0,
got_picture_ptr=0x868eb20, buf=0x16c <Address 0x16c out of bounds>,
buf_size=134601160)
at /home/melanson/ffmpeg/ffmpeg-main/libavcodec/utils.c:945
#9 0x0805d9c8 in output_packet (ist=0x68, ist_index=4, ost_table=0x1523,
nb_ostreams=1, pkt=0x868eb20)
at /home/melanson/ffmpeg/ffmpeg-main/ffmpeg.c:1112
#10 0x0805d014 in av_encode (output_files=0x68, nb_output_files=4,
input_files=0x1523, nb_input_files=1, stream_maps=0x8527ba0,
nb_stream_maps=0) at /home/melanson/ffmpeg/ffmpeg-main/ffmpeg.c:1992
#11 0x0805b63a in main (argc=8, argv=0xbfce3ef4)
at /home/melanson/ffmpeg/ffmpeg-main/ffmpeg.c:3933
(gdb) disass $pc-32 $pc+32
Dump of assembler code from 0x80ed519 to 0x80ed559:
0x080ed519 <put_h264_chroma_mc8_ssse3_rnd+393>: fiaddl 0x4e8d08c2(%ebx)
0x080ed51f <put_h264_chroma_mc8_ssse3_rnd+399>: or %cl,(%edi)
0x080ed521 <put_h264_chroma_mc8_ssse3_rnd+401>: scas %es:(%edi),%eax
0x080ed522 <put_h264_chroma_mc8_ssse3_rnd+402>: lret $0xc683
0x080ed525 <put_h264_chroma_mc8_ssse3_rnd+405>: or %cl,(%edi)
0x080ed527 <put_h264_chroma_mc8_ssse3_rnd+407>: scas %es:(%edi),%eax
0x080ed528 <put_h264_chroma_mc8_ssse3_rnd+408>: lock mov 0x28(%esp),%edx
0x080ed52d <put_h264_chroma_mc8_ssse3_rnd+413>: mov 0x24(%esp),%eax
0x080ed531 <put_h264_chroma_mc8_ssse3_rnd+417>: movd %ecx,%xmm7
0x080ed535 <put_h264_chroma_mc8_ssse3_rnd+421>: movd %esi,%xmm6
0x080ed539 <put_h264_chroma_mc8_ssse3_rnd+425>: movdqa (%esp),%xmm5
0x080ed53e <put_h264_chroma_mc8_ssse3_rnd+430>: pshuflw $0x0,%xmm7,%xmm7
0x080ed543 <put_h264_chroma_mc8_ssse3_rnd+435>: pshuflw $0x0,%xmm6,%xmm6
0x080ed548 <put_h264_chroma_mc8_ssse3_rnd+440>: movlhps %xmm7,%xmm7
0x080ed54b <put_h264_chroma_mc8_ssse3_rnd+443>: movlhps %xmm6,%xmm6
0x080ed54e <put_h264_chroma_mc8_ssse3_rnd+446>: mov 0x1c(%esp),%esi
0x080ed552 <put_h264_chroma_mc8_ssse3_rnd+450>: mov 0x20(%esp),%ecx
0x080ed556 <put_h264_chroma_mc8_ssse3_rnd+454>: movq (%ecx),%xmm0
End of assembler dump.
(gdb) info all-registers
eax 0x68 104
ecx 0x1523 5411
edx 0x4 4
ebx 0x0 0
esp 0xbfce3274 0xbfce3274
ebp 0x8 0x8
esi 0x305 773
edi 0x200020 2097184
eip 0x80ed539 0x80ed539
<put_h264_chroma_mc8_ssse3_rnd+425>
eflags 0x10216 [ PF AF IF RF ]
cs 0x73 115
ss 0x7b 123
ds 0x7b 123
es 0x7b 123
fs 0x0 0
gs 0x33 51
st0 -nan(0x8383838383838383) (raw 0xffff8383838383838383)
st1 -nan(0x8383838383838383) (raw 0xffff8383838383838383)
st2 -nan(0x20e020e020e020e0) (raw 0xffff20e020e020e020e0)
st3 -nan(0x8383838383838383) (raw 0xffff8383838383838383)
st4 -nan(0x8383838383838383) (raw 0xffff8383838383838383)
st5 -nan(0x20002000200020) (raw 0xffff0020002000200020)
st6 <invalid float value> (raw 0xffff0000000000000000)
st7 <invalid float value> (raw 0xffff0000000000000000)
fctrl 0x37f 895
fstat 0x120 288
ftag 0xaaaa 43690
fiseg 0x73 115
fioff 0x805ce8b 134598283
foseg 0x7b 123
fooff 0x0 0
fop 0x6d9 1753
xmm0 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
v16_int8 = {0xdb, 0x0, 0xdb, 0x0, 0xdb, 0x0, 0xde, 0x0, 0xdd, 0x0, 0xda,
0x0, 0xd9, 0x0, 0xd9, 0x0}, v8_int16 = {0xdb, 0xdb, 0xdb, 0xde, 0xdd,
0xda, 0xd9, 0xd9}, v4_int32 = {0xdb00db, 0xde00db, 0xda00dd, 0xd900d9},
v2_int64 = {0xde00db00db00db, 0xd900d900da00dd},
uint128 = 0x00d900d900da00dd00de00db00db00db}
xmm1 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
v16_int8 = {0xb6, 0x1, 0xb3, 0x1, 0xb3, 0x1, 0xb3, 0x1, 0xbb, 0x1, 0xba,
0x1, 0xb5, 0x1, 0xbb, 0x1}, v8_int16 = {0x1b6, 0x1b3, 0x1b3, 0x1b3,
0x1bb,
0x1ba, 0x1b5, 0x1bb}, v4_int32 = {0x1b301b6, 0x1b301b3, 0x1ba01bb,
0x1bb01b5}, v2_int64 = {0x1b301b301b301b6, 0x1bb01b501ba01bb},
uint128 = 0x01bb01b501ba01bb01b301b301b301b6}
xmm2 {v4_float = {0x80000000, 0x0, 0x0, 0x0}, v2_double = {
0x8000000000000000, 0x8000000000000000}, v16_int8 = {0xd9, 0xd9, 0xd9,
0xda, 0xda, 0xdb, 0xde, 0xdd, 0x6d, 0x6d, 0x6d, 0x6d, 0x6d, 0x6f, 0x72,
0x6f}, v8_int16 = {0xd9d9, 0xdad9, 0xdbda, 0xddde, 0x6d6d, 0x6d6d,
0x6f6d,
0x6f72}, v4_int32 = {0xdad9d9d9, 0xdddedbda, 0x6d6d6d6d, 0x6f726f6d},
v2_int64 = {0xdddedbdadad9d9d9, 0x6f726f6d6d6d6d6d},
uint128 = 0x6f726f6d6d6d6d6ddddedbdadad9d9d9}
xmm3 {v4_float = {0x80000000, 0x0, 0x0, 0x0}, v2_double = {
0x8000000000000000, 0x0}, v16_int8 = {0xd9, 0xd9, 0xd9, 0xda, 0xda,
0xd8,
0xd9, 0xdd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int16 = {0xd9d9,
0xdad9, 0xd8da, 0xddd9, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0xdad9d9d9,
0xddd9d8da, 0x0, 0x0}, v2_int64 = {0xddd9d8dadad9d9d9, 0x0},
uint128 = 0x0000000000000000ddd9d8dadad9d9d9}
xmm4 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
v16_int8 = {0xdd, 0x0, 0xda, 0x0, 0xd9, 0x0, 0xd9, 0x0, 0xd9, 0x0, 0xda,
0x0, 0xda, 0x0, 0xe2, 0x0}, v8_int16 = {0xdd, 0xda, 0xd9, 0xd9, 0xd9,
0xda, 0xda, 0xe2}, v4_int32 = {0xda00dd, 0xd900d9, 0xda00d9, 0xe200da},
v2_int64 = {0xd900d900da00dd, 0xe200da00da00d9},
uint128 = 0x00e200da00da00d900d900d900da00dd}
xmm5 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
v16_int8 = {0xc7, 0x1, 0xc7, 0x1, 0xc4, 0x1, 0xcb, 0x1, 0xc9, 0x1, 0xc4,
0x1, 0xc3, 0x1, 0xbf, 0x1}, v8_int16 = {0x1c7, 0x1c7, 0x1c4, 0x1cb,
0x1c9,
0x1c4, 0x1c3, 0x1bf}, v4_int32 = {0x1c701c7, 0x1cb01c4, 0x1c401c9,
0x1bf01c3}, v2_int64 = {0x1cb01c401c701c7, 0x1bf01c301c401c9},
uint128 = 0x01bf01c301c401c901cb01c401c701c7}
xmm6 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
v16_int8 = {0x5, 0x3, 0x0 <repeats 14 times>}, v8_int16 = {0x305, 0x0,
0x0,
0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x305, 0x0, 0x0, 0x0},
v2_int64 = {
0x305, 0x0}, uint128 = 0x00000000000000000000000000000305}
xmm7 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0},
v16_int8 = {0x23, 0x15, 0x0 <repeats 14 times>}, v8_int16 = {0x1523, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x1523, 0x0, 0x0, 0x0},
v2_int64 = {0x1523, 0x0}, uint128 = 0x00000000000000000000000000001523}
mxcsr 0x9fe0 [ PE DAZ IM DM ZM OM UM PM FZ ]
mm0 {uint64 = 0x8383838383838383, v2_int32 = {0x83838383,
0x83838383}, v4_int16 = {0x8383, 0x8383, 0x8383, 0x8383}, v8_int8 =
{0x83,
0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83}}
mm1 {uint64 = 0x8383838383838383, v2_int32 = {0x83838383,
0x83838383}, v4_int16 = {0x8383, 0x8383, 0x8383, 0x8383}, v8_int8 =
{0x83,
0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83}}
mm2 {uint64 = 0x20e020e020e020e0, v2_int32 = {0x20e020e0,
0x20e020e0}, v4_int16 = {0x20e0, 0x20e0, 0x20e0, 0x20e0}, v8_int8 =
{0xe0,
0x20, 0xe0, 0x20, 0xe0, 0x20, 0xe0, 0x20}}
mm3 {uint64 = 0x8383838383838383, v2_int32 = {0x83838383,
0x83838383}, v4_int16 = {0x8383, 0x8383, 0x8383, 0x8383}, v8_int8 =
{0x83,
0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83}}
mm4 {uint64 = 0x8383838383838383, v2_int32 = {0x83838383,
0x83838383}, v4_int16 = {0x8383, 0x8383, 0x8383, 0x8383}, v8_int8 =
{0x83,
0x83, 0x83, 0x83, 0x83, 0x83, 0x83, 0x83}}
mm5 {uint64 = 0x20002000200020, v2_int32 = {0x200020, 0x200020},
v4_int16 = {0x20, 0x20, 0x20, 0x20}, v8_int8 = {0x20, 0x0, 0x20, 0x0,
0x20,
0x0, 0x20, 0x0}}
mm6 {uint64 = 0x0, v2_int32 = {0x0, 0x0}, v4_int16 = {0x0, 0x0,
0x0, 0x0}, v8_int8 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}
mm7 {uint64 = 0x0, v2_int32 = {0x0, 0x0}, v4_int16 = {0x0, 0x0,
0x0, 0x0}, v8_int8 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}
--
-Mike Melanson
More information about the ffmpeg-cvslog
mailing list