[FFmpeg-devel] [PATCH 3/3] x86: sbrdsp: implement SSE2 hf_apply_noise

Michael Niedermayer michaelni at gmx.at
Fri Apr 19 00:59:49 CEST 2013


On Mon, Apr 15, 2013 at 08:54:32PM +0200, Christophe Gisquet wrote:
> 2013/4/14 Michael Niedermayer <michaelni at gmx.at>:
> > Seems to crash on linux x86_64 shared
> 
> Indeed, noise vector was loaded (not shown in that stack trace)
> through r9 and thus r9 was overwritten.

The patch you attached is identical to the last, heres more complete
disassmbly of the code that crashes

   0x00007ffff6cd9c80 <ff_sbr_hf_apply_noise_0_sse2+0>: movdqa 0x14fcf8(%rip),%xmm0        # 0x7ffff6e29980 <ps_noise0>
   0x00007ffff6cd9c88 <ff_sbr_hf_apply_noise_0_sse2+8>: jmp    0x7ffff6cd9cd5 <apply_noise_main>
   0x00007ffff6cd9c8a <ff_sbr_hf_apply_noise_0_sse2+10>:    nopw   0x0(%rax,%rax,1)
   0x00007ffff6cd9c90 <ff_sbr_hf_apply_noise_1_sse2+0>: and    $0x1,%r8
   0x00007ffff6cd9c94 <ff_sbr_hf_apply_noise_1_sse2+4>: shl    $0x4,%r8
   0x00007ffff6cd9c98 <ff_sbr_hf_apply_noise_1_sse2+8>: lea    0x14fd01(%rip),%r9        # 0x7ffff6e299a0 <ps_noise13>
   0x00007ffff6cd9c9f <ff_sbr_hf_apply_noise_1_sse2+15>:    movdqa (%r8,%r9,1),%xmm0
   0x00007ffff6cd9ca5 <ff_sbr_hf_apply_noise_1_sse2+21>:    jmp    0x7ffff6cd9cd5 <apply_noise_main>
   0x00007ffff6cd9ca7 <ff_sbr_hf_apply_noise_1_sse2+23>:    nopw   0x0(%rax,%rax,1)
   0x00007ffff6cd9cb0 <ff_sbr_hf_apply_noise_2_sse2+0>: movdqa 0x14fcd8(%rip),%xmm0        # 0x7ffff6e29990 <ps_noise2>
   0x00007ffff6cd9cb8 <ff_sbr_hf_apply_noise_2_sse2+8>: jmp    0x7ffff6cd9cd5 <apply_noise_main>
   0x00007ffff6cd9cba <ff_sbr_hf_apply_noise_2_sse2+10>:    nopw   0x0(%rax,%rax,1)
   0x00007ffff6cd9cc0 <ff_sbr_hf_apply_noise_3_sse2+0>: and    $0x1,%r8
   0x00007ffff6cd9cc4 <ff_sbr_hf_apply_noise_3_sse2+4>: shl    $0x4,%r8
   0x00007ffff6cd9cc8 <ff_sbr_hf_apply_noise_3_sse2+8>: lea    0x14fce1(%rip),%r9        # 0x7ffff6e299b0 <ps_noise13+16>
   0x00007ffff6cd9ccf <ff_sbr_hf_apply_noise_3_sse2+15>:    movdqa (%r8,%r9,1),%xmm0
   0x00007ffff6cd9cd5 <apply_noise_main+0>: dec    %rcx
   0x00007ffff6cd9cd8 <apply_noise_main+3>: shl    $0x2,%r9
   0x00007ffff6cd9cdc <apply_noise_main+7>: lea    0x1a9bd(%rip),%rax        # 0x7ffff6cf46a0 <ff_sbr_noise_table>
   0x00007ffff6cd9ce3 <apply_noise_main+14>:    lea    (%rdi,%r9,2),%rdi
   0x00007ffff6cd9ce7 <apply_noise_main+18>:    add    %r9,%rsi
   0x00007ffff6cd9cea <apply_noise_main+21>:    add    %r9,%rdx
   0x00007ffff6cd9ced <apply_noise_main+24>:    shl    $0x3,%rcx
   0x00007ffff6cd9cf1 <apply_noise_main+28>:    pxor   %xmm5,%xmm5
   0x00007ffff6cd9cf5 <apply_noise_main+32>:    neg    %r9
   0x00007ffff6cd9cf8 <apply_noise_main.loop+0>:    movdqa (%rdx,%r9,1),%xmm1
   0x00007ffff6cd9cfe <apply_noise_main.loop+6>:    movdqu 0x10(%rcx,%rax,1),%xmm3
   0x00007ffff6cd9d04 <apply_noise_main.loop+12>:   movdqu 0x20(%rcx,%rax,1),%xmm4
   0x00007ffff6cd9d0a <apply_noise_main.loop+18>:   add    $0x20,%rcx
   0x00007ffff6cd9d0e <apply_noise_main.loop+22>:   and    $0xff8,%rcx
   0x00007ffff6cd9d15 <apply_noise_main.loop+29>:   movdqa %xmm1,%xmm2
   0x00007ffff6cd9d19 <apply_noise_main.loop+33>:   punpckhdq %xmm1,%xmm2
   0x00007ffff6cd9d1d <apply_noise_main.loop+37>:   punpckldq %xmm1,%xmm1
   0x00007ffff6cd9d21 <apply_noise_main.loop+41>:   mulps  %xmm3,%xmm1
   0x00007ffff6cd9d24 <apply_noise_main.loop+44>:   mulps  %xmm4,%xmm2
=> 0x00007ffff6cd9d27 <apply_noise_main.loop+47>:   movdqa (%rsi,%r9,1),%xmm3
   0x00007ffff6cd9d2d <apply_noise_main.loop+53>:   movdqa %xmm3,%xmm4
   0x00007ffff6cd9d31 <apply_noise_main.loop+57>:   punpckhdq %xmm3,%xmm4
   0x00007ffff6cd9d35 <apply_noise_main.loop+61>:   punpckldq %xmm3,%xmm3
   0x00007ffff6cd9d39 <apply_noise_main.loop+65>:   movdqa %xmm3,%xmm6
   0x00007ffff6cd9d3d <apply_noise_main.loop+69>:   pcmpeqd %xmm5,%xmm6
   0x00007ffff6cd9d41 <apply_noise_main.loop+73>:   movdqa %xmm4,%xmm7
   0x00007ffff6cd9d45 <apply_noise_main.loop+77>:   pcmpeqd %xmm5,%xmm7
   0x00007ffff6cd9d49 <apply_noise_main.loop+81>:   mulps  %xmm0,%xmm3
   0x00007ffff6cd9d4c <apply_noise_main.loop+84>:   mulps  %xmm0,%xmm4
   0x00007ffff6cd9d4f <apply_noise_main.loop+87>:   pand   %xmm6,%xmm1
   0x00007ffff6cd9d53 <apply_noise_main.loop+91>:   pand   %xmm7,%xmm2
   0x00007ffff6cd9d57 <apply_noise_main.loop+95>:   movdqu (%rdi,%r9,2),%xmm6
   0x00007ffff6cd9d5d <apply_noise_main.loop+101>:  movdqu 0x10(%rdi,%r9,2),%xmm7
   0x00007ffff6cd9d64 <apply_noise_main.loop+108>:  addps  %xmm1,%xmm3
   0x00007ffff6cd9d67 <apply_noise_main.loop+111>:  addps  %xmm2,%xmm4
   0x00007ffff6cd9d6a <apply_noise_main.loop+114>:  addps  %xmm3,%xmm6
   0x00007ffff6cd9d6d <apply_noise_main.loop+117>:  addps  %xmm4,%xmm7
   0x00007ffff6cd9d70 <apply_noise_main.loop+120>:  movdqu %xmm6,(%rdi,%r9,2)
   0x00007ffff6cd9d76 <apply_noise_main.loop+126>:  movdqu %xmm7,0x10(%rdi,%r9,2)
   0x00007ffff6cd9d7d <apply_noise_main.loop+133>:  add    $0x10,%r9
   0x00007ffff6cd9d81 <apply_noise_main.loop+137>:  jl     0x7ffff6cd9cf8 <apply_noise_main.loop>
   0x00007ffff6cd9d87 <apply_noise_main.loop+143>:  retq

rax            0x7ffff6cf46a0   140737334167200
rbx            0x23 35
rcx            0x590    1424
rdx            0x27fffd3814f50  703686695276368
rsi            0x27fffd3854c50  703686695537744
rdi            0x47fffaf0b4bd0  1266636036983760
rbp            0x7fffffffd640   0x7fffffffd640
rsp            0x7fffffffd3f8   0x7fffffffd3f8
r8             0x0  0
r9             0xfffe0000247743b0   -562949341625424
r10            0x2b 43
r11            0x23 35
r12            0x7ffff7f5b720   140737353463584
r13            0x7ffff7f5b780   140737353463680
r14            0x1  1
r15            0x23 35
rip            0x7ffff6cd9d27   0x7ffff6cd9d27 <apply_noise_main.loop+47>


[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

In a rich man's house there is no place to spit but his face.
-- Diogenes of Sinope
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20130419/467f2589/attachment.asc>


More information about the ffmpeg-devel mailing list