[FFmpeg-devel] [PATCH 3/3] x86: sbrdsp: implement SSE2 hf_apply_noise
Michael Niedermayer
michaelni at gmx.at
Sun Apr 14 05:04:43 CEST 2013
On Sat, Apr 13, 2013 at 06:02:43PM +0200, Christophe Gisquet wrote:
> 2013/4/13 Michael Niedermayer <michaelni at gmx.at>:
> >> +%define count kxq
> >> +%else
> >> +%define count m_maxq
> >> +%endif
> >> + dec noiseq
> >> + shl count, 2
> >> +%if NREGS
> >> + lea r5q, [sbr_noise_table]
> >
> > count and r5q end being the same register here on x86_64 linux shared
>
> Fixed that in the fashion we discussed. Also replaced the cmpeqps by
> pcmpeqd for a 2 cycles gain.
Seems to crash on linux x86_64 shared
0x00007ffff722d0f5 <apply_noise_main+0>: dec %rcx
0x00007ffff722d0f8 <apply_noise_main+3>: shl $0x2,%r9
0x00007ffff722d0fc <apply_noise_main+7>: lea 0x1a95d(%rip),%rax # 0x7ffff7247a60 <ff_sbr_noise_table>
0x00007ffff722d103 <apply_noise_main+14>: lea (%rdi,%r9,2),%rdi
0x00007ffff722d107 <apply_noise_main+18>: add %r9,%rsi
0x00007ffff722d10a <apply_noise_main+21>: add %r9,%rdx
0x00007ffff722d10d <apply_noise_main+24>: shl $0x3,%rcx
0x00007ffff722d111 <apply_noise_main+28>: pxor %xmm5,%xmm5
0x00007ffff722d115 <apply_noise_main+32>: neg %r9
0x00007ffff722d118 <apply_noise_main.loop+0>: movdqa (%rdx,%r9,1),%xmm1
0x00007ffff722d11e <apply_noise_main.loop+6>: movdqu 0x10(%rcx,%rax,1),%xmm3
0x00007ffff722d124 <apply_noise_main.loop+12>: movdqu 0x20(%rcx,%rax,1),%xmm4
0x00007ffff722d12a <apply_noise_main.loop+18>: add $0x20,%rcx
0x00007ffff722d12e <apply_noise_main.loop+22>: and $0xff8,%rcx
0x00007ffff722d135 <apply_noise_main.loop+29>: movdqa %xmm1,%xmm2
0x00007ffff722d139 <apply_noise_main.loop+33>: punpckhdq %xmm1,%xmm2
0x00007ffff722d13d <apply_noise_main.loop+37>: punpckldq %xmm1,%xmm1
0x00007ffff722d141 <apply_noise_main.loop+41>: mulps %xmm3,%xmm1
0x00007ffff722d144 <apply_noise_main.loop+44>: mulps %xmm4,%xmm2
=> 0x00007ffff722d147 <apply_noise_main.loop+47>: movdqa (%rsi,%r9,1),%xmm3
0x00007ffff722d14d <apply_noise_main.loop+53>: movdqa %xmm3,%xmm4
0x00007ffff722d151 <apply_noise_main.loop+57>: punpckhdq %xmm3,%xmm4
0x00007ffff722d155 <apply_noise_main.loop+61>: punpckldq %xmm3,%xmm3
0x00007ffff722d159 <apply_noise_main.loop+65>: movdqa %xmm3,%xmm6
0x00007ffff722d15d <apply_noise_main.loop+69>: pcmpeqd %xmm5,%xmm6
0x00007ffff722d161 <apply_noise_main.loop+73>: movdqa %xmm4,%xmm7
0x00007ffff722d165 <apply_noise_main.loop+77>: pcmpeqd %xmm5,%xmm7
rax 0x7ffff7247a60 140737339751008
rbx 0x23 35
rcx 0x590 1424
rdx 0x27fffd4d61650 703686717609552
rsi 0x27fffd4da1350 703686717870928
rdi 0x47fffb1b4d9d0 1266636081650128
rbp 0x7fffffffd5b0 0x7fffffffd5b0
rsp 0x7fffffffd368 0x7fffffffd368
r8 0x0 0
r9 0xfffe000023227cb0 -562949363958608
r10 0x2b 43
r11 0x23 35
r12 0x7ffff7f5b720 140737353463584
r13 0x7ffff7f5b780 140737353463680
r14 0x1 1
r15 0x23 35
rip 0x7ffff722d147 0x7ffff722d147 <apply_noise_main.loop+47>
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Many that live deserve death. And some that die deserve life. Can you give
it to them? Then do not be too eager to deal out death in judgement. For
even the very wise cannot see all ends. -- Gandalf
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20130414/0f86b609/attachment.asc>
More information about the ffmpeg-devel
mailing list