[Ffmpeg-devel] gcc4 support & MMX fixups (from Debian)
Paweł Sikora
pluto
Wed Feb 1 01:26:49 CET 2006
Dnia Wednesday, 1 of February 2006 01:08, Aurelien Jacobs napisa?:
> On Wed, 1 Feb 2006 00:21:56 +0100
>
> Pawe? Sikora <pluto at pld-linux.org> wrote:
> > Dnia Wednesday, 1 of February 2006 00:01, Aurelien Jacobs napisa?:
> > > > orig: iters = 1000000000, dt = 7.92 [avg]
> > > > fixed: iters = 1000000000, dt = 7.35 [avg]
> > > >
> > > > we gain: ~7.2%
> Oh ! My bad... stupid me. I just forgot the -O3 when compiling !
> Now here are some better results :
>
> orig: iters = 1000000000, dt = 5.04
> fixed: iters = 1000000000, dt = 5.47
>
> So that's still worse for the fixed version, but that's much more
> reasonable.
>
> Here is the asm code resulting of fixed_transpose4x4:
> (...)
hmmm, the 4.1/4.0 fixed_transpose4x4 are equal but benchmarks differs.
maybe orig_transpose4x4 has different prologue?
[ 4.1 / -O2 ]
orig_transpose4x4:
leal (%rdx,%rdx), %r9d
leal (%rcx,%rcx), %eax
movslq %edx,%r11
movslq %ecx,%r8
movslq %r9d,%r10
addl %edx, %r9d
movslq %eax,%rdx
addl %ecx, %eax
movslq %r9d,%r9
cltq
#APP
movd (%rsi), %mm0
movd (%rsi,%r8), %mm1
movd (%rsi,%rdx), %mm2
movd (%rsi,%rax), %mm3
punpcklbw %mm1, %mm0
punpcklbw %mm3, %mm2
movq %mm0, %mm1
punpcklwd %mm2, %mm0
punpckhwd %mm2, %mm1
movd %mm0, (%rdi)
punpckhdq %mm0, %mm0
movd %mm0, (%rdi,%r11)
movd %mm1, (%rdi,%r10)
punpckhdq %mm1, %mm1
movd %mm1, (%rdi,%r9)
#NO_APP
ret
--
to_be || !to_be == 1, to_be | ~to_be == -1
More information about the ffmpeg-devel
mailing list