[FFmpeg-devel] Amazing intrinsics improvments in gcc 4
Michael Niedermayer
michaelni
Wed Mar 19 19:07:03 CET 2008
Hi
2004-03-12 i submitted bug
14552 (compiled trivial vector intrinsic code is inefficient) to gcc
(http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14552)
the source was:
------
typedef short mmxw __attribute__ ((mode(V4HI)));
typedef int mmxdw __attribute__ ((mode(V2SI)));
mmxdw dw;
mmxw w;
void test(){
w+=w;
dw= (mmxdw)w;
}
------
what gcc-3.4 -O3 -mtune=pentium3 -march=pentium3 -S generated was:
movq w, %mm1
pushl %ebp
movl %esp, %ebp
popl %ebp
psllw $1, %mm1
movq %mm1, w
movq w, %mm0
movq %mm0, dw
ret
what a human would generate:
movq w, %mm1
paddw %mm1,%mm1
movq %mm1, w
movq %mm1,dw
ret
Now finally after 4 years the bug has been closed (with wontfix of course)
and gcc-4.3 -O3 -mtune=pentium3 -march=pentium3 -S now generates:
pushl %ebp
movl %esp, %ebp
subl $16, %esp
movl w, %eax
movl w+4, %edx
movl %ebx, -8(%ebp)
movl %esi, -4(%ebp)
movl %eax, -16(%ebp)
movl %edx, -12(%ebp)
movswl -16(%ebp),%ecx
movswl -14(%ebp),%ebx
movswl -12(%ebp),%esi
movswl -10(%ebp),%eax
addl %ecx, %ecx
addl %ebx, %ebx
addl %esi, %esi
addl %eax, %eax
movw %bx, w+2
movl -8(%ebp), %ebx
movw %si, w+4
movl -4(%ebp), %esi
movw %ax, w+6
movw %cx, w
movl w+4, %edx
movl w, %eax
movl %edx, dw+4
movl %eax, dw
movl %ebp, %esp
popl %ebp
ret
I thought some people here would be interrested as there were various claims
on gccs abilities and improvments posted here lately ...
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
I am the wisest man alive, for I know one thing, and that is that I know
nothing. -- Socrates
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080319/7f4a2121/attachment.pgp>
More information about the ffmpeg-devel
mailing list