[MPlayer-dev-eng] MPlayer and gcc ia32 intrinsics

Michael Niedermayer michaelni at gmx.at
Wed Nov 23 02:19:15 CET 2005


Hi

On Tue, Nov 22, 2005 at 11:09:42PM +0100, Aurelien Jacobs wrote:
> On Tue, 22 Nov 2005 16:54:52 -0500
> Jason Tackaberry <tack at sault.org> wrote:
> 
> > On Tue, 2005-11-22 at 22:34 +0200, Jan Knutar wrote:
> > > They don't work with gcc 2.95.3?
> > 
> > What if the SIMD code was simply disabled (fall back to C) for gcc < 3?
> 
> IIRC gcc is somewhat buggy about intrinsics and sometimes produce
> very slow code.

yes, heres a example:
 typedef short mmxw  __attribute__ ((mode(V4HI)));
 typedef int   mmxdw __attribute__ ((mode(V2SI)));

mmxdw dw;
mmxw w;

void test(){
    w+=w;
    dw= (mmxdw)w;
}

gcc 3.4.0:
        movq    w, %mm1
        psllw   $1, %mm1
        movq    %mm1, w
        movq    w, %mm0
        movq    %mm0, dw
        ret

human: 
        movq w, %mm1 
        paddw %mm1,%mm1 
        movq %mm1, w 
        movq %mm1,dw 
        ret

gcc-4.1.0:
test:   subl    $20, %esp
        movl    w, %eax
        movl    w+4, %edx
        movl    %ebx, 8(%esp)
        movl    %esi, 12(%esp)
        movl    %eax, (%esp)
        movl    %edx, 4(%esp)
        movswl  (%esp),%esi
        movl    %edi, 16(%esp)
        movswl  4(%esp),%ecx
        movswl  2(%esp),%edi
        movswl  6(%esp),%ebx
        addl    %esi, %esi
        addl    %ecx, %ecx
        movzwl  %si, %esi
        sall    $17, %edi
        movzwl  %cx, %ecx
        sall    $17, %ebx
        movl    %edi, %eax
        movl    16(%esp), %edi
        movl    %ebx, %edx
        orl     %esi, %eax
        movl    8(%esp), %ebx
        orl     %ecx, %edx
        movl    12(%esp), %esi
        movl    %eax, w
        movl    %edx, w+4
        movl    w, %eax
        movl    w+4, %edx
        movl    %eax, dw
        movl    %edx, dw+4
        addl    $20, %esp
        ret


gcc 4.1.0/20051113 with x87/mmx mode switch patch produces:
test:   movq    w, %mm0
        paddw   %mm0, %mm0
        movq    %mm0, w
        movl    w, %eax
        movl    w+4, %edx
        movl    %eax, dw
        movl    %edx, dw+4
        emms
        ret
note, in this case there are partial memory stalls which are VERY slow, about
10-20 cpu cycles
i think this demonstrates the problem

examples taken from 
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14552

[...]

-- 
Michael




More information about the MPlayer-dev-eng mailing list