[FFmpeg-devel] [PATCH] Huffyuv speed tweaks

Michael Niedermayer michaelni
Tue Aug 19 02:08:32 CEST 2008


On Fri, Aug 08, 2008 at 04:09:09PM -0600, Jason Garrett-Glaser wrote:
> $subj, faster decoding pixel prediction.
> 
> Dark Shikari

ive picked one hunk and tested it a little
see:

[...]

> @@ -172,13 +181,15 @@
>      b= *blue;
> 
>      for(i=0; i<w; i++){
> -        b+= src[4*i+B];
> -        g+= src[4*i+G];
> -        r+= src[4*i+R];
> +        b+= src[B];
> +        g+= src[G];
> +        r+= src[R];
> 
> -        dst[4*i+B]= b;
> -        dst[4*i+G]= g;
> -        dst[4*i+R]= r;
> +        dst[B]= b;
> +        dst[G]= g;
> +        dst[R]= r;
> +        src+= 4;
> +        dst+= 4;
>      }

SVN GCC 4.3.1
.L594:
        movl    176(%esp), %ebx
        movzbl  -4(%ebx,%edx,4), %eax
        addl    %eax, %esi
        movzbl  -3(%ebx,%edx,4), %eax
        addl    %eax, %edi
        movzbl  -2(%ebx,%edx,4), %eax
        movl    %edi, %ebx
.LVL1061:
        movb    %bl, -3(%ecx,%edx,4)
        addl    %eax, %ebp
        movl    %esi, %eax
.LVL1062:
        movb    %al, -4(%ecx,%edx,4)
        movl    %ebp, %eax
.LVL1063:
        movb    %al, -2(%ecx,%edx,4)
        addl    $1, %edx
        cmpl    68(%esp), %edx
        jne     .L594

------------------
yours gcc 4.3.1
.L593:
        movzbl  (%ebx,%edx,4), %eax
        addl    %eax, %esi
        movzbl  1(%ebx,%edx,4), %eax
        addl    %eax, %edi
        movzbl  2(%ebx,%edx,4), %eax
        addl    %eax, %ebp
        movl    %esi, %eax
.LVL1066:
        movb    %al, (%ecx,%edx,4)
        movl    %edi, %eax
.LVL1067:
        movb    %al, 1(%ecx,%edx,4)
        movl    %ebp, %eax
.LVL1068:
        movb    %al, 2(%ecx,%edx,4)
        addl    $1, %edx
        cmpl    %edx, 64(%esp)
        jg      .L593
------
SVN gcc 3.4.6

.L71:
        movzbl  (%edi,%ecx,4), %eax
        addl    %eax, %esi
        movzbl  1(%edi,%ecx,4), %eax
        addl    %eax, %ebx
        movzbl  2(%edi,%ecx,4), %eax
        movb    %bl, 1(%ebp,%ecx,4)
        addl    %eax, %edx
        movl    %esi, %eax
        movb    %al, (%ebp,%ecx,4)
        movb    %dl, 2(%ebp,%ecx,4)
        incl    %ecx
.L70:
        cmpl    20(%esp), %ecx
        jl      .L71


------
yours gcc 3.4.6

.L71:
        movzbl  (%edx), %eax
        incl    %ebp
        addl    %eax, %edi
        movzbl  1(%edx), %eax
        addl    %eax, %esi
        movzbl  2(%edx), %eax
        addl    $4, %edx
        addl    %eax, %ebx
        movb    %bl, 2(%ecx)
        movl    %edi, %eax
        movb    %al, (%ecx)
        movl    %esi, %eax
        movb    %al, 1(%ecx)
        addl    $4, %ecx
.L70:
        cmpl    20(%esp), %ebp
        jl      .L71


the various code snippets where found by putting 
asm volatile("LOOPSTART\n\t"); before the loop, i hope this did not
affect the generated code

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Why not whip the teacher when the pupil misbehaves? -- Diogenes of Sinope
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080819/a07e9b11/attachment.pgp>



More information about the ffmpeg-devel mailing list