[FFmpeg-devel] [PATCH] Huffyuv speed tweaks
Michael Niedermayer
michaelni
Tue Aug 19 02:08:32 CEST 2008
On Fri, Aug 08, 2008 at 04:09:09PM -0600, Jason Garrett-Glaser wrote:
> $subj, faster decoding pixel prediction.
>
> Dark Shikari
ive picked one hunk and tested it a little
see:
[...]
> @@ -172,13 +181,15 @@
> b= *blue;
>
> for(i=0; i<w; i++){
> - b+= src[4*i+B];
> - g+= src[4*i+G];
> - r+= src[4*i+R];
> + b+= src[B];
> + g+= src[G];
> + r+= src[R];
>
> - dst[4*i+B]= b;
> - dst[4*i+G]= g;
> - dst[4*i+R]= r;
> + dst[B]= b;
> + dst[G]= g;
> + dst[R]= r;
> + src+= 4;
> + dst+= 4;
> }
SVN GCC 4.3.1
.L594:
movl 176(%esp), %ebx
movzbl -4(%ebx,%edx,4), %eax
addl %eax, %esi
movzbl -3(%ebx,%edx,4), %eax
addl %eax, %edi
movzbl -2(%ebx,%edx,4), %eax
movl %edi, %ebx
.LVL1061:
movb %bl, -3(%ecx,%edx,4)
addl %eax, %ebp
movl %esi, %eax
.LVL1062:
movb %al, -4(%ecx,%edx,4)
movl %ebp, %eax
.LVL1063:
movb %al, -2(%ecx,%edx,4)
addl $1, %edx
cmpl 68(%esp), %edx
jne .L594
------------------
yours gcc 4.3.1
.L593:
movzbl (%ebx,%edx,4), %eax
addl %eax, %esi
movzbl 1(%ebx,%edx,4), %eax
addl %eax, %edi
movzbl 2(%ebx,%edx,4), %eax
addl %eax, %ebp
movl %esi, %eax
.LVL1066:
movb %al, (%ecx,%edx,4)
movl %edi, %eax
.LVL1067:
movb %al, 1(%ecx,%edx,4)
movl %ebp, %eax
.LVL1068:
movb %al, 2(%ecx,%edx,4)
addl $1, %edx
cmpl %edx, 64(%esp)
jg .L593
------
SVN gcc 3.4.6
.L71:
movzbl (%edi,%ecx,4), %eax
addl %eax, %esi
movzbl 1(%edi,%ecx,4), %eax
addl %eax, %ebx
movzbl 2(%edi,%ecx,4), %eax
movb %bl, 1(%ebp,%ecx,4)
addl %eax, %edx
movl %esi, %eax
movb %al, (%ebp,%ecx,4)
movb %dl, 2(%ebp,%ecx,4)
incl %ecx
.L70:
cmpl 20(%esp), %ecx
jl .L71
------
yours gcc 3.4.6
.L71:
movzbl (%edx), %eax
incl %ebp
addl %eax, %edi
movzbl 1(%edx), %eax
addl %eax, %esi
movzbl 2(%edx), %eax
addl $4, %edx
addl %eax, %ebx
movb %bl, 2(%ecx)
movl %edi, %eax
movb %al, (%ecx)
movl %esi, %eax
movb %al, 1(%ecx)
addl $4, %ecx
.L70:
cmpl 20(%esp), %ebp
jl .L71
the various code snippets where found by putting
asm volatile("LOOPSTART\n\t"); before the loop, i hope this did not
affect the generated code
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Why not whip the teacher when the pupil misbehaves? -- Diogenes of Sinope
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080819/a07e9b11/attachment.pgp>
More information about the ffmpeg-devel
mailing list