[Ffmpeg-devel] [PATCH] H264 cabac vlc reading code
Michael Niedermayer
michaelni
Sat Oct 14 13:17:58 CEST 2006
Hi
On Fri, Oct 13, 2006 at 03:43:46PM +0200, Michael Niedermayer wrote:
> Hi
>
> the attached patch contains some generic "non binary"/"vlc" cabac
> reading code, sadly its slower, thats why i post it here instead of
> commiting it ;)
>
> maybe its usefull for someone or someone has an idea how to make it
> faster
todays useless attached cabac patch changes the hardcoded registers into
more flexible constraints its supposed to be better as gcc should be able
to reuse registers and avoid a few loads/stores but it isnt faster, instead
its significantly slower, ive not looked at the asm code gcc generates but
ive tried a few different gcc versions ...
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
In the past you could go to a library and read, borrow or copy any book
Today you'd get arrested for mere telling someone where the library is
-------------- next part --------------
Index: libavcodec/cabac.h
===================================================================
--- libavcodec/cabac.h (revision 6683)
+++ libavcodec/cabac.h (working copy)
@@ -451,73 +451,66 @@
);
bit&=1;
#else /* BRANCHLESS_CABAC_DECODER */
+ int dummy;
asm volatile(
- "movzbl (%1), %%eax \n\t"
- "movl "RANGE "(%2), %%ebx \n\t"
- "movl "RANGE "(%2), %%edx \n\t"
- "shrl $23, %%ebx \n\t"
- "movzbl "MANGLE(ff_h264_lps_range)"(%%ebx, %%eax, 4), %%esi\n\t"
- "shll $17, %%esi \n\t"
- "movl "LOW "(%2), %%ebx \n\t"
+ "shrl $23, %4 \n\t"
+ "movzbl "MANGLE(ff_h264_lps_range)"(%4, %0, 4), %4\n\t"
+ "shll $17, %4 \n\t"
//eax:state ebx:low, edx:range, esi:RangeLPS
- "subl %%esi, %%edx \n\t"
+ "subl %4, %2 \n\t"
#ifdef CMOV_IS_FAST //FIXME actually define this somewhere
- "cmpl %%ebx, %%edx \n\t"
- "cmova %%edx, %%esi \n\t"
+ "cmpl %1, %2 \n\t"
+ "cmova %2, %4 \n\t"
"sbbl %%ecx, %%ecx \n\t"
- "andl %%ecx, %%edx \n\t"
- "subl %%edx, %%ebx \n\t"
- "xorl %%ecx, %%eax \n\t"
+ "andl %%ecx, %2 \n\t"
+ "subl %2, %1 \n\t"
+ "xorl %%ecx, %0 \n\t"
#else /* CMOV_IS_FAST */
- "movl %%edx, %%ecx \n\t"
- "subl %%ebx, %%edx \n\t"
- "sarl $31, %%edx \n\t" //lps_mask
- "subl %%ecx, %%esi \n\t" //RangeLPS - range
- "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask
- "addl %%ecx, %%esi \n\t" //new range
- "andl %%edx, %%ecx \n\t"
- "subl %%ecx, %%ebx \n\t"
- "xorl %%edx, %%eax \n\t"
+ "movl %2, %%ecx \n\t"
+ "subl %1, %2 \n\t"
+ "sarl $31, %2 \n\t" //lps_mask
+ "subl %%ecx, %4 \n\t" //RangeLPS - range
+ "andl %2, %4 \n\t" //(RangeLPS - range)&lps_mask
+ "addl %%ecx, %4 \n\t" //new range
+ "andl %2, %%ecx \n\t"
+ "subl %%ecx, %1 \n\t"
+ "xorl %2, %0 \n\t"
#endif /* CMOV_IS_FAST */
//eax:state ebx:low edx:mask esi:range
- "movzbl "MANGLE(ff_h264_mlps_state)"+128(%%eax), %%ecx \n\t"
- "movb %%cl, (%1) \n\t"
+ "movzbl "MANGLE(ff_h264_mlps_state)"+128(%0), %k3 \n\t"
- "movl %%esi, %%edx \n\t"
+ "movl %4, %2 \n\t"
//eax:bit ebx:low edx:range esi:range
- "shr $19, %%esi \n\t"
- "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
- "shll %%cl, %%edx \n\t"
- "movl %%edx, "RANGE "(%2) \n\t"
- "shll %%cl, %%ebx \n\t"
- "movl %%ebx, "LOW "(%2) \n\t"
- "test %%bx, %%bx \n\t"
+ "shr $19, %4 \n\t"
+ "movzbl " MANGLE(ff_h264_norm_shift) "(%4), %%ecx \n\t"
+ "shll %%cl, %2 \n\t"
+ "shll %%cl, %1 \n\t"
+ "test %w1, %w1 \n\t"
" jnz 1f \n\t"
- "movl "BYTE "(%2), %%ecx \n\t"
- "movzwl (%%ecx), %%esi \n\t"
- "bswap %%esi \n\t"
- "shrl $15, %%esi \n\t"
- "subl $0xFFFF, %%esi \n\t"
+ "movl %5, %%ecx \n\t"
+ "movzwl (%%ecx), %4 \n\t"
+ "bswap %4 \n\t"
+ "shrl $15, %4 \n\t"
+ "subl $0xFFFF, %4 \n\t"
"addl $2, %%ecx \n\t"
- "movl %%ecx, "BYTE "(%2) \n\t"
+ "movl %%ecx, %5 \n\t"
- "leal -1(%%ebx), %%ecx \n\t"
- "xorl %%ebx, %%ecx \n\t"
+ "leal -1(%1), %%ecx \n\t"
+ "xorl %1, %%ecx \n\t"
"shrl $17, %%ecx \n\t"
"movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"
"neg %%ecx \n\t"
"add $7, %%ecx \n\t"
- "shll %%cl , %%esi \n\t"
- "addl %%esi, %%ebx \n\t"
- "movl %%ebx, "LOW "(%2) \n\t"
+ "shll %%cl , %4 \n\t"
+ "addl %4, %1 \n\t"
"1: \n\t"
- :"=&a"(bit)
- :"r"(state), "r"(c)
- : "%ecx", "%ebx", "%edx", "%esi", "memory"
+ :"=&r"(bit), "+r"(c->low), "+r"(c->range), "=&r"(*state), "=&r"(dummy), "+m"(c->bytestream)
+ :"0"((int)*state), "4"(c->range)
+ : "%ecx"
);
bit&=1;
#endif /* BRANCHLESS_CABAC_DECODER */
More information about the ffmpeg-devel
mailing list