[FFmpeg-cvslog] x86: cabac: replace explicit memory references with "m" operands

Mans Rullgard git at videolan.org
Mon Dec 12 01:43:51 CET 2011


ffmpeg | branch: master | Mans Rullgard <mans at mansr.com> | Sun Dec 11 21:41:59 2011 +0000| [599b4c6efddaed33b1667c386b34b07729ba732b] | committer: Mans Rullgard

x86: cabac: replace explicit memory references with "m" operands

This replaces the explicit offset(reg) memory references with
"m" operands for the same locations.  As a result, one fewer
register operand is needed for these inline asm statements.

Signed-off-by: Mans Rullgard <mans at mansr.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=599b4c6efddaed33b1667c386b34b07729ba732b
---

 libavcodec/x86/cabac.h     |   45 ++++++++++++++++++++-----------------------
 libavcodec/x86/h264_i386.h |   31 ++++++++++++++---------------
 2 files changed, 36 insertions(+), 40 deletions(-)

diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h
index 5da421a..d0d1332 100644
--- a/libavcodec/x86/cabac.h
+++ b/libavcodec/x86/cabac.h
@@ -27,7 +27,7 @@
 #include "config.h"
 
 #if HAVE_FAST_CMOV
-#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\
+#define BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, lowword, range, tmp)\
         "mov    "tmp"       , %%ecx     \n\t"\
         "shl    $17         , "tmp"     \n\t"\
         "cmp    "low"       , "tmp"     \n\t"\
@@ -37,7 +37,7 @@
         "xor    %%ecx       , "ret"     \n\t"\
         "sub    "tmp"       , "low"     \n\t"
 #else /* HAVE_FAST_CMOV */
-#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\
+#define BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, lowword, range, tmp)\
         "mov    "tmp"       , %%ecx     \n\t"\
         "shl    $17         , "tmp"     \n\t"\
         "sub    "low"       , "tmp"     \n\t"\
@@ -51,14 +51,13 @@
         "xor    "tmp"       , "ret"     \n\t"
 #endif /* HAVE_FAST_CMOV */
 
-#define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte, byte) \
+#define BRANCHLESS_GET_CABAC(ret, statep, low, lowword, range, tmp, tmpbyte, byte) \
         "movzbl "statep"    , "ret"                                     \n\t"\
         "mov    "range"     , "tmp"                                     \n\t"\
         "and    $0xC0       , "range"                                   \n\t"\
         "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
         "sub    "range"     , "tmp"                                     \n\t"\
-        BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword,        \
-                                    range, tmp)                              \
+        BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, lowword, range, tmp)   \
         "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx          \n\t"\
         "shl    %%cl        , "range"                                   \n\t"\
         "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp"          \n\t"\
@@ -66,8 +65,8 @@
         "mov    "tmpbyte"   , "statep"                                  \n\t"\
         "test   "lowword"   , "lowword"                                 \n\t"\
         " jnz   1f                                                      \n\t"\
-        "mov "byte"("cabac"), %%"REG_c"                                 \n\t"\
-        "add"OPSIZE" $2     , "byte    "("cabac")                       \n\t"\
+        "mov    "byte"      , %%"REG_c"                                 \n\t"\
+        "add"OPSIZE" $2     , "byte"                                    \n\t"\
         "movzwl (%%"REG_c")     , "tmp"                                 \n\t"\
         "lea    -1("low")   , %%ecx                                     \n\t"\
         "xor    "low"       , %%ecx                                     \n\t"\
@@ -82,7 +81,7 @@
         "add    "tmp"       , "low"                                     \n\t"\
         "1:                                                             \n\t"
 
-#if HAVE_7REGS && !defined(BROKEN_RELOCATIONS)
+#if HAVE_6REGS && !defined(BROKEN_RELOCATIONS)
 #define get_cabac_inline get_cabac_inline_x86
 static av_always_inline int get_cabac_inline_x86(CABACContext *c,
                                                  uint8_t *const state)
@@ -90,24 +89,24 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c,
     int bit, tmp;
 
     __asm__ volatile(
-        BRANCHLESS_GET_CABAC("%0", "%5", "(%4)", "%1", "%w1", "%2",
-                             "%3", "%b3", "%a6")
-        :"=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp)
-        :"r"(state), "r"(c),
-         "i"(offsetof(CABACContext, bytestream))
+        BRANCHLESS_GET_CABAC("%0", "(%5)", "%1", "%w1", "%2",
+                             "%3", "%b3", "%4")
+        :"=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp),
+         "+m"(c->bytestream)
+        :"r"(state)
         : "%"REG_c, "memory"
     );
     return bit & 1;
 }
-#endif /* HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */
+#endif /* HAVE_6REGS && !defined(BROKEN_RELOCATIONS) */
 
 #define get_cabac_bypass_sign get_cabac_bypass_sign_x86
 static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
 {
     x86_reg tmp;
     __asm__ volatile(
-        "movl %a3(%2), %k1                      \n\t"
-        "movl %a4(%2), %%eax                    \n\t"
+        "movl %4, %k1                           \n\t"
+        "movl %2, %%eax                         \n\t"
         "shl $17, %k1                           \n\t"
         "add %%eax, %%eax                       \n\t"
         "sub %k1, %%eax                         \n\t"
@@ -118,22 +117,20 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
         "sub %%edx, %%ecx                       \n\t"
         "test %%ax, %%ax                        \n\t"
         " jnz 1f                                \n\t"
-        "mov  %a5(%2), %1                       \n\t"
+        "mov  %3, %1                            \n\t"
         "subl $0xFFFF, %%eax                    \n\t"
         "movzwl (%1), %%edx                     \n\t"
         "bswap %%edx                            \n\t"
         "shrl $15, %%edx                        \n\t"
         "add  $2, %1                            \n\t"
         "addl %%edx, %%eax                      \n\t"
-        "mov  %1, %a5(%2)                       \n\t"
+        "mov  %1, %3                            \n\t"
         "1:                                     \n\t"
-        "movl %%eax, %a4(%2)                    \n\t"
+        "movl %%eax, %2                         \n\t"
 
-        :"+c"(val), "=&r"(tmp)
-        :"r"(c),
-         "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)),
-         "i"(offsetof(CABACContext, bytestream))
-        : "%eax", "%edx", "memory"
+        :"+c"(val), "=&r"(tmp), "+m"(c->low), "+m"(c->bytestream)
+        :"m"(c->range)
+        : "%eax", "%edx"
     );
     return val;
 }
diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index 7a361cd..50e80d0 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -36,7 +36,7 @@
 
 //FIXME use some macros to avoid duplicating get_cabac (cannot be done yet
 //as that would make optimization work hard)
-#if HAVE_7REGS && !defined(BROKEN_RELOCATIONS)
+#if HAVE_6REGS && !defined(BROKEN_RELOCATIONS)
 static int decode_significance_x86(CABACContext *c, int max_coeff,
                                    uint8_t *significant_coeff_ctx_base,
                                    int *index, x86_reg last_off){
@@ -48,15 +48,15 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
     __asm__ volatile(
         "2:                                     \n\t"
 
-        BRANCHLESS_GET_CABAC("%4", "%6", "(%1)", "%3",
-                             "%w3", "%5", "%k0", "%b0", "%a11")
+        BRANCHLESS_GET_CABAC("%4", "(%1)", "%3",
+                             "%w3", "%5", "%k0", "%b0", "%6")
 
         "test $1, %4                            \n\t"
         " jz 3f                                 \n\t"
         "add  %10, %1                           \n\t"
 
-        BRANCHLESS_GET_CABAC("%4", "%6", "(%1)", "%3",
-                             "%w3", "%5", "%k0", "%b0", "%a11")
+        BRANCHLESS_GET_CABAC("%4", "(%1)", "%3",
+                             "%w3", "%5", "%k0", "%b0", "%6")
 
         "sub  %10, %1                           \n\t"
         "mov  %2, %0                            \n\t"
@@ -81,9 +81,9 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
         "add  %9, %k0                           \n\t"
         "shr $2, %k0                            \n\t"
         :"=&q"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index),
-         "+&r"(c->low), "=&r"(bit), "+&r"(c->range)
-        :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off),
-         "i"(offsetof(CABACContext, bytestream))
+         "+&r"(c->low), "=&r"(bit), "+&r"(c->range),
+         "+m"(c->bytestream)
+        :"m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off)
         : "%"REG_c, "memory"
     );
     return coeff_count;
@@ -105,8 +105,8 @@ static int decode_significance_8x8_x86(CABACContext *c,
         "movzbl (%0, %6), %k6                   \n\t"
         "add %9, %6                             \n\t"
 
-        BRANCHLESS_GET_CABAC("%4", "%7", "(%6)", "%3",
-                             "%w3", "%5", "%k0", "%b0", "%a12")
+        BRANCHLESS_GET_CABAC("%4", "(%6)", "%3",
+                             "%w3", "%5", "%k0", "%b0", "%7")
 
         "mov %1, %k6                            \n\t"
         "test $1, %4                            \n\t"
@@ -115,8 +115,8 @@ static int decode_significance_8x8_x86(CABACContext *c,
         "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%k6), %k6\n\t"
         "add %11, %6                            \n\t"
 
-        BRANCHLESS_GET_CABAC("%4", "%7", "(%6)", "%3",
-                             "%w3", "%5", "%k0", "%b0", "%a12")
+        BRANCHLESS_GET_CABAC("%4", "(%6)", "%3",
+                             "%w3", "%5", "%k0", "%b0", "%7")
 
         "mov %2, %0                             \n\t"
         "mov %1, %k6                            \n\t"
@@ -138,13 +138,12 @@ static int decode_significance_8x8_x86(CABACContext *c,
         "addl %8, %k0                           \n\t"
         "shr $2, %k0                            \n\t"
         :"=&q"(coeff_count),"+m"(last), "+m"(index), "+&r"(c->low), "=&r"(bit),
-         "+&r"(c->range), "=&r"(state)
-        :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_coeff_ctx_base),
-         "i"(offsetof(CABACContext, bytestream))
+         "+&r"(c->range), "=&r"(state), "+m"(c->bytestream)
+        :"m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_coeff_ctx_base)
         : "%"REG_c, "memory"
     );
     return coeff_count;
 }
-#endif /* HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */
+#endif /* HAVE_6REGS && !defined(BROKEN_RELOCATIONS) */
 
 #endif /* AVCODEC_X86_H264_I386_H */



More information about the ffmpeg-cvslog mailing list