[FFmpeg-devel] [PATCH 3/4] h264: use one table instead of several for cabac functions
Roland Scheidegger
rscheidegger_lists at hispeed.ch
Fri Apr 27 22:12:19 CEST 2012
The reason is this is easier for PIC code (in particular on darwin...).
Keep the old names as pointers (static in cabac_functions.h so gcc
knows these are just immediate offsets) so the c code can nicely stay the same
(alternatively could use offsets directly in the functions needing the
tables). This should produce the same code as before with non-pic and better
code (confirmed) with pic.
The assembly uses the new table but still won't work for PIC case.
---
libavcodec/cabac.c | 54 ++++++++++++++++++++++++------------------
libavcodec/cabac.h | 5 ++++
libavcodec/cabac_functions.h | 8 ++++--
libavcodec/h264_cabac.c | 9 +------
libavcodec/x86/cabac.h | 18 ++++++++-----
libavcodec/x86/h264_i386.h | 24 +++++++++++++-----
6 files changed, 70 insertions(+), 48 deletions(-)
diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
index 196e548..14ef30a 100644
--- a/libavcodec/cabac.c
+++ b/libavcodec/cabac.c
@@ -31,6 +31,29 @@
#include "cabac.h"
#include "cabac_functions.h"
+uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63] = {
+ 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
+ 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+};
+
static const uint8_t lps_range[64][4]= {
{128,176,208,240}, {128,167,197,227}, {128,158,187,216}, {123,150,178,205},
{116,142,169,195}, {111,135,160,185}, {105,128,152,175}, {100,122,144,166},
@@ -50,8 +73,6 @@ static const uint8_t lps_range[64][4]= {
{ 6, 8, 9, 11}, { 6, 7, 9, 10}, { 6, 7, 8, 9}, { 2, 2, 2, 2},
};
-uint8_t ff_h264_mlps_state[4*64];
-uint8_t ff_h264_lps_range[4*2*64];
static uint8_t h264_lps_state[2*64];
static uint8_t h264_mps_state[2*64];
@@ -77,27 +98,11 @@ static const uint8_t lps_state[64]= {
36,36,37,37,37,38,38,63,
};
-const uint8_t ff_h264_norm_shift[512]= {
- 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
- 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+static const uint8_t last_coeff_flag_offset_8x8[63] = {
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
};
/**
@@ -158,6 +163,9 @@ void ff_init_cabac_states(CABACContext *c){
ff_h264_mlps_state[128-2*i-2]= 0;
}
}
+ for(i=0; i< 63; i++){
+ ff_h264_last_coeff_flag_offset_8x8[i] = last_coeff_flag_offset_8x8[i];
+ }
}
#ifdef TEST
diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h
index 667489e..29535d0 100644
--- a/libavcodec/cabac.h
+++ b/libavcodec/cabac.h
@@ -31,6 +31,11 @@
#include "put_bits.h"
+#define H264_NORM_SHIFT_OFFSET 0
+#define H264_LPS_RANGE_OFFSET 512
+#define H264_MLPS_STATE_OFFSET 1024
+#define H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET 1280
+
#define CABAC_BITS 16
#define CABAC_MASK ((1<<CABAC_BITS)-1)
diff --git a/libavcodec/cabac_functions.h b/libavcodec/cabac_functions.h
index 90af15c..ee70fcf 100644
--- a/libavcodec/cabac_functions.h
+++ b/libavcodec/cabac_functions.h
@@ -36,9 +36,11 @@
# include "x86/cabac.h"
#endif
-extern const uint8_t ff_h264_norm_shift[512];
-extern uint8_t ff_h264_mlps_state[4*64];
-extern uint8_t ff_h264_lps_range[4*2*64]; ///< rangeTabLPS
+extern uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63];
+static uint8_t * const ff_h264_norm_shift = ff_h264_cabac_tables + H264_NORM_SHIFT_OFFSET;
+static uint8_t * const ff_h264_lps_range = ff_h264_cabac_tables + H264_LPS_RANGE_OFFSET;
+static uint8_t * const ff_h264_mlps_state = ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET;
+static uint8_t * const ff_h264_last_coeff_flag_offset_8x8 = ff_h264_cabac_tables + H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET;
static void refill(CABACContext *c){
#if CABAC_BITS == 16
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 65f2cb4..29dbd7a 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -1561,13 +1561,6 @@ static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx,
return base_ctx[cat] + ctx;
}
-DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = {
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
- 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
-};
-
static av_always_inline void
decode_cabac_residual_internal(H264Context *h, DCTELEM *block,
int cat, int n, const uint8_t *scantable,
@@ -1670,7 +1663,7 @@ decode_cabac_residual_internal(H264Context *h, DCTELEM *block,
last_coeff_ctx_base-significant_coeff_ctx_base);
}
#else
- DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
+ DECODE_SIGNIFICANCE( 63, sig_off[last], ff_h264_last_coeff_flag_offset_8x8[last] );
} else {
if (is_dc && chroma422) { // dc 422
DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]);
diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h
index fee63d3..f532be3 100644
--- a/libavcodec/x86/cabac.h
+++ b/libavcodec/x86/cabac.h
@@ -24,6 +24,7 @@
#include "libavcodec/cabac.h"
#include "libavutil/attributes.h"
#include "libavutil/x86_cpu.h"
+#include "libavutil/internal.h"
#include "config.h"
#if HAVE_FAST_CMOV
@@ -51,16 +52,16 @@
"xor "tmp" , "ret" \n\t"
#endif /* HAVE_FAST_CMOV */
-#define BRANCHLESS_GET_CABAC(ret, statep, low, lowword, range, tmp, tmpbyte, byte, end) \
+#define BRANCHLESS_GET_CABAC(ret, statep, low, lowword, range, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off) \
"movzbl "statep" , "ret" \n\t"\
"mov "range" , "tmp" \n\t"\
"and $0xC0 , "range" \n\t"\
- "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
+ "movzbl "MANGLE(ff_h264_cabac_tables)"+"lps_off"("ret", "range", 2), "range" \n\t"\
"sub "range" , "tmp" \n\t"\
BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp) \
- "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\
+ "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"("range"), %%ecx \n\t"\
"shl %%cl , "range" \n\t"\
- "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\
+ "movzbl "MANGLE(ff_h264_cabac_tables)"+"mlps_off"+128("ret"), "tmp" \n\t"\
"shl %%cl , "low" \n\t"\
"mov "tmpbyte" , "statep" \n\t"\
"test "lowword" , "lowword" \n\t"\
@@ -73,7 +74,7 @@
"shr $15 , %%ecx \n\t"\
"bswap "tmp" \n\t"\
"shr $15 , "tmp" \n\t"\
- "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\
+ "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"(%%ecx), %%ecx \n\t"\
"sub $0xFFFF , "tmp" \n\t"\
"neg %%ecx \n\t"\
"add $7 , %%ecx \n\t"\
@@ -93,11 +94,14 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c,
__asm__ volatile(
BRANCHLESS_GET_CABAC("%0", "(%4)", "%1", "%w1",
"%2", "%3", "%b3",
- "%a6(%5)", "%a7(%5)")
+ "%a6(%5)", "%a7(%5)", "%a8", "%a9", "%a10")
: "=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp)
: "r"(state), "r"(c),
"i"(offsetof(CABACContext, bytestream)),
- "i"(offsetof(CABACContext, bytestream_end))
+ "i"(offsetof(CABACContext, bytestream_end)),
+ "i"(H264_NORM_SHIFT_OFFSET),
+ "i"(H264_LPS_RANGE_OFFSET),
+ "i"(H264_MLPS_STATE_OFFSET)
: "%"REG_c, "memory"
);
return bit & 1;
diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index 6aa2d07..d278708 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -45,12 +45,13 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
int minusindex= 4-(intptr_t)index;
int bit;
x86_reg coeff_count;
+
__asm__ volatile(
"3: \n\t"
BRANCHLESS_GET_CABAC("%4", "(%1)", "%3", "%w3",
"%5", "%k0", "%b0",
- "%a11(%6)", "%a12(%6)")
+ "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15")
"test $1, %4 \n\t"
" jz 4f \n\t"
@@ -58,7 +59,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
BRANCHLESS_GET_CABAC("%4", "(%1)", "%3", "%w3",
"%5", "%k0", "%b0",
- "%a11(%6)", "%a12(%6)")
+ "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15")
"sub %10, %1 \n\t"
"mov %2, %0 \n\t"
@@ -86,7 +87,10 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
"+&r"(c->low), "=&r"(bit), "+&r"(c->range)
: "r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off),
"i"(offsetof(CABACContext, bytestream)),
- "i"(offsetof(CABACContext, bytestream_end))
+ "i"(offsetof(CABACContext, bytestream_end)),
+ "i"(H264_NORM_SHIFT_OFFSET),
+ "i"(H264_LPS_RANGE_OFFSET),
+ "i"(H264_MLPS_STATE_OFFSET)
: "%"REG_c, "memory"
);
return coeff_count;
@@ -100,6 +104,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
x86_reg coeff_count;
x86_reg last=0;
x86_reg state;
+
__asm__ volatile(
"mov %1, %6 \n\t"
"3: \n\t"
@@ -110,18 +115,19 @@ static int decode_significance_8x8_x86(CABACContext *c,
BRANCHLESS_GET_CABAC("%4", "(%6)", "%3", "%w3",
"%5", "%k0", "%b0",
- "%a12(%7)", "%a13(%7)")
+ "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16")
"mov %1, %k6 \n\t"
"test $1, %4 \n\t"
" jz 4f \n\t"
- "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%k6), %k6\n\t"
+ "movzbl "MANGLE(ff_h264_cabac_tables)"+%a17(%k6), %k6\n\t"
+
"add %11, %6 \n\t"
BRANCHLESS_GET_CABAC("%4", "(%6)", "%3", "%w3",
"%5", "%k0", "%b0",
- "%a12(%7)", "%a13(%7)")
+ "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16")
"mov %2, %0 \n\t"
"mov %1, %k6 \n\t"
@@ -147,7 +153,11 @@ static int decode_significance_8x8_x86(CABACContext *c,
: "r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base),
"m"(sig_off), "m"(last_coeff_ctx_base),
"i"(offsetof(CABACContext, bytestream)),
- "i"(offsetof(CABACContext, bytestream_end))
+ "i"(offsetof(CABACContext, bytestream_end)),
+ "i"(H264_NORM_SHIFT_OFFSET),
+ "i"(H264_LPS_RANGE_OFFSET),
+ "i"(H264_MLPS_STATE_OFFSET),
+ "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET)
: "%"REG_c, "memory"
);
return coeff_count;
--
1.7.1
More information about the ffmpeg-devel
mailing list