[FFmpeg-cvslog] VP8: optimize VP8Context struct ordering
Jason Garrett-Glaser
git
Sat Mar 12 23:02:21 CET 2011
ffmpeg | branch: master | Jason Garrett-Glaser <jason at x264.com> | Fri Mar 11 13:43:10 2011 -0800| [1eeca88691ad0fd232f110f3a389ebb494c0a6dc] | committer: Jason Garrett-Glaser
VP8: optimize VP8Context struct ordering
Shaves at least 3KB off code size on x86, should improve cache utilization.
This would probably be useful to do for other decoders/encoders as well.
> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1eeca88691ad0fd232f110f3a389ebb494c0a6dc
---
libavcodec/vp8.c | 2 +-
libavcodec/vp8.h | 156 +++++++++++++++++++++++++++---------------------------
2 files changed, 79 insertions(+), 79 deletions(-)
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index de077c8..42f401d 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -474,7 +474,7 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT };
int idx = CNT_ZERO;
int cur_sign_bias = s->sign_bias[mb->ref_frame];
- int *sign_bias = s->sign_bias;
+ int8_t *sign_bias = s->sign_bias;
VP56mv near_mv[4];
uint8_t cnt[4] = { 0 };
VP56RangeCoder *c = &s->c;
diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
index b94d453..2db056f 100644
--- a/libavcodec/vp8.h
+++ b/libavcodec/vp8.h
@@ -85,83 +85,24 @@ typedef struct {
typedef struct {
AVCodecContext *avctx;
- DSPContext dsp;
- VP8DSPContext vp8dsp;
- H264PredContext hpc;
- vp8_mc_func put_pixels_tab[3][3][3];
- AVFrame frames[4];
AVFrame *framep[4];
uint8_t *edge_emu_buffer;
- VP56RangeCoder c; ///< header context, includes mb modes and motion vectors
- int profile;
- int mb_width; /* number of horizontal MB */
- int mb_height; /* number of vertical MB */
+ uint16_t mb_width; /* number of horizontal MB */
+ uint16_t mb_height; /* number of vertical MB */
int linesize;
int uvlinesize;
- int keyframe;
- int invisible;
- int update_last; ///< update VP56_FRAME_PREVIOUS with the current one
- int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so
- int update_altref;
- int deblock_filter;
-
- /**
- * If this flag is not set, all the probability updates
- * are discarded after this frame is decoded.
- */
- int update_probabilities;
-
- /**
- * All coefficients are contained in separate arith coding contexts.
- * There can be 1, 2, 4, or 8 of these after the header context.
- */
- int num_coeff_partitions;
- VP56RangeCoder coeff_partition[8];
-
- VP8Macroblock *macroblocks;
- VP8Macroblock *macroblocks_base;
- VP8FilterStrength *filter_strength;
-
- uint8_t *intra4x4_pred_mode_top;
- uint8_t intra4x4_pred_mode_left[4];
- uint8_t *segmentation_map;
-
- /**
- * Cache of the top row needed for intra prediction
- * 16 for luma, 8 for each chroma plane
- */
- uint8_t (*top_border)[16+8+8];
-
- /**
- * For coeff decode, we need to know whether the above block had non-zero
- * coefficients. This means for each macroblock, we need data for 4 luma
- * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
- * per macroblock. We keep the last row in top_nnz.
- */
- uint8_t (*top_nnz)[9];
- DECLARE_ALIGNED(8, uint8_t, left_nnz)[9];
-
- /**
- * This is the index plus one of the last non-zero coeff
- * for each of the blocks in the current macroblock.
- * So, 0 -> no coeffs
- * 1 -> dc-only (special transform)
- * 2+-> full transform
- */
- DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
- DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
- DECLARE_ALIGNED(16, DCTELEM, block_dc)[16];
- uint8_t intra4x4_pred_mode_mb[16];
-
- int chroma_pred_mode; ///< 8x8c pred mode of the current macroblock
- int segment; ///< segment of the current macroblock
+ uint8_t keyframe;
+ uint8_t deblock_filter;
+ uint8_t mbskip_enabled;
+ uint8_t segment; ///< segment of the current macroblock
+ uint8_t chroma_pred_mode; ///< 8x8c pred mode of the current macroblock
+ uint8_t profile;
VP56mv mv_min;
VP56mv mv_max;
- int mbskip_enabled;
- int sign_bias[4]; ///< one state [0, 1] per ref frame type
+ int8_t sign_bias[4]; ///< one state [0, 1] per ref frame type
int ref_count[3];
/**
@@ -170,13 +111,26 @@ typedef struct {
* a frame, since the values persist between interframes.
*/
struct {
- int enabled;
- int absolute_vals;
- int update_map;
+ uint8_t enabled;
+ uint8_t absolute_vals;
+ uint8_t update_map;
int8_t base_quant[4];
int8_t filter_level[4]; ///< base loop filter level
} segmentation;
+ struct {
+ uint8_t simple;
+ uint8_t level;
+ uint8_t sharpness;
+ } filter;
+
+ VP8Macroblock *macroblocks;
+ VP8FilterStrength *filter_strength;
+
+ uint8_t *intra4x4_pred_mode_top;
+ uint8_t intra4x4_pred_mode_left[4];
+ uint8_t *segmentation_map;
+
/**
* Macroblocks can have one of 4 different quants in a frame when
* segmentation is enabled.
@@ -190,13 +144,7 @@ typedef struct {
} qmat[4];
struct {
- int simple;
- int level;
- int sharpness;
- } filter;
-
- struct {
- int enabled; ///< whether each mb can have a different strength based on mode/ref
+ uint8_t enabled; ///< whether each mb can have a different strength based on mode/ref
/**
* filter strength adjustment for the following macroblock modes:
@@ -220,6 +168,34 @@ typedef struct {
} lf_delta;
/**
+ * Cache of the top row needed for intra prediction
+ * 16 for luma, 8 for each chroma plane
+ */
+ uint8_t (*top_border)[16+8+8];
+
+ /**
+ * For coeff decode, we need to know whether the above block had non-zero
+ * coefficients. This means for each macroblock, we need data for 4 luma
+ * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
+ * per macroblock. We keep the last row in top_nnz.
+ */
+ uint8_t (*top_nnz)[9];
+ DECLARE_ALIGNED(8, uint8_t, left_nnz)[9];
+
+ /**
+ * This is the index plus one of the last non-zero coeff
+ * for each of the blocks in the current macroblock.
+ * So, 0 -> no coeffs
+ * 1 -> dc-only (special transform)
+ * 2+-> full transform
+ */
+ DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
+ VP56RangeCoder c; ///< header context, includes mb modes and motion vectors
+ DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
+ DECLARE_ALIGNED(16, DCTELEM, block_dc)[16];
+ uint8_t intra4x4_pred_mode_mb[16];
+
+ /**
* These are all of the updatable probabilities for binary decisions.
* They are only implictly reset on keyframes, making it quite likely
* for an interframe to desync if a prior frame's header was corrupt
@@ -236,6 +212,30 @@ typedef struct {
uint8_t token[4][16][3][NUM_DCT_TOKENS-1];
uint8_t mvc[2][19];
} prob[2];
+
+ VP8Macroblock *macroblocks_base;
+ int invisible;
+ int update_last; ///< update VP56_FRAME_PREVIOUS with the current one
+ int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so
+ int update_altref;
+
+ /**
+ * If this flag is not set, all the probability updates
+ * are discarded after this frame is decoded.
+ */
+ int update_probabilities;
+
+ /**
+ * All coefficients are contained in separate arith coding contexts.
+ * There can be 1, 2, 4, or 8 of these after the header context.
+ */
+ int num_coeff_partitions;
+ VP56RangeCoder coeff_partition[8];
+ DSPContext dsp;
+ VP8DSPContext vp8dsp;
+ H264PredContext hpc;
+ vp8_mc_func put_pixels_tab[3][3][3];
+ AVFrame frames[4];
} VP8Context;
#endif
More information about the ffmpeg-cvslog
mailing list