[Ffmpeg-devel] [PATCH] Chinese AVS video decoder
Måns Rullgård
mru
Sun Jul 2 01:36:26 CEST 2006
Some comments based on discussions on IRC and other observations.
> +typedef struct {
> + int16_t x;
> + int16_t y;
> + int16_t dist;
> + int16_t ref;
> +} vector_t;
If this is marked with __attribute__((align(8))) gcc will be able to
read/write the entire thing with one instruction on 64-bit machines.
> Index: libavcodec/cavs.c
> ===================================================================
> --- libavcodec/cavs.c (revision 0)
> +++ libavcodec/cavs.c (revision 0)
[...]
> +static void filter_mb(AVSContext *h, enum mb_t mb_type) {
> + uint8_t bs[8];
[...]
> + *((uint64_t *)bs) = 0;
[...]
> + *((uint64_t *)bs) = 0x0202020202020202ULL;
[...]
> + if( *((uint64_t *)bs) ) {
These will cause alignment problems on non-x86 machines.
> +static void intra_pred_vert(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
> + int y;
> + uint64_t a = *((uint64_t *)(&top[1]));
> + for(y=0;y<8;y++) {
> + *((uint64_t *)(d+y*stride)) = a;
> + }
> +}
> +
> +static void intra_pred_horiz(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
> + int y;
> + uint64_t a;
> + for(y=0;y<8;y++) {
> + a = left[y+1] * 0x0101010101010101ULL;
> + *((uint64_t *)(d+y*stride)) = a;
> + }
> +}
> +
> +static void intra_pred_dc_128(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
> + int y;
> + uint64_t a = 0x8080808080808080ULL;
> + for(y=0;y<8;y++)
> + *((uint64_t *)(d+y*stride)) = a;
> +}
More alignment trouble.
> +static inline void modify_pred(const int8_t *mod_table, int *mode) {
> + int newmode = mod_table[(int)*mode];
Useless cast.
> + if(newmode < 0) {
> + av_log(NULL, AV_LOG_ERROR, "Illegal intra prediction mode\n");
> + *mode = 0;
> + } else {
> + *mode = newmode;
> + }
> +}
[...]
> +static inline void veccpy(vector_t *dst, vector_t *src) {
> + *((uint64_t *)dst) = *((uint64_t *)src);
> +}
This should be
static inline void veccpy(vector_t *dst, const vector_t *src) {
*dst = *src;
}
This avoids aligment/aliasing bugs (as we have seen).
> +/* kth-order exponential golomb code */
> +static inline int get_ue_code(GetBitContext *gb, int order) {
> + if(order)
> + return (get_ue_golomb(gb) << order) + get_bits(gb,order);
The order of evaluation of operands is unspecified, so that could read
the bits in the wrong order. The get_ue_golomb() and get_bits() calls
must be in different statements (well, strictly speaking, any sequence
point will do).
> + return get_ue_golomb(gb);
> +}
> +
> +static int decode_residual_block(AVSContext *h, GetBitContext *gb,
> + const residual_vlc_t *r, int esc_golomb_order,
> + int qp, uint8_t *dst, int stride) {
> + int i,pos = -1;
> + int level_code, esc_code, level, run, mask;
> + int level_buf[64];
> + int run_buf[64];
> + int dqm = dequant_mul[qp];
> + int dqs = dequant_shift[qp];
> + int dqa = 1 << (dqs - 1);
> + const uint8_t *scantab = ff_zigzag_direct;
> + DCTELEM block[64];
> +
> + memset(block,0,64*sizeof(DCTELEM));
Is this memset really needed?
> + for(i=0;i<65;i++) {
> + level_code = get_ue_code(gb,r->golomb_order);
> + if(level_code >= ESCAPE_CODE) {
> + run = (level_code - ESCAPE_CODE) >> 1;
> + esc_code = get_ue_code(gb,esc_golomb_order);
> + level = esc_code + (run > r->max_run ? 1 : r->level_add[run]);
> + while(level > r->inc_limit)
> + r++;
> + mask = -(level_code & 1);
> + level = (level^mask) - mask;
> + } else {
> + if(level_code < 0)
> + return -1;
> + level = r->rltab[level_code][0];
> + if(!level) //end of block signal
> + break;
> + run = r->rltab[level_code][1];
> + r += r->rltab[level_code][2];
> + }
> + level_buf[i] = level;
> + run_buf[i] = run;
> + }
> + /* inverse scan and dequantization */
> + for(i=i-1;i>=0;i--) {
That's an odd-looking for() construct. I'd write while(--i) instead.
> + pos += 1 + run_buf[i];
> + if(pos > 63) {
> + av_log(h->s.avctx, AV_LOG_ERROR,
> + "position out of block bounds at pic %d MB(%d,%d)\n",
> + h->picture.poc, h->mbx, h->mby);
> + return -1;
> + }
> + block[scantab[pos]] = (level_buf[i]*dqm + dqa) >> dqs;
> + }
> + h->s.dsp.cavs_idct8_add(dst,block,stride);
> + return 0;
> +}
[...]
> + veccpy(&h->mv[MV_FWD_B2],(vector_t *)&un_mv);
> + veccpy(&h->mv[MV_FWD_B3],(vector_t *)&un_mv);
> + veccpy(&h->mv[MV_BWD_B2],(vector_t *)&un_mv);
> + veccpy(&h->mv[MV_BWD_B3],(vector_t *)&un_mv);
Those casts (and others like them) remove a const qualifier. Make the
second parameter to veccpy const instead. Or just skip veccpy
entirely and write the assignment directly. I don't see why there
would ever be any need for a more complicated function.
> + modify_pred(left_modifier_l, &h->pred_mode_Y[4] );
> + modify_pred(left_modifier_l, &h->pred_mode_Y[7] );
> + modify_pred(left_modifier_c, &pred_mode_uv );
The left_modifier_[lc] arrays are declared as int_fast8_t, but
modify_pred() takes an int8_t* argument. One of them should be
changed to match.
> +static void init_top_lines(AVSContext *h) {
> + /* alloc top line of predictors */
> + h->top_qp = av_malloc( h->mb_width);
> + h->top_mv[0] = av_malloc((h->mb_width*2+1)*sizeof(vector_t));
> + h->top_mv[1] = av_malloc((h->mb_width*2+1)*sizeof(vector_t));
> + h->top_pred_Y = av_malloc( h->mb_width*2*sizeof(int));
I'd write h->top_pred_Y = av_malloc(h->mb_width*2*sizeof(*h->top_pred_Y),
just in case the declared type should ever change.
> +AVCodec cavs_decoder = {
> + "cavs",
> + CODEC_TYPE_VIDEO,
> + CODEC_ID_CAVS,
> + sizeof(AVSContext),
> + cavs_decode_init,
> + NULL,
> + cavs_decode_end,
> + cavs_decode_frame,
> + CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY, //FIXME is this correct ?
CODEC_CAP_TRUNCATED should go. It's not supported.
--
M?ns Rullg?rd
mru at inprovide.com
More information about the ffmpeg-devel
mailing list