[Ffmpeg-devel] [PATCH] Chinese AVS video decoder

Sun Jul 2 01:36:26 CEST 2006

Some comments based on discussions on IRC and other observations.

> +typedef struct {
> +    int16_t x;
> +    int16_t y;
> +    int16_t dist;
> +    int16_t ref;
> +} vector_t;

If this is marked with __attribute__((align(8))) gcc will be able to
read/write the entire thing with one instruction on 64-bit machines.

> Index: libavcodec/cavs.c
> ===================================================================
> --- libavcodec/cavs.c	(revision 0)
> +++ libavcodec/cavs.c	(revision 0)
[...]
> +static void filter_mb(AVSContext *h, enum mb_t mb_type) {
> +    uint8_t bs[8];
[...]
> +        *((uint64_t *)bs) = 0;
[...]
> +            *((uint64_t *)bs) = 0x0202020202020202ULL;
[...]
> +        if( *((uint64_t *)bs) ) {

These will cause alignment problems on non-x86 machines.

> +static void intra_pred_vert(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
> +    int y;
> +    uint64_t a = *((uint64_t *)(&top[1]));
> +    for(y=0;y<8;y++) {
> +        *((uint64_t *)(d+y*stride)) = a;
> +    }
> +}
> +
> +static void intra_pred_horiz(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
> +    int y;
> +    uint64_t a;
> +    for(y=0;y<8;y++) {
> +        a = left[y+1] * 0x0101010101010101ULL;
> +        *((uint64_t *)(d+y*stride)) = a;
> +    }
> +}
> +
> +static void intra_pred_dc_128(uint8_t *d,uint8_t *top,uint8_t *left,int stride) {
> +    int y;
> +    uint64_t a = 0x8080808080808080ULL;
> +    for(y=0;y<8;y++)
> +        *((uint64_t *)(d+y*stride)) = a;
> +}

More alignment trouble.

> +static inline void modify_pred(const int8_t *mod_table, int *mode) {
> +    int newmode = mod_table[(int)*mode];

Useless cast.

> +    if(newmode < 0) {
> +        av_log(NULL, AV_LOG_ERROR, "Illegal intra prediction mode\n");
> +        *mode = 0;
> +    } else {
> +        *mode = newmode;
> +    }
> +}
[...]
> +static inline void veccpy(vector_t *dst, vector_t *src) {
> +    *((uint64_t *)dst) = *((uint64_t *)src);
> +}

This should be

static inline void veccpy(vector_t *dst, const vector_t *src) {
    *dst = *src;
}

This avoids aligment/aliasing bugs (as we have seen).

> +/* kth-order exponential golomb code */
> +static inline int get_ue_code(GetBitContext *gb, int order) {
> +    if(order)
> +        return (get_ue_golomb(gb) << order) + get_bits(gb,order);

The order of evaluation of operands is unspecified, so that could read
the bits in the wrong order.  The get_ue_golomb() and get_bits() calls
must be in different statements (well, strictly speaking, any sequence
point will do).

> +    return get_ue_golomb(gb);
> +}
> +
> +static int decode_residual_block(AVSContext *h, GetBitContext *gb,
> +                                 const residual_vlc_t *r, int esc_golomb_order,
> +                                 int qp, uint8_t *dst, int stride) {
> +    int i,pos = -1;
> +    int level_code, esc_code, level, run, mask;
> +    int level_buf[64];
> +    int run_buf[64];
> +    int dqm = dequant_mul[qp];
> +    int dqs = dequant_shift[qp];
> +    int dqa = 1 << (dqs - 1);
> +    const uint8_t *scantab = ff_zigzag_direct;
> +    DCTELEM block[64];
> +
> +    memset(block,0,64*sizeof(DCTELEM));

Is this memset really needed?

> +    for(i=0;i<65;i++) {
> +        level_code = get_ue_code(gb,r->golomb_order);
> +        if(level_code >= ESCAPE_CODE) {
> +            run = (level_code - ESCAPE_CODE) >> 1;
> +            esc_code = get_ue_code(gb,esc_golomb_order);
> +            level = esc_code + (run > r->max_run ? 1 : r->level_add[run]);
> +            while(level > r->inc_limit)
> +                r++;
> +            mask = -(level_code & 1);
> +            level = (level^mask) - mask;
> +        } else {
> +            if(level_code < 0)
> +                return -1;
> +            level = r->rltab[level_code][0];
> +            if(!level) //end of block signal
> +                break;
> +            run   = r->rltab[level_code][1];
> +            r += r->rltab[level_code][2];
> +        }
> +        level_buf[i] = level;
> +        run_buf[i] = run;
> +    }
> +    /* inverse scan and dequantization */
> +    for(i=i-1;i>=0;i--) {

That's an odd-looking for() construct.  I'd write while(--i) instead.

> +        pos += 1 + run_buf[i];
> +        if(pos > 63) {
> +            av_log(h->s.avctx, AV_LOG_ERROR,
> +                   "position out of block bounds at pic %d MB(%d,%d)\n",
> +                   h->picture.poc, h->mbx, h->mby);
> +            return -1;
> +        }
> +        block[scantab[pos]] = (level_buf[i]*dqm + dqa) >> dqs;
> +    }
> +    h->s.dsp.cavs_idct8_add(dst,block,stride);
> +    return 0;
> +}
[...]
> +        veccpy(&h->mv[MV_FWD_B2],(vector_t *)&un_mv);
> +        veccpy(&h->mv[MV_FWD_B3],(vector_t *)&un_mv);
> +        veccpy(&h->mv[MV_BWD_B2],(vector_t *)&un_mv);
> +        veccpy(&h->mv[MV_BWD_B3],(vector_t *)&un_mv);

Those casts (and others like them) remove a const qualifier.  Make the
second parameter to veccpy const instead.  Or just skip veccpy
entirely and write the assignment directly.  I don't see why there
would ever be any need for a more complicated function.

> +        modify_pred(left_modifier_l, &h->pred_mode_Y[4] );
> +        modify_pred(left_modifier_l, &h->pred_mode_Y[7] );
> +        modify_pred(left_modifier_c, &pred_mode_uv );

The left_modifier_[lc] arrays are declared as int_fast8_t, but
modify_pred() takes an int8_t* argument.  One of them should be
changed to match.

> +static void init_top_lines(AVSContext *h) {
> +    /* alloc top line of predictors */
> +    h->top_qp       = av_malloc( h->mb_width);
> +    h->top_mv[0]    = av_malloc((h->mb_width*2+1)*sizeof(vector_t));
> +    h->top_mv[1]    = av_malloc((h->mb_width*2+1)*sizeof(vector_t));
> +    h->top_pred_Y   = av_malloc( h->mb_width*2*sizeof(int));

I'd write h->top_pred_Y = av_malloc(h->mb_width*2*sizeof(*h->top_pred_Y),
just in case the declared type should ever change.

> +AVCodec cavs_decoder = {
> +    "cavs",
> +    CODEC_TYPE_VIDEO,
> +    CODEC_ID_CAVS,
> +    sizeof(AVSContext),
> +    cavs_decode_init,
> +    NULL,
> +    cavs_decode_end,
> +    cavs_decode_frame,
> +    CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY, //FIXME is this correct ?

CODEC_CAP_TRUNCATED should go.  It's not supported.

-- 
M?ns Rullg?rd
mru at inprovide.com