[FFmpeg-devel] [PATCH] Native VP9 decoder.

Sun Sep 29 22:38:21 CEST 2013

Hi,

On Sun, Sep 29, 2013 at 2:53 PM, Derek Buitenhuis <
derek.buitenhuis at gmail.com> wrote:

> > Authors: Ronald S. Bultje <rsbultje gmail com>,
> >          Clement Boesch <u pkh me>
> > ---
>
> [...]
>
> > diff --git a/Changelog b/Changelog
> > index de863d1..8c21f22 100644
> > --- a/Changelog
> > +++ b/Changelog
> > @@ -29,6 +29,7 @@ version <next>
> >  - Lossless and alpha support for WebP decoder
> >  - Error Resilient AAC syntax (ER AAC LC) decoding
> >  - Low Delay AAC (ER AAC LD) decoding
> > +- native vp9 decoder
>
> Diego-nit: Capitalize.

'vp9' -> 'VP9' right? Done.

> > diff --git a/configure b/configure
> > index e0a6073..a3cce31 100755
> > --- a/configure
> > +++ b/configure
> > @@ -1905,6 +1905,7 @@ vp6_decoder_select="h264chroma hpeldsp huffman
> videodsp vp3dsp"
> >  vp6a_decoder_select="vp6_decoder"
> >  vp6f_decoder_select="vp6_decoder"
> >  vp8_decoder_select="h264pred videodsp"
> > +vp9_decoder_select="videodsp"
>
> Doesn't it rely on the vp8 decoder?
>

No. Maybe the bilinear filter will be shared but that can be done at the
object level and doesn't require a full decoder dependency.

> +enum CompPredMode {
> > +    PRED_SINGLEREF,
> > +    PRED_COMPREF,
> > +    PRED_SWITCHABLE,
> > +};
> > +
> > +enum BlockLevel {
> > +    BL_64X64,
> > +    BL_32X32,
> > +    BL_16X16,
> > +    BL_8X8,
> > +};
> > +
> > +enum BlockSize {
> > +    BS_64x64,
> > +    BS_64x32,
> > +    BS_32x64,
> > +    BS_32x32,
> > +    BS_32x16,
> > +    BS_16x32,
> > +    BS_16x16,
> > +    BS_16x8,
> > +    BS_8x16,
> > +    BS_8x8,
> > +    BS_8x4,
> > +    BS_4x8,
> > +    BS_4x4,
> > +    N_BS_SIZES,
> > +};
>
> These seems pretty generic -- do we not have any such thing existing?
>

Not that I can see... Note most decoders are limited to 16x16 sizes and
this is the first decoder to have 64x64 blocksizes. It's hard to imagine
some of this is shared between codecs.

> +struct mv_storage {
> > +    VP56mv mv[2];
> > +    int8_t ref[2];
> > +};
> > +
> > +struct VP9Filter {
> > +    uint8_t level[8 * 8];
> > +    uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
> > +                              [8 /* rows */][4 /* 0=16, 1=8, 2=4,
> 3=inner4 */];
> > +};
>
> No typedefs?
>

Nah, I can but it wouldn't really make it easier IMO.

> > +
> > +typedef struct {
> > +    unsigned seg_id:3, intra:1, comp:1, ref[2], mode[4], uvmode, skip:1;
> > +    enum FilterMode filter;
> > +    VP56mv mv[4 /* b_idx */][2 /* ref */];
>
> My first time seening such inline comments -- bizarre ;).
>

I like this. If you prefer enums or so I can add them, but this is pretty
simple.

> > +    enum BlockSize bs;
> > +    enum TxfmMode tx, uvtx;
> > +
> > +    int row, row7, col, col7;
> > +    uint8_t *dst[3];
> > +    ptrdiff_t y_stride, uv_stride;
> > +} VP9Block;
>
> We stopped using anonymous structs a while ago. Dunno why,
> but this should be changed for consistency.
>

Done.

> > +    unsigned profile:2;
> > +    unsigned keyframe:1, last_keyframe:1;
> > +    unsigned invisible:1, last_invisible:1;
> > +    unsigned use_last_frame_mvs:1;
> > +    unsigned errorres:1;
> > +    unsigned colorspace:3;
> > +    unsigned fullrange:1;
> > +    unsigned intraonly:1;
> > +    unsigned resetctx:2;
> > +    unsigned refreshrefmask:8;
> > +    unsigned highprecisionmvs:1;
> > +    enum FilterMode filtermode:3;
> > +    unsigned allowcompinter:1;
> > +    unsigned fixcompref:2;
> > +    unsigned refreshctx:1;
> > +    unsigned parallelmode:1;
> > +    unsigned framectxid:2;
>
> Same comment as other reviewer about bitfields.
>

Removed.

> +    struct {
> > +        unsigned level:6;
> > +        int8_t sharpness;
> > +        uint8_t lim_lut[64];
> > +        uint8_t mblim_lut[64];
> > +    } filter;
> > +    struct {
> > +        unsigned enabled:1;
> > +        int8_t mode[2] /* :6+1 */;
> > +        int8_t ref[4] /* :6+1 */;
> > +    } lf_delta;
> > +    uint8_t yac_qi;
> > +    int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
> > +    unsigned lossless:1;
> > +    struct {
> > +        unsigned enabled:1;
> > +        unsigned temporal:1;
> > +        unsigned absolute_vals:1;
> > +        unsigned update_map:1;
> > +        struct {
> > +            unsigned q_enabled:1;
> > +            unsigned lf_enabled:1;
> > +            unsigned ref_enabled:1;
> > +            unsigned skip_enabled:1;
> > +            unsigned ref_val:2;
> > +            int16_t q_val /* :8+1 */;
> > +            int8_t lf_val /* :6+1 */;
> > +            int16_t qmul[2][2];
> > +            uint8_t lflvl[4][2];
> > +        } feat[8];
> > +    } segmentation;
>
> I cannot remember -- does c99conv handle embedded anonymous structs?
>

Yes.

> > +static int update_size(AVCodecContext *ctx, int w, int h)
>
> This only ever returns 0. Should be void type.
>
> > +{
> > +    VP9Context *s = ctx->priv_data;
> > +
> > +    if (s->above_partition_ctx && w == ctx->width && h == ctx->height)
> > +        return 0;
>
> Due to unchecked mallocs below, this can possibly be a NULL
> pointer on subsequent calls.
>

Fixed second comment, first now no longer applies.

> > +    av_free(s->above_partition_ctx);
>
> av_freep?
>
> > +    s->above_partition_ctx = av_malloc(s->sb_cols * 368);
>

It's assigned directly after so not necessary.

> [...]
>
> > +    s->mv[0] = av_malloc(sizeof(*s->mv[0]) * s->sb_cols * s->sb_rows *
> 64);
> > +    s->mv[1] = av_malloc(sizeof(*s->mv[1]) * s->sb_cols * s->sb_rows *
> 64);
> > +    s->lflvl = av_malloc(sizeof(*s->lflvl) * s->sb_cols);
>
> Unchecked mallocs. Not cool.
>

Fixed.

> > +static av_always_inline int inv_recenter_nonneg(int v, int m)
> > +{
> > +    return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
>
> Did a code obfuscator generate this?
>

Heh :) It's a range-check. At the end, you're trying to accomplish that for
a given range 0-A, with current value B, you can code (in the bitstream) a
varlen-coded difference, where the length depends on how far off you are
from B. This difference is then specified as an absolute number to have
equal distribution on either side of B.

Example: you're a range 0-255 and your current value is 10. To code a
difference of -10 to 10 (output range 0-20) means the absolute difference
can have two sign values (- and +). However, beyond absolute difference 10,
the sign is known (it is always +) because otherwise the output would be
outside the valid range 0-255. So for absdiff 0-10, we have a signbit (so
coded difference values 0-20 mean 0-10 with the lowest bit being the sign).
Beyond codeddiff 20, we know the sign, so it's only a difference value
(where coded from 20 means absolute from 10).

This code and the branch in the caller basically do that.

> > +// differential forward probability updates
> > +static int update_prob(VP56RangeCoder *c, int p)
> > +{
> > +    static const int inv_map_table[254] = {
> > +          7,  20,  33,  46,  59,  72,  85,  98, 111, 124, 137, 150,
> 163, 176,
> > +        189, 202, 215, 228, 241, 254,   1,   2,   3,   4,   5,   6,
> 8,   9,
> > +         10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  21,  22,
>  23,  24,
> > +         25,  26,  27,  28,  29,  30,  31,  32,  34,  35,  36,  37,
>  38,  39,
> > +         40,  41,  42,  43,  44,  45,  47,  48,  49,  50,  51,  52,
>  53,  54,
> > +         55,  56,  57,  58,  60,  61,  62,  63,  64,  65,  66,  67,
>  68,  69,
> > +         70,  71,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,
>  83,  84,
> > +         86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,
>  99, 100,
> > +        101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113,
> 114, 115,
> > +        116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128,
> 129, 130,
> > +        131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143,
> 144, 145,
> > +        146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158,
> 159, 160,
> > +        161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173,
> 174, 175,
> > +        177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188,
> 190, 191,
> > +        192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204,
> 205, 206,
> > +        207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219,
> 220, 221,
> > +        222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234,
> 235, 236,
> > +        237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249,
> 250, 251,
> > +        252, 253,
> > +    };
>
> Not sure of the perf impact of having this inside the function  -- I
> assume the
> compiler s not dumb enough to screw this up.
>

It's just to not clutter the namespace in the rest of the file.

> +#define VP9_SYNCCODE 0x498342
>
> Move to top and/or header?
>

Done

> +static int decode_frame_header(AVCodecContext *ctx,
> > +                               const uint8_t *data, int size)
> > +{
>
> I assume this is using the safe bitreader (due to lack of remaining
> bits check(s))?
>
> > +    if (get_bits1(&s->gb)) // reserved bit
> > +        return AVERROR_INVALIDDATA;
>
> Should this not be a warning? Perhaps use explode mode?
>
> > +    if (get_bits1(&s->gb)) {
> > +        int ref = get_bits(&s->gb, 3);
> > +        av_log(ctx, AV_LOG_ERROR, "Directly show frame %d\n", ref);
> > +        return -1;
> > +    }
>
> What is this checking and why is it returning -1 instead of an AVERROR?
>
> > +    s->last_keyframe = s->keyframe;
> > +    s->keyframe  = !get_bits1(&s->gb);
> > +    s->last_invisible = s->invisible;
> > +    s->invisible = !get_bits1(&s->gb);
> > +    s->errorres  = get_bits1(&s->gb);
>
> Diego-nit: align.
>
> > +    // FIXME disable this upon resolution change
> > +    s->use_last_frame_mvs = !s->errorres && !s->last_invisible;
> > +    if (s->keyframe) {
> > +        if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) // synccode
> > +            return AVERROR_INVALIDDATA;
>
> av_log(ctx, AV_LOG_ERROR, "Sync code not found.\n");
>
> > +        s->colorspace = get_bits(&s->gb, 3);
> > +        if (s->colorspace == 7) // RGB = profile 1
> > +            return AVERROR_INVALIDDATA;
>
> av_log(ctx, AV_LOG_ERROR, "Unsupported colorspace: <...>\n");
> return AVERROR_PATCHESWELCOME;
>
> > +    } else {
> > +        s->intraonly  = s->invisible ? get_bits1(&s->gb) : 0;
> > +        s->resetctx   = s->errorres ? 0 : get_bits(&s->gb, 2);
> > +        if (s->intraonly) {
> > +            if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) // synccode
> > +                return AVERROR_INVALIDDATA;
>
> av_log(ctx, AV_LOG_ERROR, "Sync code not found.\n");
>
> > +            s->refreshrefmask = get_bits(&s->gb, 8);
> > +            w = get_bits(&s->gb, 16) + 1;
> > +            h = get_bits(&s->gb, 16) + 1;
> > +            if (get_bits1(&s->gb)) // display size
> > +                skip_bits(&s->gb, 32);
> > +        } else {
> > +            s->refreshrefmask = get_bits(&s->gb, 8);
> > +            s->refidx[0]    = get_bits(&s->gb, 3);
> > +            s->signbias[0]  = get_bits1(&s->gb);
> > +            s->refidx[1]    = get_bits(&s->gb, 3);
> > +            s->signbias[1]  = get_bits1(&s->gb);
> > +            s->refidx[2]    = get_bits(&s->gb, 3);
> > +            s->signbias[2]  = get_bits1(&s->gb);
>
> Diego-nit: align. I won't note these anymore.
>
> > +            if (!s->refs[s->refidx[0]] || !s->refs[s->refidx[1]] ||
> > +                !s->refs[s->refidx[2]])
> > +                return AVERROR_INVALIDDATA;
>
> Add an av_log() for whatever this checks... which is unclear to me. No
> valid
> refs?
>
> > +    // next 16 bits is size of the rest of the header (arith-coded)
> > +    size2 = get_bits(&s->gb, 16);
> > +    data2 = align_get_bits(&s->gb);
> > +    if (size2 > size - (data2 - data))
> > +        return AVERROR_INVALIDDATA;
>
> av_log(ctx, AV_LOG_ERROR, "Invalid <...> header size: %d.\n", size2);
>
> > +    ff_vp56_init_range_decoder(&s->c, data2, size2);
> > +    if (vp56_rac_get_prob_branchy(&s->c, 128)) // marker bit
> > +        return AVERROR_INVALIDDATA;
>
> av_log(ctx, AV_LOG_ERROR, "Could not get RAC probability.\n");
>

All done.

> > +
> > +    if (s->keyframe || s->intraonly) {
> > +        memset(s->counts.coef, 0, sizeof(s->counts.coef) +
> sizeof(s->counts.eob));
>
> Is eob sanity checked anywhere?
>

What do you mean? It's a counter for updating next-frame probabilities to
the distribution in this (i.e. adaptivity).

> +    for (i = 0; i < 3; i++)
> > +        if (vp56_rac_get_prob_branchy(&s->c, 252)) {
> > +            s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
> > +        }
>
> Un-needed braces. I missed a whole lot of these, so no longer commenting
> on them.
>

Fixed.

> > +static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
> > +    {
> > +        { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
> > +        { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, {
> 1, 1 },
> > +    }, {
> > +        { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
> > +        { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, {
> 1, 1 },
> > +    }
> > +};
>
> Move to top?
>

Done.

> > +#if 0
> > +                // FIXME can this codeblob be replaced by some sort of
> LUT?
> > +                // ref=intra?0:ref+1, comp=comp?fixcompref+1:0
> > +                // x=lut[aref][acomp][lref][lcomp];
> > +                // then probably a separate 2d lut for the case where
> only a
> > +                // or l is available, and the fixed constant for
> neither a nor
> > +                // l, so you have c = have_a && have_l ? lut4[...] :
> > +                //                    have_a ? lut2[..] : have_l ?
> lut2[..] : X;
>
> Will this be looked into, or will this if 0 be removed?
>

Clement was working on this.

Ronald