[FFmpeg-devel] [PATCH] AAC Decoder - Round 2.

Sat Jun 21 17:28:23 CEST 2008

2008/6/20 Michael Niedermayer <michaelni at gmx.at>:
> [...]
>> +/**
>> + * Program config. This describes how channels are arranged.
>> + *
>> + * Either read from stream (ID_PCE) or created based on a default
>> + * fixed channel arrangement.
>> + */
>> +typedef struct {
>> +    int che_type[4][MAX_TAGID];   ///< Channel Element type with the first index the first 4 raw_data_block IDs
>> +
>> +    int mono_mixdown;         ///< The SCE tag to use if user requests mono output,   -1 if not available
>> +    int stereo_mixdown;       ///< The CPE tag to use if user requests stereo output, -1 if not available
>> +    int mixdown_coeff_index;  ///< 0-3
>> +    int pseudo_surround;      ///< Mix surround channels out of phase
>> +
>> +} program_config_struct;
>
> i would drop the _struct
> one can always write "struct program_config" or ProgramConfig
> other structs in lav* dont have struct in their names either ...

I agree. I don't like the _struct suffix on _every_ struct. Would you
prefer migration of all structs to 'ProgramConfig' style or maybe
'program_config_t' for typedef structs and 'program_config_s' for
non-typedeffed structs?

>> +    uint8_t group_len[8];
>> +    ltp_struct ltp;
>> +    ltp_struct ltp2;
>> +    const uint16_t *swb_offset;
>> +    int num_swb;
>> +    int num_windows;
>> +    int tns_max_bands;
>> +} ics_struct;
>> +
>> +/**
>> + * Temporal Noise Shaping
>> + */
>> +typedef struct {
>> +    int present;
>> +    int n_filt[8];
>> +    int length[8][4];
>> +    int direction[8][4];
>> +    int order[8][4];
>
>> +    const float *tmp2_map[8][4];
>
> maybe the question is silly but why tmp2 and not tmp ?

The tables to which these pointers point in aactab.h have tmp2
nomenclature. tns_decode_coef in the spec uses an array called tmp2[]
during its calculations. They're probably related but I'll deal with
other issues first.

> [...]
>> +/**
>> + * Dyanmic Range Control
>> + *
>> + * DRC is decoded from bitstream but not further processed.
>> + */
>> +typedef struct {
>> +    int pce_instance_tag;
>> +    int drc_tag_reserved_bits;
>> +    int dyn_rng_sgn[17];
>> +    int dyn_rng_ctl[17];
>> +    int exclude_mask[MAX_CHANNELS];
>> +    int additional_excluded_chns[MAX_CHANNELS];
>> +    int drc_band_incr;
>> +    int drc_interpolation_scheme;
>> +    int drc_band_top[17];
>> +    int prog_ref_level;
>> +    int prog_ref_level_reserved_bits;
>> +} drc_struct;
>
> does it make sense to prefix variables in a drc_struct with drc_ ?
> and dont forget to remove unused fields after it is "further processed"

I think the DRC data is currently just parsed and not actually used.
I'll remove the drc_ prefixes though.

>> +/**
>> + * Pulse tool
>> + */
>> +typedef struct {
>> +    int present;
>
>> +    int num_pulse_minus1;
>
> why minus1 ? I think its cleaner to store num_pulse
> also present and num_pulse_minus1 look redundant

This exact variable is called number_pulse in the spec so removing the
_minus1 suffix.

[...]
>> +
>> +// aux
>> +/**
>> + * Generate a sine Window.
>> + */
>> +static void sine_window_init(float *window, int n) {
>> +    const float alpha = M_PI / n;
>> +    int i;
>> +    for(i = 0; i < n/2; i++)
>> +        window[i] = sin((i + 0.5) * alpha);
>> +}
>
> greping for sine_window finds some matches in lavc, is this and the table
> duplicated?

It is used in nellymoser and imc as an MDCT sine window. Should this
function be moved to mdct.c and implemented in these two (and any
others as appropriate) codecs?

> [...]
>> +/**
>> + * Configure output channel order and optional mixing based on the current
>> + * program config element and user requested channels.
>> + *
>> + * \param nwepcs New program config struct. We only do somthing if it differs from the current one.
>
> nwepcs

Fixed, along with a number of other spelling errors.

>> +    return 0;
>> +}
>> +
>> +// Parsers implementation
>> +
>> +/**
>> + * Decode a data_stream_element
>> + * reference: Table 4.10
>> + */
>> +static int data_stream_element(AACContext * ac, GetBitContext * gb, int id) {
>> +    int byte_align = get_bits1(gb);
>
>> +    int count = get_bits(gb, 8);
>> +    if (count == 255)
>> +        count += get_bits(gb, 8);
>
> without looking at the spec shouldnt that be a while(==255) ?

Nope. According to the spec:

cnt = _count_;
if(cnt == 255)
    cnt += _esc_count_;

_count_ and _esc_count_ are both 8 bits to be read from the bitstream.

> [...]
>> +
>> +/**
>> + * Decode Individual Channel Stream info
>> + * reference: table 4.6
>> + */
>> +static int decode_ics_info(AACContext * ac, GetBitContext * gb, int common_window, ics_struct * ics) {
>> +    if (get_bits1(gb)) {
>> +        av_log(ac->avccontext, AV_LOG_ERROR, "Reserved bit set\n");
>> +        return -1;
>> +    }
>> +    ics->window_sequence = get_bits(gb, 2);
>
>> +    ics->window_shape_prev = ics->window_shape;
>> +    ics->window_shape = get_bits1(gb);
>> +    if (ics->window_shape_prev == -1)
>> +        ics->window_shape_prev = ics->window_shape;
>
> have i missed the place that sets it to -1 ?

Removed. I have no idea where that code came from... As the variable
is only one bit unsigned, I may as well change it to uint8_t rather
than int as well.

>> +    ics->num_window_groups = 1;
>> +    ics->group_len[0] = 1;
>> +    if (ics->window_sequence == EIGHT_SHORT_SEQUENCE) {
>> +        int i;
>> +        ics->max_sfb = get_bits(gb, 4);
>> +        ics->grouping = get_bits(gb, 7);
>> +        for (i = 0; i < 7; i++) {
>> +            if (ics->grouping & (1<<(6-i))) {
>> +                ics->group_len[ics->num_window_groups-1]++;
>> +            } else {
>> +                ics->num_window_groups++;
>> +                ics->group_len[ics->num_window_groups-1] = 1;
>> +            }
>> +        }
>> +        ics->swb_offset = swb_offset_128[ac->m4ac.sampling_index];
>> +        ics->num_swb = num_swb_128[ac->m4ac.sampling_index];

[...]

> also the 2 lines above could be vertically aligned

Done, plus the 2 lines below it and a similar group a few lines further down.

> [...]
>> +static void decode_tns_data(AACContext * ac, GetBitContext * gb, const ics_struct * ics, tns_struct * tns) {
>> +    int w, filt, i, coef_len, coef_res = 0, coef_compress;
>> +    for (w = 0; w < ics->num_windows; w++) {
>> +        tns->n_filt[w] = get_bits(gb, ics->window_sequence == EIGHT_SHORT_SEQUENCE ? 1 : 2);
>> +        if (tns->n_filt[w])
>> +            coef_res = get_bits1(gb) + 3;
>> +        for (filt = 0; filt < tns->n_filt[w]; filt++) {
>> +            tns->length[w][filt] = get_bits(gb, ics->window_sequence == EIGHT_SHORT_SEQUENCE ? 4 : 6);
>> +            if ((tns->order[w][filt] = get_bits(gb, ics->window_sequence == EIGHT_SHORT_SEQUENCE ? 3 : 5))) {
>> +                tns->direction[w][filt] = get_bits1(gb);
>> +                coef_compress = get_bits1(gb);
>> +                coef_len = coef_res - coef_compress;
>
>> +                tns->tmp2_map[w][filt] = tns_tmp2_map[(coef_compress << 1) + (coef_res - 3)];
>
> 2*coef_compress + coef_res - 3
> looks cleaner to me

Done.

> also some empty lines here ane there might help readability

I added a few between assignments and if() or for().

> [...]
>> +static void decode_ms_data(AACContext * ac, GetBitContext * gb, che_struct * cpe) {
>> +    ms_struct * ms = &cpe->ms;
>> +    int g, i;
>> +    ms->present = get_bits(gb, 2);
>> +    if (ms->present == 1) {
>> +        for (g = 0; g < cpe->ch[0].ics.num_window_groups; g++)
>> +            for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
>> +                ms->mask[g][i] = get_bits1(gb);// << i;
>> +    } else if (ms->present == 2) {
>> +        for (g = 0; g < cpe->ch[0].ics.num_window_groups; g++)
>
>> +            memset(ms->mask[g], 1, cpe->ch[0].ics.max_sfb);
>
> this is missing some sizeof()

I think it's because ms->mask[][] is uint8_t. I can add *
sizeof(ms->mask[g][0]) if you like.

>> +    int i, k, g;
>> +    const uint16_t * offsets = ics->swb_offset;
>> +
>> +    for (g = 0; g < ics->num_window_groups; g++) {
>> +        for (i = 0; i < ics->max_sfb; i++) {
>> +            const int cur_cb = cb[g][i];
>> +            const int dim = cur_cb >= FIRST_PAIR_HCB ? 2 : 4;
>> +            int group;
>> +            if (cur_cb == NOISE_HCB) {
>> +                for (group = 0; group < ics->group_len[g]; group++) {
>> +                    for (k = offsets[i]; k < offsets[i+1]; k++)
>> +                        icoef[group*128+k] = av_random(&ac->random_state) & 0x0000FFFF;
>> +                }
>> +            }else if (cur_cb == ZERO_HCB) {
>> +                for (group = 0; group < ics->group_len[g]; group++) {
>> +                    memset(icoef + group * 128 + offsets[i], 0, (offsets[i+1] - offsets[i])*sizeof(int));
>> +                }
>> +            }else if (cur_cb != INTENSITY_HCB2 && cur_cb != INTENSITY_HCB) {
>> +                for (group = 0; group < ics->group_len[g]; group++) {
>> +                    for (k = offsets[i]; k < offsets[i+1]; k += dim) {
>> +                        const int index = get_vlc2(gb, ac->books[cur_cb - 1].table, 6, 3);
>
>> +                        const int *ptr = &ac->vq[cur_cb - 1][index * dim], coef_idx = (group << 7) + k;
>
> vq_ptr maybe, ptr confused me for a moment until i saw the line above

I was going to use vq_ptr but went for ptr because that's what you
used in your comments. :) Changed.

>> +                        int j;
>> +                        if (index == -1) {
>> +                            av_log(ac->avccontext, AV_LOG_ERROR, "Error in spectral data\n");
>> +                            return -1;
>> +                        }
>
> can that happen at all?

Unless I misunderstand, it doesn't look like it as all the VLC tables
are unsigned. Remove?

There is more to come. :)

Rob