[FFmpeg-devel] AAC decoder round 7

Robert Swain robert.swain
Tue Aug 12 19:45:35 CEST 2008


2008/8/12 Michael Niedermayer <michaelni at gmx.at>:
> On Tue, Aug 12, 2008 at 03:08:09PM +0100, Robert Swain wrote:
>> 2008/8/11 Michael Niedermayer <michaelni at gmx.at>:
>> > On Mon, Aug 11, 2008 at 12:50:09PM +0100, Robert Swain wrote:
>> >> $subj
>> >>
>> >> Regards,
>> >> Rob
>> >
>> > [...]
>> >
>> >> Index: libavcodec/aactab.c
>> >> ===================================================================
>> >> --- libavcodec/aactab.c       (revision 14694)
>> >> +++ libavcodec/aactab.c       (working copy)
>> >> @@ -32,6 +32,11 @@
>> >>
>> >>  #include <stdint.h>
>> >>
>> >> +DECLARE_ALIGNED(16, float,  ff_aac_kbd_long_1024[1024]);
>> >> +DECLARE_ALIGNED(16, float,  ff_aac_kbd_short_128[128]);
>> >
>> >> +DECLARE_ALIGNED(16, float, ff_aac_sine_long_1024[1024]);
>> >> +DECLARE_ALIGNED(16, float, ff_aac_sine_short_128[128]);
>> >
>> > i think these can be shared with at least wma
>> > i dont remember what vorbis used?
>>
>> WMA has variable window sizes and I think it would be awkward for it
>> to use just these two arrays unless it has all its arrays declared in
>> a similar way.
>
> but it would not be awkward for aac to use 2 of the wma windows, if they
> where in a common c/h file and non static.

OK, I'll make a patch for wma. The 'long' and 'short' are somewhat
redundant and non-specific information considering the size of the
array is in the name so I'll probably drop that and make it
ff_sine_#[].

>> "Vorbis windows all use the slope function $y = \sin(.5*\pi \,
>> \sin^2((x+.5)/n*\pi))$."
>>
>> > [...]
>> >> @@ -380,7 +465,42 @@
>> >>      ics->use_kb_window[0] = get_bits1(gb);
>> >>      ics->num_window_groups = 1;
>> >>      ics->group_len[0] = 1;
>> >> +    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
>> >> +        int i;
>> >> +        ics->max_sfb = get_bits(gb, 4);
>> >> +        for (i = 0; i < 7; i++) {
>> >> +            if (get_bits1(gb)) {
>> >> +                ics->group_len[ics->num_window_groups-1]++;
>> >> +            } else {
>> >> +                ics->num_window_groups++;
>> >> +                ics->group_len[ics->num_window_groups-1] = 1;
>> >> +            }
>> >> +        }
>> >
>> >> +        ics->swb_offset =     swb_offset_128[ac->m4ac.sampling_index];
>> >> +        ics->num_swb    = ff_aac_num_swb_128[ac->m4ac.sampling_index];
>> >> +        ics->num_windows   = 8;
>> >> +        ics->tns_max_bands = tns_max_bands_128[ac->m4ac.sampling_index];
>> >> +    } else {
>> >> +        ics->max_sfb    =                              get_bits(gb, 6);
>> >> +        ics->swb_offset =     swb_offset_1024[ac->m4ac.sampling_index];
>> >> +        ics->num_swb    = ff_aac_num_swb_1024[ac->m4ac.sampling_index];
>> >> +        ics->num_windows   = 1;
>> >> +        ics->tns_max_bands = tns_max_bands_1024[ac->m4ac.sampling_index];
>> >
>> > vertical align
>>
>> Would you prefer to have them aligned such that the array indices are
>> aligned as well? I think it looks better this way but you may not.
>
> yes i prefer them aligned as well

Done.

>> >>  /**
>> >> + * Decode spectral data; reference: table 4.50.
>> >> + *
>> >> + * @param   band_type   array of the used band type
>> >> + * @param   icoef       array of quantized spectral data
>> >> + *
>> >> + * @return  Returns error status. 0 - OK, !0 - error
>> >> + */
>> >> +static int decode_spectrum(AACContext * ac, int icoef[1024], GetBitContext * gb,
>> >> +        const IndividualChannelStream * ics, enum BandType band_type[120]) {
>> >> +    int i, k, g, idx = 0;
>> >> +    const uint16_t * offsets = ics->swb_offset;
>> >> +
>> >> +    for (g = 0; g < ics->num_window_groups; g++) {
>> >> +        for (i = 0; i < ics->max_sfb; i++, idx++) {
>> >> +            const int cur_band_type = band_type[idx];
>> >> +            const int dim = cur_band_type >= FIRST_PAIR_BT ? 2 : 4;
>> >> +            const int is_cb_unsigned = IS_CODEBOOK_UNSIGNED(cur_band_type);
>> >> +            int group;
>> >> +            if (cur_band_type == ZERO_BT) {
>> >> +                for (group = 0; group < ics->group_len[g]; group++) {
>> >> +                    memset(icoef + group * 128 + offsets[i], 0, (offsets[i+1] - offsets[i])*sizeof(int));
>> >> +                }
>> >> +            }else if (cur_band_type != NOISE_BT && cur_band_type != INTENSITY_BT2 && cur_band_type != INTENSITY_BT) {
>> >> +                for (group = 0; group < ics->group_len[g]; group++) {
>> >> +                    for (k = offsets[i]; k < offsets[i+1]; k += dim) {
>> >> +                        const int index = get_vlc2(gb, vlc_spectral[cur_band_type - 1].table, 6, 3);
>> >> +                        const int coef_idx = (group << 7) + k;
>> >> +                        const int8_t *vq_ptr;
>> >> +                        int j;
>> >> +                        if(index >= ff_aac_spectral_sizes[cur_band_type - 1]) {
>> >> +                            av_log(ac->avccontext, AV_LOG_ERROR,
>> >> +                                "Read beyond end of ff_aac_codebook_vectors[%d][]. index %d >= %d\n",
>> >> +                                cur_band_type - 1, index, ff_aac_spectral_sizes[cur_band_type - 1]);
>> >> +                            return -1;
>> >> +                        }
>> >> +                        vq_ptr = &ff_aac_codebook_vectors[cur_band_type - 1][index * dim];
>> >> +                        if (is_cb_unsigned) {
>> >> +                            for (j = 0; j < dim; j++)
>> >> +                                if (vq_ptr[j])
>> >> +                                    icoef[coef_idx + j] = 1 - 2*get_bits1(gb);
>> >> +                        }else {
>> >> +                            for (j = 0; j < dim; j++)
>> >> +                                icoef[coef_idx + j] = 1;
>> >> +                        }
>> >> +                        if (cur_band_type == ESC_BT) {
>> >> +                            for (j = 0; j < 2; j++) {
>> >> +                                if (vq_ptr[j] == 16) {
>> >> +                                    int n = 4;
>> >> +                                    /* The total length of escape_sequence must be < 22 bits according
>> >> +                                       to the specification (i.e. max is 11111111110xxxxxxxxxx). */
>> >> +                                    while (get_bits1(gb) && n < 15) n++;
>> >> +                                    if(n == 15) {
>> >> +                                        av_log(ac->avccontext, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
>> >> +                                        return -1;
>> >> +                                    }
>> >> +                                    icoef[coef_idx + j] *= (1<<n) + get_bits(gb, n);
>> >> +                                }else
>> >> +                                    icoef[coef_idx + j] *= vq_ptr[j];
>> >> +                            }
>> >> +                        }else
>> >> +                            for (j = 0; j < dim; j++)
>> >> +                                icoef[coef_idx + j] *= vq_ptr[j];
>> >> +                    }
>> >> +                }
>> >> +            }
>> >> +        }
>> >> +        icoef += ics->group_len[g]<<7;
>> >> +    }
>> >> +    return 0;
>> >> +}
>> >> +
>> >> +/**
>> >>   * Add pulses with particular amplitudes to the quantized spectral data; reference: 4.6.3.3.
>> >>   *
>> >>   * @param   pulse   pointer to pulse data struct
>> >> @@ -538,6 +780,46 @@
>> >>  }
>> >>
>> >>  /**
>> >> + * Dequantize and scale spectral data; reference: 4.6.3.3.
>> >> + *
>> >> + * @param   icoef       array of quantized spectral data
>> >> + * @param   band_type   array of the used band type
>> >> + * @param   sf          array of scalefactors or intensity stereo positions
>> >> + * @param   coef        array of dequantized, scaled spectral data
>> >> + */
>> >> +static void dequant(AACContext * ac, float coef[1024], const int icoef[1024], float sf[120],
>> >> +        const IndividualChannelStream * ics, enum BandType band_type[120]) {
>> >> +    const uint16_t * offsets = ics->swb_offset;
>> >> +    const int c = 1024/ics->num_windows;
>> >> +    int g, i, group, k, idx = 0;
>> >> +
>> >> +    for (g = 0; g < ics->num_windows; g++)
>> >> +        memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float)*(c - offsets[ics->max_sfb]));
>> >> +
>> >> +    for (g = 0; g < ics->num_window_groups; g++) {
>> >> +        for (i = 0; i < ics->max_sfb; i++, idx++) {
>> >> +            if (band_type[idx] == NOISE_BT) {
>> >> +                const float scale = sf[idx] / ((offsets[i+1] - offsets[i]) * PNS_MEAN_ENERGY);
>> >> +                for (group = 0; group < ics->group_len[g]; group++) {
>> >> +                    for (k = offsets[i]; k < offsets[i+1]; k++) {
>> >> +                        ac->random_state  = lcg_random(ac->random_state);
>> >> +                        coef[group*128+k] = ac->random_state * scale;
>> >> +                    }
>> >> +                }
>> >> +            } else if (band_type[idx] != INTENSITY_BT && band_type[idx] != INTENSITY_BT2) {
>> >> +                for (group = 0; group < ics->group_len[g]; group++) {
>> >> +                    for (k = offsets[i]; k < offsets[i+1]; k++) {
>> >> +                        coef[group*128+k] = ivquant(icoef[group*128+k]) * sf[idx];
>> >> +                    }
>> >> +                }
>> >> +            }
>> >> +        }
>> >> +        coef  += ics->group_len[g]*128;
>> >> +        icoef += ics->group_len[g]*128;
>> >> +    }
>> >> +}
>> >> +
>> >> +/**
>> >
>> > dequant and decode_spectrum() can be merged, especially the VQ tables can
>> > already include ivquant().
>> > and yes i know that this means add_pulses will be more tricky
>>
>> See attached (20080812-1422-merge_decspec_dequant.diff). It's not that
>> nice to look at and svn diff makes it worse. Suggestions to improve it
>> very welcome. Making add_pulse() recursive seemed like a good solution
>> to avoid refactoring the main loops and conditions but maybe you have
>> a better idea.
>
> yes i do :)
> for(each pulse){
>    float c= coeff[ pos[i] ];
>    float ic= c / sqrtf(sqrtf(fabsf(c))) + amp[i];
>    coeff[ pos[i] ]= cbrtf(fabsf(ic)) * ic;
> }
>
> This idea is of course based on the assumtation that there are few pulses
> compared to the number of coefficients, which as far as i understand is true
> as there are max 4 pulses IIRC

You are correct. Good idea, though I don't know why I didn't think of
doing it like that. :) See attached.

>> > [...]
>> >> +
>> >> +/**
>> >> + * Convert spectral data to float samples, applying all supported tools as appropriate.
>> >> + */
>> >> +static void spectral_to_sample(AACContext * ac) {
>> >> +    int i, type;
>> >> +    for (i = 0; i < MAX_ELEM_ID; i++) {
>> >> +        for(type = 0; type < 4; type++) {
>> >> +            ChannelElement *che = ac->che[type][i];
>> >> +            if(che) {
>> >> +                if(type == TYPE_CCE && che->coup.coupling_point == BEFORE_TNS)
>> >> +                    apply_channel_coupling(ac, che, apply_dependent_coupling);
>> >> +                if(che->ch[0].tns.present)
>> >> +                    apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
>> >> +                if(che->ch[1].tns.present)
>> >> +                    apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
>> >> +                if(type == TYPE_CCE && che->coup.coupling_point == BETWEEN_TNS_AND_IMDCT)
>> >> +                    apply_channel_coupling(ac, che, apply_dependent_coupling);
>> >> +                imdct_and_windowing(ac, &che->ch[0]);
>> >> +                if(type == TYPE_CPE)
>> >> +                    imdct_and_windowing(ac, &che->ch[1]);
>> >> +                if(type == TYPE_CCE && che->coup.coupling_point == AFTER_IMDCT)
>> >> +                    apply_channel_coupling(ac, che, apply_independent_coupling);
>> >
>> > the 3 TYPE_CCE checks are unneeded when coupling_point matches neiter of the 3
>>
>> So if type != TYPE_CCE you want me to set coupling_point to the
>> 'invalid' value (2, IIRC)? Or do you want me to rework what the valid
>> values are such that the 'invalid' value is 0? Or something else?
>>
>> I don't really like using the 'invalid' value to remove these checks
>> as this is not speed critical code. But that's just my opinion.
>
> no i meant that you could use a value of 4 or -1 not the "invalid" one

OK.

Rob
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 20080812-1834-merge_decspec_dequant.diff
Type: text/x-diff
Size: 10464 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080812/0bc04197/attachment.diff>



More information about the ffmpeg-devel mailing list