[FFmpeg-devel] [PATCH] AAC Decoder round 3
Michael Niedermayer
michaelni
Sun Jul 6 05:19:17 CEST 2008
On Tue, Jul 01, 2008 at 12:37:11PM +0100, Robert Swain wrote:
> Hello,
>
> Another submission for full review, this time without both LTP and
> SSR. The code for LTP and SSR is still in the Summer of Code
> repository for anyone who is interested in working on either of those
> features in the future.
>
> See attached.
[...]
> +DECLARE_ALIGNED_16(static float, kbd_long_1024[1024]);
> +DECLARE_ALIGNED_16(static float, kbd_short_128[128]);
> +DECLARE_ALIGNED_16(static float, sine_long_1024[1024]);
> +DECLARE_ALIGNED_16(static float, sine_short_128[128]);
vertical align
[...]
> +/**
> + * special codebooks
> + */
> +enum {
> + ZERO_HCB = 0,
> + FIRST_PAIR_HCB = 5,
> + ESC_HCB = 11,
> + NOISE_HCB = 13,
> + INTENSITY_HCB2 = 14,
> + INTENSITY_HCB = 15,
> + ESC_FLAG = 16,
> +};
This should have a type and that type should be used for the variable
holding such enums
[...]
> +/**
> + * Program configuration - describes how channels are arranged. Either read from
> + * stream (ID_PCE) or created based on a default fixed channel arrangement.
> + */
> +typedef struct {
> + int che_type[4][MAX_TAGID]; ///< channel element type with the first index as the first 4 raw_data_block IDs
> +
> + int mono_mixdown; ///< The SCE tag to use if user requests mono output, -1 if not available.
> + int stereo_mixdown; ///< The CPE tag to use if user requests stereo output, -1 if not available.
maybe mono_mixdown_tag would be a better name ?
[...]
> +/**
> + * coupling parameters
> + */
> +typedef struct {
> + int ind_sw; ///< Set if independent coupling (i.e. after IMDCT).
is_indep_coup or another name one can make sense of without having to
look it up here.
[...]
> +/**
> + * Free a channel element.
> + */
> +static void che_freep(ChannelElement **s) {
> + if(!*s)
> + return;
> + av_freep(s);
> +}
am i too tired or is the if-return useless and this a senseless wraper
function?
> +
> +/**
> + * Configure output channel order and optional mixing based on the current
> + * program configuration element and user requested channels.
> + *
> + * \param newpcs New program configuration struct - we only do something if it differs from the current one.
> + */
> +static int output_configure(AACContext *ac, ProgramConfig *newpcs) {
> + AVCodecContext *avctx = ac->avccontext;
> + ProgramConfig * pcs = &ac->pcs;
> + int i, j, channels = 0;
> + float a, b;
> + ChannelElement *mixdown[3] = { NULL, NULL, NULL };
> +
> + static const float mixdowncoeff[4] = {
> + /* matrix mix-down coefficient, table 4.70 */
> + 1. / M_SQRT2,
> + 1. / 2.,
> + 1. / (2 * M_SQRT2),
> + 0
> + };
> +
> + if(!memcmp(&ac->pcs, newpcs, sizeof(ProgramConfig)))
> + return 0; /* no change */
> +
> + *pcs = *newpcs;
> +
> + /* Allocate or free elements depending on if they are in the
> + * current program configuration struct.
> + *
> + * Set up default 1:1 output mapping.
> + *
> + * For a 5.1 stream the output order will be:
> + * [ Front Left ] [ Front Right ] [ Center ] [ LFE ] [ Surround Left ] [ Surround Right ]
> + *
> + * Locate front, center and back channels for later matrix mix-down.
> + */
> +
> + for(i = 0; i < MAX_TAGID; i++) {
> + for(j = 0; j < 4; j++) {
> + if(pcs->che_type[j][i] && !ac->che[j][i]) {
> + ac->che[j][i] = av_mallocz(sizeof(ChannelElement));
> + if(j != ID_CCE) {
> + ac->output_data[channels++] = ac->che[j][i]->ch[0].ret;
> + ac->che[j][i]->ch[0].mixing_gain = 1.0f;
> + if(j == ID_CPE) {
> + ac->output_data[channels++] = ac->che[j][i]->ch[1].ret;
> + ac->che[j][i]->ch[1].mixing_gain = 1.0f;
> + if(!mixdown[MIXDOWN_FRONT] && pcs->che_type[j][i] == AAC_CHANNEL_FRONT)
> + mixdown[MIXDOWN_FRONT] = ac->che[j][i];
> + if(!mixdown[MIXDOWN_BACK ] && pcs->che_type[j][i] == AAC_CHANNEL_BACK)
> + mixdown[MIXDOWN_BACK ] = ac->che[j][i];
> + }
> + if(j == ID_SCE && !mixdown[MIXDOWN_CENTER] && pcs->che_type[j][i] == AAC_CHANNEL_FRONT)
> + mixdown[MIXDOWN_CENTER] = ac->che[j][i];
> + }
> + } else
> + che_freep(&ac->che[j][i]);
> + }
> + }
> +
> + ac->mm[MIXDOWN_FRONT] = ac->mm[MIXDOWN_BACK] = ac->mm[MIXDOWN_CENTER] = NULL;
> +
> + /* Check for matrix mix-down to mono or stereo. */
> +
> + if(avctx->request_channels && avctx->request_channels <= 2 &&
> + avctx->request_channels != channels) {
> +
> + if((avctx->request_channels == 1 && pcs->mono_mixdown != -1) ||
> + (avctx->request_channels == 2 && pcs->stereo_mixdown != -1)) {
> + /* Add support for this as soon as we get a sample so we can figure out
> + exactly how this is supposed to work. */
> + av_log(avctx, AV_LOG_ERROR,
> + "Mix-down using pre-mixed elements is not supported, please file a bug. "
> + "Reverting to matrix mix-down.\n");
> + }
> +
> + /* We need 'center + L + R + sL + sR' for matrix mix-down. */
> + if(mixdown[MIXDOWN_CENTER] && mixdown[MIXDOWN_FRONT] && mixdown[MIXDOWN_BACK]) {
> + a = mixdowncoeff[pcs->mixdown_coeff_index];
> +
> + if(avctx->request_channels == 2) {
> + b = 1. / (1. + (1. / M_SQRT2) + a * (pcs->pseudo_surround ? 2. : 1.));
> + mixdown[MIXDOWN_CENTER]->ch[0].mixing_gain = b / M_SQRT2;
> + } else {
> + b = 1. / (3. + 2. * a);
> + mixdown[MIXDOWN_CENTER]->ch[0].mixing_gain = b;
> + }
> + mixdown[MIXDOWN_FRONT]->ch[0].mixing_gain = b;
> + mixdown[MIXDOWN_FRONT]->ch[1].mixing_gain = b;
> + mixdown[MIXDOWN_BACK ]->ch[0].mixing_gain = b * a;
> + mixdown[MIXDOWN_BACK ]->ch[1].mixing_gain = b * a;
> + for(i = 0; i < 3; i++) ac->mm[i] = mixdown[i];
> +
> + channels = avctx->request_channels;
> + } else {
> + av_log(avctx, AV_LOG_WARNING, "Matrix mixing from %d to %d channels is not supported.\n",
> + channels, avctx->request_channels);
> + }
> + }
> +
> + avctx->channels = channels;
> + ac->interleaved_output = av_realloc(ac->interleaved_output, channels * 1024 * sizeof(float));
memleak on allocation failure
[...]
> +// parser implementation
?
[...]
> +/**
> + * Decode section_data payload; reference: table 4.46.
> + */
> +static int decode_section_data(AACContext * ac, GetBitContext * gb, IndividualChannelStream * ics, int cb[][64], int cb_run_end[][64]) {
> + int g;
> + for (g = 0; g < ics->num_window_groups; g++) {
> + int bits = (ics->window_sequence == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
> + int k = 0;
> + while (k < ics->max_sfb) {
> + int sect_len = k;
> + int sect_len_incr;
> + int sect_cb = get_bits(gb, 4);
> + if (sect_cb == 12) {
> + av_log(ac->avccontext, AV_LOG_ERROR, "invalid codebook\n");
> + return -1;
> + }
> + while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits)-1)
> + sect_len += sect_len_incr;
> + sect_len += sect_len_incr;
> + if (sect_len > ics->max_sfb) {
change the check to unsigned or check for < 0 please, it doesnt seeem to
do any harm but <0 is not correct
[...]
> +/**
> + * Decode scale_factor_data; reference: table 4.47.
> + */
> +static int decode_scale_factor_data(AACContext * ac, GetBitContext * gb, float mix_gain, unsigned int global_gain, IndividualChannelStream * ics, const int cb[][64], const int cb_run_end[][64], float sf[][64]) {
> + const int sf_offset = ac->sf_offset + (ics->window_sequence == EIGHT_SHORT_SEQUENCE ? 12 : 0);
> + int g, i;
> + int offset[3] = { global_gain, global_gain - 90, 100 };
> + int noise_flag = 1;
> + static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
> + ics->intensity_present = 0;
> + for (g = 0; g < ics->num_window_groups; g++) {
> + for (i = 0; i < ics->max_sfb;) {
> + if (cb[g][i] == ZERO_HCB) {
> + int run_end = cb_run_end[g][i];
> + for(; i < run_end; i++)
> + sf[g][i] = 0.;
> + continue;
useless continue and this could use a memset(0)
> + }else if((cb[g][i] == INTENSITY_HCB) || (cb[g][i] == INTENSITY_HCB2)) {
> + int run_end = cb_run_end[g][i];
that can be factored out of the if/else
[...]
> +static void pulse_tool(AACContext * ac, const IndividualChannelStream * ics, const Pulse * pulse, int * icoef) {
> + int i, off = ics->swb_offset[pulse->start];
> + for (i = 0; i < pulse->num_pulse; i++) {
> + off += pulse->offset[i];
> + if (icoef[off] > 0)
> + icoef[off] += pulse->amp[i];
> + else
> + icoef[off] -= pulse->amp[i];
ic= icoef[off]>>31; or ic= (icoef[off]-1)>>31 (depending in 0)
icoef[off] += (pulse->amp[i]^ic)-ic;
(yes this shouldbe faster)
> + }
> +}
> +
> +static void quant_to_spec_tool(AACContext * ac, const IndividualChannelStream * ics, const int * icoef, const int cb[][64], const float sf[][64], float * coef) {
> + const uint16_t * offsets = ics->swb_offset;
> + const int c = 1024/ics->num_window_groups;
> + int g, i, group, k;
> +
> + for(g = 0; g < ics->num_window_groups; g++)
> + memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float)*(c - offsets[ics->max_sfb]));
> +
> + for (g = 0; g < ics->num_window_groups; g++) {
cant these 2 loops be merged?
also the spaces sourrounding ( are inconsisent
> + for (i = 0; i < ics->max_sfb; i++) {
> + if (cb[g][i] == NOISE_HCB) {
> + for (group = 0; group < ics->group_len[g]; group++) {
> + float scale = sf[g][i] / ((offsets[i+1] - offsets[i]) * PNS_MEAN_ENERGY);
> + for (k = offsets[i]; k < offsets[i+1]; k++)
> + coef[group*128+k] = (int32_t)av_random(&ac->random_state) * scale;
> + }
> + } else if (cb[g][i] != INTENSITY_HCB && cb[g][i] != INTENSITY_HCB2) {
> + for (group = 0; group < ics->group_len[g]; group++) {
> + for (k = offsets[i]; k < offsets[i+1]; k++) {
> + coef[group*128+k] = ivquant(ac, icoef[group*128+k]) * sf[g][i];
> + }
> + }
> + }
> + }
> + coef += ics->group_len[g]*128;
> + icoef += ics->group_len[g]*128;
vertical align
[...]
> +static int dynamic_range_info(AACContext * ac, GetBitContext * gb, int cnt) {
> + int n = 1;
> + int drc_num_bands = 1;
> + int i;
> +
> + /* pce_tag_present? */
> + if(get_bits1(gb)) {
> + ac->che_drc.pce_instance_tag = get_bits(gb, 4);
> + ac->che_drc.tag_reserved_bits = get_bits(gb, 4);
vertical align
> + n++;
> + }
> +
> + /* excluded_chns_present? */
> + if(get_bits1(gb)) {
> + n += excluded_channels(ac, gb);
> + }
> +
> + /* drc_bands_present? */
> + if (get_bits1(gb)) {
> + ac->che_drc.band_incr = get_bits(gb, 4);
> + ac->che_drc.interpolation_scheme = get_bits(gb, 4);
> + n++;
> + drc_num_bands += ac->che_drc.band_incr;
> + for (i = 0; i < drc_num_bands; i++) {
> + ac->che_drc.band_top[i] = get_bits(gb, 8);
> + n++;
> + }
> + }
> +
> + /* prog_ref_level_present? */
> + if (get_bits1(gb)) {
> + ac->che_drc.prog_ref_level = get_bits(gb, 7);
> + ac->che_drc.prog_ref_level_reserved_bits = get_bits1(gb);
reserved bits? doesnt sound like that needs to be stored in the conext
but then i didnt check teh spec ...
[...]
> +static void coupling_tool(AACContext * ac, int independent, int domain) {
This as well as other functions could benefit from some documentation
what does it do, what is "independent" what "domain" ?
> + int i;
> + for (i = 0; i < MAX_TAGID; i++) {
> + ChannelElement * cc = ac->che[ID_CCE][i];
> + if (cc) {
> + if (cc->coup.ind_sw && independent) {
> + transform_coupling_tool(ac, cc, coupling_independent_trans);
> + } else if (!cc->coup.ind_sw && !independent && (cc->coup.domain == domain)) {
> + transform_coupling_tool(ac, cc, coupling_dependent_trans);
> + }
> + }
> + }
> +}
> +
> +static void transform_sce_tool(AACContext * ac, void (*sce_trans)(AACContext * ac, SingleChannelElement * sce)) {
iam still not happy with _tool() functions. Functions should be named
so that the names describes what the function does.
[...]
> +static void spec_to_sample(AACContext * ac) {
what is "spec to sample" ? the name does not say anythig to me
[...]
> + i = 1024 * avccontext->channels * sizeof(uint16_t);
> + if(*data_size < i)
> + return -1;
i wonder if this shouldnt be checked earlier (closer to where the number of
channels is set)
> + *data_size = i;
> +
> + ac->dsp.float_to_int16(data, ac->interleaved_output, 1024 * avccontext->channels);
> + return 0;
> +}
> +
> +
> +static int aac_decode_frame(AVCodecContext * avccontext, void * data, int * data_size, const uint8_t * buf, int buf_size) {
> + AACContext * ac = avccontext->priv_data;
> + GetBitContext gb;
> + int id, err, tag;
> +
> + init_get_bits(&gb, buf, buf_size*8);
> +
> + // parse
> + while ((id = get_bits(&gb, 3)) != ID_END) {
> + tag = get_bits(&gb, 4);
> + switch (id) {
> +
> + case ID_SCE:
> + if(!ac->che[ID_SCE][tag]) {
> + if(tag == 1 && ac->che[ID_LFE][0]) {
> + /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
> + instead of SCE[0] CPE[0] CPE[0] LFE[0].
> + If we seem to have encountered such a stream,
> + transfer the LFE[0] element to SCE[1] */
> + ac->che[ID_SCE][tag] = ac->che[ID_LFE][0];
> + ac->che[ID_LFE][0] = NULL;
> + } else {
> + err = 1;
> + break;
> + }
> + }
> + err = decode_ics(ac, &gb, 0, 0, &ac->che[ID_SCE][tag]->ch[0]);
> + break;
> +
> + case ID_CPE:
> + if (ac->che[ID_CPE][tag]) {
> + err = decode_cpe(ac, &gb, tag);
> + } else
> + err = -1;
> + break;
> +
> + case ID_FIL:
> + if (tag == 15)
> + tag += get_bits(&gb, 8) - 1;
> + while (tag > 0)
> + tag -= extension_payload(ac, &gb, tag);
> + err = 0; /* FIXME */
> + break;
> +
> + case ID_PCE:
> + err = program_config_element(ac, &gb);
> + break;
> +
> + case ID_DSE:
> + data_stream_element(ac, &gb, tag);
> + err = 0;
> + break;
> +
> + case ID_CCE:
> + if (ac->che[ID_CCE][tag]) {
> + err = decode_cce(ac, &gb, tag);
> + } else
> + err = -1;
> + break;
> +
> + case ID_LFE:
> + if (ac->che[ID_LFE][tag]) {
> + err = decode_ics(ac, &gb, 0, 0, &ac->che[ID_LFE][tag]->ch[0]);
> + } else
> + err = -1;
> + break;
> +
> + default:
> + err = -1; /* should not happen, but keeps compiler happy */
> + break;
> + }
by setting err= -1 before the switch the code can be simplified
> +
> + if(err)
> + return -1;
> + }
> +
> + spec_to_sample(ac);
> + output_samples(avccontext, data, data_size);
> +
> + return buf_size;
> +}
> +
> +static int aac_decode_close(AVCodecContext * avccontext) {
av_cold
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Thouse who are best at talking, realize last or never when they are wrong.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080706/a386a7e0/attachment.pgp>
More information about the ffmpeg-devel
mailing list