[FFmpeg-devel] AAC decoder round 5
Robert Swain
robert.swain
Thu Aug 7 17:04:44 CEST 2008
2008/8/6 Michael Niedermayer <michaelni at gmx.at>:
> On Wed, Aug 06, 2008 at 12:32:32AM +0100, Robert Swain wrote:
>> $subj
>>
>> Best regards,
>> Rob
>
>> Index: Changelog
>> ===================================================================
>> --- Changelog (revision 14623)
>> +++ Changelog (working copy)
>> @@ -128,6 +128,7 @@
>> - Motion Pixels MVI demuxer
>> - removed animated GIF decoder/demuxer
>> - D-Cinema audio muxer
>> +- AAC decoder
>>
>> version 0.4.9-pre1:
>>
>
> ok
You can ignore/I can not submit the build system and documentation
changes. I won't commit them until the whole thing has been committed.
> [...]
>
>> Index: libavcodec/aactab.c
>> ===================================================================
>> --- libavcodec/aactab.c (revision 14625)
>> +++ libavcodec/aactab.c (working copy)
[...]
>> +DECLARE_ALIGNED(16, float, ff_aac_kbd_long_1024[1024]);
>> +DECLARE_ALIGNED(16, float, ff_aac_kbd_short_128[128]);
>> +DECLARE_ALIGNED(16, float, ff_aac_sine_long_1024[1024]);
>> +DECLARE_ALIGNED(16, float, ff_aac_sine_short_128[128]);
>> +
>> +const uint8_t ff_aac_num_swb_1024[] = {
>> + 41, 41, 47, 49, 49, 51, 47, 47, 43, 43, 43, 40
>> +};
>> +
>> +const uint8_t ff_aac_num_swb_128[] = {
>> + 12, 12, 12, 14, 14, 14, 15, 15, 15, 15, 15, 15
>> +};
>> +
>> const uint32_t ff_aac_scalefactor_code[121] = {
>> 0x3ffe8, 0x3ffe6, 0x3ffe7, 0x3ffe5, 0x7fff5, 0x7fff1, 0x7ffed, 0x7fff6,
>> 0x7ffee, 0x7ffef, 0x7fff0, 0x7fffc, 0x7fffd, 0x7ffff, 0x7fffe, 0x7fff7,
>
>
>> @@ -795,4 +809,90 @@
>> 4064.0312908, 4074.6805676, 4085.3368071, 4096.0000000,
>> };
>>
>> +/* [ 0, 255] scale factor decoding when using C dsp.float_to_int16
>> + * [60, 315] scale factor decoding when using SIMD dsp.float_to_int16
>> + * [45, 300] intensity stereo position decoding mapped in reverse order i.e. 0->300, 1->299, ..., 254->46, 255->45
>> + */
>
> not doxygen compat, no description of what the table actually contains.
Comment quoted above improved. Does your statement apply to all of the
above or just the comment for pow2sf_tab[]?
> [...]
>
>> Index: libavcodec/aac.c
>> ===================================================================
>> --- libavcodec/aac.c (revision 14626)
>> +++ libavcodec/aac.c (working copy)
>
> [...]
>> +/**
>> + * Configure output channel order and optional mixing based on the current
>> + * program configuration element and user requested channels.
>> + *
>> + * \param newpcs New program configuration struct - we only do something if it differs from the current one.
>> + */
>> +static int output_configure(AACContext *ac, ProgramConfig *pcs, ProgramConfig *newpcs) {
>> + AVCodecContext *avctx = ac->avccontext;
>> + int i, j, channels = 0;
>> + float a, b;
>> + ChannelElement *mixdown[3] = { NULL, NULL, NULL };
>> +
>> + static const float mixdowncoeff[4] = {
>> + /* matrix mix-down coefficient, table 4.70 */
>> + 1. / M_SQRT2,
>> + 1. / 2.,
>> + 1. / (2 * M_SQRT2),
>> + 0
>> + };
>> +
>> + if(!memcmp(pcs, newpcs, sizeof(ProgramConfig)))
>> + return 0; /* no change */
>> +
>> + *pcs = *newpcs;
>> +
>> + /* Allocate or free elements depending on if they are in the
>> + * current program configuration struct.
>> + *
>> + * Set up default 1:1 output mapping.
>> + *
>> + * For a 5.1 stream the output order will be:
>> + * [ Front Left ] [ Front Right ] [ Center ] [ LFE ] [ Surround Left ] [ Surround Right ]
>> + *
>> + * Locate front, center and back channels for later matrix mix-down.
>> + */
>> +
>> + for(i = 0; i < MAX_TAGID; i++) {
>> + for(j = 0; j < 4; j++) {
>> + if(pcs->che_type[j][i]) {
>> + if(!ac->che[j][i] && !(ac->che[j][i] = av_mallocz(sizeof(ChannelElement))))
>> + return AVERROR(ENOMEM);
>> + if(j != ID_CCE) {
>> + ac->output_data[channels++] = ac->che[j][i]->ch[0].ret;
>> + ac->che[j][i]->ch[0].mixing_gain = 1.0f;
>> + if(j == ID_CPE) {
>> + ac->output_data[channels++] = ac->che[j][i]->ch[1].ret;
>> + ac->che[j][i]->ch[1].mixing_gain = 1.0f;
>> + if(!mixdown[MIXDOWN_FRONT] && pcs->che_type[j][i] == AAC_CHANNEL_FRONT)
>> + mixdown[MIXDOWN_FRONT] = ac->che[j][i];
>> + if(!mixdown[MIXDOWN_BACK ] && pcs->che_type[j][i] == AAC_CHANNEL_BACK)
>> + mixdown[MIXDOWN_BACK ] = ac->che[j][i];
>> + }
>> + if(j == ID_SCE && !mixdown[MIXDOWN_CENTER] && pcs->che_type[j][i] == AAC_CHANNEL_FRONT)
>> + mixdown[MIXDOWN_CENTER] = ac->che[j][i];
>> + }
>> + } else
>> + av_freep(&ac->che[j][i]);
>> + }
>> + }
>> +
>> + // allocate appropriately aligned buffer for interleaved output
>> + if(channels > avctx->channels)
>> + av_freep(&ac->interleaved_output);
>> + if(!ac->interleaved_output && !(ac->interleaved_output = av_malloc(channels * 1024 * sizeof(float))))
>> + return AVERROR(ENOMEM);
>> +
>> + ac->mm[MIXDOWN_FRONT] = ac->mm[MIXDOWN_BACK] = ac->mm[MIXDOWN_CENTER] = NULL;
>> +
>> + /* Check for matrix mix-down to mono or stereo. */
>> +
>> + if(avctx->request_channels && avctx->request_channels <= 2 &&
>> + avctx->request_channels != channels) {
>> +
>> + if((avctx->request_channels == 1 && pcs->mono_mixdown_tag != -1) ||
>> + (avctx->request_channels == 2 && pcs->stereo_mixdown_tag != -1)) {
>> + /* Add support for this as soon as we get a sample so we can figure out
>> + exactly how this is supposed to work. */
>> + av_log(avctx, AV_LOG_ERROR,
>> + "Mix-down using pre-mixed elements is not supported, please file a bug. "
>> + "Reverting to matrix mix-down.\n");
>> + }
>> +
>> + /* We need 'center + L + R + sL + sR' for matrix mix-down. */
>> + if(mixdown[MIXDOWN_CENTER] && mixdown[MIXDOWN_FRONT] && mixdown[MIXDOWN_BACK]) {
>
>> + a = mixdowncoeff[pcs->mixdown_coeff_index];
>> +
>> + if(avctx->request_channels == 2) {
>> + b = 1. / (1. + (1. / M_SQRT2) + a * (pcs->pseudo_surround ? 2. : 1.));
>> + mixdown[MIXDOWN_CENTER]->ch[0].mixing_gain = b / M_SQRT2;
>> + } else {
>> + b = 1. / (3. + 2. * a);
>> + mixdown[MIXDOWN_CENTER]->ch[0].mixing_gain = b;
>> + }
>> + mixdown[MIXDOWN_FRONT]->ch[0].mixing_gain = b;
>> + mixdown[MIXDOWN_FRONT]->ch[1].mixing_gain = b;
>> + mixdown[MIXDOWN_BACK ]->ch[0].mixing_gain = b * a;
>> + mixdown[MIXDOWN_BACK ]->ch[1].mixing_gain = b * a;
>
> this is totally obfuscated
> at least rename b to something that has something to do with normalization
> factor.
> And while it is written in the spec like above, i do not think it is correct.
> As it does not seem volume is kept constant in the convertion. Also AC3 has to
> do downmix too and the matrix init could be shared.
> also mixing_gain and global_gain could maybe be merged if the volume is
> slightly changed ... and as it does not seem to be correct as is anyway
> this might be worth a try ...
See later discussion of matrix mix-down marked ***
> [...]
>> +{
>> + /* Pre-mixed down-mix outputs are not available. */
>> + newpcs->mono_mixdown_tag = -1;
>> + newpcs->stereo_mixdown_tag = -1;
>> +
>> + if(channels < 1 || channels > 7) {
>> + av_log(ac->avccontext, AV_LOG_ERROR, "invalid default channel configuration (%d channels)\n",
>> + channels);
>> + return -1;
>> + }
>> +
>> + /* default channel configurations:
>> + *
>> + * 1ch : front center (mono)
>> + * 2ch : L + R (stereo)
>> + * 3ch : front center + L + R
>> + * 4ch : front center + L + R + back center
>> + * 5ch : front center + L + R + back stereo
>> + * 6ch : front center + L + R + back stereo + LFE
>
>> + * 7ch : front center + L + R + outer front left + outer front right + back stereo + LFE
>
> its still 8, so channels/num_channels is no longer an appropriate name for
> the vars used as its not 7 channels.
channel_configuration_index?
> [...]
>> @@ -213,6 +830,764 @@
>> }
>> }
>>
>
>> +/**
>> + * Dequantize and scale spectral data; reference: 4.6.3.3.
>> + *
>> + * @param icoef array of quantized spectral data
>> + * @param band_type array of the used band type
>> + * @param sf array of scalefactors or intensity stereo positions
>> + * @param coef array of dequantized, scaled spectral data
>
> why are the coeffs not dequantized in place, that is coef==icoef ?
icoef[] are integers. coef[] are floats. To be able to use
ivquant_tab[icoef[]], icoef[] have to be integers don't they?
> [...]
>> +/**
>> + * Decode an individual_channel_stream payload; reference: table 4.44.
>> + *
>> + * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
>> + * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
>> + * @return Returns error status. 0 - OK, !0 - error
>> + */
>> +static int decode_ics(AACContext * ac, SingleChannelElement * sce, GetBitContext * gb, int common_window, int scale_flag) {
>> + int icoeffs[1024];
>> + Pulse pulse;
>> + TemporalNoiseShaping * tns = &sce->tns;
>> + IndividualChannelStream * ics = &sce->ics;
>> + float * out = sce->coeffs;
>> + int global_gain, pulse_present = 0;
>> +
>
>> + pulse.num_pulse = 0;
>> + pulse.start = 0;
>
> what effect does start have when num_pulse=0 ?
These are assigned only to silence a GCC warning about them possibly
being uninitialised when they always will be in the cases that they're
used. I could add a comment to this effect.
> [...]
>> +/**
>> + * intensity stereo decoding; reference: 4.6.8.2.3
>> + */
>> +static void apply_intensity_stereo(ChannelElement * cpe) {
>> + const IndividualChannelStream * ics = &cpe->ch[1].ics;
>> + SingleChannelElement * sce1 = &cpe->ch[1];
>> + float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
>> + const uint16_t * offsets = ics->swb_offset;
>> + int g, gp, i, k;
>> + int c;
>> + float scale;
>> + for (g = 0; g < ics->num_window_groups; g++) {
>> + for (gp = 0; gp < ics->group_len[g]; gp++) {
>> + for (i = 0; i < ics->max_sfb;) {
>> + if (sce1->band_type[g][i] == INTENSITY_BT || sce1->band_type[g][i] == INTENSITY_BT2) {
>
> should the gp loop maybe be inside the if() so its not redone ?
Done for this and apply_mid_side_stereo().
> [...]
>> +
>> +/**
>> + * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
>> + *
>> + * @return Returns number of bytes consumed.
>> + */
>> +static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc, GetBitContext * gb) {
>> + int i;
>> + int n = 0;
>> + int num_excl_chan = 0;
>> +
>> + do {
>> + for (i = 0; i < 7; i++)
>> + che_drc->exclude_mask[num_excl_chan + i] = get_bits1(gb);
>> + n++;
>> + num_excl_chan += 7;
>> + } while (num_excl_chan < MAX_CHANNELS - 7 && (che_drc->additional_excluded_chns[n-1] = get_bits1(gb)));
>> +
>> + return n;
>> +}
>
> iam still not happy with this
> If i understand correctly this is a mask of channels, it should need just
> one array, not 2 (exclude_mask / additional_excluded_chns)
Indeed. I'll remove additional_excluded_chns[].
> [...]
>> +/**
>> + * Parse extension data (incomplete); reference: table 4.51.
>> + *
>> + * @param cnt length of ID_FIL syntactic element in bytes
>> + */
>
>> +static int extension_payload(AACContext * ac, GetBitContext * gb, int cnt) {
>
> parse/decode/read...
I'll change to decode_ for consistency, including the comment.
> [...]
>> +
>> +/**
>> + * channel coupling transformation interface
>> + *
>> + * @param index index into coupling gain array
>> + * @param apply_coupling_method pointer to (in)dependent coupling function
>> + */
>> +static void apply_channel_coupling(AACContext * ac, ChannelElement * cc,
>> + void (*apply_coupling_method)(AACContext * ac, SingleChannelElement * sce, ChannelElement * cc, int index))
>> +{
>> + int c;
>> + int index = 0;
>> + ChannelCoupling * coup = &cc->coup;
>> + for (c = 0; c <= coup->num_coupled; c++) {
>> + if ( !coup->is_cpe[c] && ac->che[ID_SCE][coup->tag_select[c]]) {
>> + apply_coupling_method(ac, &ac->che[ID_SCE][coup->tag_select[c]]->ch[0], cc, index++);
>> + } else if(coup->is_cpe[c] && ac->che[ID_CPE][coup->tag_select[c]]) {
>
>> + if (!coup->l[c] && !coup->r[c]) {
>> + apply_coupling_method(ac, &ac->che[ID_CPE][coup->tag_select[c]]->ch[0], cc, index);
>> + apply_coupling_method(ac, &ac->che[ID_CPE][coup->tag_select[c]]->ch[1], cc, index++);
>> + }
>
> The struct docs say r/l are "apply gain to r/l channel" this contradicts the
> 0/0 case. which is correct and which is wrong?
In the spec, for cc_l[] it says:
"one bit indicating that a list of gain_element values is applied to the left
channel of a channel pair."
The code that uses it implies that description isn't strictly true. In
the !l[] && !r[] case, there is a shared list of gain_element values.
So l[] and r[] indicate the presence of a channel-specific list of
gain_element values. I'll change the comments.
> [...]
>> +
>> +/**
>> + * Conduct matrix mix-down and float to int16 conversion.
>> + *
>> + * @param data pointer to output data
>> + * @param data_size output data size in bytes
>> + * @return Returns error status. 0 - OK, !0 - error
>> + */
>> +static int mixdown_and_convert_to_int16(AVCodecContext * avccontext, uint16_t * data, int * data_size) {
>> + AACContext * ac = avccontext->priv_data;
>> + int i;
>> + float *c, *l, *r, *sl, *sr, *out;
>> +
>> + if (!ac->is_saved) {
>> + ac->is_saved = 1;
>> + *data_size = 0;
>> + return 0;
>> + }
>> +
>> + i = 1024 * avccontext->channels * sizeof(int16_t);
>> + if(*data_size < i) {
>> + av_log(avccontext, AV_LOG_ERROR,
>> + "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
>> + *data_size, i);
>> + return -1;
>> + }
>> + *data_size = i;
>> +
>> + if(ac->mm[MIXDOWN_CENTER]) {
>> + /* matrix mix-down */
>> + l = ac->mm[MIXDOWN_FRONT ]->ch[0].ret;
>> + r = ac->mm[MIXDOWN_FRONT ]->ch[1].ret;
>> + c = ac->mm[MIXDOWN_CENTER]->ch[0].ret;
>> + sl = ac->mm[MIXDOWN_BACK ]->ch[0].ret;
>> + sr = ac->mm[MIXDOWN_BACK ]->ch[1].ret;
>> + out = ac->interleaved_output;
>> +
>> + // XXX dsputil-ize
>> + if(avccontext->channels == 2) {
>> + if(ac->pcs.pseudo_surround) {
>> + for(i = 0; i < 1024; i++) {
>> + *out++ = *l++ + *c - *sl - *sr + ac->add_bias;
>> + *out++ = *r++ + *c++ + *sl++ + *sr++ - ac->add_bias * 3;
>> + }
>> + } else {
>> + for(i = 0; i < 1024; i++) {
>> + *out++ = *l++ + *c + *sl++ - ac->add_bias * 2;
>> + *out++ = *r++ + *c++ + *sr++ - ac->add_bias * 2;
>> + }
>> + }
>> +
>> + } else {
>> + assert(avccontext->channels == 1);
>> + for(i = 0; i < 1024; i++) {
>> + *out++ = *l++ + *r++ + *c++ + *sl++ + *sr++ - ac->add_bias * 4;
>> + }
>> + }
>> +
>> + ac->dsp.float_to_int16(data, ac->interleaved_output, 1024 * avccontext->channels);
>> + } else {
>> + ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, 1024, avccontext->channels);
>> + }
>> +
>> + return 0;
>> +}
>
> mixdown should be done prior to the IMDCT when possible and the IMDCT skipped
> for channels that are not needed, or _ALL_ mixdown code should be removed
> from the AAC decoder, as in that case mixdown can be done outside of the
> decoder equally well and cleaner.
I think doing pre-IMDCT mix-down will be complicated because of the
different windows and their overlaps. As a consequence of these issues
the optimisation may or may not be worth it. I think the overlapping
simplifications I had planned when transitioning to using imdct_half()
may help so I should probably do those first.
*** In the spec they advise against using the matrix mix-down method
so I think all this matrix mix-down code should be dropped in favour
of generic channel mixing either pre-IMDCT or post decoding, unless
someone knows of a good reason why it should be kept. I think Andreas
implemented this code but I'm not sure.
>> +
>> +
>> +static int aac_decode_frame(AVCodecContext * avccontext, void * data, int * data_size, const uint8_t * buf, int buf_size) {
>> + AACContext * ac = avccontext->priv_data;
>> + GetBitContext gb;
>> + enum RawDataBlockID id;
>> + int err, tag;
>> +
>> + init_get_bits(&gb, buf, buf_size*8);
>> +
>> + // parse
>> + while ((id = get_bits(&gb, 3)) != ID_END) {
>> + tag = get_bits(&gb, 4);
>> + err = -1;
>> + switch (id) {
>> +
>
>> + case ID_SCE:
>> + if(!ac->che[ID_SCE][tag]) {
>
> this id / tag naming confuses the hell out of me ...
> Both values together identify the "thing". the id here is the
> type (channel pair / single channel / ...)
Why is it confusing? As you say, the id is the type and the tag is
(and I know you don't like this but I'm going to use it anyway because
I'm happy with it) the instance of that type.
> [...]
>> Index: libavcodec/aac.h
>> ===================================================================
>> --- libavcodec/aac.h (revision 14624)
>> +++ libavcodec/aac.h (working copy)
>> @@ -42,8 +44,63 @@
>> ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
>> size);
>>
>> +#define MAX_CHANNELS 64
>
> ok as long as its not causing huge arrays, like if there was a use like
> array[MAX_FRAME][MAX_TAGS][MAX_BANDS][MAX_CHANNELS] ...
Please define huge.
> [...]
>> @@ -60,24 +128,182 @@
>> };
>>
>> /**
>> + * mix-down channel types
>> + * MIXDOWN_CENTER is the index into the mix-down arrays for a Single Channel Element with AAC_CHANNEL_FRONT.
>> + * MIXDOWN_(BACK|FRONT) are the indices for Channel Pair Elements with AAC_CHANNEL_(BACK|FRONT).
>> + */
>> +enum {
>> + MIXDOWN_CENTER,
>> + MIXDOWN_FRONT,
>> + MIXDOWN_BACK,
>> +};
>
> the descriptions for each should be with each not clustered before all.
OK. I think I annotated it like that because of the grammar you used
when stating that their meaning should be added.
> [...]
>> +/**
>> + * M/S joint channel coding
>> + */
>> +typedef struct {
>> + int present;
>> + uint8_t mask[8][64];
>> +} MidSideStereo;
>
> cant this use the channel coupling struct and code? Its doing the same thing
> i think
"coupling channel elements provide two functionalities: First,
coupling channels may be used to implement generalized intensity
stereo coding where channel spectra can be shared across channel
boundaries. Second, coupling channels may be used to dynamically
perform a downmix of one sound object into the stereo image."
I don't think they quite do the same thing. mask[][] is the only array
left in this struct now though so I'll rename it ms_mask[][] and
remove the struct.
>> +
>> +/**
>> + * Dynamic Range Control - decoded from the bitstream but not processed further.
>> + */
>> +typedef struct {
>> + int pce_instance_tag; ///< Indicates with which program the DRC info is associated.
>> + int dyn_rng_sgn[17]; ///< DRC sign information; 0 - positive, 1 - negative
>> + int dyn_rng_ctl[17]; ///< DRC magnitude information
>> + int exclude_mask[MAX_CHANNELS]; ///< Channels to be excluded from DRC processing.
>> + int additional_excluded_chns[MAX_CHANNELS / 7]; /**< The exclude_mask bits are
>> + coded in groups of 7 with 1 bit preceeding each group (except the first)
>> + indicating that 7 more mask bits are coded. */
>> + int band_incr; ///< Number of DRC bands greater than 1 having DRC info.
>> + int interpolation_scheme; ///< Indicates the interpolation scheme used in the SBR QMF domain.
>> + int band_top[17]; ///< Indicates the top of the i-th DRC band in units of 4 spectral lines.
>> + int prog_ref_level; /**< A reference level for the long-term program audio level for all
>> + channels combined. */
>> +} DynamicRangeControl;
>
>> +
>> +/**
>> + * pulse tool
>> + */
>
> mee, i thought the function was called like that, the struct too??
Oops, I must have missed that one. I'll remove superfluous doxygen
comments on structs.
>> + int is_indep_coup; ///< Set if independent coupling (i.e. after IMDCT).
>> + int domain; ///< Controls if coupling is performed before (0) or after (1) the TNS decoding of the target channels.
>
> wouldnt a
> int or enum where= 0 (before TNS) 1(after TNS before IMDCT) 2(after IMDCT)
> be cleaner?
Done.
>> + int num_coupled; ///< number of target elements
>
>> + int is_cpe[9]; ///< Set if target is an CPE (otherwise it's an SCE).
>
> Maybe some code could be simplified if these where ID_CPE/ID_SCE ?
Do you mean if the array values were ID_CPE/ID_SCE? I don't think it
would allow any simplifications.
>> + int tag_select[9]; ///< element tag index
>> + int l[9]; ///< Apply gain to left channel of a CPE.
>> + int r[9]; ///< Apply gain to right channel of a CPE.
>
> do these arrays need 9 or 8 elements?
9
> [...]
>> Index: libavcodec/aacdectab.h
>> ===================================================================
>> --- libavcodec/aacdectab.h (revision 0)
>> +++ libavcodec/aacdectab.h (revision 0)
>> @@ -0,0 +1,189 @@
>> +/*
>> + * AAC decoder data
>> + * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
>> + * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +/**
>> + * @file aacdectab.h
>> + * AAC decoder data
>> + * @author Oded Shimon ( ods15 ods15 dyndns org )
>> + * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
>> + */
>> +
>> +#ifndef FFMPEG_AACDECTAB_H
>> +#define FFMPEG_AACDECTAB_H
>> +
>> +#include "aac.h"
>> +
>> +#include <stdint.h>
>> +
>> +static const uint16_t swb_offset_1024_96[] = {
>> + 0, 4, 8, 12, 16, 20, 24, 28,
>> + 32, 36, 40, 44, 48, 52, 56, 64,
>> + 72, 80, 88, 96, 108, 120, 132, 144,
>> + 156, 172, 188, 212, 240, 276, 320, 384,
>> + 448, 512, 576, 640, 704, 768, 832, 896,
>> + 960, 1024
>> +};
>> +
>> +static const uint16_t swb_offset_128_96[] = {
>> + 0, 4, 8, 12, 16, 20, 24, 32, 40, 48, 64, 92, 128
>> +};
>> +
>> +static const uint16_t swb_offset_1024_64[] = {
>> + 0, 4, 8, 12, 16, 20, 24, 28,
>> + 32, 36, 40, 44, 48, 52, 56, 64,
>> + 72, 80, 88, 100, 112, 124, 140, 156,
>> + 172, 192, 216, 240, 268, 304, 344, 384,
>> + 424, 464, 504, 544, 584, 624, 664, 704,
>> + 744, 784, 824, 864, 904, 944, 984, 1024
>> +};
>> +
>> +static const uint16_t swb_offset_1024_48[] = {
>> + 0, 4, 8, 12, 16, 20, 24, 28,
>> + 32, 36, 40, 48, 56, 64, 72, 80,
>> + 88, 96, 108, 120, 132, 144, 160, 176,
>> + 196, 216, 240, 264, 292, 320, 352, 384,
>> + 416, 448, 480, 512, 544, 576, 608, 640,
>> + 672, 704, 736, 768, 800, 832, 864, 896,
>> + 928, 1024
>> +};
>> +
>> +static const uint16_t swb_offset_128_48[] = {
>> + 0, 4, 8, 12, 16, 20, 28, 36,
>> + 44, 56, 68, 80, 96, 112, 128
>> +};
>> +
>> +static const uint16_t swb_offset_1024_32[] = {
>> + 0, 4, 8, 12, 16, 20, 24, 28,
>> + 32, 36, 40, 48, 56, 64, 72, 80,
>> + 88, 96, 108, 120, 132, 144, 160, 176,
>> + 196, 216, 240, 264, 292, 320, 352, 384,
>> + 416, 448, 480, 512, 544, 576, 608, 640,
>> + 672, 704, 736, 768, 800, 832, 864, 896,
>> + 928, 960, 992, 1024
>> +};
>> +
>> +static const uint16_t swb_offset_1024_24[] = {
>> + 0, 4, 8, 12, 16, 20, 24, 28,
>> + 32, 36, 40, 44, 52, 60, 68, 76,
>> + 84, 92, 100, 108, 116, 124, 136, 148,
>> + 160, 172, 188, 204, 220, 240, 260, 284,
>> + 308, 336, 364, 396, 432, 468, 508, 552,
>> + 600, 652, 704, 768, 832, 896, 960, 1024
>> +};
>> +
>> +static const uint16_t swb_offset_128_24[] = {
>> + 0, 4, 8, 12, 16, 20, 24, 28,
>> + 36, 44, 52, 64, 76, 92, 108, 128
>> +};
>> +
>> +static const uint16_t swb_offset_1024_16[] = {
>> + 0, 8, 16, 24, 32, 40, 48, 56,
>> + 64, 72, 80, 88, 100, 112, 124, 136,
>> + 148, 160, 172, 184, 196, 212, 228, 244,
>> + 260, 280, 300, 320, 344, 368, 396, 424,
>> + 456, 492, 532, 572, 616, 664, 716, 772,
>> + 832, 896, 960, 1024
>> +};
>> +
>> +static const uint16_t swb_offset_128_16[] = {
>> + 0, 4, 8, 12, 16, 20, 24, 28,
>> + 32, 40, 48, 60, 72, 88, 108, 128
>> +};
>> +
>> +static const uint16_t swb_offset_1024_8[] = {
>> + 0, 12, 24, 36, 48, 60, 72, 84,
>> + 96, 108, 120, 132, 144, 156, 172, 188,
>> + 204, 220, 236, 252, 268, 288, 308, 328,
>> + 348, 372, 396, 420, 448, 476, 508, 544,
>> + 580, 620, 664, 712, 764, 820, 880, 944,
>> + 1024
>> +};
>> +
>> +static const uint16_t swb_offset_128_8[] = {
>> + 0, 4, 8, 12, 16, 20, 24, 28,
>> + 36, 44, 52, 60, 72, 88, 108, 128
>> +};
>> +
>> +static const uint16_t *swb_offset_1024[] = {
>> + swb_offset_1024_96, swb_offset_1024_96, swb_offset_1024_64,
>> + swb_offset_1024_48, swb_offset_1024_48, swb_offset_1024_32,
>> + swb_offset_1024_24, swb_offset_1024_24, swb_offset_1024_16,
>> + swb_offset_1024_16, swb_offset_1024_16, swb_offset_1024_8
>> +};
>> +
>> +static const uint16_t *swb_offset_128[] = {
>> + /* The last entry on the following row is swb_offset_128_64 but is a
>> + duplicate of swb_offset_128_96. */
>> + swb_offset_128_96, swb_offset_128_96, swb_offset_128_96,
>> + swb_offset_128_48, swb_offset_128_48, swb_offset_128_48,
>> + swb_offset_128_24, swb_offset_128_24, swb_offset_128_16,
>> + swb_offset_128_16, swb_offset_128_16, swb_offset_128_8
>> +};
>
> Does storing band sizes instead of band offsets lead to simpler code?
> if not, the code above is ok
grepping for 'offset' in aac.c shows quite a few more lines of
for(i=offset[idx]; i<offset[idx+1]; i++) or just offset[idx] than
lines using offset[idx+1] - offset[idx] so I think it's better as it
is.
> Btw, what does swb stand for? it should be mentioned somewhere in a doxy
scalefactor window band - term for scalefactor bands within a window,
given in Table 4.110 to Table 4.128.
scalefactor band - term for scalefactor band within a group. In case
of EIGHT_SHORT_SEQUENCE and grouping a scalefactor band may contain
several scalefactor window bands of corresponding frequency. For all
other window_sequences scalefactor bands and scalefactor window bands
are identical.
I think all that information is useful but where would be a good place
to put it?
Regards,
Rob
More information about the ffmpeg-devel
mailing list