[FFmpeg-devel] [PATCH 8/8] aacdec: add a decoder for AAC USAC (xHE-AAC)

Andreas Rheinhardt andreas.rheinhardt at outlook.com
Thu May 16 13:26:29 EEST 2024


Lynne via ffmpeg-devel:
> This commit adds a decoder for the frequency-domain part of USAC.
> 
> What works:
>  - Mono
>  - Stereo (no prediction)
>  - Stereo (mid/side coding)
> 
> What doesn't:
>  - Preroll decoding (every single decoder seems faulty or weird?)
>  - Complex stereo prediction
> 
> Known issues:
>  - Spec incompliance (noise synthesis in particular)
>  - Lack of robustness
> ---


> diff --git a/libavcodec/aac/aacdec.h b/libavcodec/aac/aacdec.h
> index 20545a24d4..3e6592cf0e 100644
> --- a/libavcodec/aac/aacdec.h
> +++ b/libavcodec/aac/aacdec.h
> @@ -42,6 +42,8 @@
>  #include "libavcodec/avcodec.h"
>  #include "libavcodec/mpeg4audio.h"
>  
> +#include "aacdec_ac.h"
> +
>  typedef struct AACDecContext AACDecContext;
>  
>  /**
> @@ -69,6 +71,32 @@ enum CouplingPoint {
>      AFTER_IMDCT = 3,
>  };
>  
> +enum AACUsacElem {
> +    ID_USAC_SCE = 0,
> +    ID_USAC_CPE = 1,
> +    ID_USAC_LFE = 2,
> +    ID_USAC_EXT = 3,
> +};
> +
> +enum ExtensionHeaderType {
> +    ID_CONFIG_EXT_FILL = 0,
> +    ID_CONFIG_EXT_LOUDNESS_INFO = 2,
> +    ID_CONFIG_EXT_STREAM_ID = 7,
> +};
> +
> +enum AACUsacExtension {
> +    ID_EXT_ELE_FILL,
> +    ID_EXT_ELE_MPEGS,
> +    ID_EXT_ELE_SAOC,
> +    ID_EXT_ELE_AUDIOPREROLL,
> +    ID_EXT_ELE_UNI_DRC,
> +};
> +
> +enum AACUSACLoudnessExt {
> +    UNIDRCLOUDEXT_TERM = 0x0,
> +    UNIDRCLOUDEXT_EQ = 0x1,
> +};
> +
>  // Supposed to be equal to AAC_RENAME() in case of USE_FIXED.
>  #define RENAME_FIXED(name) name ## _fixed
>  
> @@ -93,6 +121,40 @@ typedef struct LongTermPrediction {
>      int8_t used[MAX_LTP_LONG_SFB];
>  } LongTermPrediction;
>  
> +/* Per channel core mode */
> +typedef struct AACUsacElemData {
> +    uint8_t core_mode;
> +    uint8_t scale_factor_grouping;
> +
> +    /* Timewarping ratio */
> +#define NUM_TW_NODES 16
> +    uint8_t tw_ratio[NUM_TW_NODES];
> +
> +    struct {
> +        uint8_t acelp_core_mode : 3;
> +        uint8_t lpd_mode : 5;
> +
> +        uint8_t bpf_control_info : 1;
> +        uint8_t core_mode_last : 1;
> +        uint8_t fac_data_present : 1;
> +
> +        int last_lpd_mode;
> +    } ldp;
> +
> +    struct {
> +        unsigned int seed;
> +        uint8_t level : 3;
> +        uint8_t offset : 5;
> +    } noise;
> +
> +    struct {
> +        uint8_t gain;
> +        uint32_t kv[8 /* (1024 / 16) / 8 */][8];
> +    } fac;
> +
> +    AACArithState ac;
> +} AACUsacElemData;
> +
>  /**
>   * Individual Channel Stream
>   */
> @@ -145,6 +207,7 @@ typedef struct ChannelCoupling {
>   */
>  typedef struct SingleChannelElement {
>      IndividualChannelStream ics;
> +    AACUsacElemData ue;                             ///< USAC element data
>      TemporalNoiseShaping tns;
>      enum BandType band_type[128];                   ///< band types
>      int sfo[128];                                   ///< scalefactor offsets
> @@ -163,25 +226,141 @@ typedef struct SingleChannelElement {
>      };
>  } SingleChannelElement;
>  
> +typedef struct AACUsacStereo {
> +    uint8_t common_window;
> +    uint8_t common_tw;
> +
> +    uint8_t ms_mask_mode;
> +    uint8_t config_idx;
> +
> +    struct {
> +        uint8_t use_prev_frame;
> +        uint8_t pred_dir;
> +        uint8_t delta_code_time;
> +        uint8_t pred_used[8][64];
> +
> +        AVComplexFloat pred[8][64];
> +    } cplx;
> +} AACUsacStereo;
> +
>  /**
>   * channel element - generic struct for SCE/CPE/CCE/LFE
>   */
>  typedef struct ChannelElement {
>      int present;
>      // CPE specific
> +    uint8_t max_sfb_ste;      ///< (USAC) Maximum of both max_sfb values
>      uint8_t ms_mask[128];     ///< Set if mid/side stereo is used for each scalefactor window band
>      // shared
>      SingleChannelElement ch[2];
>      // CCE specific
>      ChannelCoupling coup;
> +    // USAC stereo coupling data
> +    AACUsacStereo us;
>  } ChannelElement;
>  
> +typedef struct AACUSACLoudnessInfo {
> +    uint8_t drc_set_id : 6;
> +    uint8_t downmix_id : 7;
> +    struct {
> +        uint16_t lvl : 12;
> +        uint8_t present : 1;
> +    } sample_peak;
> +
> +    struct {
> +        uint16_t lvl : 12;
> +        uint8_t measurement : 4;
> +        uint8_t reliability : 2;
> +        uint8_t present : 1;
> +    } true_peak;
> +
> +    uint8_t nb_measurements : 4;
> +    struct {
> +        uint8_t method_def : 4;
> +        uint8_t method_val;
> +        uint8_t measurement : 4;
> +        uint8_t reliability : 2;
> +    } measurements[16];
> +} AACUSACLoudnessInfo;
> +
> +typedef struct AACUsacElemConfig {
> +    enum AACUsacElem type;
> +
> +    uint8_t tw_mdct : 1;
> +    uint8_t noise_fill : 1;
> +
> +    uint8_t stereo_config_index;
> +
> +    struct {
> +        int ratio;
> +
> +        uint8_t harmonic_sbr : 1; /* harmonicSBR */
> +        uint8_t bs_intertes : 1; /* bs_interTes */
> +        uint8_t bs_pvc : 1; /* bs_pvc */
> +
> +        struct {
> +            uint8_t start_freq; /* dflt_start_freq */
> +            uint8_t stop_freq; /* dflt_stop_freq */
> +
> +            uint8_t freq_scale; /* dflt_freq_scale */
> +            uint8_t alter_scale : 1; /* dflt_alter_scale */
> +            uint8_t noise_scale; /* dflt_noise_scale */
> +
> +            uint8_t limiter_bands; /* dflt_limiter_bands */
> +            uint8_t limiter_gains; /* dflt_limiter_gains */
> +            uint8_t interpol_freq : 1; /* dflt_interpol_freq */
> +            uint8_t smoothing_mode : 1; /* dflt_smoothing_mode */
> +        } dflt;
> +    } sbr;
> +
> +    struct {
> +        uint8_t freq_res; /* bsFreqRes */
> +        uint8_t fixed_gain; /* bsFixedGainDMX */
> +        uint8_t temp_shape_config; /* bsTempShapeConfig */
> +        uint8_t decorr_config; /* bsDecorrConfig */
> +        uint8_t high_rate_mode : 1; /* bsHighRateMode */
> +        uint8_t phase_coding : 1; /* bsPhaseCoding */
> +
> +        uint8_t otts_bands_phase; /* bsOttBandsPhase */
> +        uint8_t residual_coding; /* bsResidualCoding */
> +        uint8_t residual_bands; /* bsResidualBands */
> +        uint8_t pseudo_lr : 1; /* bsPseudoLr */
> +        uint8_t env_quant_mode : 1; /* bsEnvQuantMode */

Is using bitfields really worth it given that they force to use masking
for accesses?

> +    } mps;
> +
> +    struct {
> +        enum AACUsacExtension type;
> +        uint8_t payload_frag;
> +        uint32_t default_len;
> +        uint32_t pl_data_offset;
> +        uint8_t *pl_data;
> +    } ext;
> +} AACUsacElemConfig;
> +
> +typedef struct AACUSACConfig {
> +    uint8_t core_sbr_frame_len_idx; /* coreSbrFrameLengthIndex */
> +    uint8_t rate_idx;
> +    uint16_t core_frame_len;
> +    uint16_t stream_identifier;
> +
> +    AACUsacElemConfig elems[64];
> +    int nb_elems;
> +
> +    struct {
> +        uint8_t nb_album;
> +        AACUSACLoudnessInfo album_info[64];
> +        uint8_t nb_info;
> +        AACUSACLoudnessInfo info[64];
> +    } loudness;
> +} AACUSACConfig;
> +
>  typedef struct OutputConfiguration {
>      MPEG4AudioConfig m4ac;
>      uint8_t layout_map[MAX_ELEM_ID*4][3];
>      int layout_map_tags;
>      AVChannelLayout ch_layout;
>      enum OCStatus status;
> +    AACUSACConfig usac;
>  } OutputConfiguration;
>  
>  /**
> diff --git a/libavcodec/aac/aacdec_ac.c b/libavcodec/aac/aacdec_ac.c
> new file mode 100644
> index 0000000000..326d716bd3
> --- /dev/null
> +++ b/libavcodec/aac/aacdec_ac.c
> @@ -0,0 +1,224 @@
> +/*
> + * AAC definitions and structures
> + * Copyright (c) 2024 Lynne
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavcodec/aactab.h"
> +#include "aacdec_ac.h"
> +
> +uint32_t ff_aac_ac_map_process(AACArithState *state, int reset, int N)
> +{
> +    float ratio;
> +    if (reset) {
> +        memset(state->last, 0, sizeof(state->last));
> +        state->last_len = N;
> +        memset(state->cur, 0, sizeof(state->cur));
> +        state->cur[3] = 0;
> +        state->cur[2] = 0;
> +        state->cur[1] = 0;
> +        state->cur[0] = 1;
> +        state->last[0] = 0 << 12;
> +        state->state_pre = 0;
> +        return 0;
> +    } else if (state->last_len != N) {
> +        int i;
> +        uint8_t last[512 /* 2048 / 4 */];
> +        memcpy(last, state->last, sizeof(last));
> +
> +        ratio = state->last_len / (float)N;
> +        for (i = 0; i < N/2; i++) {
> +            int k = (int)(i * ratio);
> +            state->last[i] = last[k];
> +        }
> +
> +        for (; i < FF_ARRAY_ELEMS(state->last); i++)
> +            state->last[i] = 0;
> +
> +        state->last_len = N;
> +    }
> +
> +    memset(state->cur, 0, sizeof(state->cur));
> +    state->cur[3] = 0;
> +    state->cur[2] = 0;
> +    state->cur[1] = 0;
> +    state->cur[0] = 1;
> +
> +    state->state_pre = state->last[0] << 12;
> +    return state->last[0] << 12;
> +}
> +
> +extern int ec_debug;
> +
> +int trig = 0;
> +
> +uint32_t ff_aac_ac_get_context(AACArithState *state, uint32_t c, int i, int N)
> +{
> +    c = state->state_pre >> 8;
> +    c = c + (state->last[i + 1] << 8);
> +    c = (c << 4);
> +    c += state->cur[1];
> +
> +    state->state_pre = c;
> +
> +    if (i > 3 &&
> +        ((state->cur[3] + state->cur[2] + state->cur[1]) < 5))
> +        return c + 0x10000;
> +
> +    return c;
> +}
> +
> +uint32_t ff_aac_ac_get_pk(uint32_t c)
> +{
> +    int i_min = -1;
> +    int i, j;
> +    int i_max = FF_ARRAY_ELEMS(ff_aac_ac_lookup_m) - 1;
> +    while ((i_max - i_min) > 1) {
> +        i = i_min + ((i_max - i_min) / 2);
> +        j = ff_aac_ac_hash_m[i];
> +        if (c < (j >> 8))
> +            i_max = i;
> +        else if (c > (j >> 8))
> +            i_min = i;
> +        else
> +            return (j & 0xFF);
> +    }
> +    return ff_aac_ac_lookup_m[i_max];
> +}
> +
> +void ff_aac_ac_update_context(AACArithState *state, int idx,
> +                              uint16_t a, uint16_t b)
> +{
> +    state->cur[0] = a + b + 1;
> +    if (state->cur[0] > 0xF)
> +        state->cur[0] = 0xF;
> +
> +    state->cur[3] = state->cur[2];
> +    state->cur[2] = state->cur[1];
> +    state->cur[1] = state->cur[0];
> +
> +    state->last[idx] = state->cur[0];
> +}
> +
> +/* Initialize AC */
> +void ff_aac_ac_init(AACArith *ac, GetBitContext *gb)
> +{
> +    ac->low = 0;
> +    ac->high = UINT16_MAX;
> +    ac->val = get_bits(gb, 16);
> +}
> +
> +uint16_t ff_aac_ac_decode(AACArith *ac, GetBitContext *gb,
> +                          const uint16_t *cdf, uint16_t cdf_len)
> +{
> +    int val = ac->val;
> +    int low = ac->low;
> +    int high = ac->high;
> +
> +    int rng = high - low + 1;
> +    int c = ((((int)(val - low + 1)) << 14) - ((int)1));
> +
> +    /* Note: this could be done faster via heuristics, the total number of
> +     * configurations is low */
> +    const uint16_t *p = cdf - 1;
> +    const uint16_t *q;
> +
> +    switch (cdf_len) {
> +    case 2:
> +        if ((p[1] * rng) > c)
> +            p += 1;
> +        break;
> +    case 4:
> +        if ((p[2] * rng) > c)
> +            p += 2;
> +        if ((p[1] * rng) > c)
> +            p += 1;
> +        break;
> +    case 17:
> +        /* First check if the current probability is even met at all */
> +        if ((p[1] * rng) <= c)
> +            break;
> +        p += 1;
> +        for (int i = 8; i >= 1; i >>= 1)
> +            if ((p[i] * rng) > c)
> +                p += i;
> +        break;
> +    case 27:
> +        const uint16_t *p_24 = p + 24;
> +
> +        if ((p[16] * rng) > c)
> +            p += 16;
> +        if ((p[8] * rng) > c)
> +            p += 8;
> +        if (p != p_24)
> +            if ((p[4] * rng) > c)
> +                p += 4;
> +        if ((p[2] * rng) > c)
> +            p += 2;
> +
> +        if (p != &p_24[2])
> +            if ((p[1] * rng) > c)
> +                p += 1;
> +        break;
> +    default:
> +        /* This should never happen */
> +        av_assert2(0);
> +    }
> +
> +    int sym = (int)((ptrdiff_t)(p - cdf)) + 1;
> +    if (sym)
> +        high = low + ((rng * cdf[sym - 1]) >> 14) - 1;
> +    low += (rng * cdf[sym]) >> 14;
> +
> +    /* This loop could be done faster */
> +    while (1) {
> +        if (high < 32768) {
> +            ;
> +        } else if (low >= 32768) {
> +            val -= 32768;
> +            low -= 32768;
> +            high -= 32768;
> +        } else if (low >= 16384 && high < 49152) {
> +            val -= 16384;
> +            low -= 16384;
> +            high -= 16384;
> +        } else {
> +            break;
> +        }
> +        low += low;
> +        high += high + 1;
> +        val = (val << 1) | get_bits1(gb);
> +    };
> +
> +    ac->low = low;
> +    ac->high = high;
> +    ac->val = val;
> +
> +    return sym;
> +}
> +
> +void ff_aac_ac_finish(AACArithState *state, int offset, int N)
> +{
> +    int i;
> +
> +    for (i = offset; i < N/2; i++)
> +        state->last[i] = 1;
> +
> +    for (; i < FF_ARRAY_ELEMS(state->last); i++)
> +        state->last[i] = 0;
> +}
> diff --git a/libavcodec/aac/aacdec_ac.h b/libavcodec/aac/aacdec_ac.h
> new file mode 100644
> index 0000000000..ef96bed770
> --- /dev/null
> +++ b/libavcodec/aac/aacdec_ac.h
> @@ -0,0 +1,54 @@
> +/*
> + * AAC definitions and structures
> + * Copyright (c) 2024 Lynne
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVCODEC_AACDEC_AC_H
> +#define AVCODEC_AACDEC_AC_H
> +
> +#include "libavcodec/get_bits.h"
> +
> +typedef struct AACArithState {
> +    uint8_t last[512 /* 2048 / 4 */];
> +    int last_len;
> +    uint8_t cur[4];
> +    uint16_t state_pre;
> +} AACArithState;
> +
> +typedef struct AACArith {
> +    uint16_t low;
> +    uint16_t high;
> +    uint16_t val;
> +} AACArith;
> +
> +#define FF_AAC_AC_ESCAPE 16
> +
> +uint32_t ff_aac_ac_map_process(AACArithState *state, int reset, int len);
> +uint32_t ff_aac_ac_get_context(AACArithState *state, uint32_t old_c, int idx, int len);
> +uint32_t ff_aac_ac_get_pk(uint32_t c);
> +
> +void ff_aac_ac_update_context(AACArithState *state, int idx, uint16_t a, uint16_t b);
> +void ff_aac_ac_init(AACArith *ac, GetBitContext *gb);
> +
> +uint16_t ff_aac_ac_decode(AACArith *ac, GetBitContext *gb,
> +                          const uint16_t *cdf, uint16_t cdf_len);
> +
> +void ff_aac_ac_finish(AACArithState *state, int offset, int nb);
> +
> +#endif /* AVCODEC_AACDEC_AC_H */
> diff --git a/libavcodec/aac/aacdec_dsp_template.c b/libavcodec/aac/aacdec_dsp_template.c
> index 59a69d88f3..8d31af22f8 100644
> --- a/libavcodec/aac/aacdec_dsp_template.c
> +++ b/libavcodec/aac/aacdec_dsp_template.c
> @@ -88,8 +88,8 @@ static void AAC_RENAME(apply_mid_side_stereo)(AACDecContext *ac, ChannelElement
>      INTFLOAT *ch1 = cpe->ch[1].AAC_RENAME(coeffs);
>      const uint16_t *offsets = ics->swb_offset;
>      for (int g = 0; g < ics->num_window_groups; g++) {
> -        for (int sfb = 0; sfb < ics->max_sfb; sfb++) {
> -            const int idx = g*ics->max_sfb + sfb;
> +        for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++) {
> +            const int idx = g*cpe->max_sfb_ste + sfb;
>              if (cpe->ms_mask[idx] &&
>                  cpe->ch[0].band_type[idx] < NOISE_BT &&
>                  cpe->ch[1].band_type[idx] < NOISE_BT) {
> diff --git a/libavcodec/aac/aacdec_latm.h b/libavcodec/aac/aacdec_latm.h
> index e40a2fe1a7..047c11e0fb 100644
> --- a/libavcodec/aac/aacdec_latm.h
> +++ b/libavcodec/aac/aacdec_latm.h
> @@ -56,7 +56,8 @@ static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
>  {
>      AACDecContext *ac     = &latmctx->aac_ctx;
>      AVCodecContext *avctx = ac->avctx;
> -    MPEG4AudioConfig m4ac = { 0 };
> +    OutputConfiguration oc = { 0 };
> +    MPEG4AudioConfig *m4ac = &oc.m4ac;
>      GetBitContext gbc;
>      int config_start_bit  = get_bits_count(gb);
>      int sync_extension    = 0;
> @@ -76,7 +77,7 @@ static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
>      if (get_bits_left(gb) <= 0)
>          return AVERROR_INVALIDDATA;
>  
> -    bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &m4ac,
> +    bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &oc,
>                                                      &gbc, config_start_bit,
>                                                      sync_extension);
>  
> @@ -88,11 +89,12 @@ static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
>        asclen = bits_consumed;
>  
>      if (!latmctx->initialized ||
> -        ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
> -        ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
> +        ac->oc[1].m4ac.sample_rate != m4ac->sample_rate ||
> +        ac->oc[1].m4ac.chan_config != m4ac->chan_config) {
>  
>          if (latmctx->initialized) {
> -            av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, chan_config=%d)\n", m4ac.sample_rate, m4ac.chan_config);
> +            av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, chan_config=%d)\n",
> +                   m4ac->sample_rate, m4ac->chan_config);
>          } else {
>              av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
>          }
> @@ -280,7 +282,7 @@ static int latm_decode_frame(AVCodecContext *avctx, AVFrame *out,
>          } else {
>              push_output_configuration(&latmctx->aac_ctx);
>              if ((err = decode_audio_specific_config(
> -                    &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
> +                    &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1],
>                      avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) {
>                  pop_output_configuration(&latmctx->aac_ctx);
>                  return err;
> diff --git a/libavcodec/aac/aacdec_lpd.c b/libavcodec/aac/aacdec_lpd.c
> new file mode 100644
> index 0000000000..be39e2c175
> --- /dev/null
> +++ b/libavcodec/aac/aacdec_lpd.c
> @@ -0,0 +1,192 @@
> +/*
> + * Copyright (c) 2024 Lynne <dev at lynne.ee>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "aacdec_lpd.h"
> +#include "aacdec_usac.h"
> +
> +const uint8_t ff_aac_lpd_mode_tab[32][4] = {
> +    { 0, 0, 0, 0 },
> +    { 1, 0, 0, 0 },
> +    { 0, 1, 0, 0 },
> +    { 1, 1, 0, 0 },
> +    { 0, 0, 1, 0 },
> +    { 1, 0, 1, 0 },
> +    { 0, 1, 1, 0 },
> +    { 1, 1, 1, 0 },
> +    { 0, 0, 0, 1 },
> +    { 1, 0, 0, 1 },
> +    { 0, 1, 0, 1 },
> +    { 1, 1, 0, 1 },
> +    { 0, 0, 1, 1 },
> +    { 1, 0, 1, 1 },
> +    { 0, 1, 1, 1 },
> +    { 1, 1, 1, 1 },
> +    { 2, 2, 0, 0 },
> +    { 2, 2, 1, 0 },
> +    { 2, 2, 0, 1 },
> +    { 2, 2, 1, 1 },
> +    { 0, 0, 2, 2 },
> +    { 1, 0, 2, 2 },
> +    { 0, 1, 2, 2 },
> +    { 1, 1, 2, 2 },
> +    { 2, 2, 2, 2 },
> +    { 3, 3, 3, 3 },
> +    /* Larger values are reserved, but permit them for resilience */
> +    { 0, 0, 0, 0 },
> +    { 0, 0, 0, 0 },
> +    { 0, 0, 0, 0 },
> +    { 0, 0, 0, 0 },
> +    { 0, 0, 0, 0 },
> +    { 0, 0, 0, 0 },
> +};
> +
> +static void parse_qn(GetBitContext *gb, int *qn, int nk_mode, int no_qn)
> +{
> +    if (nk_mode == 1) {
> +        for (int k = 0; k < no_qn; k++) {
> +            qn[k] = ff_aac_get_vlclbf(gb);
> +            if (qn[k])
> +                qn[k]++;
> +        }
> +        return;
> +    }
> +
> +    for (int k = 0; k < no_qn; k++)
> +        qn[k] = get_bits(gb, 2) + 2;
> +
> +    if (nk_mode == 2) {
> +        for (int k = 0; k < no_qn; k++) {
> +            if (qn[k] > 4) {
> +                qn[k] = ff_aac_get_vlclbf(gb);
> +                if (qn[k])
> +                    qn[k] += 4;
> +            }
> +        }
> +        return;
> +    }
> +
> +    for (int k = 0; k < no_qn; k++) {
> +        if (qn[k] > 4) {
> +            int qn_ext = ff_aac_get_vlclbf(gb);
> +            switch (qn_ext) {
> +            case 0: qn[k] = 5; break;
> +            case 1: qn[k] = 6; break;
> +            case 2: qn[k] = 0; break;
> +            default: qn[k] = qn_ext + 4; break;
> +            }
> +        }
> +    }
> +}
> +
> +static int parse_codebook_idx(GetBitContext *gb, uint32_t *kv,
> +                              int nk_mode, int no_qn)
> +{
> +    int n, nk;
> +
> +    int qn[2];
> +    parse_qn(gb, qn, nk_mode, no_qn);
> +
> +    for (int k = 0; k < no_qn; k++) {
> +        if (qn[k] > 4) {
> +            nk = (qn[k] - 3) / 2;
> +            n = qn[k] - nk*2;
> +        } else {
> +            nk = 0;
> +            n = qn[k];
> +        }
> +    }
> +
> +    int idx = get_bits(gb, 4*n);
> +
> +    if (nk > 0)
> +        for (int i = 0; i < 8; i++)
> +            kv[i] = get_bits(gb, nk);
> +
> +    return 0;
> +}
> +
> +int ff_aac_parse_fac_data(AACUsacElemData *ce, GetBitContext *gb,
> +                          int use_gain, int len)
> +{
> +    int ret;
> +    if (use_gain)
> +        ce->fac.gain = get_bits(gb, 7);
> +
> +    for (int i = 0; i < len/8; i++) {
> +        ret = parse_codebook_idx(gb, ce->fac.kv[i], 1, 1);
> +        if (ret < 0)
> +            return ret;
> +    }
> +
> +    return 0;
> +}
> +
> +int ff_aac_ldp_parse_channel_stream(AACDecContext *ac, AACUSACConfig *usac,
> +                                    AACUsacElemData *ce, GetBitContext *gb)
> +{
> +    ce->ldp.acelp_core_mode = get_bits(gb, 3);
> +    ce->ldp.lpd_mode = get_bits(gb, 5);
> +
> +    ce->ldp.bpf_control_info = get_bits1(gb);
> +    ce->ldp.core_mode_last = get_bits1(gb);
> +    ce->ldp.fac_data_present = get_bits1(gb);
> +
> +    const uint8_t *mod = ff_aac_lpd_mode_tab[ce->ldp.lpd_mode];
> +
> +    int first_ldp_flag = !ce->ldp.core_mode_last;
> +    int first_tcx_flag = 1;
> +    if (first_ldp_flag)
> +        ce->ldp.last_lpd_mode = -1; /* last_ldp_mode is a **STATEFUL** value */
> +
> +    int k = 0;
> +    while (k < 0) {
> +        if (!k) {
> +            if (ce->ldp.core_mode_last && ce->ldp.fac_data_present)
> +                ff_aac_parse_fac_data(ce, gb, 0, usac->core_frame_len/8);
> +        } else {
> +            if (!ce->ldp.last_lpd_mode && mod[k] > 0 ||
> +                ce->ldp.last_lpd_mode && !mod[k])
> +                ff_aac_parse_fac_data(ce, gb, 0, usac->core_frame_len/8);
> +        }
> +        if (!mod[k]) {
> +//            parse_acelp_coding();
> +            ce->ldp.last_lpd_mode = 0;
> +            k++;
> +        } else {
> +//            parse_tcx_coding();
> +            ce->ldp.last_lpd_mode = mod[k];
> +            k += (1 << (mod[k] - 1));
> +            first_tcx_flag = 0;
> +        }
> +    }
> +
> +//    parse_lpc_data(first_lpd_flag);
> +
> +    if (!ce->ldp.core_mode_last && ce->ldp.fac_data_present) {
> +        uint16_t len_8 = usac->core_frame_len / 8;
> +        uint16_t len_16 = usac->core_frame_len / 16;
> +        uint16_t fac_len = get_bits1(gb) /* short_fac_flag */ ? len_8 : len_16;
> +        int ret = ff_aac_parse_fac_data(ce, gb, 1, fac_len);
> +        if (ret < 0)
> +            return ret;
> +    }
> +
> +    return 0;
> +}
> diff --git a/libavcodec/aac/aacdec_lpd.h b/libavcodec/aac/aacdec_lpd.h
> new file mode 100644
> index 0000000000..924ff75e52
> --- /dev/null
> +++ b/libavcodec/aac/aacdec_lpd.h
> @@ -0,0 +1,33 @@
> +/*
> + * Copyright (c) 2024 Lynne <dev at lynne.ee>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVCODEC_AAC_AACDEC_LPD_H
> +#define AVCODEC_AAC_AACDEC_LPD_H
> +
> +#include "aacdec.h"
> +#include "libavcodec/get_bits.h"
> +
> +int ff_aac_parse_fac_data(AACUsacElemData *ce, GetBitContext *gb,
> +                          int use_gain, int len);
> +
> +int ff_aac_ldp_parse_channel_stream(AACDecContext *ac, AACUSACConfig *usac,
> +                                    AACUsacElemData *ce, GetBitContext *gb);
> +
> +#endif /* AVCODEC_AAC_AACDEC_LPD_H */
> diff --git a/libavcodec/aac/aacdec_usac.c b/libavcodec/aac/aacdec_usac.c
> new file mode 100644
> index 0000000000..4b48c4d6ca
> --- /dev/null
> +++ b/libavcodec/aac/aacdec_usac.c
> @@ -0,0 +1,1230 @@
> +/*
> + * Copyright (c) 2024 Lynne <dev at lynne.ee>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "aacdec_usac.h"
> +#include "aacdec_tab.h"
> +#include "aacdec_lpd.h"
> +#include "aacdec_ac.h"
> +
> +#include "libavcodec/opusdsp.h"
> +#include "libavcodec/aactab.h"
> +#include "libavutil/mem.h"
> +#include "libavcodec/mpeg4audio.h"
> +
> +/* Number of scalefactor bands per complex prediction band, equal to 2. */
> +#define SFB_PER_PRED_BAND 2
> +
> +static inline uint32_t get_escaped_value(GetBitContext *gb, int nb1, int nb2, int nb3)
> +{
> +    uint32_t val = get_bits(gb, nb1);
> +    if (val < ((1 << nb1) - 1))
> +        return val;
> +
> +    val += get_bits(gb, nb2);
> +    if (val == ((1 << nb2) - 1))
> +        val += get_bits(gb, nb3);
> +
> +    return val;
> +}
> +
> +static int aac_usac_samplerate[] = {

Missing const

> +    96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
> +    16000, 12000, 11025, 8000, 7350, -1, -1, 57600, 51200,
> +    40000, 38400, 34150, 28800, 25600, 20000, 19200, 17075, 14400, 12800, 9600, -1, -1, -1, -1,
> +};
> +

> +static int parse_ext_ele(AACDecContext *ac, AACUsacElemConfig *e,
> +                         GetBitContext *gb)
> +{
> +    if (get_bits1(gb)) { /* usacExtElementPresent */
> +        uint32_t len;
> +        if (get_bits1(gb)) { /* usacExtElementUseDefaultLength */
> +            len = e->ext.default_len;
> +        } else {
> +            len = get_bits(gb, 8); /* usacExtElementPayloadLength */
> +            if (len == 255)
> +                len += get_bits(gb, 16) - 2;
> +        }
> +
> +        if (len) {
> +            uint8_t *tmp;
> +            uint8_t pl_frag_start = 1;
> +            uint8_t pl_frag_end = 1;
> +            if (e->ext.payload_frag) {
> +                pl_frag_start = get_bits1(gb); /* usacExtElementStart */
> +                pl_frag_end = get_bits1(gb); /* usacExtElementStop */
> +            }
> +
> +            if (pl_frag_start)
> +                e->ext.pl_data_offset = 0;
> +
> +            tmp = av_realloc(e->ext.pl_data, e->ext.pl_data_offset + len);
> +            if (!tmp) {
> +                free(e->ext.pl_data);

Wrong deallocator.

> +                return AVERROR(ENOMEM);
> +            }
> +            e->ext.pl_data = tmp;
> +
> +            for (int i = 0; i < len; i++)
> +                e->ext.pl_data[e->ext.pl_data_offset + i] = get_bits(gb, 8);
> +
> +            if (pl_frag_end) {
> +                int ret;
> +                e->ext.pl_data_offset = 0;
> +                switch (e->ext.type) {
> +                case ID_EXT_ELE_FILL:
> +                    av_freep(&e->ext.pl_data);
> +                    break;
> +                case ID_EXT_ELE_AUDIOPREROLL:
> +                    ret = parse_audio_preroll(ac, e->ext.pl_data,
> +                                              e->ext.pl_data_offset);
> +                    if (ret < 0) {
> +                        av_freep(&e->ext.pl_data);
> +                        return ret;
> +                    }
> +                    break;
> +                default:
> +                    av_freep(&e->ext.pl_data);

Pointless if you abort in the next line

> +                    /* This should never happen */
> +                    av_assert0(0);
> +                }
> +            }
> +        }
> +    }
> +
> +    return 0;
> +}
> +


> +#include "libavcodec/opusdsp.h"
> +
> +#ifndef AVCODEC_AAC_AACDEC_USAC_H
> +#define AVCODEC_AAC_AACDEC_USAC_H
> +
> +#include "aacdec.h"
> +
> +#include "libavcodec/get_bits.h"
> +
> +static inline uint8_t ff_aac_get_vlclbf(GetBitContext *gb)
> +{
> +    uint8_t ret = 0;
> +    while (get_bits1(gb) && ret <= 36)
> +        ret++;
> +    return ret;
> +}

Look at unary.h

> +
> +int ff_aac_usac_config_decode(AACDecContext *ac, AVCodecContext *avctx,
> +                              GetBitContext *gb, OutputConfiguration *oc,
> +                              int channel_config);
> +
> +int ff_aac_usac_reset_state(AACDecContext *ac, OutputConfiguration *oc);
> +
> +int ff_aac_usac_decode_frame(AVCodecContext *avctx, AACDecContext *ac,
> +                             GetBitContext *gb, int *got_frame_ptr);
> +
> +#endif /* AVCODEC_AAC_AACDEC_USAC_H */



More information about the ffmpeg-devel mailing list