[FFmpeg-devel] [PATCH 8/8] aacdec: add a decoder for AAC USAC (xHE-AAC)

Lynne dev at lynne.ee
Thu May 16 18:00:25 EEST 2024


On 16/05/2024 12:26, Andreas Rheinhardt wrote:
> Lynne via ffmpeg-devel:
>> This commit adds a decoder for the frequency-domain part of USAC.
>>
>> What works:
>>   - Mono
>>   - Stereo (no prediction)
>>   - Stereo (mid/side coding)
>>
>> What doesn't:
>>   - Preroll decoding (every single decoder seems faulty or weird?)
>>   - Complex stereo prediction
>>
>> Known issues:
>>   - Spec incompliance (noise synthesis in particular)
>>   - Lack of robustness
>> ---
> 
> 
>> diff --git a/libavcodec/aac/aacdec.h b/libavcodec/aac/aacdec.h
>> index 20545a24d4..3e6592cf0e 100644
>> --- a/libavcodec/aac/aacdec.h
>> +++ b/libavcodec/aac/aacdec.h
>> @@ -42,6 +42,8 @@
>>   #include "libavcodec/avcodec.h"
>>   #include "libavcodec/mpeg4audio.h"
>>   
>> +#include "aacdec_ac.h"
>> +
>>   typedef struct AACDecContext AACDecContext;
>>   
>>   /**
>> @@ -69,6 +71,32 @@ enum CouplingPoint {
>>       AFTER_IMDCT = 3,
>>   };
>>   
>> +enum AACUsacElem {
>> +    ID_USAC_SCE = 0,
>> +    ID_USAC_CPE = 1,
>> +    ID_USAC_LFE = 2,
>> +    ID_USAC_EXT = 3,
>> +};
>> +
>> +enum ExtensionHeaderType {
>> +    ID_CONFIG_EXT_FILL = 0,
>> +    ID_CONFIG_EXT_LOUDNESS_INFO = 2,
>> +    ID_CONFIG_EXT_STREAM_ID = 7,
>> +};
>> +
>> +enum AACUsacExtension {
>> +    ID_EXT_ELE_FILL,
>> +    ID_EXT_ELE_MPEGS,
>> +    ID_EXT_ELE_SAOC,
>> +    ID_EXT_ELE_AUDIOPREROLL,
>> +    ID_EXT_ELE_UNI_DRC,
>> +};
>> +
>> +enum AACUSACLoudnessExt {
>> +    UNIDRCLOUDEXT_TERM = 0x0,
>> +    UNIDRCLOUDEXT_EQ = 0x1,
>> +};
>> +
>>   // Supposed to be equal to AAC_RENAME() in case of USE_FIXED.
>>   #define RENAME_FIXED(name) name ## _fixed
>>   
>> @@ -93,6 +121,40 @@ typedef struct LongTermPrediction {
>>       int8_t used[MAX_LTP_LONG_SFB];
>>   } LongTermPrediction;
>>   
>> +/* Per channel core mode */
>> +typedef struct AACUsacElemData {
>> +    uint8_t core_mode;
>> +    uint8_t scale_factor_grouping;
>> +
>> +    /* Timewarping ratio */
>> +#define NUM_TW_NODES 16
>> +    uint8_t tw_ratio[NUM_TW_NODES];
>> +
>> +    struct {
>> +        uint8_t acelp_core_mode : 3;
>> +        uint8_t lpd_mode : 5;
>> +
>> +        uint8_t bpf_control_info : 1;
>> +        uint8_t core_mode_last : 1;
>> +        uint8_t fac_data_present : 1;
>> +
>> +        int last_lpd_mode;
>> +    } ldp;
>> +
>> +    struct {
>> +        unsigned int seed;
>> +        uint8_t level : 3;
>> +        uint8_t offset : 5;
>> +    } noise;
>> +
>> +    struct {
>> +        uint8_t gain;
>> +        uint32_t kv[8 /* (1024 / 16) / 8 */][8];
>> +    } fac;
>> +
>> +    AACArithState ac;
>> +} AACUsacElemData;
>> +
>>   /**
>>    * Individual Channel Stream
>>    */
>> @@ -145,6 +207,7 @@ typedef struct ChannelCoupling {
>>    */
>>   typedef struct SingleChannelElement {
>>       IndividualChannelStream ics;
>> +    AACUsacElemData ue;                             ///< USAC element data
>>       TemporalNoiseShaping tns;
>>       enum BandType band_type[128];                   ///< band types
>>       int sfo[128];                                   ///< scalefactor offsets
>> @@ -163,25 +226,141 @@ typedef struct SingleChannelElement {
>>       };
>>   } SingleChannelElement;
>>   
>> +typedef struct AACUsacStereo {
>> +    uint8_t common_window;
>> +    uint8_t common_tw;
>> +
>> +    uint8_t ms_mask_mode;
>> +    uint8_t config_idx;
>> +
>> +    struct {
>> +        uint8_t use_prev_frame;
>> +        uint8_t pred_dir;
>> +        uint8_t delta_code_time;
>> +        uint8_t pred_used[8][64];
>> +
>> +        AVComplexFloat pred[8][64];
>> +    } cplx;
>> +} AACUsacStereo;
>> +
>>   /**
>>    * channel element - generic struct for SCE/CPE/CCE/LFE
>>    */
>>   typedef struct ChannelElement {
>>       int present;
>>       // CPE specific
>> +    uint8_t max_sfb_ste;      ///< (USAC) Maximum of both max_sfb values
>>       uint8_t ms_mask[128];     ///< Set if mid/side stereo is used for each scalefactor window band
>>       // shared
>>       SingleChannelElement ch[2];
>>       // CCE specific
>>       ChannelCoupling coup;
>> +    // USAC stereo coupling data
>> +    AACUsacStereo us;
>>   } ChannelElement;
>>   
>> +typedef struct AACUSACLoudnessInfo {
>> +    uint8_t drc_set_id : 6;
>> +    uint8_t downmix_id : 7;
>> +    struct {
>> +        uint16_t lvl : 12;
>> +        uint8_t present : 1;
>> +    } sample_peak;
>> +
>> +    struct {
>> +        uint16_t lvl : 12;
>> +        uint8_t measurement : 4;
>> +        uint8_t reliability : 2;
>> +        uint8_t present : 1;
>> +    } true_peak;
>> +
>> +    uint8_t nb_measurements : 4;
>> +    struct {
>> +        uint8_t method_def : 4;
>> +        uint8_t method_val;
>> +        uint8_t measurement : 4;
>> +        uint8_t reliability : 2;
>> +    } measurements[16];
>> +} AACUSACLoudnessInfo;
>> +
>> +typedef struct AACUsacElemConfig {
>> +    enum AACUsacElem type;
>> +
>> +    uint8_t tw_mdct : 1;
>> +    uint8_t noise_fill : 1;
>> +
>> +    uint8_t stereo_config_index;
>> +
>> +    struct {
>> +        int ratio;
>> +
>> +        uint8_t harmonic_sbr : 1; /* harmonicSBR */
>> +        uint8_t bs_intertes : 1; /* bs_interTes */
>> +        uint8_t bs_pvc : 1; /* bs_pvc */
>> +
>> +        struct {
>> +            uint8_t start_freq; /* dflt_start_freq */
>> +            uint8_t stop_freq; /* dflt_stop_freq */
>> +
>> +            uint8_t freq_scale; /* dflt_freq_scale */
>> +            uint8_t alter_scale : 1; /* dflt_alter_scale */
>> +            uint8_t noise_scale; /* dflt_noise_scale */
>> +
>> +            uint8_t limiter_bands; /* dflt_limiter_bands */
>> +            uint8_t limiter_gains; /* dflt_limiter_gains */
>> +            uint8_t interpol_freq : 1; /* dflt_interpol_freq */
>> +            uint8_t smoothing_mode : 1; /* dflt_smoothing_mode */
>> +        } dflt;
>> +    } sbr;
>> +
>> +    struct {
>> +        uint8_t freq_res; /* bsFreqRes */
>> +        uint8_t fixed_gain; /* bsFixedGainDMX */
>> +        uint8_t temp_shape_config; /* bsTempShapeConfig */
>> +        uint8_t decorr_config; /* bsDecorrConfig */
>> +        uint8_t high_rate_mode : 1; /* bsHighRateMode */
>> +        uint8_t phase_coding : 1; /* bsPhaseCoding */
>> +
>> +        uint8_t otts_bands_phase; /* bsOttBandsPhase */
>> +        uint8_t residual_coding; /* bsResidualCoding */
>> +        uint8_t residual_bands; /* bsResidualBands */
>> +        uint8_t pseudo_lr : 1; /* bsPseudoLr */
>> +        uint8_t env_quant_mode : 1; /* bsEnvQuantMode */
> 
> Is using bitfields really worth it given that they force to use masking
> for accesses?
> 
>> +    } mps;
>> +
>> +    struct {
>> +        enum AACUsacExtension type;
>> +        uint8_t payload_frag;
>> +        uint32_t default_len;
>> +        uint32_t pl_data_offset;
>> +        uint8_t *pl_data;
>> +    } ext;
>> +} AACUsacElemConfig;
>> +
>> +typedef struct AACUSACConfig {
>> +    uint8_t core_sbr_frame_len_idx; /* coreSbrFrameLengthIndex */
>> +    uint8_t rate_idx;
>> +    uint16_t core_frame_len;
>> +    uint16_t stream_identifier;
>> +
>> +    AACUsacElemConfig elems[64];
>> +    int nb_elems;
>> +
>> +    struct {
>> +        uint8_t nb_album;
>> +        AACUSACLoudnessInfo album_info[64];
>> +        uint8_t nb_info;
>> +        AACUSACLoudnessInfo info[64];
>> +    } loudness;
>> +} AACUSACConfig;
>> +
>>   typedef struct OutputConfiguration {
>>       MPEG4AudioConfig m4ac;
>>       uint8_t layout_map[MAX_ELEM_ID*4][3];
>>       int layout_map_tags;
>>       AVChannelLayout ch_layout;
>>       enum OCStatus status;
>> +    AACUSACConfig usac;
>>   } OutputConfiguration;
>>   
>>   /**
>> diff --git a/libavcodec/aac/aacdec_ac.c b/libavcodec/aac/aacdec_ac.c
>> new file mode 100644
>> index 0000000000..326d716bd3
>> --- /dev/null
>> +++ b/libavcodec/aac/aacdec_ac.c
>> @@ -0,0 +1,224 @@
>> +/*
>> + * AAC definitions and structures
>> + * Copyright (c) 2024 Lynne
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#include "libavcodec/aactab.h"
>> +#include "aacdec_ac.h"
>> +
>> +uint32_t ff_aac_ac_map_process(AACArithState *state, int reset, int N)
>> +{
>> +    float ratio;
>> +    if (reset) {
>> +        memset(state->last, 0, sizeof(state->last));
>> +        state->last_len = N;
>> +        memset(state->cur, 0, sizeof(state->cur));
>> +        state->cur[3] = 0;
>> +        state->cur[2] = 0;
>> +        state->cur[1] = 0;
>> +        state->cur[0] = 1;
>> +        state->last[0] = 0 << 12;
>> +        state->state_pre = 0;
>> +        return 0;
>> +    } else if (state->last_len != N) {
>> +        int i;
>> +        uint8_t last[512 /* 2048 / 4 */];
>> +        memcpy(last, state->last, sizeof(last));
>> +
>> +        ratio = state->last_len / (float)N;
>> +        for (i = 0; i < N/2; i++) {
>> +            int k = (int)(i * ratio);
>> +            state->last[i] = last[k];
>> +        }
>> +
>> +        for (; i < FF_ARRAY_ELEMS(state->last); i++)
>> +            state->last[i] = 0;
>> +
>> +        state->last_len = N;
>> +    }
>> +
>> +    memset(state->cur, 0, sizeof(state->cur));
>> +    state->cur[3] = 0;
>> +    state->cur[2] = 0;
>> +    state->cur[1] = 0;
>> +    state->cur[0] = 1;
>> +
>> +    state->state_pre = state->last[0] << 12;
>> +    return state->last[0] << 12;
>> +}
>> +
>> +extern int ec_debug;
>> +
>> +int trig = 0;
>> +
>> +uint32_t ff_aac_ac_get_context(AACArithState *state, uint32_t c, int i, int N)
>> +{
>> +    c = state->state_pre >> 8;
>> +    c = c + (state->last[i + 1] << 8);
>> +    c = (c << 4);
>> +    c += state->cur[1];
>> +
>> +    state->state_pre = c;
>> +
>> +    if (i > 3 &&
>> +        ((state->cur[3] + state->cur[2] + state->cur[1]) < 5))
>> +        return c + 0x10000;
>> +
>> +    return c;
>> +}
>> +
>> +uint32_t ff_aac_ac_get_pk(uint32_t c)
>> +{
>> +    int i_min = -1;
>> +    int i, j;
>> +    int i_max = FF_ARRAY_ELEMS(ff_aac_ac_lookup_m) - 1;
>> +    while ((i_max - i_min) > 1) {
>> +        i = i_min + ((i_max - i_min) / 2);
>> +        j = ff_aac_ac_hash_m[i];
>> +        if (c < (j >> 8))
>> +            i_max = i;
>> +        else if (c > (j >> 8))
>> +            i_min = i;
>> +        else
>> +            return (j & 0xFF);
>> +    }
>> +    return ff_aac_ac_lookup_m[i_max];
>> +}
>> +
>> +void ff_aac_ac_update_context(AACArithState *state, int idx,
>> +                              uint16_t a, uint16_t b)
>> +{
>> +    state->cur[0] = a + b + 1;
>> +    if (state->cur[0] > 0xF)
>> +        state->cur[0] = 0xF;
>> +
>> +    state->cur[3] = state->cur[2];
>> +    state->cur[2] = state->cur[1];
>> +    state->cur[1] = state->cur[0];
>> +
>> +    state->last[idx] = state->cur[0];
>> +}
>> +
>> +/* Initialize AC */
>> +void ff_aac_ac_init(AACArith *ac, GetBitContext *gb)
>> +{
>> +    ac->low = 0;
>> +    ac->high = UINT16_MAX;
>> +    ac->val = get_bits(gb, 16);
>> +}
>> +
>> +uint16_t ff_aac_ac_decode(AACArith *ac, GetBitContext *gb,
>> +                          const uint16_t *cdf, uint16_t cdf_len)
>> +{
>> +    int val = ac->val;
>> +    int low = ac->low;
>> +    int high = ac->high;
>> +
>> +    int rng = high - low + 1;
>> +    int c = ((((int)(val - low + 1)) << 14) - ((int)1));
>> +
>> +    /* Note: this could be done faster via heuristics, the total number of
>> +     * configurations is low */
>> +    const uint16_t *p = cdf - 1;
>> +    const uint16_t *q;
>> +
>> +    switch (cdf_len) {
>> +    case 2:
>> +        if ((p[1] * rng) > c)
>> +            p += 1;
>> +        break;
>> +    case 4:
>> +        if ((p[2] * rng) > c)
>> +            p += 2;
>> +        if ((p[1] * rng) > c)
>> +            p += 1;
>> +        break;
>> +    case 17:
>> +        /* First check if the current probability is even met at all */
>> +        if ((p[1] * rng) <= c)
>> +            break;
>> +        p += 1;
>> +        for (int i = 8; i >= 1; i >>= 1)
>> +            if ((p[i] * rng) > c)
>> +                p += i;
>> +        break;
>> +    case 27:
>> +        const uint16_t *p_24 = p + 24;
>> +
>> +        if ((p[16] * rng) > c)
>> +            p += 16;
>> +        if ((p[8] * rng) > c)
>> +            p += 8;
>> +        if (p != p_24)
>> +            if ((p[4] * rng) > c)
>> +                p += 4;
>> +        if ((p[2] * rng) > c)
>> +            p += 2;
>> +
>> +        if (p != &p_24[2])
>> +            if ((p[1] * rng) > c)
>> +                p += 1;
>> +        break;
>> +    default:
>> +        /* This should never happen */
>> +        av_assert2(0);
>> +    }
>> +
>> +    int sym = (int)((ptrdiff_t)(p - cdf)) + 1;
>> +    if (sym)
>> +        high = low + ((rng * cdf[sym - 1]) >> 14) - 1;
>> +    low += (rng * cdf[sym]) >> 14;
>> +
>> +    /* This loop could be done faster */
>> +    while (1) {
>> +        if (high < 32768) {
>> +            ;
>> +        } else if (low >= 32768) {
>> +            val -= 32768;
>> +            low -= 32768;
>> +            high -= 32768;
>> +        } else if (low >= 16384 && high < 49152) {
>> +            val -= 16384;
>> +            low -= 16384;
>> +            high -= 16384;
>> +        } else {
>> +            break;
>> +        }
>> +        low += low;
>> +        high += high + 1;
>> +        val = (val << 1) | get_bits1(gb);
>> +    };
>> +
>> +    ac->low = low;
>> +    ac->high = high;
>> +    ac->val = val;
>> +
>> +    return sym;
>> +}
>> +
>> +void ff_aac_ac_finish(AACArithState *state, int offset, int N)
>> +{
>> +    int i;
>> +
>> +    for (i = offset; i < N/2; i++)
>> +        state->last[i] = 1;
>> +
>> +    for (; i < FF_ARRAY_ELEMS(state->last); i++)
>> +        state->last[i] = 0;
>> +}
>> diff --git a/libavcodec/aac/aacdec_ac.h b/libavcodec/aac/aacdec_ac.h
>> new file mode 100644
>> index 0000000000..ef96bed770
>> --- /dev/null
>> +++ b/libavcodec/aac/aacdec_ac.h
>> @@ -0,0 +1,54 @@
>> +/*
>> + * AAC definitions and structures
>> + * Copyright (c) 2024 Lynne
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#ifndef AVCODEC_AACDEC_AC_H
>> +#define AVCODEC_AACDEC_AC_H
>> +
>> +#include "libavcodec/get_bits.h"
>> +
>> +typedef struct AACArithState {
>> +    uint8_t last[512 /* 2048 / 4 */];
>> +    int last_len;
>> +    uint8_t cur[4];
>> +    uint16_t state_pre;
>> +} AACArithState;
>> +
>> +typedef struct AACArith {
>> +    uint16_t low;
>> +    uint16_t high;
>> +    uint16_t val;
>> +} AACArith;
>> +
>> +#define FF_AAC_AC_ESCAPE 16
>> +
>> +uint32_t ff_aac_ac_map_process(AACArithState *state, int reset, int len);
>> +uint32_t ff_aac_ac_get_context(AACArithState *state, uint32_t old_c, int idx, int len);
>> +uint32_t ff_aac_ac_get_pk(uint32_t c);
>> +
>> +void ff_aac_ac_update_context(AACArithState *state, int idx, uint16_t a, uint16_t b);
>> +void ff_aac_ac_init(AACArith *ac, GetBitContext *gb);
>> +
>> +uint16_t ff_aac_ac_decode(AACArith *ac, GetBitContext *gb,
>> +                          const uint16_t *cdf, uint16_t cdf_len);
>> +
>> +void ff_aac_ac_finish(AACArithState *state, int offset, int nb);
>> +
>> +#endif /* AVCODEC_AACDEC_AC_H */
>> diff --git a/libavcodec/aac/aacdec_dsp_template.c b/libavcodec/aac/aacdec_dsp_template.c
>> index 59a69d88f3..8d31af22f8 100644
>> --- a/libavcodec/aac/aacdec_dsp_template.c
>> +++ b/libavcodec/aac/aacdec_dsp_template.c
>> @@ -88,8 +88,8 @@ static void AAC_RENAME(apply_mid_side_stereo)(AACDecContext *ac, ChannelElement
>>       INTFLOAT *ch1 = cpe->ch[1].AAC_RENAME(coeffs);
>>       const uint16_t *offsets = ics->swb_offset;
>>       for (int g = 0; g < ics->num_window_groups; g++) {
>> -        for (int sfb = 0; sfb < ics->max_sfb; sfb++) {
>> -            const int idx = g*ics->max_sfb + sfb;
>> +        for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++) {
>> +            const int idx = g*cpe->max_sfb_ste + sfb;
>>               if (cpe->ms_mask[idx] &&
>>                   cpe->ch[0].band_type[idx] < NOISE_BT &&
>>                   cpe->ch[1].band_type[idx] < NOISE_BT) {
>> diff --git a/libavcodec/aac/aacdec_latm.h b/libavcodec/aac/aacdec_latm.h
>> index e40a2fe1a7..047c11e0fb 100644
>> --- a/libavcodec/aac/aacdec_latm.h
>> +++ b/libavcodec/aac/aacdec_latm.h
>> @@ -56,7 +56,8 @@ static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
>>   {
>>       AACDecContext *ac     = &latmctx->aac_ctx;
>>       AVCodecContext *avctx = ac->avctx;
>> -    MPEG4AudioConfig m4ac = { 0 };
>> +    OutputConfiguration oc = { 0 };
>> +    MPEG4AudioConfig *m4ac = &oc.m4ac;
>>       GetBitContext gbc;
>>       int config_start_bit  = get_bits_count(gb);
>>       int sync_extension    = 0;
>> @@ -76,7 +77,7 @@ static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
>>       if (get_bits_left(gb) <= 0)
>>           return AVERROR_INVALIDDATA;
>>   
>> -    bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &m4ac,
>> +    bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &oc,
>>                                                       &gbc, config_start_bit,
>>                                                       sync_extension);
>>   
>> @@ -88,11 +89,12 @@ static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
>>         asclen = bits_consumed;
>>   
>>       if (!latmctx->initialized ||
>> -        ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
>> -        ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
>> +        ac->oc[1].m4ac.sample_rate != m4ac->sample_rate ||
>> +        ac->oc[1].m4ac.chan_config != m4ac->chan_config) {
>>   
>>           if (latmctx->initialized) {
>> -            av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, chan_config=%d)\n", m4ac.sample_rate, m4ac.chan_config);
>> +            av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, chan_config=%d)\n",
>> +                   m4ac->sample_rate, m4ac->chan_config);
>>           } else {
>>               av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
>>           }
>> @@ -280,7 +282,7 @@ static int latm_decode_frame(AVCodecContext *avctx, AVFrame *out,
>>           } else {
>>               push_output_configuration(&latmctx->aac_ctx);
>>               if ((err = decode_audio_specific_config(
>> -                    &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
>> +                    &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1],
>>                       avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) {
>>                   pop_output_configuration(&latmctx->aac_ctx);
>>                   return err;
>> diff --git a/libavcodec/aac/aacdec_lpd.c b/libavcodec/aac/aacdec_lpd.c
>> new file mode 100644
>> index 0000000000..be39e2c175
>> --- /dev/null
>> +++ b/libavcodec/aac/aacdec_lpd.c
>> @@ -0,0 +1,192 @@
>> +/*
>> + * Copyright (c) 2024 Lynne <dev at lynne.ee>
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#include "aacdec_lpd.h"
>> +#include "aacdec_usac.h"
>> +
>> +const uint8_t ff_aac_lpd_mode_tab[32][4] = {
>> +    { 0, 0, 0, 0 },
>> +    { 1, 0, 0, 0 },
>> +    { 0, 1, 0, 0 },
>> +    { 1, 1, 0, 0 },
>> +    { 0, 0, 1, 0 },
>> +    { 1, 0, 1, 0 },
>> +    { 0, 1, 1, 0 },
>> +    { 1, 1, 1, 0 },
>> +    { 0, 0, 0, 1 },
>> +    { 1, 0, 0, 1 },
>> +    { 0, 1, 0, 1 },
>> +    { 1, 1, 0, 1 },
>> +    { 0, 0, 1, 1 },
>> +    { 1, 0, 1, 1 },
>> +    { 0, 1, 1, 1 },
>> +    { 1, 1, 1, 1 },
>> +    { 2, 2, 0, 0 },
>> +    { 2, 2, 1, 0 },
>> +    { 2, 2, 0, 1 },
>> +    { 2, 2, 1, 1 },
>> +    { 0, 0, 2, 2 },
>> +    { 1, 0, 2, 2 },
>> +    { 0, 1, 2, 2 },
>> +    { 1, 1, 2, 2 },
>> +    { 2, 2, 2, 2 },
>> +    { 3, 3, 3, 3 },
>> +    /* Larger values are reserved, but permit them for resilience */
>> +    { 0, 0, 0, 0 },
>> +    { 0, 0, 0, 0 },
>> +    { 0, 0, 0, 0 },
>> +    { 0, 0, 0, 0 },
>> +    { 0, 0, 0, 0 },
>> +    { 0, 0, 0, 0 },
>> +};
>> +
>> +static void parse_qn(GetBitContext *gb, int *qn, int nk_mode, int no_qn)
>> +{
>> +    if (nk_mode == 1) {
>> +        for (int k = 0; k < no_qn; k++) {
>> +            qn[k] = ff_aac_get_vlclbf(gb);
>> +            if (qn[k])
>> +                qn[k]++;
>> +        }
>> +        return;
>> +    }
>> +
>> +    for (int k = 0; k < no_qn; k++)
>> +        qn[k] = get_bits(gb, 2) + 2;
>> +
>> +    if (nk_mode == 2) {
>> +        for (int k = 0; k < no_qn; k++) {
>> +            if (qn[k] > 4) {
>> +                qn[k] = ff_aac_get_vlclbf(gb);
>> +                if (qn[k])
>> +                    qn[k] += 4;
>> +            }
>> +        }
>> +        return;
>> +    }
>> +
>> +    for (int k = 0; k < no_qn; k++) {
>> +        if (qn[k] > 4) {
>> +            int qn_ext = ff_aac_get_vlclbf(gb);
>> +            switch (qn_ext) {
>> +            case 0: qn[k] = 5; break;
>> +            case 1: qn[k] = 6; break;
>> +            case 2: qn[k] = 0; break;
>> +            default: qn[k] = qn_ext + 4; break;
>> +            }
>> +        }
>> +    }
>> +}
>> +
>> +static int parse_codebook_idx(GetBitContext *gb, uint32_t *kv,
>> +                              int nk_mode, int no_qn)
>> +{
>> +    int n, nk;
>> +
>> +    int qn[2];
>> +    parse_qn(gb, qn, nk_mode, no_qn);
>> +
>> +    for (int k = 0; k < no_qn; k++) {
>> +        if (qn[k] > 4) {
>> +            nk = (qn[k] - 3) / 2;
>> +            n = qn[k] - nk*2;
>> +        } else {
>> +            nk = 0;
>> +            n = qn[k];
>> +        }
>> +    }
>> +
>> +    int idx = get_bits(gb, 4*n);
>> +
>> +    if (nk > 0)
>> +        for (int i = 0; i < 8; i++)
>> +            kv[i] = get_bits(gb, nk);
>> +
>> +    return 0;
>> +}
>> +
>> +int ff_aac_parse_fac_data(AACUsacElemData *ce, GetBitContext *gb,
>> +                          int use_gain, int len)
>> +{
>> +    int ret;
>> +    if (use_gain)
>> +        ce->fac.gain = get_bits(gb, 7);
>> +
>> +    for (int i = 0; i < len/8; i++) {
>> +        ret = parse_codebook_idx(gb, ce->fac.kv[i], 1, 1);
>> +        if (ret < 0)
>> +            return ret;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +int ff_aac_ldp_parse_channel_stream(AACDecContext *ac, AACUSACConfig *usac,
>> +                                    AACUsacElemData *ce, GetBitContext *gb)
>> +{
>> +    ce->ldp.acelp_core_mode = get_bits(gb, 3);
>> +    ce->ldp.lpd_mode = get_bits(gb, 5);
>> +
>> +    ce->ldp.bpf_control_info = get_bits1(gb);
>> +    ce->ldp.core_mode_last = get_bits1(gb);
>> +    ce->ldp.fac_data_present = get_bits1(gb);
>> +
>> +    const uint8_t *mod = ff_aac_lpd_mode_tab[ce->ldp.lpd_mode];
>> +
>> +    int first_ldp_flag = !ce->ldp.core_mode_last;
>> +    int first_tcx_flag = 1;
>> +    if (first_ldp_flag)
>> +        ce->ldp.last_lpd_mode = -1; /* last_ldp_mode is a **STATEFUL** value */
>> +
>> +    int k = 0;
>> +    while (k < 0) {
>> +        if (!k) {
>> +            if (ce->ldp.core_mode_last && ce->ldp.fac_data_present)
>> +                ff_aac_parse_fac_data(ce, gb, 0, usac->core_frame_len/8);
>> +        } else {
>> +            if (!ce->ldp.last_lpd_mode && mod[k] > 0 ||
>> +                ce->ldp.last_lpd_mode && !mod[k])
>> +                ff_aac_parse_fac_data(ce, gb, 0, usac->core_frame_len/8);
>> +        }
>> +        if (!mod[k]) {
>> +//            parse_acelp_coding();
>> +            ce->ldp.last_lpd_mode = 0;
>> +            k++;
>> +        } else {
>> +//            parse_tcx_coding();
>> +            ce->ldp.last_lpd_mode = mod[k];
>> +            k += (1 << (mod[k] - 1));
>> +            first_tcx_flag = 0;
>> +        }
>> +    }
>> +
>> +//    parse_lpc_data(first_lpd_flag);
>> +
>> +    if (!ce->ldp.core_mode_last && ce->ldp.fac_data_present) {
>> +        uint16_t len_8 = usac->core_frame_len / 8;
>> +        uint16_t len_16 = usac->core_frame_len / 16;
>> +        uint16_t fac_len = get_bits1(gb) /* short_fac_flag */ ? len_8 : len_16;
>> +        int ret = ff_aac_parse_fac_data(ce, gb, 1, fac_len);
>> +        if (ret < 0)
>> +            return ret;
>> +    }
>> +
>> +    return 0;
>> +}
>> diff --git a/libavcodec/aac/aacdec_lpd.h b/libavcodec/aac/aacdec_lpd.h
>> new file mode 100644
>> index 0000000000..924ff75e52
>> --- /dev/null
>> +++ b/libavcodec/aac/aacdec_lpd.h
>> @@ -0,0 +1,33 @@
>> +/*
>> + * Copyright (c) 2024 Lynne <dev at lynne.ee>
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#ifndef AVCODEC_AAC_AACDEC_LPD_H
>> +#define AVCODEC_AAC_AACDEC_LPD_H
>> +
>> +#include "aacdec.h"
>> +#include "libavcodec/get_bits.h"
>> +
>> +int ff_aac_parse_fac_data(AACUsacElemData *ce, GetBitContext *gb,
>> +                          int use_gain, int len);
>> +
>> +int ff_aac_ldp_parse_channel_stream(AACDecContext *ac, AACUSACConfig *usac,
>> +                                    AACUsacElemData *ce, GetBitContext *gb);
>> +
>> +#endif /* AVCODEC_AAC_AACDEC_LPD_H */
>> diff --git a/libavcodec/aac/aacdec_usac.c b/libavcodec/aac/aacdec_usac.c
>> new file mode 100644
>> index 0000000000..4b48c4d6ca
>> --- /dev/null
>> +++ b/libavcodec/aac/aacdec_usac.c
>> @@ -0,0 +1,1230 @@
>> +/*
>> + * Copyright (c) 2024 Lynne <dev at lynne.ee>
>> + *
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#include "aacdec_usac.h"
>> +#include "aacdec_tab.h"
>> +#include "aacdec_lpd.h"
>> +#include "aacdec_ac.h"
>> +
>> +#include "libavcodec/opusdsp.h"
>> +#include "libavcodec/aactab.h"
>> +#include "libavutil/mem.h"
>> +#include "libavcodec/mpeg4audio.h"
>> +
>> +/* Number of scalefactor bands per complex prediction band, equal to 2. */
>> +#define SFB_PER_PRED_BAND 2
>> +
>> +static inline uint32_t get_escaped_value(GetBitContext *gb, int nb1, int nb2, int nb3)
>> +{
>> +    uint32_t val = get_bits(gb, nb1);
>> +    if (val < ((1 << nb1) - 1))
>> +        return val;
>> +
>> +    val += get_bits(gb, nb2);
>> +    if (val == ((1 << nb2) - 1))
>> +        val += get_bits(gb, nb3);
>> +
>> +    return val;
>> +}
>> +
>> +static int aac_usac_samplerate[] = {
> 
> Missing const
> 
>> +    96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
>> +    16000, 12000, 11025, 8000, 7350, -1, -1, 57600, 51200,
>> +    40000, 38400, 34150, 28800, 25600, 20000, 19200, 17075, 14400, 12800, 9600, -1, -1, -1, -1,
>> +};
>> +
> 
>> +static int parse_ext_ele(AACDecContext *ac, AACUsacElemConfig *e,
>> +                         GetBitContext *gb)
>> +{
>> +    if (get_bits1(gb)) { /* usacExtElementPresent */
>> +        uint32_t len;
>> +        if (get_bits1(gb)) { /* usacExtElementUseDefaultLength */
>> +            len = e->ext.default_len;
>> +        } else {
>> +            len = get_bits(gb, 8); /* usacExtElementPayloadLength */
>> +            if (len == 255)
>> +                len += get_bits(gb, 16) - 2;
>> +        }
>> +
>> +        if (len) {
>> +            uint8_t *tmp;
>> +            uint8_t pl_frag_start = 1;
>> +            uint8_t pl_frag_end = 1;
>> +            if (e->ext.payload_frag) {
>> +                pl_frag_start = get_bits1(gb); /* usacExtElementStart */
>> +                pl_frag_end = get_bits1(gb); /* usacExtElementStop */
>> +            }
>> +
>> +            if (pl_frag_start)
>> +                e->ext.pl_data_offset = 0;
>> +
>> +            tmp = av_realloc(e->ext.pl_data, e->ext.pl_data_offset + len);
>> +            if (!tmp) {
>> +                free(e->ext.pl_data);
> 
> Wrong deallocator.
> 
>> +                return AVERROR(ENOMEM);
>> +            }
>> +            e->ext.pl_data = tmp;
>> +
>> +            for (int i = 0; i < len; i++)
>> +                e->ext.pl_data[e->ext.pl_data_offset + i] = get_bits(gb, 8);
>> +
>> +            if (pl_frag_end) {
>> +                int ret;
>> +                e->ext.pl_data_offset = 0;
>> +                switch (e->ext.type) {
>> +                case ID_EXT_ELE_FILL:
>> +                    av_freep(&e->ext.pl_data);
>> +                    break;
>> +                case ID_EXT_ELE_AUDIOPREROLL:
>> +                    ret = parse_audio_preroll(ac, e->ext.pl_data,
>> +                                              e->ext.pl_data_offset);
>> +                    if (ret < 0) {
>> +                        av_freep(&e->ext.pl_data);
>> +                        return ret;
>> +                    }
>> +                    break;
>> +                default:
>> +                    av_freep(&e->ext.pl_data);
> 
> Pointless if you abort in the next line
> 
>> +                    /* This should never happen */
>> +                    av_assert0(0);
>> +                }
>> +            }
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
> 
> 
>> +#include "libavcodec/opusdsp.h"
>> +
>> +#ifndef AVCODEC_AAC_AACDEC_USAC_H
>> +#define AVCODEC_AAC_AACDEC_USAC_H
>> +
>> +#include "aacdec.h"
>> +
>> +#include "libavcodec/get_bits.h"
>> +
>> +static inline uint8_t ff_aac_get_vlclbf(GetBitContext *gb)
>> +{
>> +    uint8_t ret = 0;
>> +    while (get_bits1(gb) && ret <= 36)
>> +        ret++;
>> +    return ret;
>> +}
> 
> Look at unary.h

That's convenient, thanks.

I've synced my changes on my repo in
https://github.com/cyanreg/FFmpeg/tree/xhe
Though I'll likely upload the patchset on the ML again tomorrow with 
some fixes for preroll parsing and complex synth.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: OpenPGP_0xA2FEA5F03F034464.asc
Type: application/pgp-keys
Size: 624 bytes
Desc: OpenPGP public key
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20240516/b654cae8/attachment.key>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: OpenPGP_signature.asc
Type: application/pgp-signature
Size: 236 bytes
Desc: OpenPGP digital signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20240516/b654cae8/attachment.sig>


More information about the ffmpeg-devel mailing list