[FFmpeg-soc] [PATCH] AMR-WB Decoder

Thu Aug 26 23:55:07 CEST 2010

Marcelo Galvão Póvoa wrote:
> Fixed excitation array incorrect length.
> 
> Should now be applicable cleanly to the latest ffmpeg revision
> (fd151a5f8bd152c456a).

First look:

> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 1422b5c..9c68d72 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -60,6 +60,7 @@ OBJS-$(CONFIG_AMRNB_DECODER)           += amrnbdec.o celp_filters.o   \
>                                            celp_math.o acelp_filters.o \
>                                            acelp_vectors.o             \
>                                            acelp_pitch_delay.o
> +OBJS-$(CONFIG_AMRWB_DECODER)           += amrwbdec.o

You are using a lot of functions (ff_acelp_interpolatef, ff_lsp2polyf,
etc) that are not in amrwbdec.c, so you need to add the dependencies of
everything you use. To see if you didn't forgot any try "./configure
--disable-decoders --enable-decoder=amr-wb".

> +static av_cold int amrwb_decode_init(AVCodecContext *avctx)
> +{
> +    AMRWBContext *ctx = avctx->priv_data;
> +    int i;
> +
> +    avctx->sample_fmt = SAMPLE_FMT_FLT;
> +
> +    av_lfg_init(&ctx->prng, 1);
> +
> +    ctx->excitation  = &ctx->excitation_buf[AMRWB_P_DELAY_MAX + LP_ORDER + 1];
> +    ctx->first_frame = 1;

> +    ctx->tilt_coef   = ctx->prev_tr_gain = 0.0;

not needed, ctx is already allocated with av_mallocz().

> +/**
> + * Parses a speech frame, storing data in the Context
> + *
> + * @param[in,out] ctx              The context
> + * @param[in] buf                  Pointer to the input buffer
> + * @param[in] buf_size             Size of the input buffer
> + *
> + * @return The frame mode
> + */
> +static enum Mode unpack_bitstream(AMRWBContext *ctx, const uint8_t *buf,
> +                                  int buf_size)
> +{
> +    GetBitContext gb;
> +    enum Mode mode;
> +    uint16_t *data;
> +
> +    init_get_bits(&gb, buf, buf_size * 8);
> +
> +    /* decode frame header (1st octet) */
> +    skip_bits(&gb, 1);  // padding bit
> +    ctx->fr_cur_mode  = get_bits(&gb, 4);

> +    mode              = ctx->fr_cur_mode;

Why the extra "mode" variable? You can just use ctx->fr_cur_mode directly...

> +    if (mode < MODE_SID) { /* Normal speech frame */
> +        const uint16_t *perm = amr_bit_orderings_by_mode[mode];
> +        int field_size;
> +
> +        while ((field_size = *perm++)) {
> +            int field = 0;
> +            int field_offset = *perm++;
> +            while (field_size--) {
> +               uint16_t bit_idx = *perm++;
> +               field <<= 1;
> +               /* The bit index inside the byte is reversed (MSB->LSB) */
> +               field |= BIT_POS(buf[bit_idx >> 3], 7 - (bit_idx & 7));
> +            }
> +            data[field_offset] = field;
> +        }
> +    }
> +    else if (mode == MODE_SID) { /* Comfort noise frame */
> +        /* not implemented */
> +    }

av_log_missing_feature(...);

> +/**
> + * Convert an ISF vector into an ISP vector
> + *
> + * @param[in] isf                  Isf vector
> + * @param[out] isp                 Output isp vector
> + * @param[in] size                 Isf/isp size
> + */
> +static void isf2isp(const float *isf, double *isp, int size)
> +{
> +    int i;
> +
> +    for (i = 0; i < size - 1; i++)
> +        isp[i] = cos(2.0 * M_PI * isf[i]);
> +
> +    isp[size - 1] = cos(4.0 * M_PI * isf[size - 1]);
> +}

Almost duplication of amrnbdec.c:lsf2lsp().

> +/**
> + * Decodes quantized ISF vectors using 36-bit indices (6K60 mode only)
> + *
> + * @param[in] ind                  Array of 5 indices
> + * @param[out] isf_q               Buffer for isf_q[LP_ORDER]
> + * @param[in] fr_q                 Frame quality (good frame == 1)
> + *
> + */
> +static void decode_isf_indices_36b(uint16_t *ind, float *isf_q, uint8_t fr_q) {
> +    int i;
> +
> +    if (fr_q == 1) {
> +        for (i = 0; i < 9; i++) {
> +            isf_q[i] = dico1_isf[ind[0]][i] / (float) (1 << 15);

isf_q[i] = dico1_isf[ind[0]][i] * (1.0f / (1 << 15));

It is faster and the compiler cannot do it for you (this applies to a
lot of code in your patch).

> +        for (i = 0; i < 7; i++) {
> +            isf_q[i + 9] = dico2_isf[ind[1]][i] / (float) (1 << 15);
> +        }
> +        for (i = 0; i < 5; i++) {
> +            isf_q[i] += dico21_isf_36b[ind[2]][i] / (float) (1 << 15);
> +        }
> +        for (i = 0; i < 4; i++) {
> +            isf_q[i + 5] += dico22_isf_36b[ind[3]][i] / (float) (1 << 15);
> +        }
> +        for (i = 0; i < 7; i++) {
> +            isf_q[i + 9] += dico23_isf_36b[ind[4]][i] / (float) (1 << 15);
> +        }
> +    }
> +    /* not implemented for bad frame */

Unless you plan to implement bad frame handling soon, I suggest you just
remove the function parameter. It serves to nothing as now.

> +/**
> + * Ensures a minimum distance between adjacent ISFs
> + *
> + * @param[in,out] isf              ISF vector
> + * @param[in] min_spacing          Minimum gap to keep
> + * @param[in] size                 ISF vector size
> + *
> + */
> +static void isf_set_min_dist(float *isf, float min_spacing, int size) {
> +    int i;
> +    float prev = 0.0;
> +
> +    for (i = 0; i < size - 1; i++) {
> +        isf[i] = FFMAX(isf[i], prev + min_spacing);
> +        prev = isf[i];
> +    }
> +}

Please do not duplicate lsp.c:ff_set_min_dist_lsf().

> +/**
> + * 16kHz version of ff_lsp2polyf for the high-band
> + */
> +static void lsp2polyf_16k(const double *lsp, double *f, int lp_half_order)
> +{
> +    int i, j;
> +
> +    f[0] = 0.25;
> +    f[1] = -0.5 * lsp[0];
> +    lsp -= 2;
> +    for(i = 2; i <= lp_half_order; i++)
> +    {
> +

nit:
for(i = 2; i <= lp_half_order; i++) {

> +        f[i] = val * f[i - 1] + 2 * f[i - 2];
> +        for(j = i - 1; j > 1; j--)
> +            f[j] += f[j - 1] * val + f[j - 2];
> +        f[1] += 0.25 * val;
> +    }
> +}

Hmm, can't this function be replaced by ff_lsp2polyf() with some rescaling?

> +/**
> + * Convert a ISP vector to LP coefficient domain {a_k}
> + * Equations from TS 26.190 section 5.2.4
> + *
> + * @param[in] isp                  ISP vector for a subframe
> + * @param[out] lp                  LP coefficients
> + * @param[in] lp_half_order        Half the number of LPs to construct
> + */
> +static void isp2lp(const double *isp, float *lp, int lp_half_order) {
> +    double pa[10 + 1], qa[10 + 1];
> +    double last_isp = isp[2 * lp_half_order - 1];
> +    double qa_old = 0.0;
> +    float *lp2 = &lp[2 * lp_half_order];
> +    int i;
> +
> +    if (lp_half_order > 8) { // high-band specific
> +        lsp2polyf_16k(isp,     pa, lp_half_order);
> +        lsp2polyf_16k(isp + 1, qa, lp_half_order - 1);
> +
> +        for (i = 0; i <= lp_half_order; i++)
> +            pa[i] *= 4.0;
> +        for (i = 0; i < lp_half_order; i++)
> +            qa[i] *= 4.0;
> +    } else {
> +        ff_lsp2polyf(isp,     pa, lp_half_order);
> +        ff_lsp2polyf(isp + 1, qa, lp_half_order - 1);
> +    }
> +
> +    for (i = 1; i < lp_half_order; i++) {
> +        double paf = (1 + last_isp) * pa[i];
> +        double qaf = (1 - last_isp) * (qa[i] - qa_old);
> +
> +        qa_old = qa[i - 1];
> +
> +        lp[i]   = 0.5 * (paf + qaf);
> +        lp2[-i] = 0.5 * (paf - qaf);
> +    }
> +
> +    lp[0] = 1.0;
> +    lp[lp_half_order] = 0.5 * (1 + last_isp) * pa[lp_half_order];
> +    lp2[0] = last_isp;
> +}

Please double-check if this is not a duplication of sipr.c:lsp2lpc_sipr().

> +static void decode_pitch_vector(AMRWBContext *ctx,
> +                                const AMRWBSubFrame *amr_subframe,
> +                                const int subframe)
> +{
> +    int pitch_lag_int, pitch_lag_frac;
> +    int i;
> +    float *exc     = ctx->excitation;
> +    enum Mode mode = ctx->fr_cur_mode;
> +
> +    if (mode <= MODE_8k85) {
> +        decode_pitch_lag_low(&pitch_lag_int, &pitch_lag_frac, amr_subframe->ada
> p,
> +                              &ctx->base_pitch_lag, subframe, mode);
> +    } else
> +        decode_pitch_lag_high(&pitch_lag_int, &pitch_lag_frac, amr_subframe->ad
> ap,
> +                              &ctx->base_pitch_lag, subframe);
> +
> +    ctx->pitch_lag_int = pitch_lag_int;
> +    pitch_lag_int     += (pitch_lag_frac < 0 ? -1 : 0) + (pitch_lag_frac ? 1 : 
> 0);
> +
> +
> +    /* Calculate the pitch vector by interpolating the past excitation at the
> +       pitch lag using a hamming windowed sinc function */
> +    ff_acelp_interpolatef(exc, exc + 1 - pitch_lag_int,
> +                          ac_inter, 4,
> +                          pitch_lag_frac + (pitch_lag_frac > 0 ? 0 : 4),
> +                          LP_ORDER, AMRWB_SFR_SIZE + 1);

ac_inter is yet another hamming function. Can you check if you cannot 
reuse acelp_vectors.c:ff_b60_sinc or sipr16kdata.h:sinc_win?

> +/**
> + * Reduce fixed vector sparseness by smoothing with one of three IR filters
> + * Also known as "adaptive phase dispersion"
> + *
> + * @param[in] ctx                  The context
> + * @param[in,out] fixed_vector     Unfiltered fixed vector
> + * @param[out] buf                 Space for modified vector if necessary
> + *
> + * @return The potentially overwritten filtered fixed vector address
> + */
> +static float *anti_sparseness(AMRWBContext *ctx,
> +                              float *fixed_vector, float *buf)

amrnbedec.c has a function with the same name. Any code can be reused?

> +/**
> + * Apply to synthesis a 2nd order high-pass filter
> + *
> + * @param[out] out                 Buffer for filtered output
> + * @param[in] hpf_coef             Filter coefficients as used below
> + * @param[in,out] mem              State from last filtering (updated)
> + * @param[in] in                   Input speech data
> + *
> + * @remark It is safe to pass the same array in in and out parameters
> + */
> +static void high_pass_filter(float *out, const float hpf_coef[2][3],
> +                             float mem[4], const float *in)
> +{
> +    int i;
> +    float *x = mem - 1, *y = mem + 2; // previous inputs and outputs
> +
> +    for (i = 0; i < AMRWB_SFR_SIZE; i++) {
> +        float x0 = in[i];
> +
> +        out[i] = hpf_coef[0][0] * x0   + hpf_coef[1][0] * y[0] +
> +                 hpf_coef[0][1] * x[1] + hpf_coef[1][1] * y[1] +
> +                 hpf_coef[0][2] * x[2];
> +
> +        y[1] = y[0];
> +        y[0] = out[i];
> +
> +        x[2] = x[1];
> +        x[1] = x0;
> +    }
> +}

acelp_filter.c:ff_acelp_apply_order_2_transfer_function()

> +/**
> + * Upsample a signal by 5/4 ratio (from 12.8kHz to 16kHz) using
> + * a FIR interpolation filter. Uses past data from before *in address
> + *
> + * @param[out] out                 Buffer for interpolated signal
> + * @param[in] in                   Current signal data (length 0.8*o_size)
> + * @param[in] o_size               Output signal length
> + */
> +static void upsample_5_4(float *out, const float *in, int o_size)
> +{
> +    const float *in0 = in - UPS_FIR_SIZE + 1;
> +    int i;
> +
> +    for (i = 0; i < o_size; i++) {
> +        int int_part  = (4 * i) / 5;
> +        int frac_part = (4 * i) - 5 * int_part;

You can break this loop in two to avoid the division:

i = 0;
for (j = 0; j < o_size/5; j++)
     for (k = 0; k < 5; k++) {
         ....
         i++;
     }

> +/**
> + * Generate the high-band excitation with the same energy from the lower
> + * one and scaled by the given gain
> + *
> + * @param[in] ctx                  The context
> + * @param[out] hb_exc              Buffer for the excitation
> + * @param[in] synth_exc            Low-band excitation used for synthesis
> + * @param[in] hb_gain              Wanted excitation gain
> + */
> +static void scaled_hb_excitation(AMRWBContext *ctx, float *hb_exc,
> +                                 const float *synth_exc, float hb_gain)
> +{
> +    int i;
> +    float energy = ff_dot_productf(synth_exc, synth_exc, AMRWB_SFR_SIZE);
> +
> +    /* Generate a white-noise excitation */
> +    for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
> +        hb_exc[i] = 32768.0 - (uint16_t) av_lfg_get(&ctx->prng);
> +
> +    ff_scale_vector_to_given_sum_of_squares(hb_exc, hb_exc, energy,
> +                                            AMRWB_SFR_SIZE_16k);
> +
> +    for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
> +        hb_exc[i] *= hb_gain;
> +}

Why are you scaling it twice?

> +/**
> + * Apply to high-band samples a 15th order filter
> + * The filter characteristic depends on the given coefficients
> + *
> + * @param[out] out                 Buffer for filtered output
> + * @param[in] fir_coef             Filter coefficients
> + * @param[in,out] mem              State from last filtering (updated)
> + * @param[in] cp_gain              Compensation gain (usually the filter gain)
> + * @param[in] in                   Input speech data (high-band)
> + *
> + * @remark It is safe to pass the same array in in and out parameters
> + */
> +static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1],
> +                          float mem[HB_FIR_SIZE], float cp_gain, const float *in)
> +{
> +    int i, j;
> +    float data[AMRWB_SFR_SIZE_16k + HB_FIR_SIZE]; // past and current samples
> +
> +    memcpy(data, mem, HB_FIR_SIZE * sizeof(float));
> +
> +    for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
> +        data[i + HB_FIR_SIZE] = in[i] / cp_gain;
> +
> +    for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) {
> +        out[i] = 0.0;
> +        for (j = 0; j <= HB_FIR_SIZE; j++)
> +            out[i] += data[i + j] * fir_coef[j];
> +    }
> +
> +    memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float));
> +}

I think it is cleaner (and more consistent) to do like the synthesis 
filter and get one single buffer for output and memory...

-Vitor