[FFmpeg-soc] [PATCH] AMR-WB Decoder
Vitor Sessak
vitor1001 at gmail.com
Thu Aug 26 23:55:07 CEST 2010
Marcelo Galvão Póvoa wrote:
> Fixed excitation array incorrect length.
>
> Should now be applicable cleanly to the latest ffmpeg revision
> (fd151a5f8bd152c456a).
First look:
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 1422b5c..9c68d72 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -60,6 +60,7 @@ OBJS-$(CONFIG_AMRNB_DECODER) += amrnbdec.o celp_filters.o \
> celp_math.o acelp_filters.o \
> acelp_vectors.o \
> acelp_pitch_delay.o
> +OBJS-$(CONFIG_AMRWB_DECODER) += amrwbdec.o
You are using a lot of functions (ff_acelp_interpolatef, ff_lsp2polyf,
etc) that are not in amrwbdec.c, so you need to add the dependencies of
everything you use. To see if you didn't forgot any try "./configure
--disable-decoders --enable-decoder=amr-wb".
> +static av_cold int amrwb_decode_init(AVCodecContext *avctx)
> +{
> + AMRWBContext *ctx = avctx->priv_data;
> + int i;
> +
> + avctx->sample_fmt = SAMPLE_FMT_FLT;
> +
> + av_lfg_init(&ctx->prng, 1);
> +
> + ctx->excitation = &ctx->excitation_buf[AMRWB_P_DELAY_MAX + LP_ORDER + 1];
> + ctx->first_frame = 1;
> + ctx->tilt_coef = ctx->prev_tr_gain = 0.0;
not needed, ctx is already allocated with av_mallocz().
> +/**
> + * Parses a speech frame, storing data in the Context
> + *
> + * @param[in,out] ctx The context
> + * @param[in] buf Pointer to the input buffer
> + * @param[in] buf_size Size of the input buffer
> + *
> + * @return The frame mode
> + */
> +static enum Mode unpack_bitstream(AMRWBContext *ctx, const uint8_t *buf,
> + int buf_size)
> +{
> + GetBitContext gb;
> + enum Mode mode;
> + uint16_t *data;
> +
> + init_get_bits(&gb, buf, buf_size * 8);
> +
> + /* decode frame header (1st octet) */
> + skip_bits(&gb, 1); // padding bit
> + ctx->fr_cur_mode = get_bits(&gb, 4);
> + mode = ctx->fr_cur_mode;
Why the extra "mode" variable? You can just use ctx->fr_cur_mode directly...
> + if (mode < MODE_SID) { /* Normal speech frame */
> + const uint16_t *perm = amr_bit_orderings_by_mode[mode];
> + int field_size;
> +
> + while ((field_size = *perm++)) {
> + int field = 0;
> + int field_offset = *perm++;
> + while (field_size--) {
> + uint16_t bit_idx = *perm++;
> + field <<= 1;
> + /* The bit index inside the byte is reversed (MSB->LSB) */
> + field |= BIT_POS(buf[bit_idx >> 3], 7 - (bit_idx & 7));
> + }
> + data[field_offset] = field;
> + }
> + }
> + else if (mode == MODE_SID) { /* Comfort noise frame */
> + /* not implemented */
> + }
av_log_missing_feature(...);
> +/**
> + * Convert an ISF vector into an ISP vector
> + *
> + * @param[in] isf Isf vector
> + * @param[out] isp Output isp vector
> + * @param[in] size Isf/isp size
> + */
> +static void isf2isp(const float *isf, double *isp, int size)
> +{
> + int i;
> +
> + for (i = 0; i < size - 1; i++)
> + isp[i] = cos(2.0 * M_PI * isf[i]);
> +
> + isp[size - 1] = cos(4.0 * M_PI * isf[size - 1]);
> +}
Almost duplication of amrnbdec.c:lsf2lsp().
> +/**
> + * Decodes quantized ISF vectors using 36-bit indices (6K60 mode only)
> + *
> + * @param[in] ind Array of 5 indices
> + * @param[out] isf_q Buffer for isf_q[LP_ORDER]
> + * @param[in] fr_q Frame quality (good frame == 1)
> + *
> + */
> +static void decode_isf_indices_36b(uint16_t *ind, float *isf_q, uint8_t fr_q) {
> + int i;
> +
> + if (fr_q == 1) {
> + for (i = 0; i < 9; i++) {
> + isf_q[i] = dico1_isf[ind[0]][i] / (float) (1 << 15);
isf_q[i] = dico1_isf[ind[0]][i] * (1.0f / (1 << 15));
It is faster and the compiler cannot do it for you (this applies to a
lot of code in your patch).
> + for (i = 0; i < 7; i++) {
> + isf_q[i + 9] = dico2_isf[ind[1]][i] / (float) (1 << 15);
> + }
> + for (i = 0; i < 5; i++) {
> + isf_q[i] += dico21_isf_36b[ind[2]][i] / (float) (1 << 15);
> + }
> + for (i = 0; i < 4; i++) {
> + isf_q[i + 5] += dico22_isf_36b[ind[3]][i] / (float) (1 << 15);
> + }
> + for (i = 0; i < 7; i++) {
> + isf_q[i + 9] += dico23_isf_36b[ind[4]][i] / (float) (1 << 15);
> + }
> + }
> + /* not implemented for bad frame */
Unless you plan to implement bad frame handling soon, I suggest you just
remove the function parameter. It serves to nothing as now.
> +/**
> + * Ensures a minimum distance between adjacent ISFs
> + *
> + * @param[in,out] isf ISF vector
> + * @param[in] min_spacing Minimum gap to keep
> + * @param[in] size ISF vector size
> + *
> + */
> +static void isf_set_min_dist(float *isf, float min_spacing, int size) {
> + int i;
> + float prev = 0.0;
> +
> + for (i = 0; i < size - 1; i++) {
> + isf[i] = FFMAX(isf[i], prev + min_spacing);
> + prev = isf[i];
> + }
> +}
Please do not duplicate lsp.c:ff_set_min_dist_lsf().
> +/**
> + * 16kHz version of ff_lsp2polyf for the high-band
> + */
> +static void lsp2polyf_16k(const double *lsp, double *f, int lp_half_order)
> +{
> + int i, j;
> +
> + f[0] = 0.25;
> + f[1] = -0.5 * lsp[0];
> + lsp -= 2;
> + for(i = 2; i <= lp_half_order; i++)
> + {
> +
nit:
for(i = 2; i <= lp_half_order; i++) {
> + f[i] = val * f[i - 1] + 2 * f[i - 2];
> + for(j = i - 1; j > 1; j--)
> + f[j] += f[j - 1] * val + f[j - 2];
> + f[1] += 0.25 * val;
> + }
> +}
Hmm, can't this function be replaced by ff_lsp2polyf() with some rescaling?
> +/**
> + * Convert a ISP vector to LP coefficient domain {a_k}
> + * Equations from TS 26.190 section 5.2.4
> + *
> + * @param[in] isp ISP vector for a subframe
> + * @param[out] lp LP coefficients
> + * @param[in] lp_half_order Half the number of LPs to construct
> + */
> +static void isp2lp(const double *isp, float *lp, int lp_half_order) {
> + double pa[10 + 1], qa[10 + 1];
> + double last_isp = isp[2 * lp_half_order - 1];
> + double qa_old = 0.0;
> + float *lp2 = &lp[2 * lp_half_order];
> + int i;
> +
> + if (lp_half_order > 8) { // high-band specific
> + lsp2polyf_16k(isp, pa, lp_half_order);
> + lsp2polyf_16k(isp + 1, qa, lp_half_order - 1);
> +
> + for (i = 0; i <= lp_half_order; i++)
> + pa[i] *= 4.0;
> + for (i = 0; i < lp_half_order; i++)
> + qa[i] *= 4.0;
> + } else {
> + ff_lsp2polyf(isp, pa, lp_half_order);
> + ff_lsp2polyf(isp + 1, qa, lp_half_order - 1);
> + }
> +
> + for (i = 1; i < lp_half_order; i++) {
> + double paf = (1 + last_isp) * pa[i];
> + double qaf = (1 - last_isp) * (qa[i] - qa_old);
> +
> + qa_old = qa[i - 1];
> +
> + lp[i] = 0.5 * (paf + qaf);
> + lp2[-i] = 0.5 * (paf - qaf);
> + }
> +
> + lp[0] = 1.0;
> + lp[lp_half_order] = 0.5 * (1 + last_isp) * pa[lp_half_order];
> + lp2[0] = last_isp;
> +}
Please double-check if this is not a duplication of sipr.c:lsp2lpc_sipr().
> +static void decode_pitch_vector(AMRWBContext *ctx,
> + const AMRWBSubFrame *amr_subframe,
> + const int subframe)
> +{
> + int pitch_lag_int, pitch_lag_frac;
> + int i;
> + float *exc = ctx->excitation;
> + enum Mode mode = ctx->fr_cur_mode;
> +
> + if (mode <= MODE_8k85) {
> + decode_pitch_lag_low(&pitch_lag_int, &pitch_lag_frac, amr_subframe->ada
> p,
> + &ctx->base_pitch_lag, subframe, mode);
> + } else
> + decode_pitch_lag_high(&pitch_lag_int, &pitch_lag_frac, amr_subframe->ad
> ap,
> + &ctx->base_pitch_lag, subframe);
> +
> + ctx->pitch_lag_int = pitch_lag_int;
> + pitch_lag_int += (pitch_lag_frac < 0 ? -1 : 0) + (pitch_lag_frac ? 1 :
> 0);
> +
> +
> + /* Calculate the pitch vector by interpolating the past excitation at the
> + pitch lag using a hamming windowed sinc function */
> + ff_acelp_interpolatef(exc, exc + 1 - pitch_lag_int,
> + ac_inter, 4,
> + pitch_lag_frac + (pitch_lag_frac > 0 ? 0 : 4),
> + LP_ORDER, AMRWB_SFR_SIZE + 1);
ac_inter is yet another hamming function. Can you check if you cannot
reuse acelp_vectors.c:ff_b60_sinc or sipr16kdata.h:sinc_win?
> +/**
> + * Reduce fixed vector sparseness by smoothing with one of three IR filters
> + * Also known as "adaptive phase dispersion"
> + *
> + * @param[in] ctx The context
> + * @param[in,out] fixed_vector Unfiltered fixed vector
> + * @param[out] buf Space for modified vector if necessary
> + *
> + * @return The potentially overwritten filtered fixed vector address
> + */
> +static float *anti_sparseness(AMRWBContext *ctx,
> + float *fixed_vector, float *buf)
amrnbedec.c has a function with the same name. Any code can be reused?
> +/**
> + * Apply to synthesis a 2nd order high-pass filter
> + *
> + * @param[out] out Buffer for filtered output
> + * @param[in] hpf_coef Filter coefficients as used below
> + * @param[in,out] mem State from last filtering (updated)
> + * @param[in] in Input speech data
> + *
> + * @remark It is safe to pass the same array in in and out parameters
> + */
> +static void high_pass_filter(float *out, const float hpf_coef[2][3],
> + float mem[4], const float *in)
> +{
> + int i;
> + float *x = mem - 1, *y = mem + 2; // previous inputs and outputs
> +
> + for (i = 0; i < AMRWB_SFR_SIZE; i++) {
> + float x0 = in[i];
> +
> + out[i] = hpf_coef[0][0] * x0 + hpf_coef[1][0] * y[0] +
> + hpf_coef[0][1] * x[1] + hpf_coef[1][1] * y[1] +
> + hpf_coef[0][2] * x[2];
> +
> + y[1] = y[0];
> + y[0] = out[i];
> +
> + x[2] = x[1];
> + x[1] = x0;
> + }
> +}
acelp_filter.c:ff_acelp_apply_order_2_transfer_function()
> +/**
> + * Upsample a signal by 5/4 ratio (from 12.8kHz to 16kHz) using
> + * a FIR interpolation filter. Uses past data from before *in address
> + *
> + * @param[out] out Buffer for interpolated signal
> + * @param[in] in Current signal data (length 0.8*o_size)
> + * @param[in] o_size Output signal length
> + */
> +static void upsample_5_4(float *out, const float *in, int o_size)
> +{
> + const float *in0 = in - UPS_FIR_SIZE + 1;
> + int i;
> +
> + for (i = 0; i < o_size; i++) {
> + int int_part = (4 * i) / 5;
> + int frac_part = (4 * i) - 5 * int_part;
You can break this loop in two to avoid the division:
i = 0;
for (j = 0; j < o_size/5; j++)
for (k = 0; k < 5; k++) {
....
i++;
}
> +/**
> + * Generate the high-band excitation with the same energy from the lower
> + * one and scaled by the given gain
> + *
> + * @param[in] ctx The context
> + * @param[out] hb_exc Buffer for the excitation
> + * @param[in] synth_exc Low-band excitation used for synthesis
> + * @param[in] hb_gain Wanted excitation gain
> + */
> +static void scaled_hb_excitation(AMRWBContext *ctx, float *hb_exc,
> + const float *synth_exc, float hb_gain)
> +{
> + int i;
> + float energy = ff_dot_productf(synth_exc, synth_exc, AMRWB_SFR_SIZE);
> +
> + /* Generate a white-noise excitation */
> + for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
> + hb_exc[i] = 32768.0 - (uint16_t) av_lfg_get(&ctx->prng);
> +
> + ff_scale_vector_to_given_sum_of_squares(hb_exc, hb_exc, energy,
> + AMRWB_SFR_SIZE_16k);
> +
> + for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
> + hb_exc[i] *= hb_gain;
> +}
Why are you scaling it twice?
> +/**
> + * Apply to high-band samples a 15th order filter
> + * The filter characteristic depends on the given coefficients
> + *
> + * @param[out] out Buffer for filtered output
> + * @param[in] fir_coef Filter coefficients
> + * @param[in,out] mem State from last filtering (updated)
> + * @param[in] cp_gain Compensation gain (usually the filter gain)
> + * @param[in] in Input speech data (high-band)
> + *
> + * @remark It is safe to pass the same array in in and out parameters
> + */
> +static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1],
> + float mem[HB_FIR_SIZE], float cp_gain, const float *in)
> +{
> + int i, j;
> + float data[AMRWB_SFR_SIZE_16k + HB_FIR_SIZE]; // past and current samples
> +
> + memcpy(data, mem, HB_FIR_SIZE * sizeof(float));
> +
> + for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
> + data[i + HB_FIR_SIZE] = in[i] / cp_gain;
> +
> + for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) {
> + out[i] = 0.0;
> + for (j = 0; j <= HB_FIR_SIZE; j++)
> + out[i] += data[i + j] * fir_coef[j];
> + }
> +
> + memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float));
> +}
I think it is cleaner (and more consistent) to do like the synthesis
filter and get one single buffer for output and memory...
-Vitor
More information about the FFmpeg-soc
mailing list