[FFmpeg-soc] [soc]AMR-WB decoder branch, master, updated.
Marcelo Póvoa
marspeoplester at gmail.com
Mon Aug 16 19:44:27 CEST 2010
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "AMR-WB decoder".
The branch, master has been updated
via 374ad547c7eb604679d787ac1b1f1192af534d34 (commit)
from 43028b56f61f0d6443cca0f4887a74a1979d6c80 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 374ad547c7eb604679d787ac1b1f1192af534d34
Author: Marcelo Povoa <marspeoplester at gmail.com>
Date: Mon Aug 16 14:43:52 2010 -0300
Don't use AMRFixed anymore for simplicity and add more
cosmetic changes, preparing the code to be reviewed
diff --git a/libavcodec/acelp_vectors.h b/libavcodec/acelp_vectors.h
index 08ca212..ba3437f 100644
--- a/libavcodec/acelp_vectors.h
+++ b/libavcodec/acelp_vectors.h
@@ -28,8 +28,8 @@
/** Sparse representation for the algebraic codebook (fixed) vector */
typedef struct {
int n;
- int x[24];
- float y[24];
+ int x[10];
+ float y[10];
int no_repeat_mask;
int pitch_lag;
float pitch_fac;
diff --git a/libavcodec/amrwbdata.h b/libavcodec/amrwbdata.h
index 35dee28..b27eb17 100644
--- a/libavcodec/amrwbdata.h
+++ b/libavcodec/amrwbdata.h
@@ -36,12 +36,12 @@
#define ENERGY_MEAN 30.0 ///< mean innovation energy (dB) in all modes
#define PREEMPH_FAC 0.68 ///< factor used to de-emphasize synthesis
-#define AMRWB_SUBFRAME_SIZE 64 ///< samples per subframe at 12.8 kHz
-#define AMRWB_SFR_SIZE_OUT 80 ///< samples per subframe at 16 kHz
+#define AMRWB_SFR_SIZE 64 ///< samples per subframe at 12.8 kHz
+#define AMRWB_SFR_SIZE_16k 80 ///< samples per subframe at 16 kHz
#define AMRWB_P_DELAY_MAX 231 ///< maximum pitch delay value
#define AMRWB_P_DELAY_MIN 34
-/* Mode ordering is sensitive, do not change */
+/* Relative mode ordering is sensitive */
enum Mode {
MODE_6k60 = 0, ///< 6.60 kbit/s
MODE_8k85, ///< 8.85 kbit/s
diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c
index 116531a..add0764 100644
--- a/libavcodec/amrwbdec.c
+++ b/libavcodec/amrwbdec.c
@@ -44,20 +44,20 @@ typedef struct {
uint8_t fr_quality; ///< frame quality index (FQI)
float isf_cur[LP_ORDER]; ///< working ISF vector from current frame
float isf_q_past[LP_ORDER]; ///< quantized ISF vector of the previous frame
- float isf_past_final[LP_ORDER]; ///< final processed ISF vector of the prev frame
+ float isf_past_final[LP_ORDER]; ///< final processed ISF vector of the previous frame
double isp[4][LP_ORDER]; ///< ISP vectors from current frame
double isp_sub4_past[LP_ORDER]; ///< ISP vector for the 4th subframe of the previous frame
float lp_coef[4][LP_ORDER + 1]; ///< Linear Prediction Coefficients from ISP vector
- uint8_t base_pitch_lag; ///< integer part of pitch lag for next relative subframe
+ uint8_t base_pitch_lag; ///< integer part of pitch lag for the next relative subframe
uint8_t pitch_lag_int; ///< integer part of pitch lag of the previous subframe
- float excitation_buf[AMRWB_P_DELAY_MAX + LP_ORDER + 1 + AMRWB_SUBFRAME_SIZE]; ///< current excitation and all necessary excitation history
+ float excitation_buf[AMRWB_P_DELAY_MAX + LP_ORDER + 1 + AMRWB_SFR_SIZE]; ///< current excitation and all necessary excitation history
float *excitation; ///< points to current excitation in excitation_buf[]
- float pitch_vector[AMRWB_SUBFRAME_SIZE]; ///< adaptive codebook (pitch) vector for current subframe
- float fixed_vector[AMRWB_SUBFRAME_SIZE]; ///< algebraic codebook (fixed) vector
+ float pitch_vector[AMRWB_SFR_SIZE]; ///< adaptive codebook (pitch) vector for current subframe
+ float fixed_vector[AMRWB_SFR_SIZE]; ///< algebraic codebook (fixed) vector for current subframe
float prediction_error[4]; ///< quantified prediction errors {20log10(^gamma_gc)} for previous four subframes
float pitch_gain[6]; ///< quantified pitch gains for the current and previous five subframes
@@ -67,11 +67,11 @@ typedef struct {
float prev_sparse_fixed_gain; ///< previous fixed gain; used by anti-sparseness to determine "onset"
uint8_t prev_ir_filter_nr; ///< previous impulse response filter "impNr": 0 - strong, 1 - medium, 2 - none
- float prev_tr_gain; ///< previous initial gain used by noise enhancer for thresold
+ float prev_tr_gain; ///< previous initial gain used by noise enhancer for threshold
- float samples_az[LP_ORDER + AMRWB_SUBFRAME_SIZE]; ///< lower band samples from synthesis at 12.8kHz
- float samples_up[UPS_MEM_SIZE + AMRWB_SUBFRAME_SIZE]; ///< lower band samples processed for upsampling
- float samples_hb[LP_ORDER_16k + AMRWB_SFR_SIZE_OUT]; ///< higher band samples from synthesis at 16kHz
+ float samples_az[LP_ORDER + AMRWB_SFR_SIZE]; ///< low-band samples and memory from synthesis at 12.8kHz
+ float samples_up[UPS_MEM_SIZE + AMRWB_SFR_SIZE]; ///< low-band samples and memory processed for upsampling
+ float samples_hb[LP_ORDER_16k + AMRWB_SFR_SIZE_16k]; ///< high-band samples and memory from synthesis at 16kHz
float hpf_31_mem[4], hpf_400_mem[4]; ///< previous values in the high pass filters
float demph_mem[1]; ///< previous value in the de-emphasis filter
@@ -111,7 +111,7 @@ static av_cold int amrwb_decode_init(AVCodecContext *avctx)
* @param[in] buf Pointer to the input buffer
* @param[in] buf_size Size of the input buffer
*
- * @return the frame mode
+ * @return The frame mode
*/
static enum Mode unpack_bitstream(AMRWBContext *ctx, const uint8_t *buf,
int buf_size)
@@ -127,7 +127,7 @@ static enum Mode unpack_bitstream(AMRWBContext *ctx, const uint8_t *buf,
ctx->fr_cur_mode = get_bits(&gb, 4);
mode = ctx->fr_cur_mode;
ctx->fr_quality = get_bits1(&gb);
- skip_bits(&gb, 2);
+ skip_bits(&gb, 2); // padding bits
// XXX: We are using only the "MIME/storage" format
// used by libopencore. This format is simpler and
@@ -147,7 +147,7 @@ static enum Mode unpack_bitstream(AMRWBContext *ctx, const uint8_t *buf,
while (field_size--) {
uint16_t bit_idx = *perm++;
field <<= 1;
- /* The bit index inside the byte is reversed */
+ /* The bit index inside the byte is reversed (MSB->LSB) */
field |= BIT_POS(buf[bit_idx >> 3], 7 - (bit_idx & 7));
}
data[field_offset] = field;
@@ -181,7 +181,7 @@ static void isf2isp(const float *isf, double *isp, int size)
* Decodes quantized ISF vectors using 36-bit indices (6K60 mode only)
*
* @param[in] ind Array of 5 indices
- * @param[out] isf_q Isf_q[LP_ORDER]
+ * @param[out] isf_q Buffer for isf_q[LP_ORDER]
* @param[in] fr_q Frame quality (good frame == 1)
*
*/
@@ -190,19 +190,19 @@ static void decode_isf_indices_36b(uint16_t *ind, float *isf_q, uint8_t fr_q) {
if (fr_q == 1) {
for (i = 0; i < 9; i++) {
- isf_q[i] = dico1_isf[ind[0]][i] / (float) (1<<15);
+ isf_q[i] = dico1_isf[ind[0]][i] / (float) (1 << 15);
}
for (i = 0; i < 7; i++) {
- isf_q[i + 9] = dico2_isf[ind[1]][i] / (float) (1<<15);
+ isf_q[i + 9] = dico2_isf[ind[1]][i] / (float) (1 << 15);
}
for (i = 0; i < 5; i++) {
- isf_q[i] = isf_q[i] + dico21_isf_36b[ind[2]][i] / (float) (1<<15);
+ isf_q[i] += dico21_isf_36b[ind[2]][i] / (float) (1 << 15);
}
for (i = 0; i < 4; i++) {
- isf_q[i + 5] = isf_q[i + 5] + dico22_isf_36b[ind[3]][i] / (float) (1<<15);
+ isf_q[i + 5] += dico22_isf_36b[ind[3]][i] / (float) (1 << 15);
}
for (i = 0; i < 7; i++) {
- isf_q[i + 9] = isf_q[i + 9] + dico23_isf_36b[ind[4]][i] / (float) (1<<15);
+ isf_q[i + 9] += dico23_isf_36b[ind[4]][i] / (float) (1 << 15);
}
}
/* not implemented for bad frame */
@@ -212,7 +212,7 @@ static void decode_isf_indices_36b(uint16_t *ind, float *isf_q, uint8_t fr_q) {
* Decodes quantized ISF vectors using 46-bit indices (except 6K60 mode)
*
* @param[in] ind Array of 7 indices
- * @param[out] isf_q Isf_q[LP_ORDER]
+ * @param[out] isf_q Buffer for isf_q[LP_ORDER]
* @param[in] fr_q Frame quality (good frame == 1)
*
*/
@@ -221,25 +221,25 @@ static void decode_isf_indices_46b(uint16_t *ind, float *isf_q, uint8_t fr_q) {
if (fr_q == 1) {
for (i = 0; i < 9; i++) {
- isf_q[i] = dico1_isf[ind[0]][i] / (float) (1<<15);
+ isf_q[i] = dico1_isf[ind[0]][i] / (float) (1 << 15);
}
for (i = 0; i < 7; i++) {
- isf_q[i + 9] = dico2_isf[ind[1]][i] / (float) (1<<15);
+ isf_q[i + 9] = dico2_isf[ind[1]][i] / (float) (1 << 15);
}
for (i = 0; i < 3; i++) {
- isf_q[i] = isf_q[i] + dico21_isf[ind[2]][i] / (float) (1<<15);
+ isf_q[i] += dico21_isf[ind[2]][i] / (float) (1 << 15);
}
for (i = 0; i < 3; i++) {
- isf_q[i + 3] = isf_q[i + 3] + dico22_isf[ind[3]][i] / (float) (1<<15);
+ isf_q[i + 3] += dico22_isf[ind[3]][i] / (float) (1 << 15);
}
for (i = 0; i < 3; i++) {
- isf_q[i + 6] = isf_q[i + 6] + dico23_isf[ind[4]][i] / (float) (1<<15);
+ isf_q[i + 6] += dico23_isf[ind[4]][i] / (float) (1 << 15);
}
for (i = 0; i < 3; i++) {
- isf_q[i + 9] = isf_q[i + 9] + dico24_isf[ind[5]][i] / (float) (1<<15);
+ isf_q[i + 9] += dico24_isf[ind[5]][i] / (float) (1 << 15);
}
for (i = 0; i < 4; i++) {
- isf_q[i + 12] = isf_q[i + 12] + dico25_isf[ind[6]][i] / (float) (1<<15);
+ isf_q[i + 12] += dico25_isf[ind[6]][i] / (float) (1 << 15);
}
}
/* not implemented for bad frame */
@@ -259,7 +259,7 @@ static void isf_add_mean_and_past(float *isf_q, float *isf_past) {
for (i = 0; i < LP_ORDER; i++) {
tmp = isf_q[i];
- isf_q[i] += isf_mean[i] / (float) (1<<15);
+ isf_q[i] += isf_mean[i] / (float) (1 << 15);
isf_q[i] += PRED_FACTOR * isf_past[i];
isf_past[i] = tmp;
}
@@ -284,7 +284,7 @@ static void isf_set_min_dist(float *isf, float min_spacing, int size) {
}
/**
- * Interpolate the fourth ISP vector from current and past frame
+ * Interpolate the fourth ISP vector from current and past frames
* to obtain a ISP vector for each subframe
*
* @param[in,out] isp_q ISPs for each subframe
@@ -331,9 +331,9 @@ static void lsp2polyf_16k(const double *lsp, double *f, int lp_half_order)
*/
static void isp2lp(const double *isp, float *lp, int lp_half_order) {
double pa[10 + 1], qa[10 + 1];
- float *lp2 = lp + (lp_half_order << 1);
double last_isp = isp[2 * lp_half_order - 1];
double qa_old = 0.0;
+ float *lp2 = &lp[2 * lp_half_order];
int i;
if (lp_half_order > 8) { // high-band specific
@@ -365,9 +365,9 @@ static void isp2lp(const double *isp, float *lp, int lp_half_order) {
}
/**
- * Decode a adaptive codebook index into pitch lag (except 6k60, 8k85 modes)
+ * Decode an adaptive codebook index into pitch lag (except 6k60, 8k85 modes)
* Calculate integer lag and fractional lag always using 1/4 resolution
- * In 1st and 3rd subframes index is relative to last subframe integer lag
+ * In 1st and 3rd subframes the index is relative to last subframe integer lag
*
* @param[out] lag_int Decoded integer pitch lag
* @param[out] lag_frac Decoded fractional pitch lag
@@ -391,13 +391,11 @@ static void decode_pitch_lag_high(int *lag_int, int *lag_frac, int pitch_index,
*lag_frac = 0;
}
/* minimum lag for next subframe */
- *base_lag_int = av_clip(*lag_int - 8 - (*lag_frac < 0 ? 1 : 0), AMRWB_P_DELAY_MIN,
- AMRWB_P_DELAY_MAX - 15);
- /* XXX: the spec states clearly that *base_lag_int should be
- * the nearest integer to *lag_int (minus 8), but the ref code
- * actually uses always its floor, causing the next frame integer
- * lag to be one less than mine when the nearest integer is
- * not equal to the floor */
+ *base_lag_int = av_clip(*lag_int - 8 - (*lag_frac < 0 ? 1 : 0),
+ AMRWB_P_DELAY_MIN, AMRWB_P_DELAY_MAX - 15);
+ // XXX: the spec states clearly that *base_lag_int should be
+ // the nearest integer to *lag_int (minus 8), but the ref code
+ // actually always uses its floor, I'm following the latter
} else {
*lag_int = (pitch_index + 1) >> 2;
*lag_frac = pitch_index - (*lag_int << 2);
@@ -421,9 +419,9 @@ static void decode_pitch_lag_low(int *lag_int, int *lag_frac, int pitch_index,
*lag_int = pitch_index - 24;
*lag_frac = 0;
}
- /* XXX: same problem as before */
- *base_lag_int = av_clip(*lag_int - 8 - (*lag_frac < 0 ? 1 : 0), AMRWB_P_DELAY_MIN,
- AMRWB_P_DELAY_MAX - 15);
+ // XXX: same problem as before
+ *base_lag_int = av_clip(*lag_int - 8 - (*lag_frac < 0 ? 1 : 0),
+ AMRWB_P_DELAY_MIN, AMRWB_P_DELAY_MAX - 15);
} else {
*lag_int = (pitch_index + 1) >> 1;
*lag_frac = (pitch_index - (*lag_int << 1)) << 1;
@@ -463,16 +461,17 @@ static void decode_pitch_vector(AMRWBContext *ctx,
ff_acelp_interpolatef(exc, exc + 1 - pitch_lag_int,
ac_inter, 4,
pitch_lag_frac + (pitch_lag_frac > 0 ? 0 : 4),
- LP_ORDER, AMRWB_SUBFRAME_SIZE + 1);
+ LP_ORDER, AMRWB_SFR_SIZE + 1);
/* Check which pitch signal path should be used
* 6k60 and 8k85 modes have the ltp flag set to 0 */
if (amr_subframe->ltp) {
- memcpy(ctx->pitch_vector, exc, AMRWB_SUBFRAME_SIZE * sizeof(float));
+ memcpy(ctx->pitch_vector, exc, AMRWB_SFR_SIZE * sizeof(float));
} else {
- for (i = 0; i < AMRWB_SUBFRAME_SIZE; i++)
- ctx->pitch_vector[i] = 0.18 * exc[i - 1] + 0.64 * exc[i] + 0.18 * exc[i + 1];
- memcpy(exc, ctx->pitch_vector, AMRWB_SUBFRAME_SIZE * sizeof(float));
+ for (i = 0; i < AMRWB_SFR_SIZE; i++)
+ ctx->pitch_vector[i] = 0.18 * exc[i - 1] + 0.64 * exc[i] +
+ 0.18 * exc[i + 1];
+ memcpy(exc, ctx->pitch_vector, AMRWB_SFR_SIZE * sizeof(float));
}
}
@@ -482,7 +481,7 @@ static void decode_pitch_vector(AMRWBContext *ctx,
* an encoded pulse indexing (TS 26.190 section 5.8.2)
*
* The results are given in out[], in which a negative number means
- * amplitude -1 and vice-versa (i.e., ampl = x/abs(x) )
+ * amplitude -1 and vice versa (i.e., ampl(x) = x / abs(x) )
*
* @param[out] out Output buffer (writes i elements)
* @param[in] code Pulse index (no. of bits varies, see below)
@@ -522,7 +521,7 @@ static void decode_4p_track(int *out, int code, int m, int off)
switch (BIT_STR(code, 4*m - 2, 2)) /* case ID (2 bits) */
{
- case 0: /* 0 pulses in A, 4 pulses in B or vice-versa */
+ case 0: /* 0 pulses in A, 4 pulses in B or vice versa */
half_4p = BIT_POS(code, 4*m - 3) << (m - 1); // which has 4 pulses
subhalf_2p = BIT_POS(code, 2*m - 3) << (m - 2);
@@ -571,19 +570,19 @@ static void decode_6p_track(int *out, int code, int m, int off)
switch (BIT_STR(code, 6*m - 4, 2)) /* case ID (2 bits) */
{
- case 0: /* 0 pulses in A, 6 pulses in B or vice-versa */
+ case 0: /* 0 pulses in A, 6 pulses in B or vice versa */
decode_1p_track(out, BIT_STR(code, 0, m),
m - 1, off + half_more);
decode_5p_track(out + 1, BIT_STR(code, m, 5*m - 5),
m - 1, off + half_more);
break;
- case 1: /* 1 pulse in A, 5 pulses in B or vice-versa */
+ case 1: /* 1 pulse in A, 5 pulses in B or vice versa */
decode_1p_track(out, BIT_STR(code, 0, m),
m - 1, off + half_other);
decode_5p_track(out + 1, BIT_STR(code, m, 5*m - 5),
m - 1, off + half_more);
break;
- case 2: /* 2 pulses in A, 4 pulses in B or vice-versa */
+ case 2: /* 2 pulses in A, 4 pulses in B or vice versa */
decode_2p_track(out, BIT_STR(code, 0, 2*m - 1),
m - 1, off + half_other);
decode_4p_track(out + 2, BIT_STR(code, 2*m - 1, 4*m - 4),
@@ -602,20 +601,17 @@ static void decode_6p_track(int *out, int code, int m, int off)
* Decode the algebraic codebook index to pulse positions and signs,
* then construct the algebraic codebook vector
*
- * @param[out] fixed_sparse Pointer to the algebraic codebook
+ * @param[out] fixed_vector Buffer for the fixed codebook excitation
* @param[in] pulse_hi MSBs part of the pulse index array (higher modes only)
* @param[in] pulse_lo LSBs part of the pulse index array
* @param[in] mode Mode of the current frame
*/
-// XXX: For now, uses the same AMRFixed struct from AMR-NB but
-// the maximum number of pulses in it was increased to 24
-static void decode_fixed_sparse(AMRFixed *fixed_sparse, const uint16_t *pulse_hi,
+static void decode_fixed_vector(float *fixed_vector, const uint16_t *pulse_hi,
const uint16_t *pulse_lo, const enum Mode mode)
{
- /* sig_pos stores for each track the decoded pulse position (1-based)
- * indexes multiplied by its corresponding amplitude (+1 or -1) */
+ /* sig_pos stores for each track the decoded pulse position indexes
+ * (1-based) multiplied by its corresponding amplitude (+1 or -1) */
int sig_pos[4][6];
- int pulses_nb = 0;
int spacing = (mode == MODE_6k60) ? 2 : 4;
int i, j;
@@ -663,15 +659,14 @@ static void decode_fixed_sparse(AMRFixed *fixed_sparse, const uint16_t *pulse_hi
break;
}
+ memset(fixed_vector, 0, sizeof(float) * AMRWB_SFR_SIZE);
+
for (i = 0; i < 4; i++)
for (j = 0; j < pulses_nb_per_mode_tr[mode][i]; j++) {
- int pos = sig_pos[i][j];
- fixed_sparse->x[pulses_nb] = (FFABS(pos) - 1) * spacing + i;
- fixed_sparse->y[pulses_nb] = pos < 0 ? -1.0 : 1.0;
- pulses_nb++;
- }
+ int pos = (FFABS(sig_pos[i][j]) - 1) * spacing + i;
- fixed_sparse->n = pulses_nb;
+ fixed_vector[pos] += sig_pos[i][j] < 0 ? -1.0 : 1.0;
+ }
}
/**
@@ -693,30 +688,23 @@ static void decode_gains(const uint8_t vq_gain, const enum Mode mode,
}
/**
- * Apply pitch sharpening filters to the fixed vector sparse
- * representation to output the fixed codebook excitation vector
+ * Apply pitch sharpening filters to the fixed codebook vector
*
* @param[in] ctx The context
- * @param[in] fixed_sparse Fixed codebook sparse
- * @param[out] fixed_vector Fixed codebook excitation
+ * @param[in,out] fixed_vector Fixed codebook excitation
*/
// XXX: Spec states this procedure should be applied when the pitch
// lag is less than 64, but this checking seems absent in reference and AMR-NB
-static void pitch_sharpening(AMRWBContext *ctx, AMRFixed *fixed_sparse,
- float *fixed_vector)
+static void pitch_sharpening(AMRWBContext *ctx, float *fixed_vector)
{
int i;
- fixed_sparse->pitch_lag = AMRWB_SUBFRAME_SIZE;
- ff_set_fixed_vector(fixed_vector, fixed_sparse, 1.0,
- AMRWB_SUBFRAME_SIZE);
-
/* Tilt part */
- for (i = AMRWB_SUBFRAME_SIZE - 1; i != 0; i--)
+ for (i = AMRWB_SFR_SIZE - 1; i != 0; i--)
fixed_vector[i] -= fixed_vector[i - 1] * ctx->tilt_coef;
/* Periodicity enhancement part */
- for (i = ctx->pitch_lag_int; i < AMRWB_SUBFRAME_SIZE; i++)
+ for (i = ctx->pitch_lag_int; i < AMRWB_SFR_SIZE; i++)
fixed_vector[i] += fixed_vector[i - ctx->pitch_lag_int] * 0.85;
}
@@ -732,9 +720,9 @@ static float voice_factor(float *p_vector, float p_gain,
float *f_vector, float f_gain)
{
double p_ener = (double) ff_dot_productf(p_vector, p_vector,
- AMRWB_SUBFRAME_SIZE) * p_gain * p_gain;
+ AMRWB_SFR_SIZE) * p_gain * p_gain;
double f_ener = (double) ff_dot_productf(f_vector, f_vector,
- AMRWB_SUBFRAME_SIZE) * f_gain * f_gain;
+ AMRWB_SFR_SIZE) * f_gain * f_gain;
return (p_ener - f_ener) / (p_ener + f_ener);
}
@@ -742,18 +730,19 @@ static float voice_factor(float *p_vector, float p_gain,
/**
* Reduce fixed vector sparseness by smoothing with one of three IR filters
* Also known as "adaptive phase dispersion"
- * Returns the overwritten filtered fixed vector address
*
* @param[in] ctx The context
* @param[in,out] fixed_vector Unfiltered fixed vector
- * @param[out] out Space for modified vector if necessary
+ * @param[out] buf Space for modified vector if necessary
+ *
+ * @return The potentially overwritten filtered fixed vector address
*/
static float *anti_sparseness(AMRWBContext *ctx,
- float *fixed_vector, float *out)
+ float *fixed_vector, float *buf)
{
int ir_filter_nr;
- if (ctx->fr_cur_mode > MODE_8k85) // no filtering in high modes
+ if (ctx->fr_cur_mode > MODE_8k85) // no filtering in higher modes
return fixed_vector;
if (ctx->pitch_gain[0] < 0.6) {
@@ -767,13 +756,13 @@ static float *anti_sparseness(AMRWBContext *ctx,
if (ctx->fixed_gain[0] > 3.0 * ctx->fixed_gain[1]) {
if (ir_filter_nr < 2)
ir_filter_nr++;
- } else
- {
+ } else {
int i, count = 0;
for (i = 0; i < 6; i++)
if (ctx->pitch_gain[i] < 0.6)
count++;
+
if (count > 2)
ir_filter_nr = 0;
@@ -797,18 +786,18 @@ static float *anti_sparseness(AMRWBContext *ctx,
* c2(n) = sum(i,0,len-1){ c(i) * coef( (n - i + len) % len ) }
*/
- memset(out, 0, sizeof(float) * AMRWB_SUBFRAME_SIZE);
- for (i = 0; i < AMRWB_SUBFRAME_SIZE; i++)
+ memset(buf, 0, sizeof(float) * AMRWB_SFR_SIZE);
+ for (i = 0; i < AMRWB_SFR_SIZE; i++)
if (fixed_vector[i]) {
- int li = AMRWB_SUBFRAME_SIZE - i;
+ int li = AMRWB_SFR_SIZE - i;
for (j = 0; j < li; j++)
- out[i + j] += fixed_vector[i] * coef[j];
+ buf[i + j] += fixed_vector[i] * coef[j];
for (j = 0; j < i; j++)
- out[j] += fixed_vector[i] * coef[j + li];
+ buf[j] += fixed_vector[i] * coef[j + li];
}
- fixed_vector = out;
+ fixed_vector = buf;
}
return fixed_vector;
@@ -827,18 +816,20 @@ static float stability_factor(const float *isf, const float *isf_past)
acc += (isf[i] - isf_past[i]) * (isf[i] - isf_past[i]);
// XXX: I could not understand well this part from ref code
- // it made more sense changing the "/ 256" to "* 512"
+ // the result is more accurate changing the "/ 256" to "* 512"
return FFMAX(0.0, 1.25 - acc * 0.8 * 512);
}
/**
* Apply a non-linear fixed gain smoothing in order to reduce
- * fluctuation in the energy of excitation. Returns smoothed gain
+ * fluctuation in the energy of excitation
*
* @param[in] fixed_gain Unsmoothed fixed gain
* @param[in,out] prev_tr_gain Previous threshold gain (updated)
* @param[in] voice_fac Frame voicing factor
* @param[in] stab_fac Frame stability factor
+ *
+ * @return The smoothed gain
*/
static float noise_enhancer(float fixed_gain, float *prev_tr_gain,
float voice_fac, float stab_fac)
@@ -846,19 +837,17 @@ static float noise_enhancer(float fixed_gain, float *prev_tr_gain,
float sm_fac = 0.5 * (1 - voice_fac) * stab_fac;
float g0;
- /* XXX: here it is supposed to "in(de)crement the fixed gain by 1.5dB"
- * in each case, but the reference source (lines 812 onwards of
- * dec_main.c) multiplies gain by strange constants that need checking
- */
+ // XXX: the following fixed-point constants used to in(de)crement
+ // gain by 1.5dB were taken from the reference code, maybe it could
+ // be simpler
if (fixed_gain < *prev_tr_gain) {
- g0 = FFMIN(*prev_tr_gain, fixed_gain + ( fixed_gain *
- (6226 / (float) (1 << 15)))); // +1.5 dB
+ g0 = FFMIN(*prev_tr_gain, fixed_gain + fixed_gain *
+ (6226 / (float) (1 << 15))); // +1.5 dB
} else
g0 = FFMAX(*prev_tr_gain, fixed_gain *
(27536 / (float) (1 << 15))); // -1.5 dB
- // update next frame threshold
- *prev_tr_gain = g0;
+ *prev_tr_gain = g0; // update next frame threshold
return sm_fac * g0 + (1 - sm_fac) * fixed_gain;
}
@@ -872,19 +861,19 @@ static float noise_enhancer(float fixed_gain, float *prev_tr_gain,
static void pitch_enhancer(float *fixed_vector, float voice_fac)
{
int i;
- float cpe = 0.125 * (1 + voice_fac);
+ float cpe = 0.125 * (1 + voice_fac);
float last = fixed_vector[0]; // holds c(i - 1)
fixed_vector[0] -= cpe * fixed_vector[1];
- for (i = 1; i < AMRWB_SUBFRAME_SIZE - 1; i++) {
+ for (i = 1; i < AMRWB_SFR_SIZE - 1; i++) {
float cur = fixed_vector[i];
fixed_vector[i] -= cpe * (last + fixed_vector[i + 1]);
last = cur;
}
- fixed_vector[AMRWB_SUBFRAME_SIZE - 1] -= cpe * last;
+ fixed_vector[AMRWB_SFR_SIZE - 1] -= cpe * last;
}
/**
@@ -895,34 +884,34 @@ static void pitch_enhancer(float *fixed_vector, float voice_fac)
* @param[out] excitation Buffer for synthesis final excitation
* @param[in] fixed_gain Fixed codebook gain for synthesis
* @param[in] fixed_vector Algebraic codebook vector
- * @param[in,out] samples Pointer to the output speech samples
+ * @param[in,out] samples Pointer to the output samples and memory
*/
static void synthesis(AMRWBContext *ctx, float *lpc, float *excitation,
- float fixed_gain, const float *fixed_vector,
- float *samples)
+ float fixed_gain, const float *fixed_vector,
+ float *samples)
{
ff_weighted_vector_sumf(excitation, ctx->pitch_vector, fixed_vector,
- ctx->pitch_gain[0], fixed_gain, AMRWB_SUBFRAME_SIZE);
+ ctx->pitch_gain[0], fixed_gain, AMRWB_SFR_SIZE);
/* emphasize pitch vector contribution in low bitrate modes */
if (ctx->pitch_gain[0] > 0.5 && ctx->fr_cur_mode <= MODE_8k85) {
int i;
float energy = ff_dot_productf(excitation, excitation,
- AMRWB_SUBFRAME_SIZE);
+ AMRWB_SFR_SIZE);
// XXX: Weird part in both ref code and spec. A unknown parameter
// {beta} seems to be identical to the current pitch gain
float pitch_factor = 0.25 * ctx->pitch_gain[0] * ctx->pitch_gain[0];
- for (i = 0; i < AMRWB_SUBFRAME_SIZE; i++)
+ for (i = 0; i < AMRWB_SFR_SIZE; i++)
excitation[i] += pitch_factor * ctx->pitch_vector[i];
ff_scale_vector_to_given_sum_of_squares(excitation, excitation,
- energy, AMRWB_SUBFRAME_SIZE);
+ energy, AMRWB_SFR_SIZE);
}
ff_celp_lp_synthesis_filterf(samples, lpc + 1, excitation,
- AMRWB_SUBFRAME_SIZE, LP_ORDER);
+ AMRWB_SFR_SIZE, LP_ORDER);
}
/**
@@ -940,10 +929,10 @@ static void de_emphasis(float *out, float *in, float m, float mem[1])
out[0] = in[0] + m * mem[0];
- for (i = 1; i < AMRWB_SUBFRAME_SIZE; i++)
+ for (i = 1; i < AMRWB_SFR_SIZE; i++)
out[i] = in[i] + out[i - 1] * m;
- mem[0] = out[AMRWB_SUBFRAME_SIZE - 1];
+ mem[0] = out[AMRWB_SFR_SIZE - 1];
}
/**
@@ -962,7 +951,7 @@ static void high_pass_filter(float *out, const float hpf_coef[2][3],
int i;
float *x = mem - 1, *y = mem + 2; // previous inputs and outputs
- for (i = 0; i < AMRWB_SUBFRAME_SIZE; i++) {
+ for (i = 0; i < AMRWB_SFR_SIZE; i++) {
float x0 = in[i];
out[i] = hpf_coef[0][0] * x0 + hpf_coef[1][0] * y[0] +
@@ -991,8 +980,8 @@ static void upsample_5_4(float *out, const float *in, int o_size)
int i;
for (i = 0; i < o_size; i++) {
- int int_part = (i << 2) / 5;
- int frac_part = (i << 2) - 5 * int_part;
+ int int_part = (4 * i) / 5;
+ int frac_part = (4 * i) - 5 * int_part;
if (!frac_part) {
out[i] = in[int_part];
@@ -1004,7 +993,7 @@ static void upsample_5_4(float *out, const float *in, int o_size)
/**
* Calculate the high-band gain based on encoded index (23k85 mode) or
- * on the lower band speech signal and the Voice Activity Detection flag
+ * on the low-band speech signal and the Voice Activity Detection flag
*
* @param[in] ctx The context
* @param[in] synth LB speech synthesis at 12.8k
@@ -1020,8 +1009,8 @@ static float find_hb_gain(AMRWBContext *ctx, const float *synth,
if (ctx->fr_cur_mode == MODE_23k85)
return qua_hb_gain[hb_idx] / (float) (1 << 14);
- tilt = ff_dot_productf(synth, synth + 1, AMRWB_SUBFRAME_SIZE - 1) /
- ff_dot_productf(synth, synth, AMRWB_SUBFRAME_SIZE);
+ tilt = ff_dot_productf(synth, synth + 1, AMRWB_SFR_SIZE - 1) /
+ ff_dot_productf(synth, synth, AMRWB_SFR_SIZE);
/* return gain bounded by [0.1, 1.0] */
return av_clipf((1.0 - FFMAX(0.0, tilt)) * (1.25 - 0.25 * wsp), 0.1, 1.0);
@@ -1033,28 +1022,28 @@ static float find_hb_gain(AMRWBContext *ctx, const float *synth,
*
* @param[in] ctx The context
* @param[out] hb_exc Buffer for the excitation
- * @param[in] synth_exc Excitation used for synthesis
+ * @param[in] synth_exc Low-band excitation used for synthesis
* @param[in] hb_gain Wanted excitation gain
*/
static void scaled_hb_excitation(AMRWBContext *ctx, float *hb_exc,
const float *synth_exc, float hb_gain)
{
int i;
- float energy = ff_dot_productf(synth_exc, synth_exc, AMRWB_SUBFRAME_SIZE);
+ float energy = ff_dot_productf(synth_exc, synth_exc, AMRWB_SFR_SIZE);
/* Generate a white-noise excitation */
- for (i = 0; i < AMRWB_SFR_SIZE_OUT; i++)
+ for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
hb_exc[i] = 32768.0 - (uint16_t) av_lfg_get(&ctx->prng);
ff_scale_vector_to_given_sum_of_squares(hb_exc, hb_exc, energy,
- AMRWB_SFR_SIZE_OUT);
+ AMRWB_SFR_SIZE_16k);
- for (i = 0; i < AMRWB_SFR_SIZE_OUT; i++)
+ for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
hb_exc[i] *= hb_gain;
}
/**
- * Calculate auto-correlation for the ISF difference vector
+ * Calculate the auto-correlation for the ISF difference vector
*/
static float auto_correlation(float *diff_isf, float mean, int lag)
{
@@ -1070,7 +1059,7 @@ static float auto_correlation(float *diff_isf, float mean, int lag)
/**
* Extrapolate a ISF vector to the 16kHz range (20th order LP)
- * used at mode 6k60 LP filter for the high-freq band
+ * used at mode 6k60 LP filter for the high frequency band
*
* @param[out] out Buffer for extrapolated isf
* @param[in] isf Input isf vector
@@ -1098,6 +1087,7 @@ static void extrapolate_isf(float out[LP_ORDER_16k], float isf[LP_ORDER])
i_max_corr = 0;
for (i = 0; i < 3; i++) {
corr_lag[i] = auto_correlation(diff_isf, diff_mean, i + 2);
+
if (corr_lag[i] > corr_lag[i_max_corr])
i_max_corr = i;
}
@@ -1148,7 +1138,7 @@ static void lpc_weighting(float *out, const float *lpc, float gamma, int size)
for (i = 0; i < size; i++) {
out[i] = lpc[i] * fac;
- fac *= gamma;
+ fac *= gamma;
}
}
@@ -1185,7 +1175,7 @@ static void hb_synthesis(AMRWBContext *ctx, int subframe, float *samples,
lpc_weighting(hb_lpc, ctx->lp_coef[subframe], 0.6, LP_ORDER + 1);
}
- ff_celp_lp_synthesis_filterf(samples, hb_lpc + 1, exc, AMRWB_SFR_SIZE_OUT,
+ ff_celp_lp_synthesis_filterf(samples, hb_lpc + 1, exc, AMRWB_SFR_SIZE_16k,
(mode == MODE_6k60) ? LP_ORDER_16k : LP_ORDER);
}
@@ -1205,20 +1195,20 @@ static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1],
float mem[HB_FIR_SIZE], float cp_gain, const float *in)
{
int i, j;
- float data[AMRWB_SFR_SIZE_OUT + HB_FIR_SIZE]; // past and current samples
+ float data[AMRWB_SFR_SIZE_16k + HB_FIR_SIZE]; // past and current samples
memcpy(data, mem, HB_FIR_SIZE * sizeof(float));
- for (i = 0; i < AMRWB_SFR_SIZE_OUT; i++)
+ for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
data[i + HB_FIR_SIZE] = in[i] / cp_gain;
- for (i = 0; i < AMRWB_SFR_SIZE_OUT; i++) {
+ for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) {
out[i] = 0.0;
for (j = 0; j <= HB_FIR_SIZE; j++)
out[i] += data[i + j] * fir_coef[j];
}
- memcpy(mem, data + AMRWB_SFR_SIZE_OUT, HB_FIR_SIZE * sizeof(float));
+ memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float));
}
/**
@@ -1226,20 +1216,18 @@ static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1],
*/
static void update_sub_state(AMRWBContext *ctx)
{
- memmove(&ctx->excitation_buf[0], &ctx->excitation_buf[AMRWB_SUBFRAME_SIZE],
+ memmove(&ctx->excitation_buf[0], &ctx->excitation_buf[AMRWB_SFR_SIZE],
(AMRWB_P_DELAY_MAX + LP_ORDER + 1) * sizeof(float));
memmove(&ctx->pitch_gain[1], &ctx->pitch_gain[0], 5 * sizeof(float));
memmove(&ctx->fixed_gain[1], &ctx->fixed_gain[0], 1 * sizeof(float));
- memmove(&ctx->samples_az[0], &ctx->samples_az[AMRWB_SUBFRAME_SIZE],
+ memmove(&ctx->samples_az[0], &ctx->samples_az[AMRWB_SFR_SIZE],
LP_ORDER * sizeof(float));
- memmove(&ctx->samples_up[0], &ctx->samples_up[AMRWB_SUBFRAME_SIZE],
+ memmove(&ctx->samples_up[0], &ctx->samples_up[AMRWB_SFR_SIZE],
UPS_MEM_SIZE * sizeof(float));
- memmove(&ctx->samples_hb[0], &ctx->samples_hb[AMRWB_SFR_SIZE_OUT],
+ memmove(&ctx->samples_hb[0], &ctx->samples_hb[AMRWB_SFR_SIZE_16k],
LP_ORDER_16k * sizeof(float));
-
- memset(ctx->fixed_vector, 0, AMRWB_SUBFRAME_SIZE * sizeof(float));
}
static int amrwb_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
@@ -1250,15 +1238,14 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
const uint8_t *buf = avpkt->data;
int buf_size = avpkt->size;
float *buf_out = data;
- AMRFixed fixed_sparse = {0}; // fixed vector up to anti-sparseness processing
- float spare_vector[AMRWB_SUBFRAME_SIZE]; // extra stack space to hold result from anti-sparseness processing
+ float spare_vector[AMRWB_SFR_SIZE]; // extra stack space to hold result from anti-sparseness processing
float fixed_gain_factor; // fixed gain correction factor (gamma)
float *synth_fixed_vector; // pointer to the fixed vector that synthesis should use
float synth_fixed_gain; // the fixed gain that synthesis should use
float voice_fac, stab_fac; // parameters used for gain smoothing
- float synth_exc[AMRWB_SUBFRAME_SIZE]; // post-processed excitation for synthesis
- float hb_exc[AMRWB_SFR_SIZE_OUT]; // excitation for the high frequency band
- float hb_samples[AMRWB_SFR_SIZE_OUT]; // filtered high-band samples from synthesis
+ float synth_exc[AMRWB_SFR_SIZE]; // post-processed excitation for synthesis
+ float hb_exc[AMRWB_SFR_SIZE_16k]; // excitation for the high frequency band
+ float hb_samples[AMRWB_SFR_SIZE_16k]; // filtered high-band samples from synthesis
float hb_gain;
int sub, i;
@@ -1298,23 +1285,23 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
for (sub = 0; sub < 4; sub++) {
const AMRWBSubFrame *cur_subframe = &cf->subframe[sub];
- float *sub_buf = buf_out + sub * AMRWB_SFR_SIZE_OUT;
+ float *sub_buf = buf_out + sub * AMRWB_SFR_SIZE_16k;
- /* Decode adaptive codebook */
+ /* Decode adaptive codebook (pitch vector) */
decode_pitch_vector(ctx, cur_subframe, sub);
- /* Decode innovative codebook (sparse representation) */
- decode_fixed_sparse(&fixed_sparse, cur_subframe->pul_ih,
+ /* Decode innovative codebook (fixed vector) */
+ decode_fixed_vector(ctx->fixed_vector, cur_subframe->pul_ih,
cur_subframe->pul_il, ctx->fr_cur_mode);
+ pitch_sharpening(ctx, ctx->fixed_vector);
+
decode_gains(cur_subframe->vq_gain, ctx->fr_cur_mode,
&fixed_gain_factor, &ctx->pitch_gain[0]);
- pitch_sharpening(ctx, &fixed_sparse, ctx->fixed_vector);
-
ctx->fixed_gain[0] =
ff_amr_set_fixed_gain(fixed_gain_factor,
ff_dot_productf(ctx->fixed_vector, ctx->fixed_vector,
- AMRWB_SUBFRAME_SIZE)/AMRWB_SUBFRAME_SIZE,
+ AMRWB_SFR_SIZE) / AMRWB_SFR_SIZE,
ctx->prediction_error,
ENERGY_MEAN, energy_pred_fac);
@@ -1324,7 +1311,7 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
ctx->tilt_coef = voice_fac * 0.25 + 0.25;
/* Construct current excitation */
- for (i = 0; i < AMRWB_SUBFRAME_SIZE; i++) {
+ for (i = 0; i < AMRWB_SFR_SIZE; i++) {
ctx->excitation[i] *= ctx->pitch_gain[0];
ctx->excitation[i] += ctx->fixed_gain[0] * ctx->fixed_vector[i];
// XXX: Should remove fractional part of excitation like AMR-NB?
@@ -1351,7 +1338,7 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
ctx->hpf_31_mem, &ctx->samples_up[UPS_MEM_SIZE]);
upsample_5_4(sub_buf, &ctx->samples_up[UPS_FIR_SIZE],
- AMRWB_SFR_SIZE_OUT);
+ AMRWB_SFR_SIZE_16k);
/* High frequency band (6.4 - 7.0 kHz) generation part */
high_pass_filter(hb_samples, hpf_400_coef, ctx->hpf_400_mem,
@@ -1374,10 +1361,10 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1.0, hb_samples);
/* Add the low and high frequency bands */
- for (i = 0; i < AMRWB_SFR_SIZE_OUT; i++) {
- // XXX: the lower band should really be upscaled by 2.0? That
+ for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) {
+ // XXX: the low-band should really be upscaled by 2.0? This
// way the output volume level seems doubled
- sub_buf[i] = (sub_buf[i] * 1.0 + hb_samples[i]) / 32768.0;
+ sub_buf[i] = (sub_buf[i] + hb_samples[i]) / 32768.0;
}
/* Update buffers and history */
@@ -1389,7 +1376,7 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
memcpy(ctx->isf_past_final, ctx->isf_cur, LP_ORDER * sizeof(float));
/* report how many samples we got */
- *data_size = 4 * AMRWB_SFR_SIZE_OUT * sizeof(float);
+ *data_size = 4 * AMRWB_SFR_SIZE_16k * sizeof(float);
return ((cf_sizes_wb[ctx->fr_cur_mode] + 7) >> 3) + 1;
}
-----------------------------------------------------------------------
Summary of changes:
libavcodec/acelp_vectors.h | 4 +-
libavcodec/amrwbdata.h | 6 +-
libavcodec/amrwbdec.c | 299 +++++++++++++++++++++-----------------------
3 files changed, 148 insertions(+), 161 deletions(-)
hooks/post-receive
--
AMR-WB decoder
More information about the FFmpeg-soc
mailing list