[FFmpeg-soc] [soc]AMR-WB decoder branch, master, updated.
Marcelo Póvoa
marspeoplester at gmail.com
Wed Jul 14 20:27:45 CEST 2010
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "AMR-WB decoder".
The branch, master has been updated
via 4b95cefc49b723ea5e2ce3ac016b0817ec41752f (commit)
from 0bfa12bd360d233e9caf7024f0912a4004ad8468 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 4b95cefc49b723ea5e2ce3ac016b0817ec41752f
Author: Marcelo Povoa <marspeoplester at gmail.com>
Date: Wed Jul 14 15:25:32 2010 -0300
Write anti-sparseness processing (6.1.5)
diff --git a/libavcodec/amrwbdata.h b/libavcodec/amrwbdata.h
index c52fe7a..6163bba 100644
--- a/libavcodec/amrwbdata.h
+++ b/libavcodec/amrwbdata.h
@@ -1751,6 +1751,41 @@ static const int16_t qua_gain_7b[128][2] = {
/* 4-tap moving average prediction coefficients in reverse order */
static const float energy_pred_fac[4] = { 0.2, 0.3, 0.4, 0.5 };
+/** impulse response filter tables converted to float from Q15
+ * used for anti-sparseness processing */
+// XXX: Not sure whether it is Q15 indeed
+static const float ir_filter_str[64] = {
+ 0.615906, 0.295807, 0.099792, -0.104889, 0.087402, -0.159912,
+ 0.048492, -0.041412, 0.018311, 0.118805, -0.045685, -0.021301,
+ 0.036713, -0.160187, 0.036591, 0.163910, -0.045410, -0.021515,
+ -0.088104, 0.060303, 0.027405, 0.022003, -0.118286, 0.128998,
+ -0.156006, 0.195312, -0.031494, -0.144196, 0.124908, -0.132812,
+ 0.097809, 0.065002, -0.060913, -0.056000, 0.080811, -0.054504,
+ -0.012390, 0.017487, 0.075806, -0.110107, 0.095795, -0.041595,
+ -0.078308, 0.116211, -0.019501, -0.062592, -0.016510, 0.072510,
+ 0.119995, -0.191101, 0.043701, -0.109894, 0.149200, 0.011292,
+ 0.017303, -0.035492, -0.087097, 0.058411, 0.001190, -0.073792,
+ 0.105408, 0.090790, -0.122711, 0.104706
+};
+
+static const float ir_filter_mid[64] = {
+ 0.735413, 0.319214, -0.160614, -0.023285, 0.062500, -0.028290,
+ 0.053497, -0.101410, 0.067505, 0.019897, -0.065491, 0.075897,
+ -0.108002, 0.125397, -0.064301, -0.011414, -0.019104, 0.130310,
+ -0.167389, 0.068207, 0.056702, -0.084503, 0.022705, 0.034790,
+ -0.023285, -0.049286, 0.123901, -0.139587, 0.091003, -0.035492,
+ 0.022308, -0.033508, 0.024506, 0.005096, -0.021790, 0.018494,
+ -0.017090, 0.019501, 0.001312, -0.053894, 0.098511, -0.084900,
+ 0.020294, 0.023285, 0.007111, -0.061096, 0.039398, 0.057098,
+ -0.105896, 0.031494, 0.082703, -0.123291, 0.110596, -0.128601,
+ 0.161499, -0.130310, 0.047699, 0.003296, -0.017700, 0.050110,
+ -0.075012, 0.029205, 0.016602, 0.077515
+};
+
+static const float *ir_filters_lookup[2] = {
+ ir_filter_str, ir_filter_mid
+};
+
/* Core frame sizes in each mode */
static const uint16_t cf_sizes_wb[] = {
132, 177, 253, 285, 317, 365, 397, 461, 477,
diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c
index c20ef9c..5469791 100644
--- a/libavcodec/amrwbdec.c
+++ b/libavcodec/amrwbdec.c
@@ -63,6 +63,9 @@ typedef struct {
float fixed_gain[5]; ///< quantified fixed gains for the current and previous four subframes
float tilt_coef; ///< {beta_1} related to the voicing of the previous subframe
+
+ float prev_sparse_fixed_gain; ///< previous fixed gain; used by anti-sparseness to determine "onset"
+ uint8_t prev_ir_filter_nr; ///< previous impulse response filter "impNr": 0 - strong, 1 - medium, 2 - none
} AMRWBContext;
static int amrwb_decode_init(AVCodecContext *avctx)
@@ -701,6 +704,98 @@ static float voice_factor(float *p_vector, float p_gain,
return (p_ener - f_ener) / (p_ener + f_ener);
}
+/**
+ * Reduce fixed vector sparseness by smoothing with one of three IR filters.
+ * Also known as "adaptive phase dispersion".
+ * Returns the filtered fixed vector address
+ *
+ * @param ctx [in] the context
+ * @param fixed_vector [in] unfiltered fixed vector
+ * @param out [in] space for modified vector if necessary
+ */
+static const float *anti_sparseness(AMRWBContext *ctx,
+ const float *fixed_vector, float *out)
+{
+ int ir_filter_nr;
+
+ if (ctx->pitch_gain[4] < 0.6) {
+ ir_filter_nr = 0; // strong filtering
+ } else if (ctx->pitch_gain[4] < 0.9) {
+ ir_filter_nr = 1; // medium filtering
+ } else
+ ir_filter_nr = 2; // no filtering
+
+ // detect 'onset'
+ if (ctx->fixed_gain[4] > 3.0 * ctx->fixed_gain[3]) {
+ if (ir_filter_nr < 2)
+ ir_filter_nr++;
+ } else
+ {
+ int i, count = 0;
+
+ for (i = 0; i < 5; i++)
+ if (ctx->pitch_gain[i] < 0.6)
+ count++;
+ if (count > 2)
+ ir_filter_nr = 0;
+
+ if (ir_filter_nr > ctx->prev_ir_filter_nr + 1)
+ ir_filter_nr--;
+ }
+
+ // update ir filter strength history
+ ctx->prev_ir_filter_nr = ir_filter_nr;
+
+ ir_filter_nr += (ctx->fr_cur_mode == MODE_8k85 ? 1 : 0);
+
+ if (ir_filter_nr < 2) {
+ int i, j;
+ const float *coef = ir_filters_lookup[ir_filter_nr];
+
+ /* Circular convolution code in reference
+ * decoder was modified to avoid using one
+ * extra array. The filtered vector is given by:
+ *
+ * c2(n) = sum(i,0,len-1){ c(i) * coef( (n - i + len) % len ) }
+ */
+
+ /* XXX: Based on ref decoder, I guess it is not neccessary
+ * a function like apply_ir_filter() here since we
+ * already have the fixed codebook in its array form and
+ * moreover, this form already has the pitch sharpening while
+ * the sparse codebook has not */
+
+ memset(out, 0, sizeof(float) * AMRWB_SUBFRAME_SIZE);
+ for (i = 0; i < AMRWB_SUBFRAME_SIZE; i++)
+ if (fixed_vector[i]) {
+ int li = AMRWB_SUBFRAME_SIZE - i;
+
+ for (j = 0; j < li; j++)
+ out[i + j] += fixed_vector[i] * coef[j];
+
+ for (j = 0; j < i; j++)
+ out[j] += fixed_vector[i] * coef[j + li];
+ }
+ fixed_vector = out;
+ }
+
+ return fixed_vector;
+}
+
+/**
+ * Update context state before the next subframe
+ *
+ * @param ctx [in] the context
+ */
+static void update_sub_state(AMRWBContext *ctx)
+{
+ ctx->tilt_coef = voice_factor(ctx->pitch_vector, ctx->pitch_gain[4],
+ ctx->fixed_vector, ctx->fixed_gain[4]) * 0.25 + 0.25;
+
+ memmove(&ctx->pitch_gain[0], &ctx->pitch_gain[1], 4 * sizeof(float));
+ memmove(&ctx->fixed_gain[0], &ctx->fixed_gain[1], 4 * sizeof(float));
+}
+
static int amrwb_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
AVPacket *avpkt)
{
@@ -709,7 +804,9 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
const uint8_t *buf = avpkt->data;
int buf_size = avpkt->size;
AMRFixed fixed_sparse = {0}; // fixed vector up to anti-sparseness processing
+ float spare_vector[AMRWB_SUBFRAME_SIZE]; // extra stack space to hold result from anti-sparseness processing
float fixed_gain_factor; // fixed gain correction factor (gamma)
+ const float *synth_fixed_vector; // pointer to the fixed vector that synthesis should use
int sub, i;
ctx->fr_cur_mode = unpack_bitstream(ctx, buf, buf_size);
@@ -771,17 +868,17 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
// I did not found a reference of this in the ref decoder
}
+ synth_fixed_vector = anti_sparseness(ctx, ctx->fixed_vector,
+ spare_vector);
+
ff_clear_fixed_vector(ctx->fixed_vector, &fixed_sparse,
AMRWB_SUBFRAME_SIZE);
+ update_sub_state(ctx);
}
// update state for next frame
memcpy(ctx->isp_sub4_past, ctx->isp[3], LP_ORDER * sizeof(ctx->isp[3][0]));
- // calculate tilt coefficient for next subframe
- ctx->tilt_coef = voice_factor(ctx->pitch_vector, ctx->pitch_gain[4],
- ctx->fixed_vector, ctx->fixed_gain[4]) * 0.25 + 0.25;
-
return 0;
}
-----------------------------------------------------------------------
Summary of changes:
libavcodec/amrwbdata.h | 35 ++++++++++++++++
libavcodec/amrwbdec.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 136 insertions(+), 4 deletions(-)
hooks/post-receive
--
AMR-WB decoder
More information about the FFmpeg-soc
mailing list