[FFmpeg-devel] [PATCH] WMA Voice decoder
Vitor Sessak
vitor1001
Sat Feb 6 17:19:49 CET 2010
Ronald S. Bultje wrote:
> Hi,
>
> On Tue, Feb 2, 2010 at 11:35 AM, Ronald S. Bultje <rsbultje at gmail.com> wrote:
>
>> (Work on aw_*() is still ongoing...)
>>
>
> I have something without the crazy loops and using av_log2(), maybe
> this is better?
>
> +/**
> + * Parse 10 independently-coded LSPs.
> + */
> +static void dequant_lsp10i(GetBitContext *gb, double *lsps)
> +{
> + static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
> + static const double mul_lsf[4] = {
> + 5.2187144800e-3, 1.4626986422e-3,
> + 9.6179549166e-4, 1.1325736225e-3
> + };
> + static const double base_lsf[4] = {
> + M_PI * -2.15522e-1, M_PI * -6.1646e-2,
> + M_PI * -3.3486e-2, M_PI * -5.7408e-2
> + };
>
I think just putting the values multiplied by pi would be more readable.
> +#define NO_OFFSET -255
> +/**
> + * Parse the offset of the first pitch-adaptive window pulses, and
> + * the distribution of pulses between the two blocks in this frame.
> + * @param ctx WMA Voice decoding context
> + * @param gb bit I/O context
> + * @param pitch pitch for each block in this frame
> + */
> +static void aw_parse_coords(AVCodecContext *ctx, GetBitContext *gb,
> + const int *pitch)
> +{
> + static const int16_t start_offset[94] = {
> + -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
> + 13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
> + 27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
> + 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
> + 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
> + 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
> + 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
> + 141, 143, 145, 147, 149, 151, 153, 155, 157, 159
> + };
> + WMAVoiceContext *s = ctx->priv_data;
> + int bits, n, offset, off_table[11], first_idx[2];
> +
> + s->aw_idx_is_ext = 0;
> + if ((bits = get_bits(gb, 6)) >= 54) {
> + s->aw_idx_is_ext = 1;
> + bits += (bits - 54) * 3 + get_bits(gb, 2);
> + }
> + s->aw_pitch_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
> +
> + offset = start_offset[bits];
> + for (n = 0; n < 11 && offset < MAX_FRAMESIZE; n++) {
> + off_table[n] = offset;
> + offset += pitch[offset >= MAX_FRAMESIZE / 2];
> + }
> + if (n < 11)
> + memset(&off_table[n], -1, (11 - n) * sizeof(int));
>
-1 or NO_OFFSET?
> +
> + s->aw_n_pulses[0] = s->aw_n_pulses[1] = 0;
>
> + s->aw_first_pulse_off[0] = s->aw_first_pulse_off[1] = NO_OFFSET;
>
Is this initialization really needed?
> +/**
> + * Parse FCB/ACB signal for a single block.
> + * @note see #synth_block().
> + */
> +static void synth_block_fcb_acb(AVCodecContext *ctx, GetBitContext *gb,
> + int block_idx, int size,
> + int block_pitch_sh2,
> + const struct frame_type_desc *frame_desc,
> + float *excitation)
> +{
> + static const float gain_coeff[6] = {
> + 0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
> + };
> + WMAVoiceContext *s = ctx->priv_data;
> + float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
> + int n, idx, gain_weight;
> + AMRFixed fcb;
> +
> + assert(size <= MAX_FRAMESIZE / 2);
> + memset(pulses, 0, sizeof(*pulses) * size);
> +
> + fcb.pitch_lag = block_pitch_sh2 >> 2;
> + fcb.pitch_fac = 1.0;
> + fcb.no_repeat_mask = 0;
> + fcb.n = 0;
> +
> + /* For the other frame types, this is where we apply the innovation
> + * (fixed) codebook pulses of the speech signal. */
> + if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
> + aw_pulse_set1(s, gb, block_idx, &fcb);
> + aw_pulse_set2(s, gb, block_idx, &fcb);
> + } else /* FCB_TYPE_EXC_PULSES */ {
> + int offset_nbits = 5 - frame_desc->log_n_blocks;
> +
> + fcb.no_repeat_mask = -1;
> + for (n = 0; n < 5; n++) {
> + float pulse = get_bits1(gb) ? 1.0 : -1.0;
> + int idx1, idx2;
> +
> + idx1 = get_bits(gb, offset_nbits);
> + fcb.x[fcb.n] = n + 5 * idx1;
> + fcb.y[fcb.n++] = pulse;
> + if (n < frame_desc->dbl_pulses) {
> + idx2 = get_bits(gb, offset_nbits);
> + fcb.x[fcb.n] = n + 5 * idx2;
> + fcb.y[fcb.n++] = (idx1 >= idx2) ? pulse : -pulse;
> + }
> + }
> + }
>
The else{} case is very close to ff_decode_10_pulses_35bits().
> +/**
> + * Synthesize output samples for a single frame.
> + * @note we assume enough bits are available, caller should check.
> + *
> + * @param ctx WMA Voice decoder context
> + * @param gb bit I/O context (s->gb or one for cross-packet superframes)
> + * @param samples pointer to output sample buffer, has space for at least 160
> + * samples
> + * @param lsps LSP array
> + * @param prev_lsps array of previous frame's LSPs
> + * @param excitation target buffer for excitation signal
> + * @param synth target buffer for synthesized speech data
> + * @return 0 on success, <0 on error.
> + */
> +static int synth_frame(AVCodecContext *ctx, GetBitContext *gb,
> + float *samples,
> + const double *lsps, const double *prev_lsps,
> + float *excitation, float *synth)
> +{
> + WMAVoiceContext *s = ctx->priv_data;
> + int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
> + int pitch[MAX_BLOCKS], last_block_pitch;
> +
> + /* Parse frame type ("frame header"), see #frame_descs */
> + int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)],
> + block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
> +
> + if (bd_idx < 0) {
> + av_log(ctx, AV_LOG_ERROR,
> + "Invalid frame type VLC code, skipping\n");
> + return -1;
> + }
> +
> + /*
> + * Pitch (per ACB type):
> + * - type 0: unused
> + * - type 1: provided (globally) for the whole frame. In #synth_block(),
> + * we derive the "pitch-per-sample" for adaptive codebook
> + * reading.
> + * - type 2: provided per block (see just before the call to
> + * #synth_block()), so not read here.
> + */
>
I think it would be more readable if you move the comments inside the
"case ACB_XX:" statements
> + switch (frame_descs[bd_idx].acb_type) {
> + case ACB_TYPE_NONE:
> + memset(pitch, 0, sizeof(pitch[0]) * frame_descs[bd_idx].n_blocks);
>
If it is unused, is it really needed to waste time zero'ing it out?
Besides that I don't have any other comments.
-Vitor
More information about the ffmpeg-devel
mailing list