[FFmpeg-devel] [PATCH 4/5] Add the G723.1 demuxer and decoder

Sat Mar 19 15:02:32 CET 2011

On 03/17/2011 11:56 PM, banan at ludd.ltu.se wrote:
> From: Mohamed Naufal Basheer<naufal11 at gmail.com>

> --- /dev/null
> +++ b/libavcodec/g723_1.c
> @@ -0,0 +1,1081 @@
> +/*
> + * G.723.1 compatible decoder
> + * Copyright (c) 2006 Benjamin Larsson
> + * Copyright (c) 2010 Mohamed Naufal Basheer
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */

First question: is it bitexact to the reference decoder?

> +/**
> + * @file
> + * G.723.1 compatible decoder
> + */
> +
> +#include "avcodec.h"
> +#define ALT_BITSTREAM_READER_LE
> +#include "get_bits.h"
> +#include "acelp_vectors.h"
> +#include "celp_filters.h"
> +#include "celp_math.h"
> +#include "lsp.h"
> +#include "libavutil/lzo.h"
> +#include "g723_1_data.h"
> +
> +typedef struct g723_1_context {
> +    G723_1_Subframe subframe[4];
> +    FrameType cur_frame_type;
> +    FrameType past_frame_type;
> +    Rate cur_rate;
> +    uint8_t lsp_index[LSP_BANDS];
> +    int pitch_lag[2];
> +    int erased_frames;
> +
> +    int16_t prev_lsp[LPC_ORDER];
> +    int16_t prev_excitation[PITCH_MAX];
> +    int16_t excitation[PITCH_MAX + FRAME_LEN];
> +    int16_t synth_mem[LPC_ORDER];
> +    int16_t fir_mem[LPC_ORDER];
> +    int     iir_mem[LPC_ORDER];
> +
> +    int random_seed;
> +    int interp_index;
> +    int interp_gain;
> +    int sid_gain;
> +    int cur_gain;
> +    int reflection_coef;
> +    int pf_gain;                 ///<  formant postfilter
> +                                 ///<  gain scaling unit memory
> +} G723_1_Context;
> +
> +static av_cold int g723_1_decode_init(AVCodecContext *avctx)
> +{
> +    G723_1_Context *p  = avctx->priv_data;
> +
> +    avctx->sample_fmt  = SAMPLE_FMT_S16;
> +    p->pf_gain         = 1<<  12;
> +    memcpy(p->prev_lsp, dc_lsp, LPC_ORDER * sizeof(int16_t));
> +
> +    return 0;
> +}
> +
> +/**
> + * Unpack the frame into parameters.
> + *
> + * @param p           the context
> + * @param buf         pointer to the input buffer
> + * @param buf_size    size of the input buffer
> + */
> +static int unpack_bitstream(G723_1_Context *p, const uint8_t *buf,
> +                            int buf_size)
> +{
> +    GetBitContext gb;
> +    int ad_cb_len;
> +    int temp, info_bits, i;
> +
> +    init_get_bits(&gb, buf, buf_size * 8);
> +
> +    /* Extract frame type and rate info */
> +    info_bits = get_bits(&gb, 2);
> +
> +    if (info_bits == 3) {
> +        p->cur_frame_type = UntransmittedFrame;
> +        return 0;
> +    }
> +
> +    /* Extract 24 bit lsp indices, 8 bit for each band */
> +    p->lsp_index[2] = get_bits(&gb, 8);
> +    p->lsp_index[1] = get_bits(&gb, 8);
> +    p->lsp_index[0] = get_bits(&gb, 8);
> +
> +    if (info_bits == 2) {
> +        p->cur_frame_type = SIDFrame;
> +        p->subframe[0].amp_index = get_bits(&gb, 6);
> +        return 0;
> +    }
> +
> +    /* Extract the info common to both rates */
> +    p->cur_rate       = info_bits ? Rate5k3 : Rate6k3;
> +    p->cur_frame_type = ActiveFrame;
> +
> +    p->pitch_lag[0] = get_bits(&gb, 7);
> +    if (p->pitch_lag[0]>  123)       /* test if forbidden code */
> +        return -1;
> +    p->pitch_lag[0] += PITCH_MIN;
> +    p->subframe[1].ad_cb_lag = get_bits(&gb, 2);
> +
> +    p->pitch_lag[1] = get_bits(&gb, 7);
> +    if (p->pitch_lag[1]>  123)
> +        return -1;
> +    p->pitch_lag[1] += PITCH_MIN;
> +    p->subframe[3].ad_cb_lag = get_bits(&gb, 2);
> +    p->subframe[0].ad_cb_lag = 1;
> +    p->subframe[2].ad_cb_lag = 1;
> +
> +    for (i = 0; i<  SUBFRAMES; i++) {
> +        /* Extract combined gain */
> +        temp = get_bits(&gb, 12);
> +        ad_cb_len = 170;
> +        p->subframe[i].dirac_train = 0;
> +        if (p->cur_rate == Rate6k3&&  p->pitch_lag[i>>  1]<  SUBFRAME_LEN - 2) {
> +            p->subframe[i].dirac_train = temp>>  11;
> +            temp&= 0x7ff;
> +            ad_cb_len = 85;
> +        }
> +        p->subframe[i].ad_cb_gain = FASTDIV(temp, GAIN_LEVELS);
> +        if (p->subframe[i].ad_cb_gain<  ad_cb_len) {
> +            p->subframe[i].amp_index = temp - p->subframe[i].ad_cb_gain *
> +                                       GAIN_LEVELS;
> +        } else {
> +            return -1;
> +        }
> +    }
> +
> +    p->subframe[0].grid_index = get_bits(&gb, 1);
> +    p->subframe[1].grid_index = get_bits(&gb, 1);
> +    p->subframe[2].grid_index = get_bits(&gb, 1);
> +    p->subframe[3].grid_index = get_bits(&gb, 1);

get_bits1()

> +
> +    if (p->cur_rate == Rate6k3) {
> +        skip_bits(&gb, 1);  /* skip reserved bit */

skip_bits1()

> +        /* Compute pulse_pos index using the 13-bit combined position index */
> +        temp = get_bits(&gb, 13);
> +        p->subframe[0].pulse_pos = temp / 810;
> +
> +        temp -= p->subframe[0].pulse_pos * 810;
> +        p->subframe[1].pulse_pos = FASTDIV(temp, 90);
> +
> +        temp -= p->subframe[1].pulse_pos * 90;
> +        p->subframe[2].pulse_pos = FASTDIV(temp, 9);
> +        p->subframe[3].pulse_pos = temp - p->subframe[2].pulse_pos * 9;
> +
> +        p->subframe[0].pulse_pos = (p->subframe[0].pulse_pos<<  16) +
> +                                   get_bits(&gb, 16);
> +        p->subframe[1].pulse_pos = (p->subframe[1].pulse_pos<<  14) +
> +                                   get_bits(&gb, 14);
> +        p->subframe[2].pulse_pos = (p->subframe[2].pulse_pos<<  16) +
> +                                   get_bits(&gb, 16);
> +        p->subframe[3].pulse_pos = (p->subframe[3].pulse_pos<<  14) +
> +                                   get_bits(&gb, 14);
> +
> +        p->subframe[0].pulse_sign = get_bits(&gb, 6);
> +        p->subframe[1].pulse_sign = get_bits(&gb, 5);
> +        p->subframe[2].pulse_sign = get_bits(&gb, 6);
> +        p->subframe[3].pulse_sign = get_bits(&gb, 5);
> +    } else { /* Rate5k3 */
> +        p->subframe[0].pulse_pos  = get_bits(&gb, 12);
> +        p->subframe[1].pulse_pos  = get_bits(&gb, 12);
> +        p->subframe[2].pulse_pos  = get_bits(&gb, 12);
> +        p->subframe[3].pulse_pos  = get_bits(&gb, 12);
> +
> +        p->subframe[0].pulse_sign = get_bits(&gb, 4);
> +        p->subframe[1].pulse_sign = get_bits(&gb, 4);
> +        p->subframe[2].pulse_sign = get_bits(&gb, 4);
> +        p->subframe[3].pulse_sign = get_bits(&gb, 4);
> +    }
> +
> +    return 0;
> +}
> +
> +/**
> + * Bitexact implementation of sqrt(val/2).
> + */
> +static int16_t square_root(int val)
> +{
> +    int16_t res = 0;
> +    int16_t exp = 0x4000;
> +    int i;
> +
> +    for (i = 0; i<  14; i ++) {
> +        int res_exp = res + exp;
> +        if (val>= res_exp * res_exp<<  1)
> +            res += exp;
> +        exp>>= 1;
> +    }
> +    return res;
> +}

Isn't this the same as ((ff_sqrt(i << 1) >> 1) & (~1)))?

> +/**
> + * Calculate the number of left-shifts required for normalizing the input.
> + *
> + * @param num   input number
> + * @param width width of the input, 16 bits(0) / 32 bits(1)
> + */
> +static int normalize_bits(int num, int width)
> +{
> +    int i = 0;
> +    int bits = (width) ? 31 : 15;
> +    int limit = 1<<  (bits - 1);
> +
> +    if (num) {
> +        if (num == -1)
> +            return bits;
> +        if (num<  0)
> +            num = ~num;
> +        for (i = 0; num<  limit; i++)
> +            num<<= 1;
> +    }
> +    return i;
> +}

I'm pretty sure the loop can be replaced by an av_log2()...

> +/**
> + * Scale vector contents based on the largest of their absolutes.
> + */

* @return ....

> +static int scale_vector(int16_t *vector, int length)
> +{
> +    int bits, scale, max = 0;
> +    int i;
> +
> +    const int16_t shift_table[16] = {
> +        0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
> +        0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x7fff
> +    };
> +
> +    for (i = 0; i<  length; i++)
> +        max = FFMAX(max, FFABS(vector[i]));
> +
> +    bits  = normalize_bits(max, 0);
> +    scale = shift_table[bits];
> +
> +    for (i = 0; i<  length; i++)
> +        vector[i] = (int16_t)(av_clipl_int32(vector[i] * scale<<  1)>>  4);

What is the point of clipping a 32-bit value to 32-bits?

I'll do a more in-depth review soon...

-Vitor