[FFmpeg-devel] [PATCH] TwinVQ decoder

Sat Jun 6 10:27:54 CEST 2009

On Mon, May 18, 2009 at 06:32:55PM +0200, Vitor Sessak wrote:
> Michael Niedermayer wrote:
>> On Wed, May 06, 2009 at 05:09:50PM +0200, Vitor Sessak wrote:
>>> Vitor Sessak wrote:
>>>> Hi,
>>>> I'm running out of ideas on how to improve this code, so I'm submitting 
>>>> it for review. Among the things I'm not 100% happy with is the windowing 
>>>> mess and the set_interleave_table_* code duplication. Any suggestion is 
>>>> welcome.
>>>> About using dsputils->vector_fmul, it gets two parameters (one for 
>>>> input/output and other for the window), and I need one that gets 
>>>> three...
>>>> Finally, it would be very practical to start the review by the tables 
>>>> since they are so big that every time I send this patch the message need 
>>>> moderator approval...
>>> Ping?
>>>
>>> New version attached...
>> [...]
>>> /**
>>>  * Parameters and tables that are different for each frame type
>>>  */
>>> typedef struct {
>>>     uint8_t         sub;      ///< Number subblocks in each frame
>>>     const uint16_t *crb_tbl;
>>>     uint8_t         n_crb;
>>>     const float    *fw_cb;
>>>     uint8_t         fw_n_div;
>>>     uint8_t         fw_n_bit;
>>>     const float    *cb0;
>>>     const float    *cb1;
>>>     uint8_t         cb_len_read;
>>> } FrameMode;
>> All the fields should have comments or better names
>> this applies to more than this struct ...
>
> Should be better now. Also taken in consideration Diego's comments...
[...]
> /**
>  * Parameters and tables that are different for every combination of
>  * bitrate/sample rate
>  */
> typedef struct {
>     const FrameMode fmode[3]; ///< Frame type-dependant parameters
> 
>     uint16_t     size;        ///< Frame size in samples
>     uint8_t      n_lsp;       ///< Number of lsp coefficients
>     const float *lspcodebook;
>     uint8_t      lsp_bit0;
>     uint8_t      lsp_bit1;
>     uint8_t      lsp_bit2;
>     uint8_t      lsp_split;
>     const float *pit_cb;
>     uint8_t      basf_bit;
>     uint8_t      pit_n_bit;
>     uint8_t      pit_cb_len;
>     uint8_t      pgain_bit;
>     uint8_t      pitch_cst;

I think some of these need documentation and or better variable names

[...]
> static void mul_vec_re(int cont, const float *buf1, const float *buf2, float *buf3)

re is to sh

i hope you understood me, after all you also assume the reader to guess that
re means reverse

> {
>     while(cont--)
>         *buf3++ = (*buf1++) * (*buf2--);
> }
> 
> static void add_vec(int cont, const float *buf1, const float *buf2, float *buf3)
> {
>     while(cont--)
>         *buf3++ = *buf1++ + *buf2++;
> }
> 

> /**
>  * Evaluate a single LPC amplitude spectrum envelope coefficient from the line
>  * spectrum pairs
>  *
>  * @param cos_lsp a vector of the cosinus of the LSP values
>  * @param cos_val cos(PI*i/N) where i is the index of the LPC amplitude
>  * @return the LPC value
>  *
>  * @todo reuse code from vorbis_dec.c: vorbis_floor0_decode
>  */
> static float eval_lpc_spectrum(const float *cos_lsp, float cos_val, int size)
> {
>     int i;
>     float a = .5;
>     float b = .5;
> 
>     for (i=0; i < size; i += 2) {
>         a *= 2*cos_val - cos_lsp[i  ];
>         b *= 2*cos_val - cos_lsp[i+1];
>     }
> 
>     a *= a*(2+2*cos_val);
>     b *= b*(2-2*cos_val);
> 
>     return .5/(a+b);
> }

if iam not too tired the .5 and 2 are useless

> 
> static void lsptowts(const float *cos_vals, float *a2, TwinContext *tctx)

doxy please

> {
>     int i;
>     const ModeTab *mtab = tctx->mtab;
>     int size_s = mtab->size / mtab->fmode[FT_SHORT].sub;
>     int mag_95 = 2 * mtab->fmode[FT_SHORT].sub;
>     float *cos_TT = ff_cos_tabs[av_log2(mtab->size)-1];
> 
>     for (i=0; i < (size_s/2); i++) {
>         a2[i]          = eval_lpc_spectrum(cos_vals,  cos_TT[mag_95*(2*i+1)],
>                                            mtab->n_lsp);
>         a2[size_s-i-1] = eval_lpc_spectrum(cos_vals, -cos_TT[mag_95*(2*i+1)],
>                                            mtab->n_lsp);
>     }
> }
> 

> static void interpolate(float *out, float v1, float v2, int size)
> {
>     int j;
> 
>     for (j=1; j < size; j++)
>         out[j - 1] = v1 *       (float)j/size +
>                      v2 * (1. - (float)j/size);
> }

this is obviously inefficient, you can di this with 1 arithmetic
operation per value output.

> 
> #define GET_COS(sub, idx, forward, cos_tab, size)  \
>     ((forward) ?                                   \
>      ( cos_tab[           (sub)*(idx)]) :          \
>      (-cos_tab[2*(size) - (sub)*(idx)]))
> 
> /**
>  * Evaluates the LPC amplitude spectrum envelope from the line spectrum pairs.
>  * Probably for speed reasons, the coefficients are evaluated like
>  * siiiibiiiiisiiiibiiiiisiiiibiiiiisiiiibiiiiis ...
>  * where s is an evaluated value, i is a value interpolated from the others
>  * and b might be either calculated or interpolated, dependent on a
>  * unexplained condition.
>  *
>  * @param step the size of a block "siiiibiiii"
>  * @param in the cosinus of the LSP data
>  */
> static inline void eval_lpcenv_or_interp(float *out, const float *in,
>                                          int size, int step, int sub,
>                                          const ModeTab *mtab, int forward)
> {
>     int i;
>     const float *cos_tab = ff_cos_tabs[av_log2(mtab->size)-1];
> 
>     for (i=-step; i < size - step; i += step) {
>         out[i+step] =
>             eval_lpc_spectrum(in,
>                               GET_COS(sub, (i+step)*4+2, forward,
>                                       cos_tab, 2*mtab->size - sub*size*2),
>                               mtab->n_lsp);
>         if (i < step)
>             continue;
> 
>         if ((out[i + step] + out[i - step] > 1.95*out[i]) ||
>             (out[i - step]               <= out[i + step]))
>             interpolate(out + i - step + 1, out[i], out[i-step], step);
>         else {
>             out[i - step/2] =
>                 eval_lpc_spectrum(in,
>                                   GET_COS(sub, 4*i-2*step+2, forward,
>                                           cos_tab,
>                                           2*mtab->size - sub*size*2),
>                                   mtab->n_lsp);
>             interpolate(out + i-step  +1, out[i-step/2], out[i-step  ], step/2);
>             interpolate(out + i-step/2+1, out[i       ], out[i-step/2], step/2);
>         }
>     }
>     interpolate(out+i-step+1, out[i], out[i-step], step);

isnt it cleaner in 3 passes ?

[...]
> static int log_or_zero(int x)
> {
>     return x ? av_log2(x) + 1: 0;
> }

av_log2(2*x)

[...]
> 
> static void extend_pitch(int a1, const float *a2, float a3, float *a4,
>                          TwinContext *tctx)
> {
>     const ModeTab *mtab = tctx->mtab;
>     int v5;
>     int v33;
>     int v35;
>     int fcmax_i, fcmin_i;
>     int i, j;
>     int basf_step = (1 << mtab->basf_bit) - 1;
>     int isampf = tctx->avctx->sample_rate/1000;
>     int ibps = tctx->avctx->bit_rate/(1000 * tctx->avctx->channels);
> 

>     fcmin_i = (40*2*mtab->size + isampf/2)/isampf;
> 
>     fcmax_i = (40*6*2*mtab->size + isampf/2)/isampf;

vertical align

> 
>     v33 = fcmin_i +  (a1*(fcmax_i - fcmin_i) + basf_step/2)/basf_step;
> 
>     if ((isampf == 22) && (ibps == 32))
>         v35 = ((v33 + 800LL)* mtab->pitch_cst * mtab->pit_cb_len * v33 +
>                200LL * mtab->size*v33)/
>             (400LL * mtab->size * v33);

looks like some things can be facored out here

and, btw vXY are unacceptable variable names (this one must be fixed and
all of them)

>     else
>         v35 = (mtab->pitch_cst * mtab->pit_cb_len * v33)/(400 * mtab->size);
> 
>     for (i=0; i < v35/2; i++)
>         a4[i] += a3 * a2[i];
> 
>     v5 = v35/2;
> 
>     for (i=0; i < mtab->pit_cb_len; i++) {
>         int v38 = very_broken_op(v33, i + 1);
>         for (j = - v35/2; j < (v35 + 1)/2 ; j++) {
>             a4[j+v38] += a3 * a2[v5];
>             ++v5;
> 
>             if (v5 >= mtab->pit_cb_len)
>                 return;
>         }
>     }
> }
> 

> static void dec_gain(enum FrameType ftype, float *out, TwinContext *tctx)
> {
>     const ModeTab *mtab = tctx->mtab;
>     int i, j;
>     int sub = mtab->fmode[ftype].sub;
>     float step     = AMP_MAX     / ((1 <<     GAIN_BITS) - 1);
>     float sub_step = SUB_AMP_MAX / ((1 << SUB_GAIN_BITS) - 1);
> 
>     if (ftype == FT_LONG) {
>         for (i=0; i < tctx->avctx->channels; i++)
>             out[i] = (1./(1<<10)) *
>                 mulawinv(step * .5 + step * get_bits(&tctx->gb, GAIN_BITS),
>                          AMP_MAX, MU);
>     } else {
>         for (i=0; i < tctx->avctx->channels; i++) {
>             float val = (1./(1<<20)) *
>                 mulawinv(step *.5 + step * get_bits(&tctx->gb, GAIN_BITS),
>                          AMP_MAX, MU);
> 
>             for (j=0; j < sub; j++) {
>                 out[i*sub + j] =
>                     val*mulawinv(sub_step*.5 +
>                                  sub_step* get_bits(&tctx->gb, SUB_GAIN_BITS),
>                                  SUB_AMP_MAX, MU);
>             }
>         }
>     }
> }

inefficient, please loose mulawinv() and the pow() it calls

> 
> static void check_lsp(int a1, float *a2, float a3)
> {
>     int i;
> 
>     for (i=0; i < a1; i++)
>         if (a2[i] - a3 < a2[i-1]) {

the variable names are unacceptable, a1 and a2 are not even the same type

>             float tmp =(a2[i-1] - a2[i] + a3)*.5;
>             a2[i-1] -= tmp;
>             a2[i] += tmp;
>         }
> }
> 

> static void  check_lsp_sort(int a1, float *a2)
> {
>     int i,j;
> 
>     for (i=0;  i < a1; ++i)
>         for (j=0; j < a1; j++)
>             if (a2[j-1] > a2[j])
>                 FFSWAP(float, a2[j],a2[j-1]);
> }

same, also doing a O(n^2) by default sort is unacceptable
are the values mostly sorted already?

[...]
> static void dec_bark_env(const uint8_t *in, int use_hist, int ch, float *out,
>                          enum FrameType ftype, TwinContext *tctx)
> {
>     const ModeTab *mtab = tctx->mtab;
>     int i,j;
>     float *hist = tctx->bark_hist[ftype][ch];
>     float val = ((const float []) {0.4, 0.35, 0.28})[ftype];
>     int bark_n_coef  = mtab->fmode[ftype].bark_n_coef;
>     int fw_cb_len = mtab->fmode[ftype].bark_env_size / bark_n_coef;
>     int pos = 0;
> 
>     for (i = 0; i < fw_cb_len; i++)
>         for (j = 0; j < bark_n_coef; j++) {
>             int idx = j + i*bark_n_coef;
>             float tmp2 = mtab->fmode[ftype].bark_cb[fw_cb_len*in[j] + i];
>             float st = (use_hist) ?
>                 (1. - val) * tmp2 + val*hist[idx] + 1. : tmp2 + 1.;
> 
>             hist[idx] = tmp2;
>             if (st < -1.) st = 1.;
> 
>             while (pos < (mtab->fmode[ftype].bark_tab)[idx])
>                 out[pos++] = st;

a memset_float() could come in handy, this one isnt the first case i
see that would benefit from it

[...]
> static void set_interleave_table_p(int16_t *a1, int a2, int a4,
>                                    int a5, const uint8_t *a6)
> {
>     int i,j;
> 
>     for (i=0; i < a2; i++)
>         for (j=0; j < a6[i]; j++) {
>             int val = a2*j;
> 
>             if (a4 == 1 || a2 % a4)
>                 val += i;
>             else

if{
}else
better for future patch compactness

[...]
> static av_cold int twin_decode_init(AVCodecContext *avctx)
> {
>     TwinContext *tctx = avctx->priv_data;
>     int isampf = avctx->sample_rate/1000;
>     int ibps = avctx->bit_rate/(1000 * avctx->channels);
>     int i, j, k;
> 
>     tctx->avctx       = avctx;
>     tctx->skip_cnt    = 2;
>     avctx->sample_fmt = SAMPLE_FMT_FLT;
> 
>     if (avctx->channels > 2) {
>         av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels: %i\n",
>                avctx->channels);
>         return -1;
>     }
> 
>     switch ((isampf << 8) +  ibps) {
>     case (8<<8) + 8:
>         tctx->mtab = &mode_08_08;
>         break;
>     case (11<<8) + 8:
>         tctx->mtab = &mode_11_08;
>         break;
>     case (11<<8) + 10:
>         tctx->mtab = &mode_11_10;
>         break;

case ( 8<<8) +  8: tctx->mtab = &mode_08_08; break;
case (11<<8) +  8: tctx->mtab = &mode_11_08; break;
case (11<<8) + 10: tctx->mtab = &mode_11_10; break;
...

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

No snowflake in an avalanche ever feels responsible. -- Voltaire
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090606/7beee8c1/attachment.pgp>