[FFmpeg-devel] [PATCH] TwinVQ decoder
Michael Niedermayer
michaelni
Sat Jun 6 10:27:54 CEST 2009
On Mon, May 18, 2009 at 06:32:55PM +0200, Vitor Sessak wrote:
> Michael Niedermayer wrote:
>> On Wed, May 06, 2009 at 05:09:50PM +0200, Vitor Sessak wrote:
>>> Vitor Sessak wrote:
>>>> Hi,
>>>> I'm running out of ideas on how to improve this code, so I'm submitting
>>>> it for review. Among the things I'm not 100% happy with is the windowing
>>>> mess and the set_interleave_table_* code duplication. Any suggestion is
>>>> welcome.
>>>> About using dsputils->vector_fmul, it gets two parameters (one for
>>>> input/output and other for the window), and I need one that gets
>>>> three...
>>>> Finally, it would be very practical to start the review by the tables
>>>> since they are so big that every time I send this patch the message need
>>>> moderator approval...
>>> Ping?
>>>
>>> New version attached...
>> [...]
>>> /**
>>> * Parameters and tables that are different for each frame type
>>> */
>>> typedef struct {
>>> uint8_t sub; ///< Number subblocks in each frame
>>> const uint16_t *crb_tbl;
>>> uint8_t n_crb;
>>> const float *fw_cb;
>>> uint8_t fw_n_div;
>>> uint8_t fw_n_bit;
>>> const float *cb0;
>>> const float *cb1;
>>> uint8_t cb_len_read;
>>> } FrameMode;
>> All the fields should have comments or better names
>> this applies to more than this struct ...
>
> Should be better now. Also taken in consideration Diego's comments...
[...]
> /**
> * Parameters and tables that are different for every combination of
> * bitrate/sample rate
> */
> typedef struct {
> const FrameMode fmode[3]; ///< Frame type-dependant parameters
>
> uint16_t size; ///< Frame size in samples
> uint8_t n_lsp; ///< Number of lsp coefficients
> const float *lspcodebook;
> uint8_t lsp_bit0;
> uint8_t lsp_bit1;
> uint8_t lsp_bit2;
> uint8_t lsp_split;
> const float *pit_cb;
> uint8_t basf_bit;
> uint8_t pit_n_bit;
> uint8_t pit_cb_len;
> uint8_t pgain_bit;
> uint8_t pitch_cst;
I think some of these need documentation and or better variable names
[...]
> static void mul_vec_re(int cont, const float *buf1, const float *buf2, float *buf3)
re is to sh
i hope you understood me, after all you also assume the reader to guess that
re means reverse
> {
> while(cont--)
> *buf3++ = (*buf1++) * (*buf2--);
> }
>
> static void add_vec(int cont, const float *buf1, const float *buf2, float *buf3)
> {
> while(cont--)
> *buf3++ = *buf1++ + *buf2++;
> }
>
> /**
> * Evaluate a single LPC amplitude spectrum envelope coefficient from the line
> * spectrum pairs
> *
> * @param cos_lsp a vector of the cosinus of the LSP values
> * @param cos_val cos(PI*i/N) where i is the index of the LPC amplitude
> * @return the LPC value
> *
> * @todo reuse code from vorbis_dec.c: vorbis_floor0_decode
> */
> static float eval_lpc_spectrum(const float *cos_lsp, float cos_val, int size)
> {
> int i;
> float a = .5;
> float b = .5;
>
> for (i=0; i < size; i += 2) {
> a *= 2*cos_val - cos_lsp[i ];
> b *= 2*cos_val - cos_lsp[i+1];
> }
>
> a *= a*(2+2*cos_val);
> b *= b*(2-2*cos_val);
>
> return .5/(a+b);
> }
if iam not too tired the .5 and 2 are useless
>
> static void lsptowts(const float *cos_vals, float *a2, TwinContext *tctx)
doxy please
> {
> int i;
> const ModeTab *mtab = tctx->mtab;
> int size_s = mtab->size / mtab->fmode[FT_SHORT].sub;
> int mag_95 = 2 * mtab->fmode[FT_SHORT].sub;
> float *cos_TT = ff_cos_tabs[av_log2(mtab->size)-1];
>
> for (i=0; i < (size_s/2); i++) {
> a2[i] = eval_lpc_spectrum(cos_vals, cos_TT[mag_95*(2*i+1)],
> mtab->n_lsp);
> a2[size_s-i-1] = eval_lpc_spectrum(cos_vals, -cos_TT[mag_95*(2*i+1)],
> mtab->n_lsp);
> }
> }
>
> static void interpolate(float *out, float v1, float v2, int size)
> {
> int j;
>
> for (j=1; j < size; j++)
> out[j - 1] = v1 * (float)j/size +
> v2 * (1. - (float)j/size);
> }
this is obviously inefficient, you can di this with 1 arithmetic
operation per value output.
>
> #define GET_COS(sub, idx, forward, cos_tab, size) \
> ((forward) ? \
> ( cos_tab[ (sub)*(idx)]) : \
> (-cos_tab[2*(size) - (sub)*(idx)]))
>
> /**
> * Evaluates the LPC amplitude spectrum envelope from the line spectrum pairs.
> * Probably for speed reasons, the coefficients are evaluated like
> * siiiibiiiiisiiiibiiiiisiiiibiiiiisiiiibiiiiis ...
> * where s is an evaluated value, i is a value interpolated from the others
> * and b might be either calculated or interpolated, dependent on a
> * unexplained condition.
> *
> * @param step the size of a block "siiiibiiii"
> * @param in the cosinus of the LSP data
> */
> static inline void eval_lpcenv_or_interp(float *out, const float *in,
> int size, int step, int sub,
> const ModeTab *mtab, int forward)
> {
> int i;
> const float *cos_tab = ff_cos_tabs[av_log2(mtab->size)-1];
>
> for (i=-step; i < size - step; i += step) {
> out[i+step] =
> eval_lpc_spectrum(in,
> GET_COS(sub, (i+step)*4+2, forward,
> cos_tab, 2*mtab->size - sub*size*2),
> mtab->n_lsp);
> if (i < step)
> continue;
>
> if ((out[i + step] + out[i - step] > 1.95*out[i]) ||
> (out[i - step] <= out[i + step]))
> interpolate(out + i - step + 1, out[i], out[i-step], step);
> else {
> out[i - step/2] =
> eval_lpc_spectrum(in,
> GET_COS(sub, 4*i-2*step+2, forward,
> cos_tab,
> 2*mtab->size - sub*size*2),
> mtab->n_lsp);
> interpolate(out + i-step +1, out[i-step/2], out[i-step ], step/2);
> interpolate(out + i-step/2+1, out[i ], out[i-step/2], step/2);
> }
> }
> interpolate(out+i-step+1, out[i], out[i-step], step);
isnt it cleaner in 3 passes ?
[...]
> static int log_or_zero(int x)
> {
> return x ? av_log2(x) + 1: 0;
> }
av_log2(2*x)
[...]
>
> static void extend_pitch(int a1, const float *a2, float a3, float *a4,
> TwinContext *tctx)
> {
> const ModeTab *mtab = tctx->mtab;
> int v5;
> int v33;
> int v35;
> int fcmax_i, fcmin_i;
> int i, j;
> int basf_step = (1 << mtab->basf_bit) - 1;
> int isampf = tctx->avctx->sample_rate/1000;
> int ibps = tctx->avctx->bit_rate/(1000 * tctx->avctx->channels);
>
> fcmin_i = (40*2*mtab->size + isampf/2)/isampf;
>
> fcmax_i = (40*6*2*mtab->size + isampf/2)/isampf;
vertical align
>
> v33 = fcmin_i + (a1*(fcmax_i - fcmin_i) + basf_step/2)/basf_step;
>
> if ((isampf == 22) && (ibps == 32))
> v35 = ((v33 + 800LL)* mtab->pitch_cst * mtab->pit_cb_len * v33 +
> 200LL * mtab->size*v33)/
> (400LL * mtab->size * v33);
looks like some things can be facored out here
and, btw vXY are unacceptable variable names (this one must be fixed and
all of them)
> else
> v35 = (mtab->pitch_cst * mtab->pit_cb_len * v33)/(400 * mtab->size);
>
> for (i=0; i < v35/2; i++)
> a4[i] += a3 * a2[i];
>
> v5 = v35/2;
>
> for (i=0; i < mtab->pit_cb_len; i++) {
> int v38 = very_broken_op(v33, i + 1);
> for (j = - v35/2; j < (v35 + 1)/2 ; j++) {
> a4[j+v38] += a3 * a2[v5];
> ++v5;
>
> if (v5 >= mtab->pit_cb_len)
> return;
> }
> }
> }
>
> static void dec_gain(enum FrameType ftype, float *out, TwinContext *tctx)
> {
> const ModeTab *mtab = tctx->mtab;
> int i, j;
> int sub = mtab->fmode[ftype].sub;
> float step = AMP_MAX / ((1 << GAIN_BITS) - 1);
> float sub_step = SUB_AMP_MAX / ((1 << SUB_GAIN_BITS) - 1);
>
> if (ftype == FT_LONG) {
> for (i=0; i < tctx->avctx->channels; i++)
> out[i] = (1./(1<<10)) *
> mulawinv(step * .5 + step * get_bits(&tctx->gb, GAIN_BITS),
> AMP_MAX, MU);
> } else {
> for (i=0; i < tctx->avctx->channels; i++) {
> float val = (1./(1<<20)) *
> mulawinv(step *.5 + step * get_bits(&tctx->gb, GAIN_BITS),
> AMP_MAX, MU);
>
> for (j=0; j < sub; j++) {
> out[i*sub + j] =
> val*mulawinv(sub_step*.5 +
> sub_step* get_bits(&tctx->gb, SUB_GAIN_BITS),
> SUB_AMP_MAX, MU);
> }
> }
> }
> }
inefficient, please loose mulawinv() and the pow() it calls
>
> static void check_lsp(int a1, float *a2, float a3)
> {
> int i;
>
> for (i=0; i < a1; i++)
> if (a2[i] - a3 < a2[i-1]) {
the variable names are unacceptable, a1 and a2 are not even the same type
> float tmp =(a2[i-1] - a2[i] + a3)*.5;
> a2[i-1] -= tmp;
> a2[i] += tmp;
> }
> }
>
> static void check_lsp_sort(int a1, float *a2)
> {
> int i,j;
>
> for (i=0; i < a1; ++i)
> for (j=0; j < a1; j++)
> if (a2[j-1] > a2[j])
> FFSWAP(float, a2[j],a2[j-1]);
> }
same, also doing a O(n^2) by default sort is unacceptable
are the values mostly sorted already?
[...]
> static void dec_bark_env(const uint8_t *in, int use_hist, int ch, float *out,
> enum FrameType ftype, TwinContext *tctx)
> {
> const ModeTab *mtab = tctx->mtab;
> int i,j;
> float *hist = tctx->bark_hist[ftype][ch];
> float val = ((const float []) {0.4, 0.35, 0.28})[ftype];
> int bark_n_coef = mtab->fmode[ftype].bark_n_coef;
> int fw_cb_len = mtab->fmode[ftype].bark_env_size / bark_n_coef;
> int pos = 0;
>
> for (i = 0; i < fw_cb_len; i++)
> for (j = 0; j < bark_n_coef; j++) {
> int idx = j + i*bark_n_coef;
> float tmp2 = mtab->fmode[ftype].bark_cb[fw_cb_len*in[j] + i];
> float st = (use_hist) ?
> (1. - val) * tmp2 + val*hist[idx] + 1. : tmp2 + 1.;
>
> hist[idx] = tmp2;
> if (st < -1.) st = 1.;
>
> while (pos < (mtab->fmode[ftype].bark_tab)[idx])
> out[pos++] = st;
a memset_float() could come in handy, this one isnt the first case i
see that would benefit from it
[...]
> static void set_interleave_table_p(int16_t *a1, int a2, int a4,
> int a5, const uint8_t *a6)
> {
> int i,j;
>
> for (i=0; i < a2; i++)
> for (j=0; j < a6[i]; j++) {
> int val = a2*j;
>
> if (a4 == 1 || a2 % a4)
> val += i;
> else
if{
}else
better for future patch compactness
[...]
> static av_cold int twin_decode_init(AVCodecContext *avctx)
> {
> TwinContext *tctx = avctx->priv_data;
> int isampf = avctx->sample_rate/1000;
> int ibps = avctx->bit_rate/(1000 * avctx->channels);
> int i, j, k;
>
> tctx->avctx = avctx;
> tctx->skip_cnt = 2;
> avctx->sample_fmt = SAMPLE_FMT_FLT;
>
> if (avctx->channels > 2) {
> av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels: %i\n",
> avctx->channels);
> return -1;
> }
>
> switch ((isampf << 8) + ibps) {
> case (8<<8) + 8:
> tctx->mtab = &mode_08_08;
> break;
> case (11<<8) + 8:
> tctx->mtab = &mode_11_08;
> break;
> case (11<<8) + 10:
> tctx->mtab = &mode_11_10;
> break;
case ( 8<<8) + 8: tctx->mtab = &mode_08_08; break;
case (11<<8) + 8: tctx->mtab = &mode_11_08; break;
case (11<<8) + 10: tctx->mtab = &mode_11_10; break;
...
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
No snowflake in an avalanche ever feels responsible. -- Voltaire
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090606/7beee8c1/attachment.pgp>
More information about the ffmpeg-devel
mailing list