[FFmpeg-devel] [PATCH] MPEG-4 Parametric Stereo decoder
Vitor Sessak
vitor1001
Sun May 23 16:29:50 CEST 2010
On 05/20/2010 07:51 PM, Alex Converse wrote:
> Yes there are some places where it can be further optimized but I'm
> losing motivation on this quickly so perhaps some review will respark
> my interest.
I gave a look at it, a few comments...
> --- a/libavcodec/sbr.h
> +++ b/libavcodec/sbr.h
> @@ -31,6 +31,7 @@
>
> #include <stdint.h>
> #include "fft.h"
> +#include "ps.h"
>
> /**
> * Spectral Band Replication header - spectrum parameters that invoke a reset if they differ from the previous header.
> @@ -133,6 +134,7 @@ typedef struct {
> ///The number of frequency bands in f_master
> unsigned n_master;
> SBRData data[2];
> + PSContext ps;
Missing doxy (what does ps == -1 means?).
> --- a/libavcodec/aac.c
> +++ b/libavcodec/aac.c
Missing doc update:
- * N (planned) Parametric Stereo
+ * Y Parametric Stereo
> av_cold void ff_aac_sbr_ctx_close(SpectralBandReplication *sbr)
> @@ -904,6 +908,7 @@ static void read_sbr_extension(AACContext *ac, SpectralBandReplication *sbr,
> int bs_extension_id, int *num_bits_left)
> {
> //TODO - implement ps_data for parametric stereo parsing
> +//av_log(NULL, AV_LOG_ERROR, "frame %d sbr_extension %d %d\n", ac->avccontext->frame_number, bs_extension_id, *num_bits_left);
> switch (bs_extension_id) {
> case EXTENSION_ID_PS:
> if (!ac->m4ac.ps) {
> @@ -911,8 +916,8 @@ static void read_sbr_extension(AACContext *ac, SpectralBandReplication *sbr,
> skip_bits_long(gb, *num_bits_left); // bs_fill_bits
> *num_bits_left = 0;
> } else {
> -#if 0
> - *num_bits_left -= ff_ps_data(gb, ps);
> +#if 1
> + *num_bits_left -= ff_ps_data(ac->avccontext, gb, &sbr->ps);
Is this the intended behavior when ff_ps_data() returns -1?
> --- /dev/null
> +++ b/libavcodec/ps.c
> @@ -0,0 +1,1161 @@
[...]
> +enum {
> + huff_iid_df1,
> + huff_iid_dt1,
> + huff_iid_df0,
> + huff_iid_dt0,
> + huff_icc_df,
> + huff_icc_dt,
> + huff_ipd_df,
> + huff_ipd_dt,
> + huff_opd_df,
> + huff_opd_dt,
> +};
You seems to assume that these constants are at < 10, so I'd put
enum {
huff_iid_df1 = 0,
huff_iid_dt1,
huff_iid_df0,
huff_iid_dt0,
huff_icc_df,
huff_icc_dt,
huff_ipd_df,
huff_ipd_dt,
huff_opd_df,
huff_opd_dt,
};
> +#define PS_INIT_VLC_STATIC(num, size) \
> + INIT_VLC_STATIC(&vlc_ps[num], 9, ps_tmp[num].table_size / ps_tmp[num].elem_size, \
> + ps_tmp[num].ps_bits, 1, 1, \
> + ps_tmp[num].ps_codes, ps_tmp[num].elem_size, ps_tmp[num].elem_size, \
> + size);
> +
> +#define PS_VLC_ROW(name) \
> + { name ## _codes, name ## _bits, sizeof(name ## _codes), sizeof(name ## _codes[0]) }
> +
This is a nit, but I'd prefer if those where defined closer to where it
is actually used.
> +static int iid_data(AVCodecContext *avctx, GetBitContext *gb, PSContext *ps, int e, int dt)
> +{
> + int b;
> + int table_idx = huff_iid[2*dt+ps->iid_quant];
> + VLC_TYPE (*vlc_table)[2] = vlc_ps[table_idx].table;
> + if (dt) {
> + int e_prev = e ? e - 1 : ps->num_env_old - 1;
> + e_prev = FFMAX(e_prev, 0);
> + for (b = 0; b < ps->nr_iid_par; b++) {
> + ps->iid_par[e][b] = ps->iid_par[e_prev][b] +
> + get_vlc2(gb, vlc_table, 9, 3) -
> + huff_offset[table_idx];
> + if (FFABS(ps->iid_par[e][b]) > 7 + 8 * ps->iid_quant) {
> + av_log(avctx, AV_LOG_ERROR, "illegal iid\n");
> + return -1;
> + }
> + }
> + } else {
> + int prev = 0;
> + for (b = 0; b < ps->nr_iid_par; b++) {
> + prev += get_vlc2(gb, vlc_table, 9, 3) -
> + huff_offset[table_idx];
> + ps->iid_par[e][b] = prev;
> + if (FFABS(ps->iid_par[e][b]) > 7 + 8 * ps->iid_quant) {
> + av_log(avctx, AV_LOG_ERROR, "illegal iid\n");
> + return -1;
> + }
> + }
> + }
> + return 0;
> +}
"goto error;" would help factorizing the av_log and the return.
> +static int ps_extension(GetBitContext *gb, PSContext *ps, int ps_extension_id)
> +{
> + int e;
> + int count = get_bits_count(gb);
> + if (!ps_extension_id) {
> + ps->enable_ipdopd = get_bits1(gb);
> + if (ps->enable_ipdopd) {
> + for (e = 0; e < ps->num_env; e++) {
> + int dt = get_bits1(gb);
> + ipd_data(gb, ps, e, dt);
> + dt = get_bits1(gb);
> + opd_data(gb, ps, e, dt);
> + }
> + }
> + skip_bits1(gb); //reserved_ps
> + }
> + return get_bits_count(gb) - count;
> +}
if (ps_extension_id)
return 0;
> +
> +int ff_ps_data(AVCodecContext *avctx, GetBitContext *gb, PSContext *ps)
> +{
[...]
> + //Fix up envelopes
> + if (!ps->num_env) {
> + ps->num_env = 1;
> + ps->border_position[1] = 31;
> + if (ps->enable_iid && ps->num_env_old > 1) {
> + memcpy(ps->iid_par, ps->iid_par+ps->num_env_old-1, sizeof(ps->iid_par[0]));
> + }
> + if (ps->enable_icc && ps->num_env_old > 1) {
> + memcpy(ps->icc_par, ps->icc_par+ps->num_env_old-1, sizeof(ps->icc_par[0]));
> + }
> + if (ps->enable_ipdopd && ps->num_env_old > 1) {
> + memcpy(ps->ipd_par, ps->ipd_par+ps->num_env_old-1, sizeof(ps->ipd_par[0]));
> + memcpy(ps->opd_par, ps->opd_par+ps->num_env_old-1, sizeof(ps->opd_par[0]));
> + }
> + } else if (ps->border_position[ps->num_env] < numQMFSlots - 1) {
> + //Create a fake envelope
> + if (ps->enable_iid && ps->num_env_old > 1) {
> + memcpy(ps->iid_par+ps->num_env, ps->iid_par+ps->num_env-1, sizeof(ps->iid_par[0]));
> + }
> + if (ps->enable_icc && ps->num_env_old > 1) {
> + memcpy(ps->icc_par+ps->num_env, ps->icc_par+ps->num_env-1, sizeof(ps->icc_par[0]));
> + }
> + if (ps->enable_ipdopd) {
> + memcpy(ps->ipd_par+ps->num_env, ps->ipd_par+ps->num_env-1, sizeof(ps->ipd_par[0]));
> + memcpy(ps->opd_par+ps->num_env, ps->opd_par+ps->num_env-1, sizeof(ps->opd_par[0]));
> + }
> + ps->num_env++;
> + ps->border_position[ps->num_env] = numQMFSlots - 1;
> + }
> +
The memcpy's shouldn't be much hard to factorize.
> +/** Split one subband into 2 subsubbands with a real filter */
> +static av_noinline void hybrid2_re(float (*in)[2], float (*out)[32][2], const float filter[7], int len, int reverse)
Is adding av_noinline an optimization?
> +/** Table 8.46 */
> +#define MAP_GENERIC_10_TO_20(out, in, full) \
> + int b; \
> + if (full) \
> + b = 9; \
> + else { \
> + b = 4; \
> + out[10] = 0; \
> + } \
> + for (; b >= 0; b--) { \
> + out[2*b+1] = out[2*b] = in[b]; \
> + }
> +
> +static void map_idx_10_to_20(int8_t *par_mapped, const int8_t *par, int full)
> +{
> + MAP_GENERIC_10_TO_20(par_mapped, par, full)
> +}
> +
This one does not need to be a macro...
> +#define MAP_GENERIC_34_TO_20(out, in, full) \
> + out[ 0] = (2*in[ 0] + in[ 1]) / 3; \
> + out[ 1] = ( in[ 1] + 2*in[ 2]) / 3; \
> + out[ 2] = (2*in[ 3] + in[ 4]) / 3; \
> + out[ 3] = ( in[ 4] + 2*in[ 5]) / 3; \
> + out[ 4] = ( in[ 6] + in[ 7]) / 2; \
> + out[ 5] = ( in[ 8] + in[ 9]) / 2; \
> + out[ 6] = in[10]; \
> + out[ 7] = in[11]; \
> + out[ 8] = ( in[12] + in[13]) / 2; \
> + out[ 9] = ( in[14] + in[15]) / 2; \
> + out[10] = in[16]; \
> + if (full) { \
> + out[11] = in[17]; \
> + out[12] = in[18]; \
> + out[13] = in[19]; \
> + out[14] = ( in[20] + in[21]) / 2; \
> + out[15] = ( in[22] + in[23]) / 2; \
> + out[16] = ( in[24] + in[25]) / 2; \
> + out[17] = ( in[26] + in[27]) / 2; \
> + out[18] = ( in[28] + in[29] + in[30] + in[31]) / 4; \
> + out[19] = ( in[32] + in[33]) / 2; \
> + }
> +
> +static void map_idx_34_to_20(int8_t *par_mapped, const int8_t *par, int full)
> +{
> + MAP_GENERIC_34_TO_20(par_mapped, par, full)
> +}
> +
> +static void map_val_34_to_20(float par[PS_MAX_NUM_ENV][PS_MAX_NR_IIDICC], int e)
> +{
> + MAP_GENERIC_34_TO_20(par[e], par[e], 1)
> +}
If map_val_34_to_20() is speed critical, using divisions instead of
multiplications might be slow...
[...]
> +static av_noinline void stereo_processing(PSContext *ps, float (*l)[32][2], float (*r)[32][2], int is34)
> +{
> + int e, b, k, n;
> +
> + float (*H11)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H11;
> + float (*H12)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H12;
> + float (*H21)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H21;
> + float (*H22)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H22;
This is just because "H11" is shorter than "ps->H11"?
> + //Remapping
> + for (b = 0; b < PS_MAX_NR_IIDICC; b++) {
> + H11[0][0][b] = H11[0][ps->num_env_old][b];
> + H12[0][0][b] = H12[0][ps->num_env_old][b];
> + H21[0][0][b] = H21[0][ps->num_env_old][b];
> + H22[0][0][b] = H22[0][ps->num_env_old][b];
> + H11[1][0][b] = H11[1][ps->num_env_old][b];
> + H12[1][0][b] = H12[1][ps->num_env_old][b];
> + H21[1][0][b] = H21[1][ps->num_env_old][b];
> + H22[1][0][b] = H22[1][ps->num_env_old][b];
> + }
memcpy()?
Also this function is scary (not that I see an obvious way it could be
otherwise), but a comment of the relevant section of the spec might be
helpful.
-Vitor
More information about the ffmpeg-devel
mailing list