[FFmpeg-devel] [PATCH] MPEG-4 Parametric Stereo decoder

Vitor Sessak vitor1001
Sun May 23 16:29:50 CEST 2010


On 05/20/2010 07:51 PM, Alex Converse wrote:
> Yes there are some places where it can be further optimized but I'm
> losing motivation on this quickly so perhaps some review will respark
> my interest.

I gave a look at it, a few comments...

> --- a/libavcodec/sbr.h
> +++ b/libavcodec/sbr.h
> @@ -31,6 +31,7 @@
>
>  #include <stdint.h>
>  #include "fft.h"
> +#include "ps.h"
>
>  /**
>   * Spectral Band Replication header - spectrum parameters that invoke a reset if they differ from the previous header.
> @@ -133,6 +134,7 @@ typedef struct {
>      ///The number of frequency bands in f_master
>      unsigned           n_master;
>      SBRData            data[2];
> +    PSContext          ps;

Missing doxy (what does ps == -1 means?).

> --- a/libavcodec/aac.c
> +++ b/libavcodec/aac.c

Missing doc update:

- * N (planned)          Parametric Stereo
+ * Y                    Parametric Stereo

>  av_cold void ff_aac_sbr_ctx_close(SpectralBandReplication *sbr)
> @@ -904,6 +908,7 @@ static void read_sbr_extension(AACContext *ac, SpectralBandReplication *sbr,
>                                 int bs_extension_id, int *num_bits_left)
>  {
>  //TODO - implement ps_data for parametric stereo parsing
> +//av_log(NULL, AV_LOG_ERROR, "frame %d sbr_extension %d %d\n", ac->avccontext->frame_number, bs_extension_id, *num_bits_left);
>      switch (bs_extension_id) {
>      case EXTENSION_ID_PS:
>          if (!ac->m4ac.ps) {
> @@ -911,8 +916,8 @@ static void read_sbr_extension(AACContext *ac, SpectralBandReplication *sbr,
>              skip_bits_long(gb, *num_bits_left); // bs_fill_bits
>              *num_bits_left = 0;
>          } else {
> -#if 0
> -            *num_bits_left -= ff_ps_data(gb, ps);
> +#if 1
> +            *num_bits_left -= ff_ps_data(ac->avccontext, gb, &sbr->ps);

Is this the intended behavior when ff_ps_data() returns -1?

> --- /dev/null
> +++ b/libavcodec/ps.c
> @@ -0,0 +1,1161 @@

[...]

> +enum {
> +    huff_iid_df1,
> +    huff_iid_dt1,
> +    huff_iid_df0,
> +    huff_iid_dt0,
> +    huff_icc_df,
> +    huff_icc_dt,
> +    huff_ipd_df,
> +    huff_ipd_dt,
> +    huff_opd_df,
> +    huff_opd_dt,
> +};

You seems to assume that these constants are at < 10, so I'd put

enum {
     huff_iid_df1 = 0,
     huff_iid_dt1,
     huff_iid_df0,
     huff_iid_dt0,
     huff_icc_df,
     huff_icc_dt,
     huff_ipd_df,
     huff_ipd_dt,
     huff_opd_df,
     huff_opd_dt,
};

> +#define PS_INIT_VLC_STATIC(num, size) \
> +    INIT_VLC_STATIC(&vlc_ps[num], 9, ps_tmp[num].table_size / ps_tmp[num].elem_size,    \
> +                    ps_tmp[num].ps_bits, 1, 1,                                          \
> +                    ps_tmp[num].ps_codes, ps_tmp[num].elem_size, ps_tmp[num].elem_size, \
> +                    size);
> +
> +#define PS_VLC_ROW(name) \
> +    { name ## _codes, name ## _bits, sizeof(name ## _codes), sizeof(name ## _codes[0]) }
> +

This is a nit, but I'd prefer if those where defined closer to where it 
is actually used.

> +static int iid_data(AVCodecContext *avctx, GetBitContext *gb, PSContext *ps, int e, int dt)
> +{
> +    int b;
> +    int table_idx = huff_iid[2*dt+ps->iid_quant];
> +    VLC_TYPE (*vlc_table)[2] = vlc_ps[table_idx].table;
> +    if (dt) {
> +        int e_prev = e ? e - 1 : ps->num_env_old - 1;
> +        e_prev = FFMAX(e_prev, 0);
> +        for (b = 0; b < ps->nr_iid_par; b++) {
> +            ps->iid_par[e][b] = ps->iid_par[e_prev][b] +
> +                                get_vlc2(gb, vlc_table, 9, 3) -
> +                                huff_offset[table_idx];
> +            if (FFABS(ps->iid_par[e][b]) > 7 + 8 * ps->iid_quant) {
> +                av_log(avctx, AV_LOG_ERROR, "illegal iid\n");
> +                return -1;
> +            }
> +        }
> +    } else {
> +        int prev = 0;
> +        for (b = 0; b < ps->nr_iid_par; b++) {
> +            prev += get_vlc2(gb, vlc_table, 9, 3) -
> +                    huff_offset[table_idx];
> +            ps->iid_par[e][b] = prev;
> +            if (FFABS(ps->iid_par[e][b]) > 7 + 8 * ps->iid_quant) {
> +                av_log(avctx, AV_LOG_ERROR, "illegal iid\n");
> +                return -1;
> +            }
> +        }
> +    }
> +    return 0;
> +}

"goto error;" would help factorizing the av_log and the return.

> +static int ps_extension(GetBitContext *gb, PSContext *ps, int ps_extension_id)
> +{
> +    int e;
> +    int count = get_bits_count(gb);
> +    if (!ps_extension_id) {
> +        ps->enable_ipdopd = get_bits1(gb);
> +        if (ps->enable_ipdopd) {
> +            for (e = 0; e < ps->num_env; e++) {
> +                int dt = get_bits1(gb);
> +                ipd_data(gb, ps, e, dt);
> +                dt = get_bits1(gb);
> +                opd_data(gb, ps, e, dt);
> +            }
> +        }
> +        skip_bits1(gb);      //reserved_ps
> +    }
> +    return get_bits_count(gb) - count;
> +}

if (ps_extension_id)
     return 0;

> +
> +int ff_ps_data(AVCodecContext *avctx, GetBitContext *gb, PSContext *ps)
> +{

[...]

> +    //Fix up envelopes
> +    if (!ps->num_env) {
> +        ps->num_env = 1;
> +        ps->border_position[1] = 31;
> +        if (ps->enable_iid && ps->num_env_old > 1) {
> +            memcpy(ps->iid_par, ps->iid_par+ps->num_env_old-1, sizeof(ps->iid_par[0]));
> +        }
> +        if (ps->enable_icc && ps->num_env_old > 1) {
> +            memcpy(ps->icc_par, ps->icc_par+ps->num_env_old-1, sizeof(ps->icc_par[0]));
> +        }
> +        if (ps->enable_ipdopd && ps->num_env_old > 1) {
> +            memcpy(ps->ipd_par, ps->ipd_par+ps->num_env_old-1, sizeof(ps->ipd_par[0]));
> +            memcpy(ps->opd_par, ps->opd_par+ps->num_env_old-1, sizeof(ps->opd_par[0]));
> +        }
> +    } else if (ps->border_position[ps->num_env] < numQMFSlots - 1) {
> +        //Create a fake envelope
> +        if (ps->enable_iid && ps->num_env_old > 1) {
> +            memcpy(ps->iid_par+ps->num_env, ps->iid_par+ps->num_env-1, sizeof(ps->iid_par[0]));
> +        }
> +        if (ps->enable_icc && ps->num_env_old > 1) {
> +            memcpy(ps->icc_par+ps->num_env, ps->icc_par+ps->num_env-1, sizeof(ps->icc_par[0]));
> +        }
> +        if (ps->enable_ipdopd) {
> +            memcpy(ps->ipd_par+ps->num_env, ps->ipd_par+ps->num_env-1, sizeof(ps->ipd_par[0]));
> +            memcpy(ps->opd_par+ps->num_env, ps->opd_par+ps->num_env-1, sizeof(ps->opd_par[0]));
> +        }
> +        ps->num_env++;
> +        ps->border_position[ps->num_env] = numQMFSlots - 1;
> +    }
> +

The memcpy's shouldn't be much hard to factorize.

> +/** Split one subband into 2 subsubbands with a real filter */
> +static av_noinline void hybrid2_re(float (*in)[2], float (*out)[32][2], const float filter[7], int len, int reverse)

Is adding av_noinline an optimization?

> +/** Table 8.46 */
> +#define MAP_GENERIC_10_TO_20(out, in, full) \
> +    int b;                                        \
> +    if (full)                                     \
> +        b = 9;                                    \
> +    else {                                        \
> +        b = 4;                                    \
> +        out[10] = 0;                              \
> +    }                                             \
> +    for (; b >= 0; b--) {                         \
> +        out[2*b+1] = out[2*b] = in[b];            \
> +    }
> +
> +static void map_idx_10_to_20(int8_t *par_mapped, const int8_t *par, int full)
> +{
> +    MAP_GENERIC_10_TO_20(par_mapped, par, full)
> +}
> +

This one does not need to be a macro...

> +#define MAP_GENERIC_34_TO_20(out, in, full) \
> +    out[ 0] = (2*in[ 0] +   in[ 1]) / 3;                               \
> +    out[ 1] = (  in[ 1] + 2*in[ 2]) / 3;                               \
> +    out[ 2] = (2*in[ 3] +   in[ 4]) / 3;                               \
> +    out[ 3] = (  in[ 4] + 2*in[ 5]) / 3;                               \
> +    out[ 4] = (  in[ 6] +   in[ 7]) / 2;                               \
> +    out[ 5] = (  in[ 8] +   in[ 9]) / 2;                               \
> +    out[ 6] =    in[10];                                               \
> +    out[ 7] =    in[11];                                               \
> +    out[ 8] = (  in[12] +   in[13]) / 2;                               \
> +    out[ 9] = (  in[14] +   in[15]) / 2;                               \
> +    out[10] =    in[16];                                               \
> +    if (full) {                                                        \
> +    out[11] =    in[17];                                               \
> +    out[12] =    in[18];                                               \
> +    out[13] =    in[19];                                               \
> +    out[14] = (  in[20] +   in[21]) / 2;                               \
> +    out[15] = (  in[22] +   in[23]) / 2;                               \
> +    out[16] = (  in[24] +   in[25]) / 2;                               \
> +    out[17] = (  in[26] +   in[27]) / 2;                               \
> +    out[18] = (  in[28] +   in[29] +   in[30] +   in[31]) / 4;         \
> +    out[19] = (  in[32] +   in[33]) / 2;                               \
> +    }
> +
> +static void map_idx_34_to_20(int8_t *par_mapped, const int8_t *par, int full)
> +{
> +    MAP_GENERIC_34_TO_20(par_mapped, par, full)
> +}
> +
> +static void map_val_34_to_20(float  par[PS_MAX_NUM_ENV][PS_MAX_NR_IIDICC], int e)
> +{
> +    MAP_GENERIC_34_TO_20(par[e], par[e], 1)
> +}

If map_val_34_to_20() is speed critical, using divisions instead of 
multiplications might be slow...

[...]

> +static av_noinline void stereo_processing(PSContext *ps, float (*l)[32][2], float (*r)[32][2], int is34)
> +{
> +    int e, b, k, n;
> +
> +    float (*H11)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H11;
> +    float (*H12)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H12;
> +    float (*H21)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H21;
> +    float (*H22)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H22;

This is just because "H11" is shorter than "ps->H11"?

> +    //Remapping
> +    for (b = 0; b < PS_MAX_NR_IIDICC; b++) {
> +        H11[0][0][b] = H11[0][ps->num_env_old][b];
> +        H12[0][0][b] = H12[0][ps->num_env_old][b];
> +        H21[0][0][b] = H21[0][ps->num_env_old][b];
> +        H22[0][0][b] = H22[0][ps->num_env_old][b];
> +        H11[1][0][b] = H11[1][ps->num_env_old][b];
> +        H12[1][0][b] = H12[1][ps->num_env_old][b];
> +        H21[1][0][b] = H21[1][ps->num_env_old][b];
> +        H22[1][0][b] = H22[1][ps->num_env_old][b];
> +    }

memcpy()?

Also this function is scary (not that I see an obvious way it could be 
otherwise), but a comment of the relevant section of the spec might be 
helpful.

-Vitor



More information about the ffmpeg-devel mailing list