[FFmpeg-devel] [PATCH] MPEG-4 Parametric Stereo decoder

Alex Converse alex.converse
Sat May 29 19:30:16 CEST 2010


On Sun, May 23, 2010 at 10:29 AM, Vitor Sessak <vitor1001 at gmail.com> wrote:
> On 05/20/2010 07:51 PM, Alex Converse wrote:
>>
>> Yes there are some places where it can be further optimized but I'm
>> losing motivation on this quickly so perhaps some review will respark
>> my interest.
>
> I gave a look at it, a few comments...
>
>> --- a/libavcodec/sbr.h
>> +++ b/libavcodec/sbr.h
>> @@ -31,6 +31,7 @@
>>
>> ?#include <stdint.h>
>> ?#include "fft.h"
>> +#include "ps.h"
>>
>> ?/**
>> ?* Spectral Band Replication header - spectrum parameters that invoke a
>> reset if they differ from the previous header.
>> @@ -133,6 +134,7 @@ typedef struct {
>> ? ? ///The number of frequency bands in f_master
>> ? ? unsigned ? ? ? ? ? n_master;
>> ? ? SBRData ? ? ? ? ? ?data[2];
>> + ? ?PSContext ? ? ? ? ?ps;
>
> Missing doxy (what does ps == -1 means?).
>

This is never compared to -1, perhaps you are confusing it with
MPEG4AudioConfig.ps which does have doxy?

>> --- a/libavcodec/aac.c
>> +++ b/libavcodec/aac.c
>
> Missing doc update:
>
> - * N (planned) ? ? ? ? ?Parametric Stereo
> + * Y ? ? ? ? ? ? ? ? ? ?Parametric Stereo
>

Fixed

>> ?av_cold void ff_aac_sbr_ctx_close(SpectralBandReplication *sbr)
>> @@ -904,6 +908,7 @@ static void read_sbr_extension(AACContext *ac,
>> SpectralBandReplication *sbr,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?int bs_extension_id, int *num_bits_left)
>> ?{
>> ?//TODO - implement ps_data for parametric stereo parsing
>> +//av_log(NULL, AV_LOG_ERROR, "frame %d sbr_extension %d %d\n",
>> ac->avccontext->frame_number, bs_extension_id, *num_bits_left);
>> ? ? switch (bs_extension_id) {
>> ? ? case EXTENSION_ID_PS:
>> ? ? ? ? if (!ac->m4ac.ps) {
>> @@ -911,8 +916,8 @@ static void read_sbr_extension(AACContext *ac,
>> SpectralBandReplication *sbr,
>> ? ? ? ? ? ? skip_bits_long(gb, *num_bits_left); // bs_fill_bits
>> ? ? ? ? ? ? *num_bits_left = 0;
>> ? ? ? ? } else {
>> -#if 0
>> - ? ? ? ? ? ?*num_bits_left -= ff_ps_data(gb, ps);
>> +#if 1
>> + ? ? ? ? ? ?*num_bits_left -= ff_ps_data(ac->avccontext, gb, &sbr->ps);
>
> Is this the intended behavior when ff_ps_data() returns -1?
>

No, switched to sbrdec style get bits management

>> --- /dev/null
>> +++ b/libavcodec/ps.c
>> @@ -0,0 +1,1161 @@
>
> [...]
>
>> +enum {
>> + ? ?huff_iid_df1,
>> + ? ?huff_iid_dt1,
>> + ? ?huff_iid_df0,
>> + ? ?huff_iid_dt0,
>> + ? ?huff_icc_df,
>> + ? ?huff_icc_dt,
>> + ? ?huff_ipd_df,
>> + ? ?huff_ipd_dt,
>> + ? ?huff_opd_df,
>> + ? ?huff_opd_dt,
>> +};
>
> You seems to assume that these constants are at < 10, so I'd put
>
> enum {
> ? ?huff_iid_df1 = 0,
> ? ?huff_iid_dt1,
> ? ?huff_iid_df0,
> ? ?huff_iid_dt0,
> ? ?huff_icc_df,
> ? ?huff_icc_dt,
> ? ?huff_ipd_df,
> ? ?huff_ipd_dt,
> ? ?huff_opd_df,
> ? ?huff_opd_dt,
> };
>

Doesn't the C spec imply that?

>> +#define PS_INIT_VLC_STATIC(num, size) \
>> + ? ?INIT_VLC_STATIC(&vlc_ps[num], 9, ps_tmp[num].table_size /
>> ps_tmp[num].elem_size, ? ?\
>> + ? ? ? ? ? ? ? ? ? ?ps_tmp[num].ps_bits, 1, 1,
>> ? ? ? ? ? ? ? ?\
>> + ? ? ? ? ? ? ? ? ? ?ps_tmp[num].ps_codes, ps_tmp[num].elem_size,
>> ps_tmp[num].elem_size, \
>> + ? ? ? ? ? ? ? ? ? ?size);
>> +
>> +#define PS_VLC_ROW(name) \
>> + ? ?{ name ## _codes, name ## _bits, sizeof(name ## _codes), sizeof(name
>> ## _codes[0]) }
>> +
>
> This is a nit, but I'd prefer if those where defined closer to where it is
> actually used.
>

moved

>> +static int iid_data(AVCodecContext *avctx, GetBitContext *gb, PSContext
>> *ps, int e, int dt)
>> +{
>> + ? ?int b;
>> + ? ?int table_idx = huff_iid[2*dt+ps->iid_quant];
>> + ? ?VLC_TYPE (*vlc_table)[2] = vlc_ps[table_idx].table;
>> + ? ?if (dt) {
>> + ? ? ? ?int e_prev = e ? e - 1 : ps->num_env_old - 1;
>> + ? ? ? ?e_prev = FFMAX(e_prev, 0);
>> + ? ? ? ?for (b = 0; b < ps->nr_iid_par; b++) {
>> + ? ? ? ? ? ?ps->iid_par[e][b] = ps->iid_par[e_prev][b] +
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?get_vlc2(gb, vlc_table, 9, 3) -
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?huff_offset[table_idx];
>> + ? ? ? ? ? ?if (FFABS(ps->iid_par[e][b]) > 7 + 8 * ps->iid_quant) {
>> + ? ? ? ? ? ? ? ?av_log(avctx, AV_LOG_ERROR, "illegal iid\n");
>> + ? ? ? ? ? ? ? ?return -1;
>> + ? ? ? ? ? ?}
>> + ? ? ? ?}
>> + ? ?} else {
>> + ? ? ? ?int prev = 0;
>> + ? ? ? ?for (b = 0; b < ps->nr_iid_par; b++) {
>> + ? ? ? ? ? ?prev += get_vlc2(gb, vlc_table, 9, 3) -
>> + ? ? ? ? ? ? ? ? ? ?huff_offset[table_idx];
>> + ? ? ? ? ? ?ps->iid_par[e][b] = prev;
>> + ? ? ? ? ? ?if (FFABS(ps->iid_par[e][b]) > 7 + 8 * ps->iid_quant) {
>> + ? ? ? ? ? ? ? ?av_log(avctx, AV_LOG_ERROR, "illegal iid\n");
>> + ? ? ? ? ? ? ? ?return -1;
>> + ? ? ? ? ? ?}
>> + ? ? ? ?}
>> + ? ?}
>> + ? ?return 0;
>> +}
>
> "goto error;" would help factorizing the av_log and the return.

fixed

>
>> +static int ps_extension(GetBitContext *gb, PSContext *ps, int
>> ps_extension_id)
>> +{
>> + ? ?int e;
>> + ? ?int count = get_bits_count(gb);
>> + ? ?if (!ps_extension_id) {
>> + ? ? ? ?ps->enable_ipdopd = get_bits1(gb);
>> + ? ? ? ?if (ps->enable_ipdopd) {
>> + ? ? ? ? ? ?for (e = 0; e < ps->num_env; e++) {
>> + ? ? ? ? ? ? ? ?int dt = get_bits1(gb);
>> + ? ? ? ? ? ? ? ?ipd_data(gb, ps, e, dt);
>> + ? ? ? ? ? ? ? ?dt = get_bits1(gb);
>> + ? ? ? ? ? ? ? ?opd_data(gb, ps, e, dt);
>> + ? ? ? ? ? ?}
>> + ? ? ? ?}
>> + ? ? ? ?skip_bits1(gb); ? ? ?//reserved_ps
>> + ? ?}
>> + ? ?return get_bits_count(gb) - count;
>> +}
>
> if (ps_extension_id)
> ? ?return 0;
>
>> +
>> +int ff_ps_data(AVCodecContext *avctx, GetBitContext *gb, PSContext *ps)
>> +{
>
> [...]
>
>> + ? ?//Fix up envelopes
>> + ? ?if (!ps->num_env) {
>> + ? ? ? ?ps->num_env = 1;
>> + ? ? ? ?ps->border_position[1] = 31;
>> + ? ? ? ?if (ps->enable_iid && ps->num_env_old > 1) {
>> + ? ? ? ? ? ?memcpy(ps->iid_par, ps->iid_par+ps->num_env_old-1,
>> sizeof(ps->iid_par[0]));
>> + ? ? ? ?}
>> + ? ? ? ?if (ps->enable_icc && ps->num_env_old > 1) {
>> + ? ? ? ? ? ?memcpy(ps->icc_par, ps->icc_par+ps->num_env_old-1,
>> sizeof(ps->icc_par[0]));
>> + ? ? ? ?}
>> + ? ? ? ?if (ps->enable_ipdopd && ps->num_env_old > 1) {
>> + ? ? ? ? ? ?memcpy(ps->ipd_par, ps->ipd_par+ps->num_env_old-1,
>> sizeof(ps->ipd_par[0]));
>> + ? ? ? ? ? ?memcpy(ps->opd_par, ps->opd_par+ps->num_env_old-1,
>> sizeof(ps->opd_par[0]));
>> + ? ? ? ?}
>> + ? ?} else if (ps->border_position[ps->num_env] < numQMFSlots - 1) {
>> + ? ? ? ?//Create a fake envelope
>> + ? ? ? ?if (ps->enable_iid && ps->num_env_old > 1) {
>> + ? ? ? ? ? ?memcpy(ps->iid_par+ps->num_env, ps->iid_par+ps->num_env-1,
>> sizeof(ps->iid_par[0]));
>> + ? ? ? ?}
>> + ? ? ? ?if (ps->enable_icc && ps->num_env_old > 1) {
>> + ? ? ? ? ? ?memcpy(ps->icc_par+ps->num_env, ps->icc_par+ps->num_env-1,
>> sizeof(ps->icc_par[0]));
>> + ? ? ? ?}
>> + ? ? ? ?if (ps->enable_ipdopd) {
>> + ? ? ? ? ? ?memcpy(ps->ipd_par+ps->num_env, ps->ipd_par+ps->num_env-1,
>> sizeof(ps->ipd_par[0]));
>> + ? ? ? ? ? ?memcpy(ps->opd_par+ps->num_env, ps->opd_par+ps->num_env-1,
>> sizeof(ps->opd_par[0]));
>> + ? ? ? ?}
>> + ? ? ? ?ps->num_env++;
>> + ? ? ? ?ps->border_position[ps->num_env] = numQMFSlots - 1;
>> + ? ?}
>> +
>
> The memcpy's shouldn't be much hard to factorize.
>
>> +/** Split one subband into 2 subsubbands with a real filter */
>> +static av_noinline void hybrid2_re(float (*in)[2], float (*out)[32][2],
>> const float filter[7], int len, int reverse)
>
> Is adding av_noinline an optimization?
>

That's old debug code I forgot to remove

>> +/** Table 8.46 */
>> +#define MAP_GENERIC_10_TO_20(out, in, full) \
>> + ? ?int b; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> + ? ?if (full) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ? ? ?b = 9; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> + ? ?else { ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> + ? ? ? ?b = 4; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> + ? ? ? ?out[10] = 0; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> + ? ?} ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?for (; b >= 0; b--) { ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ? ? ?out[2*b+1] = out[2*b] = in[b]; ? ? ? ? ? ?\
>> + ? ?}
>> +
>> +static void map_idx_10_to_20(int8_t *par_mapped, const int8_t *par, int
>> full)
>> +{
>> + ? ?MAP_GENERIC_10_TO_20(par_mapped, par, full)
>> +}
>> +
>
> This one does not need to be a macro...
>

Fixed

>> +#define MAP_GENERIC_34_TO_20(out, in, full) \
>> + ? ?out[ 0] = (2*in[ 0] + ? in[ 1]) / 3; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?out[ 1] = ( ?in[ 1] + 2*in[ 2]) / 3; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?out[ 2] = (2*in[ 3] + ? in[ 4]) / 3; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?out[ 3] = ( ?in[ 4] + 2*in[ 5]) / 3; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?out[ 4] = ( ?in[ 6] + ? in[ 7]) / 2; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?out[ 5] = ( ?in[ 8] + ? in[ 9]) / 2; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?out[ 6] = ? ?in[10]; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?out[ 7] = ? ?in[11]; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?out[ 8] = ( ?in[12] + ? in[13]) / 2; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?out[ 9] = ( ?in[14] + ? in[15]) / 2; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?out[10] = ? ?in[16]; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?if (full) { ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
>> + ? ?out[11] = ? ?in[17]; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?out[12] = ? ?in[18]; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?out[13] = ? ?in[19]; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?out[14] = ( ?in[20] + ? in[21]) / 2; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?out[15] = ( ?in[22] + ? in[23]) / 2; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?out[16] = ( ?in[24] + ? in[25]) / 2; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?out[17] = ( ?in[26] + ? in[27]) / 2; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?out[18] = ( ?in[28] + ? in[29] + ? in[30] + ? in[31]) / 4; ? ? ? ? \
>> + ? ?out[19] = ( ?in[32] + ? in[33]) / 2; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> + ? ?}
>> +
>> +static void map_idx_34_to_20(int8_t *par_mapped, const int8_t *par, int
>> full)
>> +{
>> + ? ?MAP_GENERIC_34_TO_20(par_mapped, par, full)
>> +}
>> +
>> +static void map_val_34_to_20(float
>> ?par[PS_MAX_NUM_ENV][PS_MAX_NR_IIDICC], int e)
>> +{
>> + ? ?MAP_GENERIC_34_TO_20(par[e], par[e], 1)
>> +}
>
> If map_val_34_to_20() is speed critical, using divisions instead of
> multiplications might be slow...
>

Fixed

> [...]
>
>> +static av_noinline void stereo_processing(PSContext *ps, float
>> (*l)[32][2], float (*r)[32][2], int is34)
>> +{
>> + ? ?int e, b, k, n;
>> +
>> + ? ?float (*H11)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H11;
>> + ? ?float (*H12)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H12;
>> + ? ?float (*H21)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H21;
>> + ? ?float (*H22)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H22;
>
> This is just because "H11" is shorter than "ps->H11"?
>

Since only the last column of H** carries state between frames I
figured it would be worth making H** local at some point and only
carting the last column in the context

>> + ? ?//Remapping
>> + ? ?for (b = 0; b < PS_MAX_NR_IIDICC; b++) {
>> + ? ? ? ?H11[0][0][b] = H11[0][ps->num_env_old][b];
>> + ? ? ? ?H12[0][0][b] = H12[0][ps->num_env_old][b];
>> + ? ? ? ?H21[0][0][b] = H21[0][ps->num_env_old][b];
>> + ? ? ? ?H22[0][0][b] = H22[0][ps->num_env_old][b];
>> + ? ? ? ?H11[1][0][b] = H11[1][ps->num_env_old][b];
>> + ? ? ? ?H12[1][0][b] = H12[1][ps->num_env_old][b];
>> + ? ? ? ?H21[1][0][b] = H21[1][ps->num_env_old][b];
>> + ? ? ? ?H22[1][0][b] = H22[1][ps->num_env_old][b];
>> + ? ?}
>
> memcpy()?
>

See above comment on H**

> Also this function is scary (not that I see an obvious way it could be
> otherwise), but a comment of the relevant section of the spec might be
> helpful.
>

I actually find it pretty straight forward. Consider the baseline case
first. The computed parameters are interpolated and used to rotate the
mid and decorrelated signals. The ipd/opd case computes an imaginary
component to the rotation and does the same.



More information about the ffmpeg-devel mailing list