[FFmpeg-devel] [PATCH 1/4] libavcodec: Implementation of AC3 fixed point decoder.

Sun Sep 30 17:55:38 CEST 2012

Hi Nedeljko,

On 09/25/2012 04:10 PM, Nedeljko Babic wrote:
> AC3 fixed point decoder is based on AC3 floating point
>   decoder that is already part of FFmpeg.
>
> It does not use FFmpegs FFT. It uses  FFT  developed for
>   optimization of floating point AC3 decoder and because
>   of that currently some of the files that implement this
>   FFT are located in libavcodec/mips folder.

> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 9b86f7c..cf88e48 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -80,7 +80,8 @@ OBJS-$(CONFIG_AAC_ENCODER)             += aacenc.o aaccoder.o    \
>                                             mpeg4audio.o kbdwin.o  \
>                                             audio_frame_queue.o
>   OBJS-$(CONFIG_AASC_DECODER)            += aasc.o msrledec.o
> -OBJS-$(CONFIG_AC3_DECODER)             += ac3dec.o ac3dec_data.o ac3.o kbdwin.o
> +OBJS-$(CONFIG_AC3_DECODER)             += ac3dec_float.o ac3dec_data.o ac3.o kbdwin.o
> +OBJS-$(CONFIG_AC3_FIXED_DECODER)       += fft_ac3_init_tab.o fft_ac3_fixed.o ac3dec_fixed.o ac3dec_data.o ac3.o kbdwin.o
>   OBJS-$(CONFIG_AC3_ENCODER)             += ac3enc_float.o ac3enc.o ac3tab.o \
>                                             ac3.o kbdwin.o
>   OBJS-$(CONFIG_AC3_FIXED_ENCODER)       += ac3enc_fixed.o ac3enc.o ac3tab.o ac3.o
> diff --git a/libavcodec/ac3.h b/libavcodec/ac3.h
> index b9f34b9..0a1ff23 100644
> --- a/libavcodec/ac3.h
> +++ b/libavcodec/ac3.h
> @@ -27,6 +27,10 @@
>   #ifndef AVCODEC_AC3_H
>   #define AVCODEC_AC3_H
>
> +#ifndef CONFIG_AC3_FIXED
> +#   define CONFIG_AC3_FIXED 0
> +#endif

I don't think this is needed.

>   #define AC3_MAX_CODED_FRAME_SIZE 3840 /* in bytes */
>   #define AC3_MAX_CHANNELS 7            /**< maximum number of channels, including coupling channel */
>   #define CPL_CH 0                      /**< coupling channel index */
> @@ -51,6 +55,41 @@
>   #define EXP_D25   2
>   #define EXP_D45   3
>
> +#if CONFIG_AC3_FIXED
> +
> +#define CONFIG_FFT_FLOAT 0
> +
> +/* pre-defined gain values */
> +#define LEVEL_PLUS_3DB          5793
> +#define LEVEL_PLUS_1POINT5DB    4871
> +#define LEVEL_MINUS_1POINT5DB   3444
> +#define LEVEL_MINUS_3DB         2896
> +#define LEVEL_MINUS_4POINT5DB   2435
> +#define LEVEL_MINUS_6DB         2048
> +#define LEVEL_MINUS_9DB         1448
> +#define LEVEL_ZERO              0
> +#define LEVEL_ONE               4096
> +
> +#define MUL_BIAS1 65536
> +#define MUL_BIAS2 2147418112

If this is just the floating-point values converted to fixed, it is 
better to use a macro to define it only once. See FIXR() macro in 
mpegaudiodec.c.

> +#define AC3_RENAME(x)           x ## _fixed

> +#define AC3_CENTER(x)           center_levels[x]
> +#define AC3_SURROUND(x)         surround_levels[x]

Those two looks unused.

> +#define AC3_LEVEL(x)            ((x)*23170 + 0x4000) >> 15
> +#define AC3_NORM(x,norm)        ((x)<<12)/norm

> +#define AC3_DYNAMIC_RANGE(x)    (x)

This looks very different from the float version, how can it work?

> +#define AC3_SPX_BLEND(x)        (x)

> +#define TYPE_PREFIX(x)          fixed_ ## x

Why not use sulfix everywhere, for simplicity and consistency?

> +#define AC3_DYNAMIC_RANGE1   0

Hmm, that is strange.

> +#define INTFLOAT int
> +#define SHORTFLOAT int16_t
> +
> +#define ROUND12(x) ((x)+2048)>>12
> +
> +#else
> +
>   /* pre-defined gain values */
>   #define LEVEL_PLUS_3DB          1.4142135623730950
>   #define LEVEL_PLUS_1POINT5DB    1.1892071150027209
> @@ -62,6 +101,26 @@
>   #define LEVEL_ZERO              0.0000000000000000
>   #define LEVEL_ONE               1.0000000000000000
>
> +#define MUL_BIAS1 1.0f
> +#define MUL_BIAS2 32767.0f
> +
> +#define AC3_RENAME(x)           x
> +#define AC3_CENTER(x)           (x)
> +#define AC3_SURROUND(x)         (x)
> +#define AC3_LEVEL(x)            (x)*LEVEL_MINUS_3DB
> +#define AC3_NORM(x,norm)        (x)*(1.0f/norm)
> +#define AC3_DYNAMIC_RANGE(x)    ((dynamic_range_tab[x] - 1.0) * s->drc_scale) + 1.0
> +#define AC3_SPX_BLEND(x)        (x)* (1.0f/32)
> +#define TYPE_PREFIX(x)          float_ ## x
> +
> +#define AC3_DYNAMIC_RANGE1   1.0f
> +#define INTFLOAT float
> +#define SHORTFLOAT float
> +
> +#define ROUND12(x) (x)
> +
> +#endif /* CONFIG_AC3_FIXED */
> +
>   /** Delta bit allocation strategy */
>   typedef enum {
>       DBA_REUSE = 0,
> diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c
> index c608de8..5d82e23 100644
> --- a/libavcodec/ac3dec.c
> +++ b/libavcodec/ac3dec.c
> @@ -64,7 +64,7 @@ static const uint8_t quantization_tab[16] = {
>   static float dynamic_range_tab[256];
>
>   /** Adjustments in dB gain */
> -static const float gain_levels[9] = {
> +static const INTFLOAT AC3_RENAME(gain_levels)[9] = {
>       LEVEL_PLUS_3DB,
>       LEVEL_PLUS_1POINT5DB,
>       LEVEL_ONE,
> @@ -157,27 +157,32 @@ static av_cold void ac3_tables_init(void)
>   /**
>    * AVCodec initialization
>    */
> -static av_cold int ac3_decode_init(AVCodecContext *avctx)
> +static av_cold int AC3_RENAME(ac3_decode_init)(AVCodecContext *avctx)
>   {
>       AC3DecodeContext *s = avctx->priv_data;
>       s->avctx = avctx;
>
>       ff_ac3_common_init();
>       ac3_tables_init();
> -    ff_mdct_init(&s->imdct_256, 8, 1, 1.0);
> -    ff_mdct_init(&s->imdct_512, 9, 1, 1.0);
> -    ff_kbd_window_init(s->window, 5.0, 256);
> +    AC3_RENAME(ff_mdct_init)(&s->imdct_256, 8, 1, 1.0);

> +    AC3_RENAME(ff_mdct_init)(&s->imdct_512, 9, 1, 1.0);\

Why the backslash?

> +    AC3_RENAME(ff_kbd_window_init)(s->window, 5.0, 256);
>       ff_dsputil_init(&s->dsp, avctx);
>       ff_ac3dsp_init(&s->ac3dsp, avctx->flags & CODEC_FLAG_BITEXACT);
>       ff_fmt_convert_init(&s->fmt_conv, avctx);
>       av_lfg_init(&s->dith_state, 0);
>
> +#if CONFIG_AC3_FIXED
> +    ff_ac3_fft_init_fixed(&s->imdct_256);
> +    ff_ac3_fft_init_fixed(&s->imdct_512);
> +#endif
> +
>       /* set scale value for float to int16 conversion */
>       if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
> -        s->mul_bias = 1.0f;
> +        s->mul_bias = MUL_BIAS1;
>           avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
>       } else {
> -        s->mul_bias = 32767.0f;
> +        s->mul_bias = MUL_BIAS2;
>           avctx->sample_fmt = AV_SAMPLE_FMT_S16;
>       }
>
> @@ -300,23 +305,23 @@ static int parse_frame_header(AC3DecodeContext *s)
>    * Set stereo downmixing coefficients based on frame header info.
>    * reference: Section 7.8.2 Downmixing Into Two Channels
>    */
> -static void set_downmix_coeffs(AC3DecodeContext *s)
> +static void AC3_RENAME(set_downmix_coeffs)(AC3DecodeContext *s)
>   {
>       int i;
> -    float cmix = gain_levels[s->  center_mix_level];
> -    float smix = gain_levels[s->surround_mix_level];
> -    float norm0, norm1;
> +    INTFLOAT cmix = AC3_RENAME(gain_levels)[s->  center_mix_level];
> +    INTFLOAT smix = AC3_RENAME(gain_levels)[s->surround_mix_level];
> +    INTFLOAT norm0, norm1;
>
>       for (i = 0; i < s->fbw_channels; i++) {
> -        s->downmix_coeffs[i][0] = gain_levels[ac3_default_coeffs[s->channel_mode][i][0]];
> -        s->downmix_coeffs[i][1] = gain_levels[ac3_default_coeffs[s->channel_mode][i][1]];
> +        s->downmix_coeffs[i][0] = AC3_RENAME(gain_levels)[ac3_default_coeffs[s->channel_mode][i][0]];
> +        s->downmix_coeffs[i][1] = AC3_RENAME(gain_levels)[ac3_default_coeffs[s->channel_mode][i][1]];
>       }
>       if (s->channel_mode > 1 && s->channel_mode & 1) {
>           s->downmix_coeffs[1][0] = s->downmix_coeffs[1][1] = cmix;
>       }
>       if (s->channel_mode == AC3_CHMODE_2F1R || s->channel_mode == AC3_CHMODE_3F1R) {
>           int nf = s->channel_mode - 2;
> -        s->downmix_coeffs[nf][0] = s->downmix_coeffs[nf][1] = smix * LEVEL_MINUS_3DB;
> +        s->downmix_coeffs[nf][0] = s->downmix_coeffs[nf][1] = AC3_LEVEL(smix);
>       }
>       if (s->channel_mode == AC3_CHMODE_2F2R || s->channel_mode == AC3_CHMODE_3F2R) {
>           int nf = s->channel_mode - 4;
> @@ -324,22 +329,20 @@ static void set_downmix_coeffs(AC3DecodeContext *s)
>       }
>
>       /* renormalize */
> -    norm0 = norm1 = 0.0;
> +    norm0 = norm1 = (INTFLOAT)0.0;

norm0 = norm1 = FIXR(0.0);

so this construct can also work for other constants.

>       for (i = 0; i < s->fbw_channels; i++) {
>           norm0 += s->downmix_coeffs[i][0];
>           norm1 += s->downmix_coeffs[i][1];
>       }

> -    norm0 = 1.0f / norm0;
> -    norm1 = 1.0f / norm1;
> +
>       for (i = 0; i < s->fbw_channels; i++) {
> -        s->downmix_coeffs[i][0] *= norm0;
> -        s->downmix_coeffs[i][1] *= norm1;
> +        s->downmix_coeffs[i][0] = AC3_NORM(s->downmix_coeffs[i][0],norm0);
> +        s->downmix_coeffs[i][1] = AC3_NORM(s->downmix_coeffs[i][1],norm1);
>       }

Division is much slower than multiplication. You can do the same trick 
in fixed point:

norm0 = (1 << bits) / norm0;
s->downmix_coeffs[i][0] = MUL(s->downmix_coeffs[i][0],norm0);

where

#define MUL(a,b) (((int64_t) (a)) * (b)) >> (s))

>       if (s->output_mode == AC3_CHMODE_MONO) {
>           for (i = 0; i < s->fbw_channels; i++)
> -            s->downmix_coeffs[i][0] = (s->downmix_coeffs[i][0] +
> -                                       s->downmix_coeffs[i][1]) * LEVEL_MINUS_3DB;
> +            s->downmix_coeffs[i][0] = AC3_LEVEL(s->downmix_coeffs[i][0] + s->downmix_coeffs[i][1]);
>       }
>   }

Hmm, is this correct?

> @@ -602,51 +605,25 @@ static inline void do_imdct(AC3DecodeContext *s, int channels)
>       for (ch = 1; ch <= channels; ch++) {
>           if (s->block_switch[ch]) {
>               int i;
> -            float *x = s->tmp_output + 128;
> +            FFTSample *x = s->tmp_output+128;
>               for (i = 0; i < 128; i++)
>                   x[i] = s->transform_coeffs[ch][2 * i];
> -            s->imdct_256.imdct_half(&s->imdct_256, s->tmp_output, x);
> -            s->dsp.vector_fmul_window(s->output[ch - 1], s->delay[ch - 1],
> +            s->imdct_256.AC3_RENAME(imdct_half)(&s->imdct_256, s->tmp_output, x);
> +            s->dsp.AC3_RENAME(vector_fmul_window)(s->output[ch - 1], s->delay[ch - 1],
>                                         s->tmp_output, s->window, 128);
>               for (i = 0; i < 128; i++)
>                   x[i] = s->transform_coeffs[ch][2 * i + 1];
> -            s->imdct_256.imdct_half(&s->imdct_256, s->delay[ch - 1], x);
> +            s->imdct_256.AC3_RENAME(imdct_half)(&s->imdct_256, s->delay[ch - 1], x);
>           } else {
> -            s->imdct_512.imdct_half(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]);
> -            s->dsp.vector_fmul_window(s->output[ch - 1], s->delay[ch - 1],
> +            s->imdct_512.AC3_RENAME(imdct_half)(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]);
> +            s->dsp.AC3_RENAME(vector_fmul_window)(s->output[ch - 1], s->delay[ch - 1],
>                                         s->tmp_output, s->window, 128);
> -            memcpy(s->delay[ch - 1], s->tmp_output + 128, 128 * sizeof(float));
> +            memcpy(s->delay[ch - 1], s->tmp_output + 128, 128 * sizeof(FFTSample));
>           }
>       }
>   }
>
> -/**
> - * Upmix delay samples from stereo to original channel layout.
> - */
> -static void ac3_upmix_delay(AC3DecodeContext *s)
> -{
> -    int channel_data_size = sizeof(s->delay[0]);
> -    switch (s->channel_mode) {
> -    case AC3_CHMODE_DUALMONO:
> -    case AC3_CHMODE_STEREO:
> -        /* upmix mono to stereo */
> -        memcpy(s->delay[1], s->delay[0], channel_data_size);
> -        break;
> -    case AC3_CHMODE_2F2R:
> -        memset(s->delay[3], 0, channel_data_size);
> -    case AC3_CHMODE_2F1R:
> -        memset(s->delay[2], 0, channel_data_size);
> -        break;
> -    case AC3_CHMODE_3F2R:
> -        memset(s->delay[4], 0, channel_data_size);
> -    case AC3_CHMODE_3F1R:
> -        memset(s->delay[3], 0, channel_data_size);
> -    case AC3_CHMODE_3F:
> -        memcpy(s->delay[2], s->delay[1], channel_data_size);
> -        memset(s->delay[1], 0, channel_data_size);
> -        break;
> -    }
> -}
> +

??

>   /**
>    * Decode band structure for coupling, spectral extension, or enhanced coupling.
> @@ -748,10 +725,9 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
>       i = !s->channel_mode;
>       do {
>           if (get_bits1(gbc)) {
> -            s->dynamic_range[i] = ((dynamic_range_tab[get_bits(gbc, 8)] - 1.0) *
> -                                  s->drc_scale) + 1.0;
> +            s->dynamic_range[i] = AC3_DYNAMIC_RANGE(get_bits(gbc, 8));
>           } else if (blk == 0) {
> -            s->dynamic_range[i] = 1.0f;
> +            s->dynamic_range[i] = AC3_DYNAMIC_RANGE1;
>           }
>       } while (i--);
>
> @@ -777,6 +753,10 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
>               if (start_subband > 7)
>                   start_subband += start_subband - 7;
>               end_subband    = get_bits(gbc, 3) + 5;
> +#if CONFIG_AC3_FIXED
> +            s->spx_dst_end_freq = end_freq_inv_tab[end_subband];
> +            end_subband += 5;
> +#endif
>               if (end_subband   > 7)
>                   end_subband   += end_subband   - 7;
>               dst_start_freq = dst_start_freq * 12 + 25;
> @@ -797,7 +777,9 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
>
>               s->spx_dst_start_freq = dst_start_freq;
>               s->spx_src_start_freq = src_start_freq;
> +#if !CONFIG_AC3_FIXED
>               s->spx_dst_end_freq   = dst_end_freq;
> +#endif
>
>               decode_band_structure(gbc, blk, s->eac3, 0,
>                                     start_subband, end_subband,
> @@ -817,18 +799,45 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
>           for (ch = 1; ch <= fbw_channels; ch++) {
>               if (s->channel_uses_spx[ch]) {
>                   if (s->first_spx_coords[ch] || get_bits1(gbc)) {
> -                    float spx_blend;
> +                    INTFLOAT spx_blend;
>                       int bin, master_spx_coord;
>
>                       s->first_spx_coords[ch] = 0;
> -                    spx_blend = get_bits(gbc, 5) * (1.0f/32);
> +                    spx_blend = AC3_SPX_BLEND(get_bits(gbc, 5));
>                       master_spx_coord = get_bits(gbc, 2) * 3;
>
>                       bin = s->spx_src_start_freq;
>                       for (bnd = 0; bnd < s->num_spx_bands; bnd++) {
>                           int bandsize;
>                           int spx_coord_exp, spx_coord_mant;
> -                        float nratio, sblend, nblend, spx_coord;
> +                        INTFLOAT nratio, sblend, nblend;
> +#if CONFIG_AC3_FIXED
> +                        int64_t accu;
> +                        /* calculate blending factors */
> +                        bandsize = s->spx_band_sizes[bnd];
> +                        accu = (long long)((bin << 23) + (bandsize << 22)) * s->spx_dst_end_freq;
> +                        nratio = (int)(accu >> 32);
> +                        nratio -= spx_blend << 18;
> +
> +                        if (nratio < 0)
> +                        {
> +                          nblend = 0;
> +                          sblend = 0x800000;
> +                        }
> +                        else if (nratio > 0x7fffff)
> +                        {
> +                          nblend = 0x800000;
> +                          sblend = 0;
> +                        }
> +                        else
> +                        {
> +                          nblend = ac3_fixed_sqrt(nratio);
> +                          accu = (long long)nblend * 1859775393;
> +                          nblend = (int)((accu + (1<<29)) >> 30);
> +                          sblend = ac3_fixed_sqrt(0x800000 - nratio);
> +                        }
> +#else

Indentation:

if (...) {
} else if (...) {
}

-Vitor