[FFmpeg-devel] [PATCH 1/4] libavcodec: Implementation of AC3 fixed point decoder.
Vitor Sessak
vitor1001 at gmail.com
Mon Oct 8 18:38:20 CEST 2012
On 10/03/2012 03:17 PM, Babic, Nedeljko wrote:
> Hi Vitor,
Hello!
>>> AC3 fixed point decoder is based on AC3 floating point
>>> decoder that is already part of FFmpeg.
>>>
>>> It does not use FFmpegs FFT. It uses FFT developed for
>>> optimization of floating point AC3 decoder and because
>>> of that currently some of the files that implement this
>>> FFT are located in libavcodec/mips folder.
>>
>>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>>> index 9b86f7c..cf88e48 100644
>>> --- a/libavcodec/Makefile
>>> +++ b/libavcodec/Makefile
>>> @@ -80,7 +80,8 @@ OBJS-$(CONFIG_AAC_ENCODER) += aacenc.o aaccoder.o \
>>> mpeg4audio.o kbdwin.o \
>>> audio_frame_queue.o
>>> OBJS-$(CONFIG_AASC_DECODER) += aasc.o msrledec.o
>>> -OBJS-$(CONFIG_AC3_DECODER) += ac3dec.o ac3dec_data.o ac3.o kbdwin.o
>>> +OBJS-$(CONFIG_AC3_DECODER) += ac3dec_float.o ac3dec_data.o ac3.o kbdwin.o
>>> +OBJS-$(CONFIG_AC3_FIXED_DECODER) += fft_ac3_init_tab.o fft_ac3_fixed.o ac3dec_fixed.o ac3dec_data.o ac3.o kbdwin.o
>>> OBJS-$(CONFIG_AC3_ENCODER) += ac3enc_float.o ac3enc.o ac3tab.o \
>>> ac3.o kbdwin.o
>>> OBJS-$(CONFIG_AC3_FIXED_ENCODER) += ac3enc_fixed.o ac3enc.o ac3tab.o ac3.o
>>> diff --git a/libavcodec/ac3.h b/libavcodec/ac3.h
>>> index b9f34b9..0a1ff23 100644
>>> --- a/libavcodec/ac3.h
>>> +++ b/libavcodec/ac3.h
>>> @@ -27,6 +27,10 @@
>>> #ifndef AVCODEC_AC3_H
>>> #define AVCODEC_AC3_H
>>>
>>> +#ifndef CONFIG_AC3_FIXED
>>> +# define CONFIG_AC3_FIXED 0
>>> +#endif
>>
>> I don't think this is needed.
>
> But how can I distinguish between fixed and floating point macros without this?
> Should I use just #ifdef CONFIG_AC3_FIXED?
Isn't CONFIG_AC3_FIXED already set to 0/1 by configure?
>>> #define AC3_MAX_CODED_FRAME_SIZE 3840 /* in bytes */
>>> #define AC3_MAX_CHANNELS 7 /**< maximum number of channels, including coupling channel */
>>> #define CPL_CH 0 /**< coupling channel index */
>>> @@ -51,6 +55,41 @@
>>> #define EXP_D25 2
>>> #define EXP_D45 3
>>>
>>> +#if CONFIG_AC3_FIXED
>>> +
>>> +#define CONFIG_FFT_FLOAT 0
>>> +
>>> +/* pre-defined gain values */
>>> +#define LEVEL_PLUS_3DB 5793
>>> +#define LEVEL_PLUS_1POINT5DB 4871
>>> +#define LEVEL_MINUS_1POINT5DB 3444
>>> +#define LEVEL_MINUS_3DB 2896
>>> +#define LEVEL_MINUS_4POINT5DB 2435
>>> +#define LEVEL_MINUS_6DB 2048
>>> +#define LEVEL_MINUS_9DB 1448
>>> +#define LEVEL_ZERO 0
>>> +#define LEVEL_ONE 4096
>>> +
>>> +#define MUL_BIAS1 65536
>>> +#define MUL_BIAS2 2147418112
>>
>> If this is just the floating-point values converted to fixed, it is
>> better to use a macro to define it only once. See FIXR() macro in
>> mpegaudiodec.c.
>>
>
> You are correct and I will create macro for this.
>
>>> +#define AC3_RENAME(x) x ## _fixed
>>
>>> +#define AC3_CENTER(x) center_levels[x]
>>> +#define AC3_SURROUND(x) surround_levels[x]
>>
>> Those two looks unused.
>>
>
> They are leftovers from previous version of code. Sorry.
> I will remove them.
>
>>> +#define AC3_LEVEL(x) ((x)*23170 + 0x4000) >> 15
>>> +#define AC3_NORM(x,norm) ((x)<<12)/norm
>>
>>> +#define AC3_DYNAMIC_RANGE(x) (x)
>>
>> This looks very different from the float version, how can it work?
>>
>
> The way in which this is used in fixed point version of code is different
> from the way it is used in floating point version. You can see in
> function scale_coefs how dynamic range is used.
>
>>> +#define AC3_SPX_BLEND(x) (x)
>>
>>> +#define TYPE_PREFIX(x) fixed_ ## x
>>
>> Why not use sulfix everywhere, for simplicity and consistency?
>
> I didn't want to change the name of the function float_to_int16_interleave
> that was created before this decoder (and its name is more descriptive this way).
Some people are working to make AC3 decoder output planar data, so this
should be a non-issue soon.
>>
>>> +#define AC3_DYNAMIC_RANGE1 0
>>
>> Hmm, that is strange.
>
> Similar as above. The way that dynamic range is used in this decoder is
> different from the way it is used in floating point decoder.
Why? If it is simply better, change also the float version to use the
new way.
>>> for (i = 0; i < s->fbw_channels; i++) {
>>> norm0 += s->downmix_coeffs[i][0];
>>> norm1 += s->downmix_coeffs[i][1];
>>> }
>>
>>> - norm0 = 1.0f / norm0;
>>> - norm1 = 1.0f / norm1;
>>> +
>>> for (i = 0; i < s->fbw_channels; i++) {
>>> - s->downmix_coeffs[i][0] *= norm0;
>>> - s->downmix_coeffs[i][1] *= norm1;
>>> + s->downmix_coeffs[i][0] = AC3_NORM(s->downmix_coeffs[i][0],norm0);
>>> + s->downmix_coeffs[i][1] = AC3_NORM(s->downmix_coeffs[i][1],norm1);
>>> }
>>
>> Division is much slower than multiplication. You can do the same trick
>> in fixed point:
>>
>> norm0 = (1 << bits) / norm0;
>> s->downmix_coeffs[i][0] = MUL(s->downmix_coeffs[i][0],norm0);
>>
>> where
>>
>> #define MUL(a,b) (((int64_t) (a)) * (b)) >> (s))
>>
>
> I am not sure that accuracy of fixed point code would be ok if division is moved out
> from the loop
If you use intermediate 64-bit values, this should impact very little
the precision.
>>> if (s->output_mode == AC3_CHMODE_MONO) {
>>> for (i = 0; i < s->fbw_channels; i++)
>>> - s->downmix_coeffs[i][0] = (s->downmix_coeffs[i][0] +
>>> - s->downmix_coeffs[i][1]) * LEVEL_MINUS_3DB;
>>> + s->downmix_coeffs[i][0] = AC3_LEVEL(s->downmix_coeffs[i][0] + s->downmix_coeffs[i][1]);
>>> }
>>> }
>>
>> Hmm, is this correct?
>
> Not exactly, although it passes fate tests. I will have to add back multiplication with LEVEL_MINUS_3DB.
>
>>
>>> @@ -602,51 +605,25 @@ static inline void do_imdct(AC3DecodeContext *s, int channels)
>>> for (ch = 1; ch <= channels; ch++) {
>>> if (s->block_switch[ch]) {
>>> int i;
>>> - float *x = s->tmp_output + 128;
>>> + FFTSample *x = s->tmp_output+128;
>>> for (i = 0; i < 128; i++)
>>> x[i] = s->transform_coeffs[ch][2 * i];
>>> - s->imdct_256.imdct_half(&s->imdct_256, s->tmp_output, x);
>>> - s->dsp.vector_fmul_window(s->output[ch - 1], s->delay[ch - 1],
>>> + s->imdct_256.AC3_RENAME(imdct_half)(&s->imdct_256, s->tmp_output, x);
>>> + s->dsp.AC3_RENAME(vector_fmul_window)(s->output[ch - 1], s->delay[ch - 1],
>>> s->tmp_output, s->window, 128);
>>> for (i = 0; i < 128; i++)
>>> x[i] = s->transform_coeffs[ch][2 * i + 1];
>>> - s->imdct_256.imdct_half(&s->imdct_256, s->delay[ch - 1], x);
>>> + s->imdct_256.AC3_RENAME(imdct_half)(&s->imdct_256, s->delay[ch - 1], x);
>>> } else {
>>> - s->imdct_512.imdct_half(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]);
>>> - s->dsp.vector_fmul_window(s->output[ch - 1], s->delay[ch - 1],
>>> + s->imdct_512.AC3_RENAME(imdct_half)(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]);
>>> + s->dsp.AC3_RENAME(vector_fmul_window)(s->output[ch - 1], s->delay[ch - 1],
>>> s->tmp_output, s->window, 128);
>>> - memcpy(s->delay[ch - 1], s->tmp_output + 128, 128 * sizeof(float));
>>> + memcpy(s->delay[ch - 1], s->tmp_output + 128, 128 * sizeof(FFTSample));
>>> }
>>> }
>>> }
>>>
>>> -/**
>>> - * Upmix delay samples from stereo to original channel layout.
>>> - */
>>> -static void ac3_upmix_delay(AC3DecodeContext *s)
>>> -{
>>> - int channel_data_size = sizeof(s->delay[0]);
>>> - switch (s->channel_mode) {
>>> - case AC3_CHMODE_DUALMONO:
>>> - case AC3_CHMODE_STEREO:
>>> - /* upmix mono to stereo */
>>> - memcpy(s->delay[1], s->delay[0], channel_data_size);
>>> - break;
>>> - case AC3_CHMODE_2F2R:
>>> - memset(s->delay[3], 0, channel_data_size);
>>> - case AC3_CHMODE_2F1R:
>>> - memset(s->delay[2], 0, channel_data_size);
>>> - break;
>>> - case AC3_CHMODE_3F2R:
>>> - memset(s->delay[4], 0, channel_data_size);
>>> - case AC3_CHMODE_3F1R:
>>> - memset(s->delay[3], 0, channel_data_size);
>>> - case AC3_CHMODE_3F:
>>> - memcpy(s->delay[2], s->delay[1], channel_data_size);
>>> - memset(s->delay[1], 0, channel_data_size);
>>> - break;
>>> - }
>>> -}
>>> +
>>
>> ??
>
> The sequence of imdct and downmix is changed in fixed point code for better accuracy,
> so this function is not needed.
So it should be also done for float version, in a separate patch (to be
applied before the main one).
-Vitor
More information about the ffmpeg-devel
mailing list