[FFmpeg-devel] Bink audio decoder

Wed Apr 16 00:44:38 CEST 2008

On Wed, Apr 09, 2008 at 11:47:32PM +1000, pross at xvid.org wrote:
> Hi,
> 
> Enclosed is a working demuxer and audio decoder for the Bink multimedia
> format. The patch has been tested on PPC hardware only, but should run
> elsewhere.
> 

> The audio codec makes use of a windowed real-value FFT (and DCT for more
> recent files). Benjamin L tells me that there are NO equivalent dsp
> routines in FFmpeg, so as a starting point a well known public domain
> routine is included.

We have a FFT
DCT and real value FFT can be efficiently implemented with FFT + trivial 
pre/post transforms.
read http://www.nrbook.com/a/bookcpdf/c12-3.pdf

[...]

> +#include <stdio.h>
> +#include <stdlib.h>

are both really needed?

> +#include "avcodec.h"
> +#define ALT_BITSTREAM_READER_LE
> +#include "bitstream.h"
> +#include "dsputil.h"
> +#include "fft4g.h"  // rdft(), ddct()
> +

> +// from wmadata.h
> +static const uint16_t wma_critical_freqs[25] = {
> +    100,   200,  300, 400,   510,  630,  770,    920,
> +    1080, 1270, 1480, 1720, 2000, 2320, 2700,   3150,
> +    3700, 4400, 5300, 6400, 7700, 9500, 12000, 15500,
> +    24500,
> +};

duplicate

> +
> +#define MAX_CHANNELS 2
> +
> +#define BLOCK_MAX_SIZE ((1<<11)*MAX_CHANNELS)
> +
> +typedef struct {
> +    AVCodecContext *avctx;
> +    GetBitContext gb;
> +    int first;
> +    int use_mdct;

> +    int frame_len;        // transform size (samples)
> +    int overlap_len;        // overlap size (samples)

comments are not doxygen compatible

> +    int channels;
> +    int num_bands;
> +    unsigned int * bands;
> +    float root;
> +    DECLARE_ALIGNED_16(short, window[BLOCK_MAX_SIZE/16]);
> +} BinkAudioContext;
> +

> +/*
> + */
> +static av_cold int binkaudio_decode_init(AVCodecContext *avctx)

useless comment

> +{
> +    BinkAudioContext * s = avctx->priv_data;
> +    int sample_rate = avctx->sample_rate;
> +    int sample_rate_half;
> +    int i;
> +    int frame_len_bits;
> +    s->avctx = avctx;
> +

> +    s->use_mdct = avctx->extradata_size>0 && *(char*)avctx->extradata == 1;

useless cast

> +
> +    /* determine frame length */
> +    if (avctx->sample_rate<22050) {
> +        frame_len_bits = 9;
> +    }else if (avctx->sample_rate<44100) {
> +        frame_len_bits = 10;
> +    }else{
> +        frame_len_bits = 11;
> +    }
> +    s->frame_len = 1<<frame_len_bits;
> +

> +        if (s->use_mdct) {
> +        s->channels = avctx->channels;
> +    }else{

indention ...

[...]
> +static float get_float(GetBitContext *gb)
> +{
> +    int power = get_bits(gb, 5);
> +    float f = get_bits(gb, 23) / (float)(1 << (23-power));

is power > 23 disallowed?

> +    if(get_bits1(gb))
> +        f = -f;
> +    return f;
> +}
> +

> +static const int rle_length_tab[16] = {
> +    2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 32, 64
> +};

fits in uint8_t

> +
> +static int decode_block(BinkAudioContext *s, short *out)
> +{
> +    // coeffs is a double because rdft() and ddct() take doubles
> +    DECLARE_ALIGNED_16(double, coeffs[BLOCK_MAX_SIZE]);
> +
> +    // temporary workspace for ddct() and rdft()
> +    DECLARE_ALIGNED_16(double, ddct_w[BLOCK_MAX_SIZE*5/4]);    ///< max size = block_size*5/4
> +    DECLARE_ALIGNED_16(double, rdft_w[BLOCK_MAX_SIZE/2]); ///< max size = block_size*5/4
> +    DECLARE_ALIGNED_16(int,    fft4g_ip[48]);    ///< max size = 2+sqrt(block_max_size/2)

Please put these things somewhere else than the stack, some compilers
have difficulty aligning variables on the stack.
Also why double and not float?

> +
> +    int ch,i,j,k;
> +    float q, quant[25];
> +    int width,coeff;
> +    int channels = s->channels;
> +
> +    if (s->use_mdct) {
> +        skip_bits(&s->gb, 2);
> +    }
> +
> +    for(ch=0; ch<channels; ch++) {
> +        q = 0.0;
> +        coeffs[0] = get_float(&s->gb);
> +        coeffs[1] = get_float(&s->gb);
> +
> +        for(i=0; i<s->num_bands; i++) {
> +            int value = get_bits(&s->gb, 8);
> +            quant[i] = pow(10.0, FFMIN(value, 95) * 0.0664);
> +        }
> +
> +        // find band (k)
> +        for(k=0; s->bands[k]*2 < 2; k++) {
> +            q = quant[k];
> +        }

s->bands[0] == 1 -> 2 >= 2  -> q=0.0
or did i miss something ?

[...]
> +            if (width==0) {
> +                memset(coeffs+i, 0, (j-i)*sizeof(double));

sizeof(*coeffs) is safer if it ever is changed to float

> +                i=j;
> +                while(s->bands[k]*2 < i)
> +                    q = quant[k++];
> +            }else{
> +                while(i<j) {
> +                    if (s->bands[k]*2 == i)
> +                        q = quant[k++];
> +                    coeff = get_bits(&s->gb, width);
> +                    if (coeff) {
> +                        if (get_bits1(&s->gb))
> +                            coeffs[i] = -q*coeff;
> +                        else
> +                            coeffs[i] =  q*coeff;
> +                    }else{
> +                        coeffs[i] = 0.0;
> +                    }
> +                    i++;
> +                }
> +            }
> +        }
> +
> +        if (s->use_mdct) {
> +            ddct(s->frame_len, 0, coeffs, fft4g_ip, ddct_w);
> +        }else{
> +            rdft(s->frame_len, -1, coeffs, fft4g_ip, rdft_w);
> +        }
> +

> +        for(i=0; i<s->frame_len; i++) {    //FIXME: dsp.float_to_int16?
> +            out[i*channels + ch] = av_clip_int16(
> +                                   lrintf(coeffs[i]*s->root));
> +        }

yes that should be fixme-ed and the root should be multiplied into above q

> +    }
> +
> +    // windowing
> +    if (s->first>0) {
> +        int count = s->overlap_len*channels;
> +        for(i=0; i<count; i++) {
> +            out[i] = (s->window[i]*(count-i) + out[i]*i) / count;
> +        }

divisions are slow ...
Also i suspect that the windowing
would be more efficient before lrintf() in floats.

[...]
> +/**
> + */
> +static int binkaudio_decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size)
> +{
> +    BinkAudioContext * s = avctx->priv_data;

> +    short *samples = (short*)data;

useless cast

> +    s->first = 0;

this looks a little odd, also if its correct then its duplicated.

[...]
> Index: libavcodec/fft4g.h
> ===================================================================
> --- libavcodec/fft4g.h	(revision 0)
> +++ libavcodec/fft4g.h	(revision 0)
> @@ -0,0 +1,1368 @@
> +/*
> +General Purpose FFT (Fast Fourier/Cosine/Sine Transform) Package
> +
> +Description:
> +    A package to calculate Discrete Fourier/Cosine/Sine Transforms of 
> +    1-dimensional sequences of length 2^N.
> +
> +Reference:
> +    * Masatake MORI, Makoto NATORI, Tatuo TORII: Suchikeisan, 
> +      Iwanamikouzajyouhoukagaku18, Iwanami, 1982 (Japanese)
> +    * Henri J. Nussbaumer: Fast Fourier Transform and Convolution 
> +      Algorithms, Springer Verlag, 1982
> +    * C. S. Burrus, Notes on the FFT (with large FFT paper list)
> +      http://www-dsp.rice.edu/research/fft/fftnote.asc
> +
> +Copyright:
> +    Copyright(C) 1996-2001 Takuya OOURA
> +    email: ooura at mmm.t.u-tokyo.ac.jp
> +    download: http://momonga.t.u-tokyo.ac.jp/~ooura/fft.html
> +    You may use, copy, modify this code for any purpose and 
> +    without fee. You may distribute this ORIGINAL package.

we cannot accept that due to the non free license.

[...]
> Index: libavformat/bink.c
> ===================================================================
> --- libavformat/bink.c	(revision 0)
> +++ libavformat/bink.c	(revision 0)
> @@ -0,0 +1,190 @@
> +/*
> + * Bink file Demuxer
> + * Copyright (c) 2008 Peter Ross (pross at xvid.org)

The demuxer should be in a seperate patch (smaller patches, easier to
handle, easier to review, ...)

[...]
> +static int bink_probe(AVProbeData *p)
> +{
> +    const uint8_t *b = p->buf;
> +
> +    if (b[0] == 'B' && b[1] == 'I' && b[2] == 'K')
> +        return AVPROBE_SCORE_MAX;

please check more than 3 bytes if possible

> +    return 0;
> +}
> +
> +#define BINK_EXTRADATA_SIZE    1
> +static int bink_read_header(AVFormatContext *s, AVFormatParameters *ap)
> +{
> +    BinkDemuxContext *bink = s->priv_data;
> +    ByteIOContext *pb = s->pb;

> +    unsigned int version,width,height;
> +    AVRational fps;
> +    AVStream *st;

some of these are redundant

[...]
> +    st = av_new_stream(s, 0);
> +    if (!st)
> +        return AVERROR(ENOMEM);
> +    st->codec->codec_type = CODEC_TYPE_VIDEO;
> +    st->codec->codec_tag = 0; /* no fourcc */
> +    st->codec->codec_id = CODEC_ID_BINKVIDEO;
> +    st->codec->width = width;
> +    st->codec->height = height;

vertical align:
st->codec->width  = width;
st->codec->height = height;

[...]
> +            if ((get_le16(pb) & 0x1000)) {    //dct flag
> +                st[i]->codec->extradata = av_malloc(BINK_EXTRADATA_SIZE);
> +                st[i]->codec->extradata_size = BINK_EXTRADATA_SIZE;
> +                *(char*)st[i]->codec->extradata = 1;
> +            }

I think you should just place these 2 bytes as they are in extradata. This
seems simpler and more natural

> +        }
> +        url_fskip(pb, 4 * bink->num_tracks);
> +        av_free(st);
> +    }
> +

> +    bink->video_pts = bink->audio_pts = 0;

This should be unneeded as things are zeroed automatically.

[...]
> +static int bink_read_packet(AVFormatContext *s, AVPacket *pkt)
> +{
> +    BinkDemuxContext *bink = s->priv_data;
> +    ByteIOContext *pb = s->pb;
> +
> +    if (bink->current_track<0) {
> +        int packet_pos = bink->packet_pos;
> +
> +        url_fseek(pb, bink->index_pos + (bink->video_pts+1)*4, SEEK_SET);
> +        if (bink->video_pts+1 < bink->total_frames) {
> +            bink->packet_pos = get_le32(pb);   /* store next packet offset */
> +            bink->remain_packet_size = bink->packet_pos - packet_pos;    /* calculate size of this packet */
> +        }else if (bink->video_pts < bink->total_frames) {
> +            bink->remain_packet_size = bink->file_size - packet_pos;
> +        }else {
> +            return AVERROR(EIO);
> +        }
> +        url_fseek(pb, packet_pos, SEEK_SET);
> +        bink->current_track = 0;
> +    }

it looks like bink can be read without randomly seeking around, so please
dont randomly seek around. The index should be read in with av_add_index_entry()

> +
> +    while (bink->current_track < bink->num_tracks) {
> +        unsigned int audio_size = get_le32(pb);
> +        bink->remain_packet_size -= 4 + audio_size;
> +        bink->current_track++;
> +        if (audio_size > 0) {
> +            url_fskip(pb, 4);
> +            audio_size -= 4;

> +            if (av_get_packet(s->pb, pkt, audio_size) != audio_size)
> +                return AVERROR(EIO);

memleak

> +            pkt->stream_index = bink->current_track;

> +            pkt->pts = bink->audio_pts++;

somehow i do not think this will be correct for >1 audio stream

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Observe your enemies, for they first find out your faults. -- Antisthenes
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080416/85713e8c/attachment.pgp>