[FFmpeg-devel] [PATCH] TAK demuxer and decoder

Sun Sep 30 03:59:02 CEST 2012

On Sat, Sep 29, 2012 at 04:27:20PM +0000, Paul B Mahol wrote:
> Signed-off-by: Paul B Mahol <onemda at gmail.com>
[...]
> +static int tak_get_nb_samples(AVCodecContext *avctx, int sample_rate,
> +                              enum TAKFrameSizeType type)
> +{
> +    int nb_samples, max_nb_samples;
> +
> +    if (type <= TAK_FST_250ms) {
> +        nb_samples     = sample_rate * frame_duration_type_quants[type] >>
> +                                       TAK_FRAME_DURATION_QUANT_SHIFT;
> +        max_nb_samples = 16384;
> +    } else if (type < FF_ARRAY_ELEMS(frame_duration_type_quants)) {
> +        nb_samples     = frame_duration_type_quants[type];
> +        max_nb_samples = sample_rate * frame_duration_type_quants[TAK_FST_250ms] >>
> +                                       TAK_FRAME_DURATION_QUANT_SHIFT;

the multiplies should use int64_t to ensure they dont overflow

[...]
> +int avpriv_tak_parse_streaminfo(void *log, GetBitContext *gb, TAKStreamInfo *s)
> +{
> +    uint64_t channel_mask = 0;
> +    int frame_type, i;
> +
> +    s->codec = get_bits(gb, TAK_ENCODER_CODEC_BITS);
> +    if (s->codec != 2 && s->codec != 4) {
> +        av_log(log, AV_LOG_ERROR, "unsupported codec: %d\n", s->codec);
> +        return AVERROR_PATCHWELCOME;
> +    }
> +
> +    skip_bits(gb, TAK_ENCODER_PROFILE_BITS);
> +
> +    frame_type = get_bits(gb, TAK_SIZE_FRAME_DURATION_BITS);

> +    s->samples = get_bits_long(gb, TAK_SIZE_SAMPLES_NUM_BITS);

get_bits_longlong() (see my patch) or something similar is needed

[...]
> +
> +static int get_scale(int sample_rate, int shift)
> +{
> +    return (((sample_rate + 511 >> 9) + 3) & 0xFFFFFFFC) << shift;

FFALIGN(sample_rate + 511 >> 9, 4) << shift;

[...]
> +static int get_code(GetBitContext *gb, int nbits)
> +{
> +    int ret;
> +
> +    if (nbits == 1) {
> +        skip_bits1(gb);
> +        ret = 0;
> +    } else {
> +        ret = get_sbits(gb, nbits);
> +    }
> +
> +    return ret;

you could drop the ret variable and use 2 returns directly

> +}
> +
> +static void decode_lpc(int32_t *ptr, int lpc, int length)
> +{
> +    int i, a1, a2, a3, a4, a5;
> +
> +    if (length < 2)
> +        return;
> +
> +    if (lpc == 1) {
> +        a1 = *ptr++;
> +        for (i = 0; i < (length - 1 >> 1); i++) {
> +            *ptr   += a1;
> +            ptr[1] += *ptr;
> +            a1      = ptr[1];
> +            ptr    += 2;
> +        }
> +        if ((length - 1) & 1)
> +            *ptr += a1;
> +    } else if (lpc == 2) {
> +        a1     = ptr[1];
> +        a2     = a1 + *ptr;
> +        ptr[1] = a2;
> +        if (length > 2) {
> +            ptr += 2;
> +            for (i = 0; i < (length - 2 >> 1); i++) {
> +                a3     = *ptr + a1;
> +                a4     = a3 + a2;
> +                *ptr   = a4;
> +                a1     = ptr[1] + a3;
> +                a2     = a1 + a4;
> +                ptr[1] = a2;
> +                ptr   += 2;
> +            }
> +            if (length & 1)
> +                *ptr  += a1 + a2;
> +        }
> +    } else if (lpc == 3) {
> +        a1     = ptr[1];
> +        a2     = a1 + *ptr;
> +        ptr[1] = a2;
> +        if (length > 2) {
> +            a3   = ptr[2];
> +            a4   = a3 + a1;
> +            a5   = a4 + a2;
> +            ptr += 3;
> +            for (i = 0; i < length - 3; i++) {
> +                a3  += *ptr;
> +                a4  += a3;
> +                a5  += a4;
> +                *ptr = a5;
> +                ptr++;
> +            }
> +        }
> +    }
> +}

this looks like code that applies p[i] = (v+=p[i]) 1-3 times
if thats true, it should be documented
(the code could also be simplified but would then be slower)

[...]
> +static int decode_subframe(TAKDecContext *s, int32_t *ptr, int subframe_size, int prev_subframe_size)
> +{
> +    GetBitContext  *gb = &s->gb;
> +    int tmp, x, y, i, j, ret = 0;
> +
> +    if (get_bits1(gb)) {
> +        s->nb_predictors = predictor_sizes[get_bits(gb, 4)];
> +
> +        if (prev_subframe_size > 0 && get_bits1(gb)) {
> +            if (s->nb_predictors > prev_subframe_size)
> +                return AVERROR_INVALIDDATA;
> +
> +            ptr           -= s->nb_predictors;
> +            subframe_size += s->nb_predictors;
> +
> +            if (s->nb_predictors > subframe_size)
> +                return AVERROR_INVALIDDATA;
> +        } else {
> +            int lpc;
> +
> +            if (s->nb_predictors > subframe_size)
> +                return AVERROR_INVALIDDATA;
> +
> +            lpc = get_bits(gb, 2);
> +            if (lpc > 2)
> +                return AVERROR_INVALIDDATA;
> +
> +            if ((ret = decode_m(s, ptr, s->nb_predictors)) < 0)
> +                return ret;
> +
> +            decode_lpc(ptr, lpc, s->nb_predictors);
> +        }
> +
> +        s->xred = get_b(gb);
> +        s->size = get_bits1(gb) + 5;
> +
> +        if (get_bits1(gb)) {
> +            s->ared = get_bits(gb, 3) + 1;
> +            if (s->ared > 7)
> +                return AVERROR_INVALIDDATA;
> +        } else {
> +            s->ared = 0;
> +        }
> +        s->predictors[0] = get_code(gb, 10);
> +        s->predictors[1] = get_code(gb, 10);
> +        s->predictors[2] = get_code(gb, s->size + 1) << (9 - s->size);
> +        s->predictors[3] = get_code(gb, s->size + 1) << (9 - s->size);
> +        if (s->nb_predictors > 4) {
> +            tmp = s->size + 1 - get_bits1(gb);
> +
> +            for (i = 4; i < s->nb_predictors; i++) {
> +                if (!(i & 3))
> +                    x = tmp - get_bits(gb, 2);
> +                s->predictors[i] = get_code(gb, x) << (9 - s->size);
> +            }
> +        }
> +
> +        s->ncoeff[0] = s->predictors[0] << 6;
> +        for (i = 1; i < s->nb_predictors; i++) {
> +            int32_t *p1 = s->ncoeff;
> +            int32_t *p2 = &s->ncoeff[i - 1];
> +
> +            for (j = 0; j < (i + 1) / 2; j++) {
> +                x     = *p1 + (s->predictors[i] * *p2 + 256 >> 9);
> +                *p2  += s->predictors[i] * *p1 + 256 >> 9;
> +                *p1++ = x;
> +                p2--;
> +            }
> +
> +            s->ncoeff[i] = s->predictors[i] << 6;
> +        }
> +
> +        x = -1 << (32 - (s->ared + 5));
> +        y =  1 << ((s->ared + 5) - 1);
> +        for (i = 0, j = s->nb_predictors - 1; i < s->nb_predictors / 2; i++, j--) {
> +            tmp = y + s->ncoeff[j];
> +            s->ncoeff[j] = -(x & -(y + s->ncoeff[i] >> 31) |
> +                            (y + s->ncoeff[i]) >> (s->ared + 5));
> +            s->ncoeff[i] = -(x & -(tmp >> 31) | (tmp >> s->ared + 5));
> +        }
> +
> +        if ((ret = decode_m(s, &ptr[s->nb_predictors], subframe_size - s->nb_predictors)) < 0)
> +            return ret;
> +
> +        for (i = 0; i < s->nb_predictors; i++)
> +            s->f[i] = *ptr++ >> s->xred;
> +
> +        y    = FF_ARRAY_ELEMS(s->f) - s->nb_predictors;
> +        x    = subframe_size - s->nb_predictors;
> +        while (x > 0) {

> +            if (y >= x)
> +                tmp = x;
> +            else
> +                tmp = y;

FFMIN()

[...]
> +static int decorrelate(TAKDecContext *s, int c1, int c2, int length)
> +{
> +    GetBitContext  *gb = &s->gb;
> +    int32_t *p1 = &s->decode_buffer[c1 * s->nb_samples + 1];
> +    int32_t *p2 = &s->decode_buffer[c2 * s->nb_samples + 1];
> +    int a, b, i, j, x, tmp;
> +
> +    if (s->dmode > 3) {
> +        s->dshift = get_b(gb);
> +        if (s->dmode > 5) {
> +            if (get_bits1(gb))
> +                s->nb_dcoeffs = 16;
> +            else
> +                s->nb_dcoeffs = 8;
> +
> +            s->dval1 = get_bits1(gb);
> +            s->dval2 = get_bits1(gb);
> +
> +            for (i = 0; i < s->nb_dcoeffs; i++) {
> +                if (!(i & 3))
> +                    x = 14 - get_bits(gb, 3);
> +                s->dcoeffs[i] = get_code(gb, x);
> +            }
> +        } else {
> +            s->dfactor = get_code(gb, 10);
> +        }
> +    }
> +
> +    switch (s->dmode) {
> +    case 1:
> +        for (i = 0; i < length; i++, p1++, p2++)
> +            *p2 += *p1;
> +        break;
> +    case 2:
> +        for (i = 0; i < length; i++, p1++, p2++)
> +            *p1 = *p2 - *p1;
> +        break;
> +    case 3:

> +        for (i = 0; i < length; i++, p1++, p2++) {
> +            x   = (*p2 & 1) + 2 * *p1;
> +            *p1 = (  x - *p2) & 0x80000000 | (  x - *p2 >> 1);
> +            *p2 = (*p2 + x  ) & 0x80000000 | (*p2 + x   >> 1);

the & 0x80000000 | seems redundant if these are signed 32bit variables

> +        }
> +        break;
> +    case 4:
> +        FFSWAP(int32_t *, p1, p2);
> +    case 5:
> +        if (s->dshift)
> +            tmp = -1 << (32 - s->dshift);
> +        else
> +            tmp = 0;
> +
> +        for (i = 0; i < length; i++, p1++, p2++) {
> +            x   = *p2 >> s->dshift;;

double ;

> +            *p1 = ((-((s->dfactor * (tmp & -(*p2 >> 31) | x) + 128) >> 31) &
> +                    0xFF000000 |
> +                  ((   s->dfactor * (tmp & -(*p2 >> 31) | x) + 128) >> 8)) <<
> +                   s->dshift) - *p1;

theres a common subexpression that can be factored out
and the sign extension stuff looks redundant
and theres more redundant sign extension stuff in the patch

> +        }
> +        break;
> +    case 6:
> +        FFSWAP(int32_t *, p1, p2);
> +    case 7:
> +        if (length < 256)
> +            return AVERROR_INVALIDDATA;
> +
> +        a = s->nb_dcoeffs / 2;
> +        b = length - (s->nb_dcoeffs - 1);
> +
> +        if (s->dval1) {
> +            for (i = 0; i < a; i++)
> +                p1[i] += p2[i];
> +        }
> +
> +        if (s->dval2) {
> +            x = a + b;
> +            for (i = 0; i < length - x; i++)
> +                p1[x + i] += p2[x + i];
> +        }
> +
> +        for (i = 0; i < s->nb_dcoeffs; i++)
> +            s->f[i] = *p2++ >> s->dshift;
> +
> +        p1 += a;
> +        x = FF_ARRAY_ELEMS(s->f) - s->nb_dcoeffs;
> +        for (; b > 0; b -= tmp) {

> +            if (b <= x)
> +                tmp = b;
> +            else
> +                tmp = x;

FFMIN()

[...]
> +    if (avctx->bits_per_coded_sample <= 16) {
> +        av_fast_malloc(&s->decode_buffer, &s->decode_buffer_size,
> +                       sizeof(*s->decode_buffer) * s->nb_samples * avctx->channels +
> +                       FF_INPUT_BUFFER_PADDING_SIZE);
> +        if (!s->decode_buffer)
> +            return AVERROR(ENOMEM);
> +    } else {
> +        s->decode_buffer = (int32_t *)s->frame.data[0];
> +    }

I suggest not to assign something "random" to a pointer that normally
has been allocated and has to be freed.
Its easy to mix it up and end up freeing the wrong pointer

[...]
> +static int tak_read_header(AVFormatContext *s)
> +{
> +    AVIOContext *pb = s->pb;
> +    GetBitContext gb;
> +    AVStream *st;
> +    uint8_t *buffer = NULL;
> +    int ret;
> +
> +    st = avformat_new_stream(s, 0);
> +    if (!st)
> +        return AVERROR(ENOMEM);
> +    st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
> +    st->codec->codec_id   = AV_CODEC_ID_TAK;
> +    st->need_parsing      = AVSTREAM_PARSE_FULL_RAW;
> +
> +    if (avio_rl32(pb) != MKTAG('t', 'B', 'a', 'K')) {
> +        avio_seek(pb, -4, SEEK_CUR);
> +        return 0;
> +    }
> +

> +    while (!url_feof(pb)) {
> +        int metadata_type, metadata_size;

metadata_type should have enum type

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

it is not once nor twice but times without number that the same ideas make
their appearance in the world. -- Aristotle
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20120930/a3135388/attachment.asc>