[FFmpeg-devel] [PATCH] ALS decoder
Thilo Borgmann
thilo.borgmann
Fri Aug 21 01:40:56 CEST 2009
Hi,
>> + uint32_t header_size; ///< header size of original audio file in bytes, provided for debugging
>> + uint32_t trailer_size; ///< Trailer size of original audio file in bytes, provided for debugging
>> + uint32_t crc; ///< 32-bit CCITT-32 CRC checksum
>> +} ALSSpecificConfig;
>
> Please try not to make *every* line exceed 80 columns. You could
> easily shorten them here by reducing the amount of whitespace between
> the type and name and before the comment.
>
>> + int64_t *prev_raw_samples; ///< contains unshifted raw samples from the previous block
>> + int64_t **raw_samples; ///< decoded raw samples for each channel
>> + int64_t *raw_buffer; ///< contains all decoded raw samples including carryover samples
>> +} ALSDecContext;
>
> Ditto.
ok, reduced whitespaces.
>
>> +/** Computes ceil(log2(x)) using av_log2.
>> + */
>> +static inline int ceil_log2(int x) {
>> + return x > 0 ? av_log2((x - 1) << 1) : 0;
>> +}
>
> Little functions like this are likely to be needed again some time.
> They should be placed in a common location.
Moved to libavutil/common.h. Please review my bad doxy wording.
>
>> +/** Reads an ALSSpecificConfig from a buffer into the output struct.
>> + */
>> +static av_cold int read_specific_config(ALSDecContext *ctx)
>> +{
>> + GetBitContext gb;
>> + uint64_t ht_size;
>> + int i, config_offset;
>> + MPEG4AudioConfig m4ac;
>> + ALSSpecificConfig *sconf = &ctx->sconf;
>> + const uint8_t *buffer = ctx->avctx->extradata;
>> + int buffer_size = ctx->avctx->extradata_size;
>> +
>> + init_get_bits(&gb, buffer, buffer_size * 8);
>> +
>> + config_offset = ff_mpeg4audio_get_config(&m4ac, buffer, buffer_size);
>> +
>> + if (config_offset < 0)
>> + return -1;
>> +
>> + skip_bits_long(&gb, config_offset);
>> + buffer_size -= config_offset >> 3;
>> +
>> + if (buffer_size < 22)
>> + return -1;
>> +
>> + // read the fixed items
>> + sconf->als_id = get_bits_long(&gb, 32);
>> + sconf->samp_freq = m4ac.sample_rate;
>> + skip_bits_long(&gb, 32);
>
> Maybe comment what is being skipped here and elsewhere.
Ok, I thought it is obvious. Done.
>
>> + sconf->samples = get_bits_long(&gb, 32);
>> + sconf->channels = m4ac.channels;
>> + skip_bits(&gb, 16);
>> + sconf->file_type = get_bits(&gb, 3);
>> + sconf->resolution = get_bits(&gb, 3);
>> + sconf->floating = get_bits1(&gb);
>> + sconf->msb_first = get_bits1(&gb);
>> + sconf->frame_length = get_bits(&gb, 16) + 1;
>> + sconf->random_access = get_bits(&gb, 8);
>> + sconf->ra_flag = get_bits(&gb, 2);
>> + sconf->adapt_order = get_bits1(&gb);
>> + sconf->coef_table = get_bits(&gb, 2);
>> + sconf->long_term_prediction = get_bits1(&gb);
>> + sconf->max_order = get_bits(&gb, 10);
>> + sconf->block_switching = get_bits(&gb, 2);
>> + sconf->bgmc_mode = get_bits1(&gb);
>> + sconf->sb_part = get_bits1(&gb);
>> + sconf->joint_stereo = get_bits1(&gb);
>> + sconf->mc_coding = get_bits1(&gb);
>> + sconf->chan_config = get_bits1(&gb);
>> + sconf->chan_sort = get_bits1(&gb);
>> + sconf->crc_enabled = get_bits1(&gb);
>> + sconf->rlslms = get_bits1(&gb);
>> + skip_bits(&gb, 5); // skip 5 reserved bits
>
> Please lose some of that whitespace. Compliments on the alignment
> though. Diego will love you.
I did this above, but here, not even the 80-characters reason exist.
Why give up readability?
>> +
>> + for (i = 0; i < sconf->channels; i++) {
>> + sconf->chan_pos[i] = get_bits(&gb, chan_pos_bits);
>> + }
>
> We usually omit {} for single-line for/if/while statements...
I usually do that, too... done.
>
>> + align_get_bits(&gb);
>> + buffer_size -= bytes_needed;
>> + // TODO: use this to actually do channel sorting
>> + } else {
>> + sconf->chan_sort = 0;
>> + }
>
> ... except for one-line else clauses after a multiline if.
Yes.
>> +/** Reads and decodes a Rice codeword.
>> + */
>> +static int64_t decode_rice(GetBitContext *gb, unsigned int k)
>> +{
>> + int64_t value = 0;
>> + int64_t q = 0;
>> + int max = gb->size_in_bits - get_bits_count(gb) - k;
>> +
>> + if (!k) {
>> + q = get_unary(gb, 0, max);
>> + return (q & 1) ? -((q + 1) >> 1) : ((q + 1) >> 1);
>> + } else if (k == 1) {
>> + q = get_unary(gb, 0, max);
>> + return get_bits1(gb) ? q : -(q + 1);
>> + } else {
>> + unsigned int r, sub_sign;
>> +
>> + q = get_unary(gb, 0, max);
>> + sub_sign = get_bits1(gb);
>> + r = get_bits_long(gb, k - 1);
>> +
>> + value = (q << (k - 1)) + r;
>> +
>> + return sub_sign ? value : -(value + 1);
>> + }
>> +}
>
> This function looks like it was designed specifically to make gcc fail:
>
> 1. 64-bit variables used unnecessarily.
> 2. GCC hates complex conditionals.
> 3. Compilers in general are bad at bit-hacks.
>
> It seems to me that int is enough for 'q' in the first two cases.
> get_unary() returns int, so we start with at most that many bits. To
> avoid overflow in q+1, do this instead:
>
> int r = q & 1;
> return r ? -((q >> 1) + r) : ((q >> 1) + r);
>
> Furthermore, gcc does stupid things with that code even with plain int
> variables. Rewriting in a few more steps helps massively:
>
> int r = q & 1;
> q = (q >> 1) + r;
> return r ? -q : q;
>
> This reduces the number of instructions on ARM from 6 to 4 (from 4 to
> 3 with armcc). On MIPS it goes from 9 with a branch to 5 branch-free.
>
> Finally, -(v + 1) is equivalent to ~v on two's complement machines,
> which we have previously agreed to assume we have. Hence we should
> write the return values in the second and third cases like this:
>
> return sign ? value : ~value;
>
> This prevents 32-bit overflow in the k==1 case and gives better code
> with several compilers in the final case, where we must resort to
> 64-bit maths.
Tricky. Works like a charm. Done.
>
>> +/** Converts PARCOR coefficient k to direct filter coefficient.
>> + */
>> +static void parcor_to_lpc(unsigned int k, int64_t *par, int64_t *cof)
>> +{
>> + int i;
>> + int64_t tmp1, tmp2;
>> +
>> + for (i = 0; i < (k+1) >> 1; i++) {
>> + tmp1 = cof[ i ] + ((par[k] * cof[k - i - 1] + (1 << 19)) >> 20);
>> + tmp2 = cof[k - i - 1] + ((par[k] * cof[ i ] + (1 << 19)) >> 20);
>> + cof[k - i - 1] = tmp2;
>> + cof[ i ] = tmp1;
>> + }
>> +
>> + cof[k] = par[k];
>> +}
>
> This is again a perfect trap for gcc. 64-bit maths in tight loops is
> something it is *really* bad at. Is there no way to avoid 64-bit
> elements here? Is perhaps the range usually smaller, so we could
> choose a 32-bit version most of the time and fall back on 64-bit only
> when required? Regardless of compiler, 64-bit multiplication is
> expensive on any 32-bit machine.
I think the usual case uses 64 bit here, but I'm not sure. Maybe Justin
can give a comment on this?
>> +/** Reads the block data.
>> + */
>> +static int read_block_data(ALSDecContext *ctx, unsigned int ra_block,
>> + int64_t *raw_samples, unsigned int block_length,
>> + unsigned int *js_blocks, int64_t *raw_other)
>> +{
>> ...
> The block nesting is very deep here. Perhaps splitting this large
> function into several smaller ones would help readability. Even gcc
> should manage to inline them.
I agree, done.
>
>> + if (sconf->long_term_prediction) {
>> + // TODO: LTP mode
>> + }
>> +
>> + start = 0;
>> ...
>> + } else {
>> + int store_prev_samples = (*js_blocks && raw_other) || shift_lsbs;
>> +
>> + // store previous smaples in case that they have to be altered
>> + if (store_prev_samples)
>> + memcpy(ctx->prev_raw_samples, raw_samples - sconf->max_order,
>> + sizeof(int64_t) * sconf->max_order);
>> +
>> + // reconstruct difference signal for prediction (joint-stereo)
>> + if (*js_blocks && raw_other) {
>> + int i;
>> + if (raw_other > raw_samples) { // D = R - L
>> + for (i = -1; i >= -sconf->max_order; i--)
>> + raw_samples[i] = raw_other[i] - raw_samples[i];
>> + } else { // D = R - L
>
> Are those two comments meant to be the same?
Yes but semantically different. This is meant to be that way.
It shows, which channel is represented by which variable.
>> ...
>> +
>> + if (shift_lsbs) {
>> + for (k = 0; k < block_length; k++)
>> + raw_samples[k] <<= shift_lsbs;
>> + }
>> +
>> + return 0;
>> +}
>
> Many of the loops above look like they could be easily simdified. I
> don't know how much time is spent in each, so I haven't thought about
> it in detail. Breaking the function apart would also help in finding
> where most time is spent.
I don't think so, at least in terms of merging several of them.
Otherwise or if you thought about another optimization, please give an
example.
>
>> +
>> +/** Reads the frame data.
>> + */
>> +static int read_frame_data(ALSDecContext *ctx, unsigned int ra_frame)
>> +{
>> ...
>
> Again, very deep nesting makes code hard to read. Consider making
> some blocks separate functions.
Done.
>> ...
>
> Maybe you already tried to fix it, but there's a scary number of
> memcpy/memmove calls above.
All of them are really necessary.
>> +
>> + // transform decoded frame into output format
>> + #define INTERLEAVE_OUTPUT(bps) \
>
> That's an awful lot of whitespace. Please cut it back so the lines
> fit in 80 columns, which will be easily possible when you address my
> next comment.
>
>> + { \
>> + int##bps##_t *dest = (int##bps##_t*) data; \
>> + shift = bps - ctx->avctx->bits_per_raw_sample; \
>> + for (sample = 0; sample < ctx->cur_frame_length; sample++) { \
>> + for (c = 0; c < sconf->channels; c++) { \
>> + *(dest++) = (int##bps##_t) (ctx->raw_samples[c][sample] << shift); \
>
> Useless parens: *dest++ is good. Useless cast => more useless parens.
Yes, done.
>> + // allocate previous raw sample buffer
>> + if (!(ctx->prev_raw_samples = av_malloc(sizeof(int64_t) * sconf->max_order))) {
>> + av_log(avctx, AV_LOG_ERROR, "Allocating buffer memory failed.\n");
>> + decode_end(avctx);
>> + return AVERROR(ENOMEM);
>> + }
>> +
>> + // allocate raw and carried sample buffer
>> + if (!(ctx->raw_buffer = av_mallocz(sizeof(int64_t) *
>> + avctx->channels * channel_size))) {
>> + av_log(avctx, AV_LOG_ERROR, "Allocating buffer memory failed.\n");
>> + decode_end(avctx);
>> + return AVERROR(ENOMEM);
>> + }
>> +
>> + // allocate raw sample array buffer
>> + if (!(ctx->raw_samples = av_malloc(sizeof(int64_t*) * avctx->channels))) {
>> + av_log(avctx, AV_LOG_ERROR, "Allocating buffer array failed.\n");
>> + decode_end(avctx);
>> + return AVERROR(ENOMEM);
>> + }
>
> This looks very repetitive...
Thus.... it is good/bad/crazy?
The log message depends on the buffer allocated, the size of the buffer
is never the same...
>
>> + // allocate raw and carried samples buffers
>> + ctx->raw_samples[0] = ctx->raw_buffer + sconf->max_order;
>> + for (c = 1; c < avctx->channels; c++) {
>> ...
>> +
>> Index: libavcodec/als_data.h
>> ===================================================================
>> --- libavcodec/als_data.h (revision 0)
>> +++ libavcodec/als_data.h (revision 0)
>> @@ -0,0 +1,147 @@
>> +/*
>> + * ALS header file for common data
>> + * Copyright (c) 2009 Thilo Borgmann <thilo.borgmann _at_ googlemail.com>
>> ...
>> +#ifndef AVCODEC_ALS_DATA_H
>> +#define AVCODEC_ALS_DATA_H
>> +
>> +
>> +#include <stdint.h>
>> +
>> +/** Rice parameters and corresponding index offsets for decoding the
>> + * indices of scaled PARCOR values. The table choosen is set globally
>> + * by the encoder and stored in ALSSpecificConfig.
>> + */
>> +int8_t parcor_rice_table[3][20][2] = {
>> + {
>> + {-52, 4},
>
> WTF happened to the indentation here?
>
>> +
>> +/** Scaled PARCOR values used for the first two PARCOR coefficients.
>> + * To be indexed by the Rice coded indices.
>> + * Generated by: parcor_scaled_values[i] = 32 + ((i * (i+1)) << 7) - (1 << 20)
>> + */
>> +int32_t parcor_scaled_values[] = {-1048544, -1048288, -1047776, -1047008,
>> + -1045984, -1044704, -1043168, -1041376,
>
> And here.
Would you like block- or function-like indents?
No kidding - I ask that before it goes back and forth and I don't care
which one...
(Or are there table-like indention rules I've missed?)
Revision 4 attached.
Thanks for your review!
-Thilo
-------------- next part --------------
An embedded and charset-unspecified text was scrubbed...
Name: als_decoder.rev4.patch
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090821/3d2679cb/attachment.asc>
More information about the ffmpeg-devel
mailing list