[FFmpeg-devel] [PATCH][GSoC] Implement floating point decoding in ALS
Paul B Mahol
onemda at gmail.com
Thu Apr 21 19:44:11 CEST 2016
On 4/21/16, Umair Khan <omerjerk at gmail.com> wrote:
> Hi,
>
> This patch is the second qualification task of my project.
> The patch isn't final yet.
> I've got it reviewed by Thilo Borgmann and now sending it here.
>
> Currently, it decodes the floating point data perfectly, and prints
> the output to the console.
> I matched the output float values with the ones in the original .wav
> file and they are same.
>
> Right now, I'm having problem with writing the data to the output
> file. The output file generated is almost half the size of what it
> should be. I've checked the code and it should work but it doesn't.
>
> I'm also attaching the encoded file having floating point sample data
> which I use for testing.
>
> Umair
>
> From cdaf469a9832812755924485e7f83a465652612a Mon Sep 17 00:00:00 2001
> From: Umair Khan <omerjerk at gmail.com>
> Date: Sat, 16 Apr 2016 17:56:13 +0530
> Subject: [PATCH] Implement floating point decoding in ALS
>
> Signed-off-by: Umair Khan <omerjerk at gmail.com>
> ---
> libavcodec/alsdec.c | 484 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 479 insertions(+), 5 deletions(-)
>
> diff --git a/libavcodec/alsdec.c b/libavcodec/alsdec.c
> index 1402b00..97f650c 100644
> --- a/libavcodec/alsdec.c
> +++ b/libavcodec/alsdec.c
> @@ -37,6 +37,7 @@
> #include "internal.h"
> #include "libavutil/samplefmt.h"
> #include "libavutil/crc.h"
> +#include "libavutil/intfloat.h"
>
> #include <stdint.h>
>
> @@ -188,6 +189,14 @@ typedef struct ALSChannelData {
> } ALSChannelData;
>
>
> +typedef struct MaskedLZDict {
> + int string_code;
> + int parent_code;
> + int char_code;
> + int match_len;
> +} MaskedLZDict;
> +
> +
> typedef struct ALSDecContext {
> AVCodecContext *avctx;
> ALSSpecificConfig sconf;
> @@ -225,6 +234,15 @@ typedef struct ALSDecContext {
> int32_t **raw_samples; ///< decoded raw samples for each channel
> int32_t *raw_buffer; ///< contains all decoded raw samples including carryover samples
> uint8_t *crc_buffer; ///< buffer of byte order corrected samples used for CRC check
> + //float data
This is not doxygen comment.
> + MaskedLZDict* dict;
> + float *acf;
> + int *last_acf_mantissa;
> + int *shift_value;
> + int *last_shift_value;
> + int **raw_mantissa; ///< decoded mantissa bits of the difference signal
> + unsigned char *larray;
> + int **nbits;
> } ALSDecContext;
>
>
> @@ -247,6 +265,45 @@ typedef struct ALSBlockData {
> } ALSBlockData;
>
>
> +/**
> + * Masked LZ compression/decompression
> + */
> +
> +#define WORD_SIZE 8
> +#define WORD_MASK 0xff
> +#define CODE_UNSET -1
> +#define CODE_BIT_INIT 9
> +#define CODE_BIT_MAX 15
> +#define DIC_INDEX_INIT 512 // 2^9
> +#define DIC_INDEX_MAX 32768L // 2^15
> +#define FLUSH_CODE 256
> +#define FREEZE_CODE 257
> +#define FIRST_CODE 258
> +#define MAX_CODE 32767L
> +#define TABLE_SIZE 35023L // TABLE_SIZE must be a prime number
> +#define MASK_CODE 0
> +#define MAX_SEARCH 4 //(DIC_INDEX_MAX)
> +
> +#define IEEE754_EXP_BIASED 127 // IEEE754 defines exp to be biased by -127
> +
> +typedef union {
> + float f;
> + struct {
> + unsigned int mantissa : 23;
> + unsigned int exponent : 8;
> + unsigned int sign : 1;
> + } parts;
> +} CFloat;
> +
> +int dic_code_bit;
> +int current_dic_index_max;
> +unsigned int bump_code;
> +unsigned int flush_code;
> +int next_code;
> +int freeze_flag;
This can't be here.
> +//Masked LZ ends
> +
> +
> static av_cold void dprint_specific_config(ALSDecContext *ctx)
> {
> #ifdef DEBUG
> @@ -441,7 +498,6 @@ static int check_specific_config(ALSDecContext *ctx)
> } \
> }
>
> - MISSING_ERR(sconf->floating, "Floating point decoding", AVERROR_PATCHWELCOME);
> MISSING_ERR(sconf->rlslms, "Adaptive RLS-LMS prediction", AVERROR_PATCHWELCOME);
>
> return error;
> @@ -867,9 +923,6 @@ static int read_var_block_data(ALSDecContext *ctx, ALSBlockData *bd)
> *current_res++ = decode_rice(gb, s[sb]);
> }
>
> - if (!sconf->mc_coding || ctx->js_switch)
> - align_get_bits(gb);
> -
> return 0;
> }
>
> @@ -1006,6 +1059,9 @@ static int read_block(ALSDecContext *ctx, ALSBlockData *bd)
> */
> static int decode_block(ALSDecContext *ctx, ALSBlockData *bd)
> {
> + ALSSpecificConfig *sconf = &ctx->sconf;
> + GetBitContext *gb = &ctx->gb;
> +
> unsigned int smp;
> int ret = 0;
>
> @@ -1024,6 +1080,9 @@ static int decode_block(ALSDecContext *ctx, ALSBlockData *bd)
> for (smp = 0; smp < bd->block_length; smp++)
> bd->raw_samples[smp] <<= *bd->shift_lsbs;
>
> + if (!sconf->mc_coding || ctx->js_switch)
> + align_get_bits(gb);
> +
> return 0;
> }
>
> @@ -1350,6 +1409,376 @@ static int revert_channel_correlation(ALSDecContext *ctx, ALSBlockData *bd,
> }
>
>
> +//initialize dictionary
> +static void init_dict(void) {
> + flush_code = FLUSH_CODE;
> + current_dic_index_max = DIC_INDEX_INIT;
> + dic_code_bit = CODE_BIT_INIT;
> + bump_code = (DIC_INDEX_INIT - 1);
> + next_code = FIRST_CODE;
> + freeze_flag = 0;
> +}
> +
> +
> +static int decode_string(unsigned char *buff, int string_code, int *first_char_code, unsigned long bufsize, MaskedLZDict* dict) {
> + unsigned long count, offset;
> + int current_code, parent_code, tmp_code;
> +
> + count = 0;
> + current_code = string_code;
> + *first_char_code = CODE_UNSET;
> + while ( count < bufsize ) {
> + switch ( current_code ) {
> + case CODE_UNSET:
> +// printf("Dic Index ERR!!! [stringCode == CODE_UNSET]\n");
> + return count;
> + break;
> + default:
> + if ( current_code < FIRST_CODE ) {
> + *first_char_code = current_code;
> + buff[0] = current_code;
> + count++;
> + return count;
> + } else {
> + offset = ( dict[current_code].match_len ) - 1;
> + tmp_code = dict[current_code].char_code;
> + buff[offset] = tmp_code;
> + count++;
> + }
> + current_code = dict[current_code].parent_code;
> + if ( ( current_code < 0 ) || ( current_code > ( DIC_INDEX_MAX - 1 ) ) ) {
> +// printf("Dic Index ERR!!!\n");
> + return count;
> + }
> + if ( current_code > FIRST_CODE ) {
> + parent_code = dict[current_code].parent_code;
> + offset = (dict[current_code].match_len) - 1;
> + if ( parent_code < 0 || parent_code > DIC_INDEX_MAX-1 ) {
> +// fprintf(stderr,"Dic Index ERR!!!\n");
> + return count;
> + }
> + if (( offset > (DIC_INDEX_MAX - 1))) {
> +// printf("Dic offset ERR!!!\n");
> + return count;
> + }
> + }
> + break;
> + }
> + }
> + return count;
> +}
> +
> +
> +static void flush_dict(MaskedLZDict* dict) {
> + int i;
> + for ( i = 0; i < TABLE_SIZE; i++ ) {
> + dict[i].string_code = CODE_UNSET;
> + dict[i].parent_code = CODE_UNSET;
> + dict[i].match_len = 0;
> + }
> + //// read first part
> + // initial DicCodes
> + // $0 - 255 xxxx
> + // $256 FLUSH_CODE
> + // $257 FREEZE_CODE
> + // $258 - $(max-2) code
> + // $(max-1) BUMP_CODE
> + // $(max-1) BumpCode 1st BumpCode = 511
> + // add first entry to dictionary as [$258]
> + current_dic_index_max = DIC_INDEX_INIT;
> + dic_code_bit = CODE_BIT_INIT; // DicCodeBitInit;
> + bump_code = current_dic_index_max - 1;
> + next_code = FIRST_CODE;
> + freeze_flag = 0;
> +}
> +
> +
> +static void set_new_entry_dict(int string_code, int parent_code, int char_code, MaskedLZDict* dict) {
> + dict[string_code].parent_code = parent_code;
> + dict[string_code].string_code = string_code;
> + dict[string_code].char_code = char_code;
> + if (parent_code < FIRST_CODE) {
> + dict[string_code].match_len = 2;
> + } else {
> +// if ( pDict[parentCode].stringCode == CODE_UNSET )
> +// fprintf(stderr, "Errr stringCode = CODE_UNSET\n");
> + dict[string_code].match_len = (dict[parent_code].match_len) + 1;
> + }
> +}
> +
> +
> +static int masked_lz_decompression(ALSDecContext *ctx, int size, unsigned char *buff) {
> + GetBitContext* gb = &ctx->gb;
> + MaskedLZDict *dict = ctx->dict;
> +
> + unsigned long output_chars;
> + int string_code, last_string_code, char_code;
> +
> + string_code = 0;
> + char_code = -1;
> + last_string_code = -1;
> +
> + output_chars = 0;
> +
> + while (output_chars < size) {
> + string_code = get_bits(gb, dic_code_bit);
> + switch (string_code) {
> + case FLUSH_CODE:
> + case MAX_CODE:
> + flush_dict(dict);
> + char_code = -1;
> + last_string_code = -1;
> + break;
> + case FREEZE_CODE:
> + freeze_flag = 1;
> + break;
> + default:
> + if (string_code > current_dic_index_max) {
> + av_log(ctx->avctx, AV_LOG_ERROR, "string code %d more than the max value.", string_code);
> + return output_chars;
> + }
> + if (string_code == (int) bump_code) {
> + ++dic_code_bit;
> + current_dic_index_max *= 2;
> + bump_code = current_dic_index_max - 1;
> + } else {
> + if (string_code >= next_code) {
> + output_chars += decode_string(&buff[output_chars], last_string_code, &char_code, size - output_chars, dict);
> + output_chars += decode_string(&buff[output_chars], char_code, &char_code, size - output_chars, dict);
> + set_new_entry_dict(next_code, last_string_code, char_code, dict);
> + ++next_code;
> + } else {
> + output_chars += decode_string(&buff[output_chars], string_code, &char_code, size - output_chars, dict);
> + if ((output_chars <= size) && (freeze_flag == 0)) {
> + if (last_string_code != -1) {
> + set_new_entry_dict(next_code, last_string_code, char_code, dict);
> + ++next_code;
> + } else {
> + break;
> + }
> + }
> + }
> + last_string_code = string_code;
> + }
> + break;
> + }
> + }
> + return output_chars;
> +}
> +
> +
> +static float multiply(float a, float b) {
I believe this can be implemented by not using floats.
> + uint64_t mantissa_temp;
> + uint64_t mask_64;
> + int bit_count;
> + int cutoff_bit_count;
> + unsigned char last_2_bits;
> + unsigned int mantissa;
> + int sign;
> + CFloat f1, f2;
> + uint32_t return_val = 0;
> +
> + f1.f = a;
> + f2.f = b;
> +
> + sign = f1.parts.sign ^ f2.parts.sign;
> +
> + //Multiply mantissa bits in a 64-bit register
> + mantissa_temp = (uint64_t) f1.parts.mantissa * (uint64_t) f2.parts.mantissa;
> +
> + // Count the valid bit count
> + for( bit_count=48, mask_64=(uint64_t)0x1 << 47; !( mantissa_temp & mask_64 ) && mask_64; bit_count--, mask_64>>=1 );
> +
> + // Round off
> + cutoff_bit_count = bit_count - 24;
> + if (cutoff_bit_count > 0) {
> + last_2_bits = (unsigned char)( ( (unsigned int)mantissa_temp >> ( cutoff_bit_count - 1 ) ) & 0x3 );
> + if ( ( last_2_bits == 0x3 ) || ( ( last_2_bits == 0x1 ) && ( (unsigned int)mantissa_temp & ( ( 0x1UL << ( cutoff_bit_count - 1 ) ) - 1 ) ) ) ) {
> + // Need to round up
> + mantissa_temp += (uint64_t)0x1 << cutoff_bit_count;
> + }
> + }
> +
> + mantissa = (unsigned int)( mantissa_temp >> cutoff_bit_count );
> + // Need one more shift?
> + if (mantissa & 0x01000000ul) {
> + bit_count++;
> + mantissa >>= 1;
> + }
> +
> + if (!sign) {
> + return_val = 0x80000000U;
> + }
> + return_val |= (f1.parts.exponent + f2.parts.exponent + bit_count - 47) << 23;
> + return_val |= mantissa;
> + return av_int2float(return_val);
> +}
> +
> +
> +static int read_diff_float_data(ALSDecContext *ctx, unsigned int ra_frame) {
> + AVCodecContext *avctx = ctx->avctx;
> + GetBitContext *gb = &ctx->gb;
> + uint32_t tmp_32, num_bytes_diff_float;
> + int use_acf;
> + float *acf = ctx->acf;
> + int *shift_value = ctx->shift_value;
> + int *last_shift_value = ctx->last_shift_value;
> + int *last_acf_mantissa = ctx->last_acf_mantissa;
> + int **raw_mantissa = ctx->raw_mantissa;
> + int **nbits = ctx->nbits;
> + unsigned char *larray = ctx->larray;
> + unsigned int partA_flag, highest_byte, shift_amp;
> + int frame_length = ctx->cur_frame_length;
> + int nchars;
> + int i, c;
> + long k, nbits_aligned;
> + unsigned long acc, j;
> + uint32_t e;
> + unsigned int mantissa;
> + uint32_t sign;
> + float scale = (float) (0x1u << 23);
> + CFloat temp_pcm;
> +
> + num_bytes_diff_float = get_bits_long(gb, 32); //num_bytes_diff_float
> +
> + av_log(avctx, AV_LOG_ERROR, "read_diff_float_data() length = %"PRIu32"\n", num_bytes_diff_float);
> + use_acf = get_bits1(gb);
> + if (ra_frame) {
> + for (int c = 0; c < avctx->channels; ++c) {
> + last_acf_mantissa[c] = 0;
> + last_shift_value[c] = 0;
> + }
> + flush_dict(ctx->dict);
> + }
> + for (c = 0; c < avctx->channels; ++c) {
> + if (use_acf) {
> + if (get_bits1(gb) /*acf_flag*/) {
> + tmp_32 = get_bits(gb, 23);
> + last_acf_mantissa[c] = tmp_32;
> + } else {
> + tmp_32 = last_acf_mantissa[c];
> + }
> + acf[c] = av_int2float(tmp_32);
> + } else {
> + acf[c] = 1.0f;
> + }
> + highest_byte = get_bits(gb, 2);
> + shift_amp = get_bits1(gb);
> + partA_flag = get_bits1(gb);
> + if (shift_amp) {
> + shift_value[c] = get_bits(gb, 8);
> + last_shift_value[c] = shift_value[c];
> + } else {
> + shift_value[c] = last_shift_value[c];
> + }
> +
> + if (partA_flag) {
> + if (!get_bits1(gb)/*compressed_flag*/) { //uncompressed
> + for (i = 0; i < frame_length; ++i) {
> + if (ctx->raw_samples[c][i] == 0) {
> + tmp_32 = get_bits_long(gb, 32);
> + ctx->raw_samples[c][i] = tmp_32;
> + }
> + }
> + } else { //compressed
> + nchars = 0;
> + for (i = 0; i < frame_length; ++i) {
> + if (ctx->raw_samples[c][i] == 0) {
> + nchars += 4;
> + }
> + }
> + masked_lz_decompression(ctx, nchars, larray);
> + for (i = 0; i < frame_length; ++i) {
> + tmp_32 = (larray[i] << 24) | larray[i+1] << 16 | larray[i+2] << 8 | larray[i+3];
> + ctx->raw_samples[c][i] = tmp_32;
> + }
> + }
> + }
> +
> + //decode part B
> + if (highest_byte) {
> + for (i = 0; i < frame_length; ++i) {
> + if (ctx->raw_samples[c][i] != 0) {
> + //The following logic is taken from Tabel 14.45 and 14.46 from the ISO spec
> + if (acf[c] == 1.0f) {
> + nbits[c][i] = 23;
> + } else {
> + nbits[c][i] = 23 - av_log2(abs(ctx->raw_samples[c][i]));
> + }
> + nbits[c][i] = FFMIN(nbits[c][i], highest_byte*8);
> + }
> + }
> + if (!get_bits1(gb)/*compressed_flag*/) { //uncompressed
> + for (i = 0; i < frame_length; ++i) {
> + if (ctx->raw_samples[c][i] != 0) {
> + raw_mantissa[c][i] = get_bits(gb, nbits[c][i]);
> + }
> + }
> + av_log(avctx, AV_LOG_ERROR, "partB uncompressed\n");
> + } else { //compressed
> + nchars = 0;
> + for (i = 0; i < frame_length; ++i) {
> + if (ctx->raw_samples[c][i]) {
> + nchars += (int) nbits[c][i] / 8;
> + if (nbits[c][i] % 8 > 0) {
> + ++nchars;
> + }
> + }
> + }
> + masked_lz_decompression(ctx, nchars, larray);
> + j = 0;
> + for (i = 0; i < frame_length; ++i) {
> + if (ctx->raw_samples[c][i]) {
> + if ((nbits[c][i] % 8) > 0) {
> + nbits_aligned = 8 * ((unsigned int)(nbits[c][i] / 8) + 1);
> + } else {
> + nbits_aligned = nbits[c][i];
> + }
> + acc = 0;
> + for (k = 0; nbits_aligned/8; ++k) {
> + acc = ( acc << 8 ) + larray[j++];
> + }
> + acc >>= ( nbits_aligned - nbits[c][i] );
> + raw_mantissa[c][i] = acc;
> + }
> + }
> + }
> + } else {
> + // av_log(avctx, AV_LOG_ERROR, "no part B\n");
> + }
> +
> + for (i = 0; i < frame_length; ++i) {
> + if (ctx->raw_samples[c][i] != 0) {
> + if (acf[c] == 1.0f) {
> + temp_pcm.f = (float) (ctx->raw_samples[c][i] / scale);
> + // av_log(avctx, AV_LOG_ERROR, "float = %f\n", temp_pcm.f);
> + } else {
> + temp_pcm.f = multiply(acf[c], (float) (ctx->raw_samples[c][i] / scale));
> + }
> + e = temp_pcm.parts.exponent;
> +
> + mantissa = (temp_pcm.parts.mantissa | 0x800000) + raw_mantissa[c][i];
> + // av_log(avctx, AV_LOG_ERROR, "mantissa = %d\n", mantissa);
> + // av_log(avctx, AV_LOG_ERROR, "e = %d", e);
> + while( mantissa >= 0x1000000 ) {
> + e++;
> + mantissa >>= 1;
> + }
> + // av_log(avctx, AV_LOG_ERROR, "after e = %d", e);
> + if ( mantissa ) e += (shift_value[c] - 127);
> + mantissa = (mantissa & 0x007fffffUL) | 0x00800000UL;
> + // av_log(avctx, AV_LOG_ERROR, "exponent = %d shift = %d\n", e, shift_value[c]);
> + tmp_32 = (sign << 31) | (e << 23) | (mantissa & 0x007fffffUL);
> + ctx->raw_samples[c][i] = tmp_32;
> + av_log(avctx, AV_LOG_ERROR, "final output = %f\n", av_int2float(ctx->raw_samples[c][i]));
> + }
> + }
> + align_get_bits(gb);
> + }
> + return 0;
> +}
> +
> +
> /** Read the frame data.
> */
> static int read_frame_data(ALSDecContext *ctx, unsigned int ra_frame)
> @@ -1491,7 +1920,9 @@ static int read_frame_data(ALSDecContext *ctx, unsigned int ra_frame)
> sizeof(*ctx->raw_samples[c]) * sconf->max_order);
> }
>
> - // TODO: read_diff_float_data
> + if (sconf->floating) {
> + read_diff_float_data(ctx, ra_frame);
> + }
>
> if (get_bits_left(gb) < 0) {
> av_log(ctx->avctx, AV_LOG_ERROR, "Overread %d\n", -get_bits_left(gb));
> @@ -1661,6 +2092,14 @@ static av_cold int decode_end(AVCodecContext *avctx)
> av_freep(&ctx->chan_data_buffer);
> av_freep(&ctx->reverted_channels);
> av_freep(&ctx->crc_buffer);
> + av_freep(&ctx->dict);
> + av_freep(&ctx->acf);
> + av_freep(&ctx->last_acf_mantissa);
> + av_freep(&ctx->shift_value);
> + av_freep(&ctx->last_shift_value);
> + av_freep(&ctx->raw_mantissa);
> + av_freep(&ctx->larray);
> + av_freep(&ctx->nbits);
where is freeing nbits channel stuff?
>
> return 0;
> }
> @@ -1711,6 +2150,12 @@ static av_cold int decode_init(AVCodecContext *avctx)
> }
> }
>
> + if (sconf->floating) {
> + av_log(avctx, AV_LOG_ERROR, "floating is enabled.\n");
> + } else {
> + av_log(avctx, AV_LOG_ERROR, "floating is NOT enabled.\n");
> + }
> +
> // set maximum Rice parameter for progressive decoding based on resolution
> // This is not specified in 14496-3 but actually done by the reference
> // codec RM22 revision 2.
> @@ -1797,6 +2242,35 @@ static av_cold int decode_init(AVCodecContext *avctx)
> ctx->raw_buffer = av_mallocz_array(avctx->channels * channel_size, sizeof(*ctx->raw_buffer));
> ctx->raw_samples = av_malloc_array(avctx->channels, sizeof(*ctx->raw_samples));
>
> + if (sconf->floating) {
> + ctx->dict = av_malloc_array(TABLE_SIZE, sizeof(*ctx->dict));
> + ctx->acf = av_malloc_array(avctx->channels, sizeof(*ctx->acf));
> + ctx->shift_value = av_malloc_array(avctx->channels, sizeof(*ctx->shift_value));
> + ctx->last_shift_value = av_malloc_array(avctx->channels, sizeof(*ctx->last_shift_value));
> + ctx->last_acf_mantissa = av_malloc_array(avctx->channels, sizeof(*ctx->last_acf_mantissa));
> +
> + ctx->raw_mantissa = av_malloc_array(avctx->channels, sizeof(*ctx->raw_mantissa));
> + for (int c = 0; c < avctx->channels; ++c) {
> + ctx->raw_mantissa[c] = av_malloc_array(ctx->cur_frame_length, sizeof(**ctx->raw_mantissa));
> + }
> +
> + ctx->larray = av_malloc_array(ctx->cur_frame_length * 4, sizeof(*ctx->larray));
> +
> + ctx->nbits = av_malloc_array(avctx->channels, sizeof(*ctx->nbits));
> + for (int c = 0; c < avctx->channels; ++c) {
> + ctx->nbits[c] = av_malloc_array(ctx->cur_frame_length, sizeof(**ctx->nbits));
You allocate this, but nowhere is free done.
> + }
> +
> + init_dict();
> +
> + if (!ctx->dict || !ctx->acf || !ctx->shift_value || !ctx->last_shift_value
> + || !ctx->last_acf_mantissa || !ctx->raw_mantissa) {
> + av_log(avctx, AV_LOG_ERROR, "Allocating buffer memory failed.\n");
> + ret = AVERROR(ENOMEM);
> + goto fail;
> + }
> + }
> +
> // allocate previous raw sample buffer
> if (!ctx->prev_raw_samples || !ctx->raw_buffer|| !ctx->raw_samples) {
> av_log(avctx, AV_LOG_ERROR, "Allocating buffer memory failed.\n");
> --
> 2.5.0
>
More information about the ffmpeg-devel
mailing list