[FFmpeg-devel] [PATCH 1/4] avcodec: add siren audio decoder
Lynne
dev at lynne.ee
Thu May 16 13:51:46 EEST 2019
May 16, 2019, 10:43 AM by onemda at gmail.com:
> Signed-off-by: Paul B Mahol <> onemda at gmail.com <mailto:onemda at gmail.com>> >
> ---
> libavcodec/Makefile | 1 +
> libavcodec/allcodecs.c | 1 +
> libavcodec/avcodec.h | 1 +
> libavcodec/codec_desc.c | 7 +
> libavcodec/siren.c | 724 ++++++++++++++++++++++++++++++++++++++++
> 5 files changed, 734 insertions(+)
> create mode 100644 libavcodec/siren.c
>
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index edccd73037..b2bc61650c 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -578,6 +578,7 @@ OBJS-$(CONFIG_SIPR_DECODER) += sipr.o acelp_pitch_delay.o \
> celp_math.o acelp_vectors.o \
> acelp_filters.o celp_filters.o \
> sipr16k.o
> +OBJS-$(CONFIG_SIREN_DECODER) += siren.o
> OBJS-$(CONFIG_SMACKAUD_DECODER) += smacker.o
> OBJS-$(CONFIG_SMACKER_DECODER) += smacker.o
> OBJS-$(CONFIG_SMC_DECODER) += smc.o
> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> index 6178d31b5c..8c273a490e 100644
> --- a/libavcodec/allcodecs.c
> +++ b/libavcodec/allcodecs.c
> @@ -468,6 +468,7 @@ extern AVCodec ff_sbc_encoder;
> extern AVCodec ff_sbc_decoder;
> extern AVCodec ff_shorten_decoder;
> extern AVCodec ff_sipr_decoder;
> +extern AVCodec ff_siren_decoder;
> extern AVCodec ff_smackaud_decoder;
> extern AVCodec ff_sonic_encoder;
> extern AVCodec ff_sonic_decoder;
> diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
> index b749946633..6a382207c3 100644
> --- a/libavcodec/avcodec.h
> +++ b/libavcodec/avcodec.h
> @@ -651,6 +651,7 @@ enum AVCodecID {
> AV_CODEC_ID_SBC,
> AV_CODEC_ID_ATRAC9,
> AV_CODEC_ID_HCOM,
> + AV_CODEC_ID_SIREN,
>
> /* subtitle codecs */
> AV_CODEC_ID_FIRST_SUBTITLE = 0x17000, ///< A dummy ID pointing at the start of subtitle codecs.
> diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
> index 621b16e160..a3139458f5 100644
> --- a/libavcodec/codec_desc.c
> +++ b/libavcodec/codec_desc.c
> @@ -2978,6 +2978,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
> .long_name = NULL_IF_CONFIG_SMALL("HCOM Audio"),
> .props = AV_CODEC_PROP_LOSSY,
> },
> + {
> + .id = AV_CODEC_ID_SIREN,
> + .type = AVMEDIA_TYPE_AUDIO,
> + .name = "siren",
> + .long_name = NULL_IF_CONFIG_SMALL("Siren"),
> + .props = AV_CODEC_PROP_LOSSY,
> + },
>
> /* subtitle codecs */
> {
> diff --git a/libavcodec/siren.c b/libavcodec/siren.c
> new file mode 100644
> index 0000000000..f9e9897c6b
> --- /dev/null
> +++ b/libavcodec/siren.c
> @@ -0,0 +1,724 @@
> +/*
> + * Siren audio decoder
> + * Copyright (c) 2012 Youness Alaoui <> kakaroto at kakaroto.homelinux.net <mailto:kakaroto at kakaroto.homelinux.net>> >
> + * Copyright (c) 2018 Paul B Mahol
> + * Copyright (c) 2019 Lynne <> dev at lynne.ee <mailto:dev at lynne.ee>> >
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavutil/tx.h"
> +#include "libavutil/float_dsp.h"
> +
> +#include "avcodec.h"
> +#include "get_bits.h"
> +#include "internal.h"
> +#include "mathops.h"
> +
> +#define STEPSIZE 0.3010299957
>
Just hardcode this in the powf call.
> +
> +static const uint16_t checksum_table[4] = { 0x7F80, 0x7878, 0x6666, 0x5555 };
> +static const uint8_t index_table[8] = {4, 4, 3, 3, 2, 2, 1, 0};
> +static const uint8_t vector_dimension[8] = { 2, 2, 2, 4, 4, 5, 5, 1 };
> +static const uint8_t number_of_vectors[8] = { 10, 10, 10, 5, 5, 4, 4, 20 };
> +static const uint8_t expected_bits_table[8] = { 52, 47, 43, 37, 29, 22, 16, 0 };
> +static const int8_t differential_decoder_tree[27][24][2] = {
> + {{1, 2}, {3, 4}, {5, 6}, {7, 8}, {9, 10}, {11, -12}, {-11, -10}, {-8, -9}, {-7, -6}, {-13, 12}, {-5, -4}, {0, 13}, {-3, -14}, {-2, 14}, {-1, 15}, {-15, 16}, {-16, 17}, {-17, 18}, {19, 20}, {21, 22}, {-18, -19}, {-20, -21}, {-22, -23}, {-32, -32}},
>
Tidy this table up? Its 240 chars long.
> +
> +static const float noise_category7 = 0.70711f;
>
Hardcode this as well.
> +
> +typedef struct SirenContext {
> + GetBitContext gb;
> +
> + int packet_size;
> + int number_of_coefs;
> + int rate_control_bits;
> + int rate_control_possibilities;
> + int checksum_bits;
> + int esf_adjustment;
> + int number_of_regions;
> + int scale_factor;
> + int sample_rate_bits;
> + int bits_per_frame;
> + int region_size;
> +
> + int dw1, dw2, dw3, dw4;
> +
> + int absolute_region_power_index[28];
> + float decoder_standard_deviation[28];
> + int power_categories[28];
> + int category_balance[28];
> + float standard_deviation[64];
> + float deviation_inverse[64];
> + int input_frame[20];
> + float backup_frame[320];
> + float coefs[320];
> +
> + AVFloatDSPContext *fdsp;
> + av_tx_fn tx_fn;
> + AVTXContext *tx_ctx;
> +
> + DECLARE_ALIGNED(32, float, context)[320];
> + DECLARE_ALIGNED(32, float, temp)[320];
> + DECLARE_ALIGNED(32, float, tx_in)[320];
> + DECLARE_ALIGNED(32, float, output_frame)[320];
> + DECLARE_ALIGNED(32, float, window)[320];
>
You only need context, temp and window.
context doesn't need 320 floats, it only uses 160. Also you should rename it to
prev_win.
> +} SirenContext;
> +
> +static av_cold int siren_init(AVCodecContext *avctx)
> +{
> + const float scale = 1.0f;
>
>
const float scale = 1.0 / 32768;
> + SirenContext *s = avctx->priv_data;
> + int i;
> +
> + avctx->channels = 1;
> + avctx->channel_layout = AV_CH_LAYOUT_MONO;
> + avctx->sample_fmt = AV_SAMPLE_FMT_S16;
>
AV_SAMPLE_FMT_FLT
> +
> + s->packet_size = 40;
> + s->number_of_coefs = 320;
> + s->rate_control_bits = 4;
> + s->rate_control_possibilities = 16;
> + s->checksum_bits = 0;
> + s->esf_adjustment = 7;
> + s->number_of_regions = 14;
> + s->scale_factor = 1;
> + s->bits_per_frame = avctx->sample_rate / 50;
> + s->region_size = 20;
> + s->dw1 = s->dw2 = s->dw3 = s->dw4 = 1;
> +
> + for (i = 0; i < 64; i++) {
> + float region_power = powf(10, (i - 24) * STEPSIZE);
> +
> + s->standard_deviation[i] = sqrtf(region_power);
> + s->deviation_inverse[i] = 1.f / s->standard_deviation[i];
>
Nit: 1.0 instead of 1.f
> + }
> +
> + for (i = 0; i < 320; i++) {
> + float angle = ((i + 0.5f) * M_PI_2) / 320.f;
> + s->window[i] = sinf(angle);
> + }
> +
> + s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
> + if (!s->fdsp)
> + return AVERROR(ENOMEM);
> +
> + return av_tx_init(&s->tx_ctx, &s->tx_fn, AV_TX_FLOAT_MDCT, 1, 320, &scale, 0);
> +}
> +
> +static int decode_envelope(SirenContext *s, GetBitContext *gb,
> + int number_of_regions, float *decoder_standard_deviation,
> + int *absolute_region_power_index, int esf_adjustment)
> +{
> + int i, index;
> +
> + absolute_region_power_index[0] = get_bits(gb, 5) - esf_adjustment;
> + decoder_standard_deviation[0] =
> + s->standard_deviation[absolute_region_power_index[0] + 24];
> +
> + for (i = 1; i < number_of_regions; i++) {
> + index = 0;
> + do {
> + index = differential_decoder_tree[i - 1][index][get_bits1(gb)];
> + } while (index > 0);
> +
> + absolute_region_power_index[i] =
> + absolute_region_power_index[i - 1] - index - 12;
> + decoder_standard_deviation[i] =
> + s->standard_deviation[absolute_region_power_index[i] + 24];
> + }
> +
> + return get_bits_count(gb);
> +}
> +
> +static int categorize_regions(int number_of_regions, int number_of_available_bits,
> + int *absolute_region_power_index, int *power_categories,
> + int *category_balance)
> +{
> + int region, delta, i, temp;
> + int expected_number_of_code_bits;
> + int min, max;
> + int offset,
> + num_rate_control_possibilities,
> + raw_value, raw_max_idx = 0, raw_min_idx = 0;
> + int max_rate_categories[28];
> + int min_rate_categories[28];
> + int temp_category_balances[64];
> + int *min_rate_ptr = NULL;
> + int *max_rate_ptr = NULL;
> +
> + if (number_of_regions == 14) {
> + num_rate_control_possibilities = 16;
> + } else {
> + num_rate_control_possibilities = 32;
> + }
> +
> + offset = -32;
> + for (delta = 32; number_of_regions > 0 && delta > 0; delta /= 2) {
> + expected_number_of_code_bits = 0;
> + for (region = 0; region < number_of_regions; region++) {
> + i = (delta + offset -
> + absolute_region_power_index[region]) >> 1;
> + i = av_clip_uintp2(i, 3);
> + power_categories[region] = i;
> + expected_number_of_code_bits += expected_bits_table[i];
> +
> + }
> + if (expected_number_of_code_bits >= number_of_available_bits - 32)
> + offset += delta;
> + }
> +
> + expected_number_of_code_bits = 0;
> + for (region = 0; region < number_of_regions; region++) {
> + i = (offset - absolute_region_power_index[region]) >> 1;
> + i = av_clip_uintp2(i, 3);
> + max_rate_categories[region] = min_rate_categories[region] =
> + power_categories[region] = i;
> + expected_number_of_code_bits += expected_bits_table[i];
> + }
> +
> + min = max = expected_number_of_code_bits;
> + min_rate_ptr = max_rate_ptr =
> + temp_category_balances + num_rate_control_possibilities;
> + for (i = 0; i < num_rate_control_possibilities - 1; i++) {
> + if (min + max > number_of_available_bits * 2) {
> + raw_value = -99;
> + for (region = number_of_regions - 1; region >= 0; region--) {
> + if (min_rate_categories[region] < 7) {
> + temp =
> + offset - absolute_region_power_index[region] -
> + 2 * min_rate_categories[region];
> + if (temp > raw_value) {
> + raw_value = temp;
> + raw_min_idx = region;
> + }
> + }
> + }
> + *min_rate_ptr++ = raw_min_idx;
> + min +=
> + expected_bits_table[min_rate_categories[raw_min_idx] + 1] -
> + expected_bits_table[min_rate_categories[raw_min_idx]];
> + min_rate_categories[raw_min_idx]++;
> + } else {
> + raw_value = 99;
> + for (region = 0; region < number_of_regions; region++) {
> + if (max_rate_categories[region] > 0) {
> + temp =
> + offset - absolute_region_power_index[region] -
> + 2 * max_rate_categories[region];
> + if (temp < raw_value) {
> + raw_value = temp;
> + raw_max_idx = region;
> + }
> + }
> + }
> +
> + *--max_rate_ptr = raw_max_idx;
> + max += expected_bits_table[max_rate_categories[raw_max_idx] - 1] -
> + expected_bits_table[max_rate_categories[raw_max_idx]];
> + max_rate_categories[raw_max_idx]--;
> + }
> + }
> +
> + for (region = 0; region < number_of_regions; region++)
> + power_categories[region] = max_rate_categories[region];
> +
> + for (i = 0; i < num_rate_control_possibilities - 1; i++)
> + category_balance[i] = *max_rate_ptr++;
> +
> + return 0;
> +}
> +
> +static int get_dw(SirenContext *s)
> +{
> + int ret = s->dw1 + s->dw4;
> +
> + if ((ret & 0x8000) != 0)
> + ret++;
> +
> + s->dw1 = s->dw2;
> + s->dw2 = s->dw3;
> + s->dw3 = s->dw4;
> + s->dw4 = ret;
>
You should add a flush function to set those to 1 as well as set prev_win to 0 for seeking.
> +
> + return ret;
> +}
> +
> +static int decode_vector(SirenContext *s, int number_of_regions,
> + int number_of_available_bits, float *decoder_standard_deviation,
> + int *power_categories, float *coefs, int scale_factor)
> +{
> + GetBitContext *gb = &s->gb;
> + float *coefs_ptr;
> + float decoded_value;
> + float noise;
> + const uint16_t *decoder_tree;
> + int region;
> + int category;
> + int i, j;
> + int index;
> + int error;
> + int dw1;
> + int dw2;
> +
> + error = 0;
> + for (region = 0; region < number_of_regions; region++) {
> + category = power_categories[region];
> + coefs_ptr = coefs + (region * s->region_size);
> +
> + if (category < 7) {
> + decoder_tree = decoder_tables[category];
> +
> + for (i = 0; i < number_of_vectors[category]; i++) {
> + index = 0;
> + do {
> + if (get_bits_left(gb) <= 0) {
> + error = 1;
> + break;
> + }
> +
> + index = decoder_tree[index + get_bits1(gb)];
> + } while ((index & 1) == 0);
> +
> + index >>= 1;
> +
> + if (error == 0 && get_bits_left(gb) >= 0) {
> + for (j = 0; j < vector_dimension[category]; j++) {
> + decoded_value = mlt_quant[category][index & ((1 << index_table[category]) - 1)];
> + index >>= index_table[category];
> +
> + if (decoded_value != 0) {
> + if (!get_bits1(gb))
> + decoded_value *= -decoder_standard_deviation[region];
> + else
> + decoded_value *= decoder_standard_deviation[region];
> + }
> +
> + *coefs_ptr++ = decoded_value * scale_factor;
> + }
> + } else {
> + error = 1;
> + break;
> + }
> + }
> +
> + if (error == 1) {
> + for (j = region + 1; j < number_of_regions; j++)
> + power_categories[j] = 7;
> + category = 7;
> + }
> + }
> +
> + coefs_ptr = coefs + (region * s->region_size);
> +
> + if (category == 5) {
> + i = 0;
> + for (j = 0; j < s->region_size; j++) {
> + if (*coefs_ptr != 0) {
> + i++;
> + if (fabs(*coefs_ptr) >
> + 2.0 * decoder_standard_deviation[region]) {
> + i += 3;
> + }
> + }
> + coefs_ptr++;
> + }
> +
> + noise =
> + decoder_standard_deviation[region] * noise_category5[i];
> + } else if (category == 6) {
> + i = 0;
> + for (j = 0; j < s->region_size; j++) {
> + if (*coefs_ptr++ != 0)
> + i++;
> + }
> +
> + noise =
> + decoder_standard_deviation[region] * noise_category6[i];
> + } else if (category == 7) {
> + noise = decoder_standard_deviation[region] * noise_category7;
> + } else {
> + noise = 0;
> + }
> +
> + coefs_ptr = coefs + (region * s->region_size);
> +
> + if (category == 5 || category == 6 || category == 7) {
> + dw1 = get_dw(s);
> + dw2 = get_dw(s);
> +
> + for (j = 0; j < 10; j++) {
> + if (category == 7 || *coefs_ptr == 0) {
> + if ((dw1 & 1))
> + *coefs_ptr = noise;
> + else
> + *coefs_ptr = -noise;
> + }
> + coefs_ptr++;
> + dw1 >>= 1;
> +
> + if (category == 7 || *coefs_ptr == 0) {
> + if ((dw2 & 1))
> + *coefs_ptr = noise;
> + else
> + *coefs_ptr = -noise;
> + }
> + coefs_ptr++;
> + dw2 >>= 1;
> + }
> + }
> + }
> +
> + return error == 1 ? -1 : get_bits_left(gb);
> +}
> +
> +static int decode_samples(SirenContext *s, float *coefs, float *old_win, int dct_length, float *samples)
> +{
> + s->tx_fn(s->tx_ctx, s->temp, coefs, sizeof(float));
> +
> + s->fdsp->vector_fmul_window(samples, old_win, s->temp,
> + s->window, dct_length >> 1);
> +
> + memcpy(old_win, s->temp + (dct_length >> 1), sizeof(float)*dct_length >> 1);
> +
> + return 1;
> +}
> +
> +static int siren_decode(AVCodecContext *avctx, void *data,
> + int *got_frame, AVPacket *pkt)
> +{
> + SirenContext *s = avctx->priv_data;
> + GetBitContext *gb = &s->gb;
> + AVFrame *frame = data;
> + int ret, number_of_valid_coefs = 20 * s->number_of_regions;
> + int frame_error = 0, i, rate_control = 0;
> + int checksum, calculated_checksum;
> +
> + if (s->checksum_bits > 0)
> + memcpy(s->input_frame, pkt->data, FFMIN(s->packet_size, sizeof(s->input_frame)));
> +
> + if ((ret = init_get_bits8(gb, pkt->data, pkt->size)) < 0)
> + return ret;
> +
> + decode_envelope(s, gb, s->number_of_regions,
> + s->decoder_standard_deviation,
> + s->absolute_region_power_index, s->esf_adjustment);
> +
> + rate_control = get_bits(gb, s->rate_control_bits);
> +
> + categorize_regions(s->number_of_regions, get_bits_left(gb),
> + s->absolute_region_power_index, s->power_categories,
> + s->category_balance);
> +
> + for (i = 0; i < rate_control; i++) {
> + s->power_categories[s->category_balance[i]]++;
> + }
> +
> + decode_vector(s, s->number_of_regions, get_bits_left(gb),
> + s->decoder_standard_deviation, s->power_categories,
> + s->coefs, s->scale_factor);
> +
> + if (get_bits_left(gb) > 0) {
> + for (i = 0; i < get_bits_left(gb); i++) {
> + if (!get_bits1(gb))
> + frame_error = 1;
> + }
> + } else if (get_bits_left(gb) < 0 &&
> + rate_control + 1 < s->rate_control_possibilities) {
> + frame_error |= 2;
> + }
> +
> + for (i = 0; i < s->number_of_regions; i++) {
> + if (s->absolute_region_power_index[i] > 33 ||
> + s->absolute_region_power_index[i] < -31)
> + frame_error |= 4;
> + }
> +
> + if (s->checksum_bits > 0) {
>
This is dead code, s->checksum_bits is always 0 as init'd by the init function.
You should probably remove it or maybe make it warn if enabled with a flag.
The checksum algorithm is either 16-bit ANSI or CCITT.
If you remove the checksum code you can also remove s->packet_size,
s->checksum_bits, s->input_frame and clean up siren_decode().
> + int bytes_per_frame = s->bits_per_frame >> 4;
> + int idx = 0, sum = 0;
> +
> + checksum = s->input_frame[bytes_per_frame - 1] & ((1 << s->checksum_bits) - 1);
> + s->input_frame[bytes_per_frame - 1] &= ~checksum;
> + do {
> + sum ^= (s->input_frame[idx] & 0xFFFF) << (idx % 15);
> + } while (++idx < bytes_per_frame);
> +
> + sum = (sum >> 15) ^ (sum & 0x7FFF);
> + calculated_checksum = 0;
> + for (i = 0; i < 4; i++) {
> + int j, temp1 = checksum_table[i] & sum;
> + for (j = 8; j > 0; j >>= 1) {
> + int temp2 = temp1 >> j;
> + temp1 ^= temp2;
> + }
> + calculated_checksum <<= 1;
> + calculated_checksum |= temp1 & 1;
> + }
> +
> + if (checksum != calculated_checksum)
> + frame_error |= 8;
> + }
> +
> + if (frame_error != 0) {
> + for (i = 0; i < number_of_valid_coefs; i++) {
> + s->coefs[i] = s->backup_frame[i];
> + s->backup_frame[i] = 0;
> + }
> + } else {
> + for (i = 0; i < number_of_valid_coefs; i++)
> + s->backup_frame[i] = s->coefs[i];
> + }
> +
> + for (i = number_of_valid_coefs; i < s->number_of_coefs; i++)
> + s->coefs[i] = 0;
> +
> + *got_frame = decode_samples(s, s->coefs, s->context, s->number_of_coefs, s->output_frame);
> + if (*got_frame) {
> + int16_t *dst;
> +
> + frame->nb_samples = 320;
> + if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
> + return ret;
> + dst = (int16_t *)frame->data[0];
> +
> + for (i = 0; i < frame->nb_samples; i++) {
> + dst[i] = av_clip_int16(s->output_frame[i]);
> + }
> + }
>
Since you've changed the sample format you can just decode directly to the frame like:
frame->nb_samples = 320;
if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
return ret;
decode_samples(s, s->coefs, s->prev_win, s->number_of_coefs,
(float *)frame->data[0]);
*got_frame = 1;
And make decode_samples return a void.
More information about the ffmpeg-devel
mailing list