[FFmpeg-devel] [PATCH] avformat/flacdec: Implement decoding of 32-bit PCM
Martijn van Beurden
mvanb1 at gmail.com
Mon Jul 11 22:02:54 EEST 2022
Op ma 20 jun. 2022 om 22:14 schreef Martijn van Beurden <mvanb1 at gmail.com>:
> Recently libFLAC gained the ability (not in any released version yet
> though) to create FLAC files containing 32-bit int PCM samples. To
> keep complexity reasonable, the choice was made to limit residuals
> to 32-bit integers, which the encoder must make sure of. In case
> the encoder cannot find any predictor of which the residuals fit
> this limit, it must default to using a verbatim subframe. Tests have
> shown that this does not happen often (<0.1% of subframes on a
> music corpus of various styles). See also discussion here:
> https://github.com/ietf-wg-cellar/flac-specification/pull/148
>
> This patch adds decoding of these files to ffmpeg.
> ---
> libavcodec/flac.c | 4 +-
> libavcodec/flacdec.c | 248 ++++++++++++++++++++++++++++++++++++++----
> libavcodec/get_bits.h | 12 ++
> libavcodec/mathops.h | 9 ++
> 4 files changed, 250 insertions(+), 23 deletions(-)
>
> diff --git a/libavcodec/flac.c b/libavcodec/flac.c
> index dd68830622..f326d8fa5c 100644
> --- a/libavcodec/flac.c
> +++ b/libavcodec/flac.c
> @@ -27,7 +27,7 @@
> #include "flac.h"
> #include "flacdata.h"
>
> -static const int8_t sample_size_table[] = { 0, 8, 12, 0, 16, 20, 24, 0 };
> +static const int8_t sample_size_table[] = { 0, 8, 12, 0, 16, 20, 24, 32 };
>
> static const AVChannelLayout flac_channel_layouts[8] = {
> AV_CHANNEL_LAYOUT_MONO,
> @@ -81,7 +81,7 @@ int ff_flac_decode_frame_header(AVCodecContext *avctx,
> GetBitContext *gb,
>
> /* bits per sample */
> bps_code = get_bits(gb, 3);
> - if (bps_code == 3 || bps_code == 7) {
> + if (bps_code == 3) {
> av_log(avctx, AV_LOG_ERROR + log_level_offset,
> "invalid sample size code (%d)\n",
> bps_code);
> diff --git a/libavcodec/flacdec.c b/libavcodec/flacdec.c
> index 87f20c7425..49952ce120 100644
> --- a/libavcodec/flacdec.c
> +++ b/libavcodec/flacdec.c
> @@ -63,6 +63,9 @@ typedef struct FLACContext {
> int32_t *decoded[FLAC_MAX_CHANNELS]; ///< decoded samples
> uint8_t *decoded_buffer;
> unsigned int decoded_buffer_size;
> + int64_t *decoded_33bps; ///< decoded samples for a
> 33 bps subframe
> + uint8_t *decoded_buffer_33bps;
> + unsigned int decoded_buffer_size_33bps;
> int buggy_lpc; ///< use workaround for old
> lavc encoded files
>
> FLACDSPContext dsp;
> @@ -154,6 +157,24 @@ static int allocate_buffers(FLACContext *s)
> s->flac_stream_info.channels,
> s->flac_stream_info.max_blocksize,
> AV_SAMPLE_FMT_S32P, 0);
> + if (ret >= 0 && s->flac_stream_info.bps == 32 &&
> s->flac_stream_info.channels == 2) {
> + buf_size = av_samples_get_buffer_size(NULL, 1,
> +
> s->flac_stream_info.max_blocksize,
> + AV_SAMPLE_FMT_S64P, 0);
> + if (buf_size < 0)
> + return buf_size;
> +
> + av_fast_malloc(&s->decoded_buffer_33bps,
> &s->decoded_buffer_size_33bps, buf_size);
> + if (!s->decoded_buffer)
> + return AVERROR(ENOMEM);
> +
> + ret = av_samples_fill_arrays((uint8_t **)&s->decoded_33bps, NULL,
> + s->decoded_buffer_33bps,
> + 1,
> + s->flac_stream_info.max_blocksize,
> + AV_SAMPLE_FMT_S64P, 0);
> +
> + }
> return ret < 0 ? ret : 0;
> }
>
> @@ -331,6 +352,94 @@ static int decode_subframe_fixed(FLACContext *s,
> int32_t *decoded,
> return 0;
> }
>
> +static int decode_subframe_fixed_wide(FLACContext *s, int32_t *decoded,
> + int pred_order, int bps)
> +{
> + const int blocksize = s->blocksize;
> + int i;
> + int ret;
> +
> + /* warm up samples */
> + for (i = 0; i < pred_order; i++) {
> + decoded[i] = get_sbits_long(&s->gb, bps);
> + }
> +
> + if ((ret = decode_residuals(s, decoded, pred_order)) < 0)
> + return ret;
> +
> + switch (pred_order) {
> + case 0:
> + break;
> + case 1:
> + for (i = pred_order; i < blocksize; i++)
> + decoded[i] += decoded[i-1];
> + break;
> + case 2:
> + for (i = pred_order; i < blocksize; i++)
> + decoded[i] = (int64_t)decoded[i] + 2*(int64_t)decoded[i-1] -
> (int64_t)decoded[i-2];
> + break;
> + case 3:
> + for (i = pred_order; i < blocksize; i++)
> + decoded[i] = (int64_t)decoded[i] + 3*(int64_t)decoded[i-1] -
> 3*(int64_t)decoded[i-2] + (int64_t)decoded[i-3];
> + break;
> + case 4:
> + for (i = pred_order; i < blocksize; i++)
> + decoded[i] = (int64_t)decoded[i] + 4*(int64_t)decoded[i-1] -
> 6*(int64_t)decoded[i-2] + 4*(int64_t)decoded[i-3] - (int64_t)decoded[i-4];
> + break;
> + default:
> + av_log(s->avctx, AV_LOG_ERROR, "illegal pred order %d\n",
> pred_order);
> + return AVERROR_INVALIDDATA;
> + }
> +
> + return 0;
> +}
> +
> +
> +static int decode_subframe_fixed_33bps(FLACContext *s, int64_t *decoded,
> + int32_t *residual, int pred_order)
> +{
> + const int blocksize = s->blocksize;
> + int i;
> + int ret;
> +
> + /* warm up samples */
> + for (i = 0; i < pred_order; i++) {
> + decoded[i] = get_sbits64(&s->gb, 33);
> + av_log(s->avctx, AV_LOG_DEBUG, "warm-up %d = %" PRId64 "\n", i,
> decoded[i]);
> + }
> +
> + if ((ret = decode_residuals(s, residual, pred_order)) < 0)
> + return ret;
> +
> + switch (pred_order) {
> + case 0:
> + for (i = pred_order; i < blocksize; i++)
> + decoded[i] = residual[i];
> + break;
> + case 1:
> + for (i = pred_order; i < blocksize; i++)
> + decoded[i] = residual[i] + decoded[i-1];
> + break;
> + case 2:
> + for (i = pred_order; i < blocksize; i++)
> + decoded[i] = residual[i] + 2*decoded[i-1] - decoded[i-2];
> + break;
> + case 3:
> + for (i = pred_order; i < blocksize; i++)
> + decoded[i] = residual[i] + 3*decoded[i-1] - 3*decoded[i-2] +
> decoded[i-3];
> + break;
> + case 4:
> + for (i = pred_order; i < blocksize; i++)
> + decoded[i] = residual[i] + 4*decoded[i-1] - 6*decoded[i-2] +
> 4*decoded[i-3] - decoded[i-4];
> + break;
> + default:
> + av_log(s->avctx, AV_LOG_ERROR, "illegal pred order %d\n",
> pred_order);
> + return AVERROR_INVALIDDATA;
> + }
> +
> + return 0;
> +}
> +
> static void lpc_analyze_remodulate(SUINT32 *decoded, const int coeffs[32],
> int order, int qlevel, int len, int
> bps)
> {
> @@ -402,12 +511,53 @@ static int decode_subframe_lpc(FLACContext *s,
> int32_t *decoded, int pred_order,
> return 0;
> }
>
> +static int decode_subframe_lpc_33bps(FLACContext *s, int64_t *decoded,
> + int32_t *residual, int pred_order)
> +{
> + int i, j, ret;
> + int coeff_prec, qlevel;
> + int coeffs[32];
> +
> + /* warm up samples */
> + for (i = 0; i < pred_order; i++) {
> + decoded[i] = get_sbits64(&s->gb, 33);
> + }
> +
> + coeff_prec = get_bits(&s->gb, 4) + 1;
> + if (coeff_prec == 16) {
> + av_log(s->avctx, AV_LOG_ERROR, "invalid coeff precision\n");
> + return AVERROR_INVALIDDATA;
> + }
> + qlevel = get_sbits(&s->gb, 5);
> + if (qlevel < 0) {
> + av_log(s->avctx, AV_LOG_ERROR, "qlevel %d not supported, maybe
> buggy stream\n",
> + qlevel);
> + return AVERROR_INVALIDDATA;
> + }
> +
> + for (i = 0; i < pred_order; i++) {
> + coeffs[pred_order - i - 1] = get_sbits(&s->gb, coeff_prec);
> + }
> +
> + if ((ret = decode_residuals(s, residual, pred_order)) < 0)
> + return ret;
> +
> + for (i = pred_order; i < s->blocksize; i++, decoded++) {
> + int64_t sum = 0;
> + for (j = 0; j < pred_order; j++)
> + sum += (int64_t)coeffs[j] * decoded[j];
> + decoded[j] = residual[i] + (sum >> qlevel);
> + }
> +
> + return 0;
> +}
> +
> static inline int decode_subframe(FLACContext *s, int channel)
> {
> int32_t *decoded = s->decoded[channel];
> int type, wasted = 0;
> int bps = s->flac_stream_info.bps;
> - int i, tmp, ret;
> + int i, ret;
>
> if (channel == 0) {
> if (s->ch_mode == FLAC_CHMODE_RIGHT_SIDE)
> @@ -436,34 +586,63 @@ static inline int decode_subframe(FLACContext *s,
> int channel)
> wasted = 1 + get_unary(&s->gb, 1, get_bits_left(&s->gb));
> bps -= wasted;
> }
> - if (bps > 32) {
> - avpriv_report_missing_feature(s->avctx, "Decorrelated bit depth >
> 32");
> - return AVERROR_PATCHWELCOME;
> - }
>
> //FIXME use av_log2 for types
> if (type == 0) {
> - tmp = get_sbits_long(&s->gb, bps);
> - for (i = 0; i < s->blocksize; i++)
> - decoded[i] = tmp;
> + if (bps < 33) {
> + int32_t tmp = get_sbits_long(&s->gb, bps);
> + for (i = 0; i < s->blocksize; i++)
> + decoded[i] = tmp;
> + } else {
> + int64_t tmp = get_sbits64(&s->gb, 33);
> + for (i = 0; i < s->blocksize; i++)
> + s->decoded_33bps[i] = tmp;
> + }
> } else if (type == 1) {
> - for (i = 0; i < s->blocksize; i++)
> - decoded[i] = get_sbits_long(&s->gb, bps);
> + if (bps < 33) {
> + for (i = 0; i < s->blocksize; i++)
> + decoded[i] = get_sbits_long(&s->gb, bps);
> + } else {
> + for (i = 0; i < s->blocksize; i++)
> + s->decoded_33bps[i] = get_sbits64(&s->gb, 33);
> + }
> } else if ((type >= 8) && (type <= 12)) {
> - if ((ret = decode_subframe_fixed(s, decoded, type & ~0x8, bps)) <
> 0)
> - return ret;
> + int order = type & ~0x8;
> + if (bps < 33) {
> + if (bps + order <= 32) {
> + if ((ret = decode_subframe_fixed(s, decoded, order, bps))
> < 0)
> + return ret;
> + } else {
> + if ((ret = decode_subframe_fixed_wide(s, decoded, order,
> bps)) < 0)
> + return ret;
> + }
> + } else {
> + if ((ret = decode_subframe_fixed_33bps(s, s->decoded_33bps,
> decoded, order)) < 0)
> + return ret;
> + }
> } else if (type >= 32) {
> - if ((ret = decode_subframe_lpc(s, decoded, (type & ~0x20)+1,
> bps)) < 0)
> - return ret;
> + if (bps < 33) {
> + if ((ret = decode_subframe_lpc(s, decoded, (type & ~0x20)+1,
> bps)) < 0)
> + return ret;
> + } else {
> + if ((ret = decode_subframe_lpc_33bps(s, s->decoded_33bps,
> decoded, (type & ~0x20)+1)) < 0)
> + return ret;
> + }
> } else {
> av_log(s->avctx, AV_LOG_ERROR, "invalid coding type\n");
> return AVERROR_INVALIDDATA;
> }
>
> - if (wasted && wasted < 32) {
> - int i;
> - for (i = 0; i < s->blocksize; i++)
> - decoded[i] = (unsigned)decoded[i] << wasted;
> + if (wasted) {
> + if (wasted+bps == 33) {
> + int i;
> + for (i = 0; i < s->blocksize; i++)
> + s->decoded_33bps[i] = (uint64_t)decoded[i] << wasted;
> + } else if (wasted < 32) {
> + int i;
> + for (i = 0; i < s->blocksize; i++)
> + decoded[i] = (unsigned)decoded[i] << wasted;
> + }
> }
>
> return 0;
> @@ -554,6 +733,26 @@ static int decode_frame(FLACContext *s)
> return 0;
> }
>
> +static void decorrelate_33bps(int ch_mode, int32_t **decoded, int64_t
> *decoded_33bps, int len)
> +{
> + int i;
> + if (ch_mode == FLAC_CHMODE_LEFT_SIDE ) {
> + for (i = 0; i < len; i++)
> + decoded[1][i] = decoded[0][i] - decoded_33bps[i];
> + } else if (ch_mode == FLAC_CHMODE_RIGHT_SIDE ) {
> + for (i = 0; i < len; i++)
> + decoded[0][i] = decoded[1][i] + decoded_33bps[i];
> + } else if (ch_mode == FLAC_CHMODE_MID_SIDE ) {
> + for (i = 0; i < len; i++) {
> + uint64_t a = decoded[0][i];
> + int64_t b = decoded_33bps[i];
> + a -= b >> 1;
> + decoded[0][i] = (a + b);
> + decoded[1][i] = a;
> + }
> + }
> +}
> +
> static int flac_decode_frame(AVCodecContext *avctx, AVFrame *frame,
> int *got_frame_ptr, AVPacket *avpkt)
> {
> @@ -618,9 +817,15 @@ static int flac_decode_frame(AVCodecContext *avctx,
> AVFrame *frame,
> if ((ret = ff_thread_get_buffer(avctx, frame, 0)) < 0)
> return ret;
>
> - s->dsp.decorrelate[s->ch_mode](frame->data, s->decoded,
> - s->flac_stream_info.channels,
> - s->blocksize, s->sample_shift);
> + if (s->flac_stream_info.bps == 32 && s->ch_mode > 0) {
> + decorrelate_33bps(s->ch_mode, s->decoded, s->decoded_33bps,
> s->blocksize);
> + s->dsp.decorrelate[0](frame->data, s->decoded,
> s->flac_stream_info.channels,
> + s->blocksize, s->sample_shift);
> + } else {
> + s->dsp.decorrelate[s->ch_mode](frame->data, s->decoded,
> + s->flac_stream_info.channels,
> + s->blocksize, s->sample_shift);
> + }
>
> if (bytes_read > buf_size) {
> av_log(s->avctx, AV_LOG_ERROR, "overread: %d\n", bytes_read -
> buf_size);
> @@ -641,6 +846,7 @@ static av_cold int flac_decode_close(AVCodecContext
> *avctx)
> FLACContext *s = avctx->priv_data;
>
> av_freep(&s->decoded_buffer);
> + av_freep(&s->decoded_buffer_33bps);
>
> return 0;
> }
> diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
> index 16f8af5107..c12b62d4be 100644
> --- a/libavcodec/get_bits.h
> +++ b/libavcodec/get_bits.h
> @@ -597,6 +597,18 @@ static inline int get_sbits_long(GetBitContext *s,
> int n)
> return sign_extend(get_bits_long(s, n), n);
> }
>
> +/**
> + * Read 0-64 bits as a signed integer.
> + */
> +static inline int64_t get_sbits64(GetBitContext *s, int n)
> +{
> + // sign_extend(x, 0) is undefined
> + if (!n)
> + return 0;
> +
> + return sign_extend64(get_bits64(s, n), n);
> +}
> +
> /**
> * Show 0-32 bits.
> */
> diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h
> index f81d21f9c4..8a82d9d086 100644
> --- a/libavcodec/mathops.h
> +++ b/libavcodec/mathops.h
> @@ -137,6 +137,15 @@ static inline av_const int sign_extend(int val,
> unsigned bits)
> }
> #endif
>
> +#ifndef sign_extend64
> +static inline av_const int64_t sign_extend64(int64_t val, unsigned bits)
> +{
> + unsigned shift = 8 * sizeof(int64_t) - bits;
> + union { uint64_t u; int64_t s; } v = { (uint64_t) val << shift };
> + return v.s >> shift;
> +}
> +#endif
> +
> #ifndef zero_extend
> static inline av_const unsigned zero_extend(unsigned val, unsigned bits)
> {
> --
> 2.30.2
>
>
With this I'd like to once more bring this patch to the attention of the
mailinglist.
Additionally. here's a file to test with:
http://www.audiograaf.nl/misc_stuff/32-bit-test-FLAC.flac
More information about the ffmpeg-devel
mailing list