[FFmpeg-devel] [PATCH v4 5/8] avcodec/pcm_rechunk_bsf: add bitstream filter to rechunk pcm audio

Tue May 5 09:24:36 EEST 2020

Marton Balint:
> Signed-off-by: Marton Balint <cus at passwd.hu>
> ---
>  Changelog                      |   1 +
>  doc/bitstream_filters.texi     |  30 ++++++
>  libavcodec/Makefile            |   1 +
>  libavcodec/bitstream_filters.c |   1 +
>  libavcodec/pcm_rechunk_bsf.c   | 220 +++++++++++++++++++++++++++++++++++++++++
>  libavcodec/version.h           |   2 +-
>  6 files changed, 254 insertions(+), 1 deletion(-)
>  create mode 100644 libavcodec/pcm_rechunk_bsf.c
> 
> diff --git a/Changelog b/Changelog
> index 83b8a4a46e..883e0bff99 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -63,6 +63,7 @@ version <next>:
>  - maskedthreshold filter
>  - Support for muxing pcm and pgs in m2ts
>  - Cunning Developments ADPCM decoder
> +- pcm_rechunk bitstream filter
>  
>  
>  version 4.2:
> diff --git a/doc/bitstream_filters.texi b/doc/bitstream_filters.texi
> index 8fe5b3ad75..287d320cc0 100644
> --- a/doc/bitstream_filters.texi
> +++ b/doc/bitstream_filters.texi
> @@ -548,6 +548,36 @@ ffmpeg -i INPUT -c copy -bsf noise[=1] output.mkv
>  @section null
>  This bitstream filter passes the packets through unchanged.
>  
> + at section pcm_rechunk
> +
> +Repacketize PCM audio to a fixed number of samples per packet or a fixed packet
> +rate per second. This is similar to the @ref{asetnsamples,,asetnsamples audio
> +filter,ffmpeg-filters} but works on audio packets instead of audio frames.
> +
> + at table @option
> + at item nb_out_samples, n
> +Set the number of samples per each output audio packet. The number is intended
> +as the number of samples @emph{per each channel}. Default value is 1024.
> +
> + at item pad, p
> +If set to 1, the filter will pad the last audio packet with silence, so that it
> +will contain the same number of samples (or roughly the same number of samples,
> +see @option{frame_rate}) as the previous ones. Default value is 1.
> +
> + at item frame_rate, r
> +This option makes the filter output a fixed numer of packets per second instead

numer

> +of a fixed number of samples per packet. If the audio sample rate is not
> +divisible by the frame rate then the number of samples will not be constant but
> +will vary slightly so that each packet will start as close to the frame
> +boundary as possible. Using this option has precedence over @option{nb_out_samples}.
> + at end table
> +
> +You can generate the well known 1602-1601-1602-1601-1602 pattern of 48kHz audio
> +for NTSC frame rate using the @option{frame_rate} option.
> + at example
> +ffmpeg -f lavfi -i sine=r=48000:d=1 -c pcm_s16le -bsf pcm_rechunk=r=30000/1001 -f framecrc -
> + at end example
> +
>  @section prores_metadata
>  
>  Modify color property metadata embedded in prores stream.
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 28076c2c83..f5dcbb44ee 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -1116,6 +1116,7 @@ OBJS-$(CONFIG_MP3_HEADER_DECOMPRESS_BSF)  += mp3_header_decompress_bsf.o \
>  OBJS-$(CONFIG_MPEG2_METADATA_BSF)         += mpeg2_metadata_bsf.o
>  OBJS-$(CONFIG_NOISE_BSF)                  += noise_bsf.o
>  OBJS-$(CONFIG_NULL_BSF)                   += null_bsf.o
> +OBJS-$(CONFIG_PCM_RECHUNK_BSF)            += pcm_rechunk_bsf.o
>  OBJS-$(CONFIG_PRORES_METADATA_BSF)        += prores_metadata_bsf.o
>  OBJS-$(CONFIG_REMOVE_EXTRADATA_BSF)       += remove_extradata_bsf.o
>  OBJS-$(CONFIG_TEXT2MOVSUB_BSF)            += movsub_bsf.o
> diff --git a/libavcodec/bitstream_filters.c b/libavcodec/bitstream_filters.c
> index 6b5ffe4d70..9e701191f8 100644
> --- a/libavcodec/bitstream_filters.c
> +++ b/libavcodec/bitstream_filters.c
> @@ -49,6 +49,7 @@ extern const AVBitStreamFilter ff_mpeg4_unpack_bframes_bsf;
>  extern const AVBitStreamFilter ff_mov2textsub_bsf;
>  extern const AVBitStreamFilter ff_noise_bsf;
>  extern const AVBitStreamFilter ff_null_bsf;
> +extern const AVBitStreamFilter ff_pcm_rechunk_bsf;
>  extern const AVBitStreamFilter ff_prores_metadata_bsf;
>  extern const AVBitStreamFilter ff_remove_extradata_bsf;
>  extern const AVBitStreamFilter ff_text2movsub_bsf;
> diff --git a/libavcodec/pcm_rechunk_bsf.c b/libavcodec/pcm_rechunk_bsf.c
> new file mode 100644
> index 0000000000..b528ed0c71
> --- /dev/null
> +++ b/libavcodec/pcm_rechunk_bsf.c
> @@ -0,0 +1,220 @@
> +/*
> + * Copyright (c) 2020 Marton Balint
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "avcodec.h"
> +#include "bsf.h"
> +#include "libavutil/avassert.h"
> +#include "libavutil/opt.h"
> +
> +typedef struct PCMContext {
> +    const AVClass *class;
> +
> +    int nb_out_samples;
> +    int pad;
> +    AVRational frame_rate;
> +
> +    AVPacket *in_pkt;
> +    AVPacket *out_pkt;
> +    int sample_size;
> +    int64_t n;
> +} PCMContext;
> +
> +static int init(AVBSFContext *ctx)
> +{
> +    PCMContext *s = ctx->priv_data;
> +    AVRational sr = av_make_q(ctx->par_in->sample_rate, 1);
> +    int64_t min_samples;
> +
> +    if (ctx->par_in->channels <= 0 || ctx->par_in->sample_rate <= 0)
> +        return AVERROR(EINVAL);
> +
> +    ctx->time_base_out = av_inv_q(sr);
> +    s->sample_size = ctx->par_in->channels * av_get_bits_per_sample(ctx->par_in->codec_id) / 8;
> +
> +    if (s->frame_rate.num) {
> +        min_samples = av_rescale_q_rnd(1, sr, s->frame_rate, AV_ROUND_DOWN);
> +    } else {
> +        min_samples = s->nb_out_samples;
> +    }
> +    if (min_samples <= 0 || min_samples > INT_MAX / s->sample_size - 1)
> +        return AVERROR(EINVAL);
> +
> +    s->in_pkt  = av_packet_alloc();
> +    s->out_pkt = av_packet_alloc();
> +    if (!s->in_pkt || !s->out_pkt)
> +        return AVERROR(ENOMEM);
> +
> +    return 0;
> +}
> +
> +static void uninit(AVBSFContext *ctx)
> +{
> +    PCMContext *s = ctx->priv_data;
> +    av_packet_free(&s->in_pkt);
> +    av_packet_free(&s->out_pkt);
> +}
> +
> +static void flush(AVBSFContext *ctx)
> +{
> +    PCMContext *s = ctx->priv_data;
> +    av_packet_unref(s->in_pkt);
> +    av_packet_unref(s->out_pkt);
> +    s->n = 0;
> +}
> +
> +static int send_packet(PCMContext *s, int nb_samples, AVPacket *pkt)
> +{
> +    pkt->duration = nb_samples;
> +    s->n++;
> +    return 0;
> +}
> +
> +static void drain_packet(AVPacket *pkt, int drain_data, int drain_samples)
> +{
> +    pkt->size -= drain_data;
> +    pkt->data += drain_data;
> +    if (pkt->dts != AV_NOPTS_VALUE)
> +        pkt->dts += drain_samples;
> +    if (pkt->pts != AV_NOPTS_VALUE)
> +        pkt->pts += drain_samples;
> +}
> +
> +static int get_next_nb_samples(AVBSFContext *ctx)
> +{
> +    PCMContext *s = ctx->priv_data;
> +    if (s->frame_rate.num) {
> +        AVRational sr = av_make_q(ctx->par_in->sample_rate, 1);
> +        return av_rescale_q(s->n + 1, sr, s->frame_rate) - av_rescale_q(s->n, sr, s->frame_rate);
> +    } else {
> +        return s->nb_out_samples;
> +    }
> +}
> +
> +static int rechunk_filter(AVBSFContext *ctx, AVPacket *pkt)
> +{
> +    PCMContext *s = ctx->priv_data;
> +    int nb_samples = get_next_nb_samples(ctx);
> +    int data_size = nb_samples * s->sample_size;
> +    int ret;
> +
> +    do {
> +        if (s->in_pkt->size) {
> +            if (s->out_pkt->size || s->in_pkt->size < data_size) {
> +                int drain = FFMIN(s->in_pkt->size, data_size - s->out_pkt->size);
> +                if (!s->out_pkt->size) {
> +                    ret = av_new_packet(s->out_pkt, data_size);
> +                    if (ret < 0)
> +                        return ret;
> +                    ret = av_packet_copy_props(s->out_pkt, s->in_pkt);
> +                    if (ret < 0) {
> +                        av_packet_unref(s->out_pkt);
> +                        return ret;
> +                    }
> +                    s->out_pkt->size = 0;
> +                }
> +                memcpy(s->out_pkt->data + s->out_pkt->size, s->in_pkt->data, drain);
> +                s->out_pkt->size += drain;
> +                drain_packet(s->in_pkt, drain, drain / s->sample_size);
> +                if (!s->in_pkt->size)
> +                    av_packet_unref(s->in_pkt);
> +                if (s->out_pkt->size == data_size) {
> +                    av_packet_move_ref(pkt, s->out_pkt);
> +                    return send_packet(s, nb_samples, pkt);
> +                }
> +            } else if (s->in_pkt->size > data_size) {
> +                ret = av_packet_ref(pkt, s->in_pkt);
> +                if (ret < 0)
> +                    return ret;
> +                pkt->size = data_size;

I just wonder: Have you tried av_shrink_packet() and found out that it
simply zeroes the data after the end of the packet without any regard to
whether it is writable or not or did you simply just do it the way you
do it here?

> +                drain_packet(s->in_pkt, data_size, nb_samples);
> +                return send_packet(s, nb_samples, pkt);
> +            } else {
> +                av_assert0(s->in_pkt->size == data_size);
> +                av_packet_move_ref(pkt, s->in_pkt);
> +                return send_packet(s, nb_samples, pkt);
> +            }
> +        }
> +
> +        ret = ff_bsf_get_packet_ref(ctx, s->in_pkt);
> +        if (ret == AVERROR_EOF && s->out_pkt->size) {
> +            if (s->pad) {
> +                memset(s->out_pkt->data + s->out_pkt->size, 0, data_size - s->out_pkt->size);
> +                s->out_pkt->size = data_size;
> +            } else {
> +                nb_samples = s->out_pkt->size / s->sample_size;
> +            }
> +            av_packet_move_ref(pkt, s->out_pkt);
> +            return send_packet(s, nb_samples, pkt);
> +        }
> +        if (ret >= 0)
> +            av_packet_rescale_ts(s->in_pkt, ctx->time_base_in, ctx->time_base_out);
> +    } while (ret >= 0);
> +
> +    return ret;
> +}
> +
> +#define OFFSET(x) offsetof(PCMContext, x)
> +#define FLAGS (AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_BSF_PARAM)
> +static const AVOption options[] = {
> +    { "nb_out_samples", "set the number of per-packet output samples", OFFSET(nb_out_samples),   AV_OPT_TYPE_INT, {.i64=1024}, 1, INT_MAX, FLAGS },
> +    { "n",              "set the number of per-packet output samples", OFFSET(nb_out_samples),   AV_OPT_TYPE_INT, {.i64=1024}, 1, INT_MAX, FLAGS },
> +    { "pad",            "pad last packet with zeros",                  OFFSET(pad),             AV_OPT_TYPE_BOOL, {.i64=1} ,   0,       1, FLAGS },
> +    { "p",              "pad last packet with zeros",                  OFFSET(pad),             AV_OPT_TYPE_BOOL, {.i64=1} ,   0,       1, FLAGS },
> +    { "frame_rate",     "set number of packets per second",            OFFSET(frame_rate),  AV_OPT_TYPE_RATIONAL, {.dbl=0},    0, INT_MAX, FLAGS },
> +    { "r",              "set number of packets per second",            OFFSET(frame_rate),  AV_OPT_TYPE_RATIONAL, {.dbl=0},    0, INT_MAX, FLAGS },
> +    { NULL },
> +};
> +
> +static const AVClass pcm_rechunk_class = {
> +    .class_name = "pcm_rechunk_bsf",
> +    .item_name  = av_default_item_name,
> +    .option     = options,
> +    .version    = LIBAVUTIL_VERSION_INT,
> +};
> +
> +static const enum AVCodecID codec_ids[] = {
> +    AV_CODEC_ID_PCM_S16LE,
> +    AV_CODEC_ID_PCM_S16BE,
> +    AV_CODEC_ID_PCM_S8,
> +    AV_CODEC_ID_PCM_S32LE,
> +    AV_CODEC_ID_PCM_S32BE,
> +    AV_CODEC_ID_PCM_S24LE,
> +    AV_CODEC_ID_PCM_S24BE,
> +    AV_CODEC_ID_PCM_F32BE,
> +    AV_CODEC_ID_PCM_F32LE,
> +    AV_CODEC_ID_PCM_F64BE,
> +    AV_CODEC_ID_PCM_F64LE,
> +    AV_CODEC_ID_PCM_S64LE,
> +    AV_CODEC_ID_PCM_S64BE,
> +    AV_CODEC_ID_PCM_F16LE,
> +    AV_CODEC_ID_PCM_F24LE,
> +    AV_CODEC_ID_NONE,
> +};
> +
> +const AVBitStreamFilter ff_pcm_rechunk_bsf = {
> +    .name           = "pcm_rechunk",
> +    .priv_data_size = sizeof(PCMContext),
> +    .priv_class     = &pcm_rechunk_class,
> +    .filter         = rechunk_filter,
> +    .init           = init,
> +    .flush          = flush,
> +    .close          = uninit,
> +    .codec_ids      = codec_ids,
> +};
> diff --git a/libavcodec/version.h b/libavcodec/version.h
> index 3de16c884c..a8cc09d0f6 100644
> --- a/libavcodec/version.h
> +++ b/libavcodec/version.h
> @@ -28,7 +28,7 @@
>  #include "libavutil/version.h"
>  
>  #define LIBAVCODEC_VERSION_MAJOR  58
> -#define LIBAVCODEC_VERSION_MINOR  82
> +#define LIBAVCODEC_VERSION_MINOR  83
>  #define LIBAVCODEC_VERSION_MICRO 100
>  
>  #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
> 
LGTM apart from that.

- Andreas