[FFmpeg-devel] [PATCH 7/8] avcodec/pcm_rechunk_bsf: add bitstream filter to rechunk pcm audio

Tue Apr 7 20:03:16 EEST 2020

Marton Balint:
> Signed-off-by: Marton Balint <cus at passwd.hu>
> ---
>  Changelog                      |   1 +
>  doc/bitstream_filters.texi     |  30 ++++++
>  libavcodec/Makefile            |   1 +
>  libavcodec/bitstream_filters.c |   1 +
>  libavcodec/pcm_rechunk_bsf.c   | 206 +++++++++++++++++++++++++++++++++++++++++
>  libavcodec/version.h           |   4 +-
>  6 files changed, 241 insertions(+), 2 deletions(-)
>  create mode 100644 libavcodec/pcm_rechunk_bsf.c
> 
> diff --git a/Changelog b/Changelog
> index 05b9a84562..dddaf02199 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -55,6 +55,7 @@ version <next>:
>  - CRI HCA decoder
>  - CRI HCA demuxer
>  - overlay_cuda filter
> +- pcm_rechunk bitstream filter
>  
>  
>  version 4.2:
> diff --git a/doc/bitstream_filters.texi b/doc/bitstream_filters.texi
> index 8fe5b3ad75..70c276feed 100644
> --- a/doc/bitstream_filters.texi
> +++ b/doc/bitstream_filters.texi
> @@ -548,6 +548,36 @@ ffmpeg -i INPUT -c copy -bsf noise[=1] output.mkv
>  @section null
>  This bitstream filter passes the packets through unchanged.
>  
> + at section pcm_rechunk
> +
> +Repacketize PCM audio to a fixed number of samples per packet or a fixed packet
> +rate per second. This is similar to the @ref{asetnsamples,,asetnsamples audio
> +filter,ffmpeg-filters} but works on audio packets instead of audio frames.
> +
> + at table @option
> + at item nb_out_samples, n
> +Set the number of samples per each output audio packet. The number is intended
> +as the number of samples @emph{per each channel}. Default value is 1024.
> +
> + at item pad, p
> +If set to 1, the filter will pad the last audio packet with silence, so that it
> +will contain the same number of samples (or roughly the same number of samples,
> +see @option{frame_rate}) as the previous ones. Default value is 1.
> +
> + at item frame_rate, r
> +This option makes the filter output a fixed numer of packets per second instead
> +of a fixed number of samples per packet. If the audio sample rate is not
> +divisible by the frame rate then the number of samples will not be constant but
> +will vary slightly so that each packet will start as close as to the frame
> +boundary as possible. Using this option has precedence over @option{nb_out_samples}.
> + at end table
> +
> +You can generate the well known 1602-1601-1602-1601-1602 pattern of 48kHz audio
> +for NTSC frame rate using the @option{frame_rate} option.
> + at example
> +ffmpeg -f lavfi -i sine=r=48000:d=1 -c pcm_s16le -bsf pcm_rechunk=r=30000/1001 -f framecrc -
> + at end example
> +
>  @section prores_metadata
>  
>  Modify color property metadata embedded in prores stream.
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index c1c9a44f2b..a49391f97f 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -1110,6 +1110,7 @@ OBJS-$(CONFIG_MP3_HEADER_DECOMPRESS_BSF)  += mp3_header_decompress_bsf.o \
>  OBJS-$(CONFIG_MPEG2_METADATA_BSF)         += mpeg2_metadata_bsf.o
>  OBJS-$(CONFIG_NOISE_BSF)                  += noise_bsf.o
>  OBJS-$(CONFIG_NULL_BSF)                   += null_bsf.o
> +OBJS-$(CONFIG_PCM_RECHUNK_BSF)            += pcm_rechunk_bsf.o
>  OBJS-$(CONFIG_PRORES_METADATA_BSF)        += prores_metadata_bsf.o
>  OBJS-$(CONFIG_REMOVE_EXTRADATA_BSF)       += remove_extradata_bsf.o
>  OBJS-$(CONFIG_TEXT2MOVSUB_BSF)            += movsub_bsf.o
> diff --git a/libavcodec/bitstream_filters.c b/libavcodec/bitstream_filters.c
> index 6b5ffe4d70..9e701191f8 100644
> --- a/libavcodec/bitstream_filters.c
> +++ b/libavcodec/bitstream_filters.c
> @@ -49,6 +49,7 @@ extern const AVBitStreamFilter ff_mpeg4_unpack_bframes_bsf;
>  extern const AVBitStreamFilter ff_mov2textsub_bsf;
>  extern const AVBitStreamFilter ff_noise_bsf;
>  extern const AVBitStreamFilter ff_null_bsf;
> +extern const AVBitStreamFilter ff_pcm_rechunk_bsf;
>  extern const AVBitStreamFilter ff_prores_metadata_bsf;
>  extern const AVBitStreamFilter ff_remove_extradata_bsf;
>  extern const AVBitStreamFilter ff_text2movsub_bsf;
> diff --git a/libavcodec/pcm_rechunk_bsf.c b/libavcodec/pcm_rechunk_bsf.c
> new file mode 100644
> index 0000000000..e02a205eb6
> --- /dev/null
> +++ b/libavcodec/pcm_rechunk_bsf.c
> @@ -0,0 +1,206 @@
> +/*
> + * Copyright (c) 2020 Marton Balint
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "avcodec.h"
> +#include "bsf.h"
> +#include "libavutil/avassert.h"
> +#include "libavutil/mem.h"
> +#include "libavutil/opt.h"
> +
> +typedef struct PCMContext {
> +    const AVClass *class;
> +
> +    int nb_out_samples;
> +    int pad;
> +    AVRational frame_rate;
> +
> +    AVPacket *in_pkt;
> +    AVPacket *out_pkt;
> +    int sample_size;
> +    int max_packet_size;
> +    int64_t n;
> +    int64_t dts;
> +} PCMContext;
> +
> +static int init(AVBSFContext *ctx)
> +{
> +    PCMContext *s = ctx->priv_data;
> +    AVRational sr = av_make_q(ctx->par_in->sample_rate, 1);
> +    int64_t max_samples;
> +
> +    ctx->time_base_out = av_inv_q(sr);

Is it actually guaranteed that par_in->sample_rate is not 0?

> +    s->in_pkt = av_packet_alloc();
> +    s->out_pkt = av_packet_alloc();
> +    if (!s->in_pkt || !s->out_pkt)
> +        return AVERROR(ENOMEM);

These allocations will have been wasted if one errors out below, so they
should be moved to the end of this function.

> +
> +    s->sample_size = ctx->par_in->channels * av_get_bits_per_sample(ctx->par_in->codec_id) / 8;
> +
> +    if (s->frame_rate.num) {
> +        if (av_rescale_q_rnd(1, sr, s->frame_rate, AV_ROUND_DOWN) <= 0)
> +            return AVERROR(EINVAL);
> +        max_samples = av_rescale_q_rnd(1, sr, s->frame_rate, AV_ROUND_UP);
> +    } else {
> +        max_samples = s->nb_out_samples;
> +    }
> +    if (max_samples > INT_MAX / s->sample_size)
> +        return AVERROR(EINVAL);
> +
> +    s->max_packet_size = max_samples * s->sample_size;
> +    return 0;
> +}
> +
> +static void uninit(AVBSFContext *ctx)
> +{
> +    PCMContext *s = ctx->priv_data;
> +    av_packet_free(&s->in_pkt);
> +    av_packet_free(&s->out_pkt);
> +}
> +
> +static void flush(AVBSFContext *ctx)
> +{
> +    PCMContext *s = ctx->priv_data;
> +    av_packet_unref(s->in_pkt);
> +    av_packet_unref(s->out_pkt);
> +    s->n = 0;
> +    s->dts = 0;
> +}
> +
> +static int send_packet(PCMContext *s, int nb_samples, AVPacket *pkt)
> +{
> +    pkt->dts = pkt->pts = s->dts;
> +    pkt->duration = nb_samples;
> +    s->dts += nb_samples;
> +    s->n++;
> +    return 0;
> +}
> +
> +static int rechunk_filter(AVBSFContext *ctx, AVPacket *pkt)
> +{
> +    PCMContext *s = ctx->priv_data;
> +    AVRational sr = av_make_q(ctx->par_in->sample_rate, 1);
> +    int nb_samples = s->frame_rate.num ? (av_rescale_q(s->n + 1, sr, s->frame_rate) - s->dts) : s->nb_out_samples;
> +    int data_size = nb_samples * s->sample_size;
> +    int ret;
> +
> +    if (!s->out_pkt->data) {
> +        ret = av_new_packet(s->out_pkt, s->max_packet_size);
> +        if (ret < 0)
> +            return ret;
> +        s->out_pkt->size = 0;
> +    }
> +
> +    do {
> +        if (s->in_pkt->size) {
> +            if (s->out_pkt->size || s->in_pkt->size < data_size) {
> +                int drain = FFMIN(s->in_pkt->size, data_size - s->out_pkt->size);
> +                if (!s->out_pkt->size) {
> +                    ret = av_packet_copy_props(s->out_pkt, s->in_pkt);
> +                    if (ret < 0)
> +                        return ret;
> +                }
> +                memcpy(s->out_pkt->data + s->out_pkt->size, s->in_pkt->data, drain);
> +                s->out_pkt->size += drain;
> +                s->in_pkt->size -= drain;
> +                s->in_pkt->data += drain;

This could be aligned on =.

> +                if (s->out_pkt->size == data_size) {
> +                    av_packet_move_ref(pkt, s->out_pkt);

If the current pkt is a packet with a smaller amount of samples than the
maximum, then the data immediately after the packet data will not be the
(zeroed) padding, but uninitialized data before the zeroed padding. This
is not good (it won't lead to segfaults, but it might lead to Valgrind
warnings). See below for a suggestion how to fix this.

> +                    return send_packet(s, nb_samples, pkt);
> +                }
> +                av_packet_unref(s->in_pkt);

If out_pkt initially already contained data and a new in_pkt provides
exactly as much data as needed to output another packet, then you will
set in_pkt->size to zero above, but you will do not unref it. Given that
the code treats "size == 0" as sign that the packet is blank, this will
lead to memleaks.

> +            } else if (s->in_pkt->size > data_size) {
> +                ret = av_packet_ref(pkt, s->in_pkt);
> +                if (ret < 0)
> +                    return ret;
> +                pkt->size = data_size;
> +                s->in_pkt->size -= data_size;
> +                s->in_pkt->data += data_size;
> +                return send_packet(s, nb_samples, pkt);
> +            } else {
> +                av_assert0(s->in_pkt->size == data_size);
> +                av_packet_move_ref(pkt, s->in_pkt);
> +                return send_packet(s, nb_samples, pkt);
> +            }
> +        }
> +
> +        ret = ff_bsf_get_packet_ref(ctx, s->in_pkt);

Doing this here in a loop is either pointless or an API violation (but
the internal API is not really documented anyway): The caller is
supposed to provide a packet via av_bsf_send_packet() and then call
av_bsf_receive_packet() until the bsf is completely drained. Then he
needs to send a new packet. The bsf meanwhile uses
ff_bsf_get_packet[_ref] to get the packet when
AVBitStreamFilter.filter() is executed. The bsf API implies that it is
impossible for two ff_bsf_get_packet_ref() calls to succeed in the same
AVBitStreamFilter.filter() call, so that your loop is actually a fake loop.

I therefore suggest to adapt rechunk_filter() as follows:
a) There is no reason at all why both in_pkt and out_pkt should be
non-blank at the same time (except during the brief period when one
copies from in to out, of course). The code needs to ensure this.
b) At the start of rechunk_filter(), you check for whether there is
enough data in in_pkt to output a complete packet. If there is, you
output it (and update in_pkt); if there is some, but not enough data in
in_pkt, you allocate exactly as much space in out_pkt as needed for the
next output packet, copy the remaining data (as well as the properties)
from in_pkt and unref in_pkt. (There is unfortunately no
av_packet_move_props(), maybe there should be one?)
c) If you did not exit rechunk_filter() in b), you read another packet
into in_pkt.
If out_pkt is empty and in_pkt contains enough data, you output this data.
If out_pkt is empty and in_pkt contains not enough data, you return
AVERROR(EAGAIN).
If out_pkt is not empty, you copy as much data as needed/available into
out_pkt (which already has the right size); if there is enough data, you
output out_pkt (and potentially unref in_pkt if it has been exhausted).
If there is not enough data, you unref in_pkt and return AVERROR(EAGAIN).

(Notice that max_packet_size can be removed from PCMContext.)

In case you are wondering whether a) could be used to omit one of the
packets: It is possible to omit out_pkt (but I am not sure how
advantageous it would be):

b') If there is enough data in in_pkt for a complete packet, you output
it (via av_packet_move_ref() or av_packet_ref()); if not and if there is
data left in in_pkt and if the actual buffer of in_pkt is not already of
the right size, you copy the props of in_pkt to pkt and allocate a
buffer of the right size (with padding, of course) in pkt and copy the
remaining data of in_pkt to pkt and reset in_pkt. (Alternatively, you
could move in_pkt to pkt and directly allocate an AVBufferRef with the
right size, copy the data, unref the old buffer and replace it with the
new one.)
If there is not enough data in in_pkt and if in_pkt already contains a
buffer of the right size, then you simply move in_pkt to pkt.

c') Then you get a new packet and put it into in_pkt. In case pkt
already contains data, you copy as much data as needed/available into
pkt. If it is enough, you output pkt. If it is not enough, in_pkt can be
unreferenced and you move pkt into in_pkt and return AVERROR(EAGAIN).

It should go without saying that you need to somehow record whether
in_pkt already contains a buffer of the right size (this is trivial in
the approach above: if out_pkt contains data it also contains a buffer
of the right size). I prefer the first approach.

> +        if (ret == AVERROR_EOF && s->out_pkt->size) {
> +            if (s->pad) {
> +                memset(s->out_pkt->data + s->out_pkt->size, 0, data_size - s->out_pkt->size);
> +                s->out_pkt->size = data_size;
> +            } else {
> +                nb_samples = s->out_pkt->size / s->sample_size;
> +            }
> +            av_packet_move_ref(pkt, s->out_pkt);
> +            return send_packet(s, nb_samples, pkt);
> +        }
> +    } while (ret >= 0);
> +
> +    return ret;
> +}
> +
> +#define OFFSET(x) offsetof(PCMContext, x)
> +#define FLAGS (AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_BSF_PARAM)
> +static const AVOption options[] = {
> +    { "nb_out_samples", "set the number of per-packet output samples", OFFSET(nb_out_samples),   AV_OPT_TYPE_INT, {.i64=1024}, 1, INT_MAX, FLAGS },
> +    { "n",              "set the number of per-packet output samples", OFFSET(nb_out_samples),   AV_OPT_TYPE_INT, {.i64=1024}, 1, INT_MAX, FLAGS },
> +    { "pad",            "pad last packet with zeros",                  OFFSET(pad),             AV_OPT_TYPE_BOOL, {.i64=1} ,   0,       1, FLAGS },
> +    { "p",              "pad last packet with zeros",                  OFFSET(pad),             AV_OPT_TYPE_BOOL, {.i64=1} ,   0,       1, FLAGS },
> +    { "frame_rate",     "set number of packets per second",            OFFSET(frame_rate),  AV_OPT_TYPE_RATIONAL, {.dbl=0},    0, INT_MAX, FLAGS },
> +    { "r",              "set number of packets per second",            OFFSET(frame_rate),  AV_OPT_TYPE_RATIONAL, {.dbl=0},    0, INT_MAX, FLAGS },
> +    { NULL },
> +};
> +
> +static const AVClass metadata_class = {

You seem to have copied this name from the *_metadata_bsf bitstream
filters. It is not really appropriate here.

- Andreas

> +    .class_name = "pcm_rechunk_bsf",
> +    .item_name  = av_default_item_name,
> +    .option     = options,
> +    .version    = LIBAVUTIL_VERSION_INT,
> +};
> +
> +static const enum AVCodecID codec_ids[] = {
> +    AV_CODEC_ID_PCM_S16LE,
> +    AV_CODEC_ID_PCM_S16BE,
> +    AV_CODEC_ID_PCM_S8,
> +    AV_CODEC_ID_PCM_S32LE,
> +    AV_CODEC_ID_PCM_S32BE,
> +    AV_CODEC_ID_PCM_S24LE,
> +    AV_CODEC_ID_PCM_S24BE,
> +    AV_CODEC_ID_PCM_F32BE,
> +    AV_CODEC_ID_PCM_F32LE,
> +    AV_CODEC_ID_PCM_F64BE,
> +    AV_CODEC_ID_PCM_F64LE,
> +    AV_CODEC_ID_PCM_S64LE,
> +    AV_CODEC_ID_PCM_S64BE,
> +    AV_CODEC_ID_PCM_F16LE,
> +    AV_CODEC_ID_PCM_F24LE,
> +    AV_CODEC_ID_NONE,
> +};
> +
> +const AVBitStreamFilter ff_pcm_rechunk_bsf = {
> +    .name           = "pcm_rechunk",
> +    .priv_data_size = sizeof(PCMContext),
> +    .priv_class     = &metadata_class,
> +    .filter         = rechunk_filter,
> +    .init           = init,
> +    .flush          = flush,
> +    .close          = uninit,
> +    .codec_ids      = codec_ids,
> +};
> diff --git a/libavcodec/version.h b/libavcodec/version.h
> index f4d1d4de21..e62d1a7925 100644
> --- a/libavcodec/version.h
> +++ b/libavcodec/version.h
> @@ -28,8 +28,8 @@
>  #include "libavutil/version.h"
>  
>  #define LIBAVCODEC_VERSION_MAJOR  58
> -#define LIBAVCODEC_VERSION_MINOR  77
> -#define LIBAVCODEC_VERSION_MICRO 101
> +#define LIBAVCODEC_VERSION_MINOR  78
> +#define LIBAVCODEC_VERSION_MICRO 100
>  
>  #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
>                                                 LIBAVCODEC_VERSION_MINOR, \
>