[FFmpeg-devel] [PATCH v3 5/6] avcodec/pcm_rechunk_bsf: add bitstream filter to rechunk pcm audio

Sun Apr 26 01:27:43 EEST 2020

Marton Balint:
> Signed-off-by: Marton Balint <cus at passwd.hu>
> ---
>  Changelog                      |   1 +
>  doc/bitstream_filters.texi     |  30 ++++++
>  libavcodec/Makefile            |   1 +
>  libavcodec/bitstream_filters.c |   1 +
>  libavcodec/pcm_rechunk_bsf.c   | 206 +++++++++++++++++++++++++++++++++++++++++
>  libavcodec/version.h           |   2 +-
>  6 files changed, 240 insertions(+), 1 deletion(-)
>  create mode 100644 libavcodec/pcm_rechunk_bsf.c
> 
> diff --git a/Changelog b/Changelog
> index d9fcd8bb0a..6b0c911279 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -59,6 +59,7 @@ version <next>:
>  - mv30 decoder
>  - Expanded styling support for 3GPP Timed Text Subtitles (movtext)
>  - WebP parser
> +- pcm_rechunk bitstream filter
>  
>  
>  version 4.2:
> diff --git a/doc/bitstream_filters.texi b/doc/bitstream_filters.texi
> index 8fe5b3ad75..70c276feed 100644
> --- a/doc/bitstream_filters.texi
> +++ b/doc/bitstream_filters.texi
> @@ -548,6 +548,36 @@ ffmpeg -i INPUT -c copy -bsf noise[=1] output.mkv
>  @section null
>  This bitstream filter passes the packets through unchanged.
>  
> + at section pcm_rechunk
> +
> +Repacketize PCM audio to a fixed number of samples per packet or a fixed packet
> +rate per second. This is similar to the @ref{asetnsamples,,asetnsamples audio
> +filter,ffmpeg-filters} but works on audio packets instead of audio frames.
> +
> + at table @option
> + at item nb_out_samples, n
> +Set the number of samples per each output audio packet. The number is intended
> +as the number of samples @emph{per each channel}. Default value is 1024.
> +
> + at item pad, p
> +If set to 1, the filter will pad the last audio packet with silence, so that it
> +will contain the same number of samples (or roughly the same number of samples,
> +see @option{frame_rate}) as the previous ones. Default value is 1.
> +
> + at item frame_rate, r
> +This option makes the filter output a fixed numer of packets per second instead
> +of a fixed number of samples per packet. If the audio sample rate is not
> +divisible by the frame rate then the number of samples will not be constant but
> +will vary slightly so that each packet will start as close as to the frame

"as close to the frame boundary as possible" or "as close as possible to
the frame boundary"

> +boundary as possible. Using this option has precedence over @option{nb_out_samples}.
> + at end table
> +
> +You can generate the well known 1602-1601-1602-1601-1602 pattern of 48kHz audio
> +for NTSC frame rate using the @option{frame_rate} option.
> + at example
> +ffmpeg -f lavfi -i sine=r=48000:d=1 -c pcm_s16le -bsf pcm_rechunk=r=30000/1001 -f framecrc -
> + at end example
> +
>  @section prores_metadata
>  
>  Modify color property metadata embedded in prores stream.
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 88944d9a3a..35968bdaf7 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -1115,6 +1115,7 @@ OBJS-$(CONFIG_MP3_HEADER_DECOMPRESS_BSF)  += mp3_header_decompress_bsf.o \
>  OBJS-$(CONFIG_MPEG2_METADATA_BSF)         += mpeg2_metadata_bsf.o
>  OBJS-$(CONFIG_NOISE_BSF)                  += noise_bsf.o
>  OBJS-$(CONFIG_NULL_BSF)                   += null_bsf.o
> +OBJS-$(CONFIG_PCM_RECHUNK_BSF)            += pcm_rechunk_bsf.o
>  OBJS-$(CONFIG_PRORES_METADATA_BSF)        += prores_metadata_bsf.o
>  OBJS-$(CONFIG_REMOVE_EXTRADATA_BSF)       += remove_extradata_bsf.o
>  OBJS-$(CONFIG_TEXT2MOVSUB_BSF)            += movsub_bsf.o
> diff --git a/libavcodec/bitstream_filters.c b/libavcodec/bitstream_filters.c
> index 6b5ffe4d70..9e701191f8 100644
> --- a/libavcodec/bitstream_filters.c
> +++ b/libavcodec/bitstream_filters.c
> @@ -49,6 +49,7 @@ extern const AVBitStreamFilter ff_mpeg4_unpack_bframes_bsf;
>  extern const AVBitStreamFilter ff_mov2textsub_bsf;
>  extern const AVBitStreamFilter ff_noise_bsf;
>  extern const AVBitStreamFilter ff_null_bsf;
> +extern const AVBitStreamFilter ff_pcm_rechunk_bsf;
>  extern const AVBitStreamFilter ff_prores_metadata_bsf;
>  extern const AVBitStreamFilter ff_remove_extradata_bsf;
>  extern const AVBitStreamFilter ff_text2movsub_bsf;
> diff --git a/libavcodec/pcm_rechunk_bsf.c b/libavcodec/pcm_rechunk_bsf.c
> new file mode 100644
> index 0000000000..2a038fd79b
> --- /dev/null
> +++ b/libavcodec/pcm_rechunk_bsf.c
> @@ -0,0 +1,206 @@
> +/*
> + * Copyright (c) 2020 Marton Balint
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "avcodec.h"
> +#include "bsf.h"
> +#include "libavutil/avassert.h"
> +#include "libavutil/mem.h"

I don't see where this header would be used -- your allocations are all
performed implicitly by av_new_packet().

> +#include "libavutil/opt.h"
> +
> +typedef struct PCMContext {
> +    const AVClass *class;
> +
> +    int nb_out_samples;
> +    int pad;
> +    AVRational frame_rate;
> +
> +    AVPacket *in_pkt;
> +    AVPacket *out_pkt;
> +    int sample_size;
> +    int64_t n;
> +    int64_t dts;
> +} PCMContext;
> +
> +static int init(AVBSFContext *ctx)
> +{
> +    PCMContext *s = ctx->priv_data;
> +    AVRational sr = av_make_q(ctx->par_in->sample_rate, 1);
> +    int64_t min_samples;
> +
> +    if (ctx->par_in->channels <= 0 || ctx->par_in->sample_rate <= 0)
> +        return AVERROR(EINVAL);
> +
> +    ctx->time_base_out = av_inv_q(sr);
> +    s->sample_size = ctx->par_in->channels * av_get_bits_per_sample(ctx->par_in->codec_id) / 8;
> +
> +    if (s->frame_rate.num) {
> +        min_samples = av_rescale_q_rnd(1, sr, s->frame_rate, AV_ROUND_DOWN);
> +    } else {
> +        min_samples = s->nb_out_samples;
> +    }
> +    if (min_samples <= 0 || min_samples > INT_MAX / s->sample_size - 1)
> +        return AVERROR(EINVAL);
> +
> +    s->in_pkt = av_packet_alloc();
> +    s->out_pkt = av_packet_alloc();

Could be aligned on "=".

> +    if (!s->in_pkt || !s->out_pkt)
> +        return AVERROR(ENOMEM);
> +
> +    return 0;
> +}
> +
> +static void uninit(AVBSFContext *ctx)
> +{
> +    PCMContext *s = ctx->priv_data;
> +    av_packet_free(&s->in_pkt);
> +    av_packet_free(&s->out_pkt);
> +}
> +
> +static void flush(AVBSFContext *ctx)
> +{
> +    PCMContext *s = ctx->priv_data;
> +    av_packet_unref(s->in_pkt);
> +    av_packet_unref(s->out_pkt);
> +    s->n = 0;
> +    s->dts = 0;
> +}
> +
> +static int send_packet(PCMContext *s, int nb_samples, AVPacket *pkt)
> +{
> +    pkt->dts = pkt->pts = s->dts;
> +    pkt->duration = nb_samples;
> +    s->dts += nb_samples;

This implicitly presumes that the timebase is equal to the sample rate.
Is this actually guaranteed? (Notice that you can set the output
timebase as you want during init().)

And this filter does more than just repacketizing the samples: It also
discards the timing of its input and makes up completely new timestamps
and durations. This needs to be documented.

> +    s->n++;
> +    return 0;
> +}
> +
> +static int rechunk_filter(AVBSFContext *ctx, AVPacket *pkt)
> +{
> +    PCMContext *s = ctx->priv_data;
> +    AVRational sr = av_make_q(ctx->par_in->sample_rate, 1);
> +    int nb_samples = s->frame_rate.num ? (av_rescale_q(s->n + 1, sr, s->frame_rate) - s->dts) : s->nb_out_samples;
> +    int data_size = nb_samples * s->sample_size;
> +    int ret;
> +
> +    do {
> +        if (s->in_pkt->size) {
> +            if (s->out_pkt->size || s->in_pkt->size < data_size) {
> +                int drain = FFMIN(s->in_pkt->size, data_size - s->out_pkt->size);
> +                if (!s->out_pkt->size) {
> +                    ret = av_new_packet(s->out_pkt, data_size);
> +                    if (ret < 0)
> +                        return ret;
> +                    ret = av_packet_copy_props(s->out_pkt, s->in_pkt);
> +                    if (ret < 0) {
> +                        av_packet_unref(s->out_pkt);
> +                        return ret;
> +                    }
> +                    s->out_pkt->size = 0;
> +                }
> +                memcpy(s->out_pkt->data + s->out_pkt->size, s->in_pkt->data, drain);
> +                s->out_pkt->size += drain;
> +                s->in_pkt->size  -= drain;
> +                s->in_pkt->data  += drain;
> +                if (s->out_pkt->size == data_size) {
> +                    av_packet_move_ref(pkt, s->out_pkt);
> +                    if (!s->in_pkt->size)
> +                        av_packet_unref(s->in_pkt);

I would move this check in front of the check for whether out_pkt is
full, so that there are not two places where in_pkt is unreferenced.

> +                    return send_packet(s, nb_samples, pkt);
> +                }
> +                av_packet_unref(s->in_pkt);
> +            } else if (s->in_pkt->size > data_size) {
> +                ret = av_packet_ref(pkt, s->in_pkt);
> +                if (ret < 0)
> +                    return ret;
> +                pkt->size = data_size;
> +                s->in_pkt->size -= data_size;
> +                s->in_pkt->data += data_size;
> +                return send_packet(s, nb_samples, pkt);
> +            } else {
> +                av_assert0(s->in_pkt->size == data_size);
> +                av_packet_move_ref(pkt, s->in_pkt);
> +                return send_packet(s, nb_samples, pkt);
> +            }
> +        }
> +
> +        ret = ff_bsf_get_packet_ref(ctx, s->in_pkt);
> +        if (ret == AVERROR_EOF && s->out_pkt->size) {
> +            if (s->pad) {
> +                memset(s->out_pkt->data + s->out_pkt->size, 0, data_size - s->out_pkt->size);
> +                s->out_pkt->size = data_size;
> +            } else {
> +                nb_samples = s->out_pkt->size / s->sample_size;
> +            }
> +            av_packet_move_ref(pkt, s->out_pkt);
> +            return send_packet(s, nb_samples, pkt);
> +        }
> +    } while (ret >= 0);
> +
> +    return ret;
> +}
> +
> +#define OFFSET(x) offsetof(PCMContext, x)
> +#define FLAGS (AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_BSF_PARAM)
> +static const AVOption options[] = {
> +    { "nb_out_samples", "set the number of per-packet output samples", OFFSET(nb_out_samples),   AV_OPT_TYPE_INT, {.i64=1024}, 1, INT_MAX, FLAGS },
> +    { "n",              "set the number of per-packet output samples", OFFSET(nb_out_samples),   AV_OPT_TYPE_INT, {.i64=1024}, 1, INT_MAX, FLAGS },
> +    { "pad",            "pad last packet with zeros",                  OFFSET(pad),             AV_OPT_TYPE_BOOL, {.i64=1} ,   0,       1, FLAGS },
> +    { "p",              "pad last packet with zeros",                  OFFSET(pad),             AV_OPT_TYPE_BOOL, {.i64=1} ,   0,       1, FLAGS },
> +    { "frame_rate",     "set number of packets per second",            OFFSET(frame_rate),  AV_OPT_TYPE_RATIONAL, {.dbl=0},    0, INT_MAX, FLAGS },
> +    { "r",              "set number of packets per second",            OFFSET(frame_rate),  AV_OPT_TYPE_RATIONAL, {.dbl=0},    0, INT_MAX, FLAGS },
> +    { NULL },
> +};
> +
> +static const AVClass pcm_rechunk_class = {
> +    .class_name = "pcm_rechunk_bsf",
> +    .item_name  = av_default_item_name,
> +    .option     = options,
> +    .version    = LIBAVUTIL_VERSION_INT,
> +};
> +
> +static const enum AVCodecID codec_ids[] = {
> +    AV_CODEC_ID_PCM_S16LE,
> +    AV_CODEC_ID_PCM_S16BE,
> +    AV_CODEC_ID_PCM_S8,
> +    AV_CODEC_ID_PCM_S32LE,
> +    AV_CODEC_ID_PCM_S32BE,
> +    AV_CODEC_ID_PCM_S24LE,
> +    AV_CODEC_ID_PCM_S24BE,
> +    AV_CODEC_ID_PCM_F32BE,
> +    AV_CODEC_ID_PCM_F32LE,
> +    AV_CODEC_ID_PCM_F64BE,
> +    AV_CODEC_ID_PCM_F64LE,
> +    AV_CODEC_ID_PCM_S64LE,
> +    AV_CODEC_ID_PCM_S64BE,
> +    AV_CODEC_ID_PCM_F16LE,
> +    AV_CODEC_ID_PCM_F24LE,
> +    AV_CODEC_ID_NONE,
> +};
> +
> +const AVBitStreamFilter ff_pcm_rechunk_bsf = {
> +    .name           = "pcm_rechunk",
> +    .priv_data_size = sizeof(PCMContext),
> +    .priv_class     = &pcm_rechunk_class,
> +    .filter         = rechunk_filter,
> +    .init           = init,
> +    .flush          = flush,
> +    .close          = uninit,
> +    .codec_ids      = codec_ids,
> +};
> diff --git a/libavcodec/version.h b/libavcodec/version.h
> index 8cff2e855b..ad85fb15e5 100644
> --- a/libavcodec/version.h
> +++ b/libavcodec/version.h
> @@ -28,7 +28,7 @@
>  #include "libavutil/version.h"
>  
>  #define LIBAVCODEC_VERSION_MAJOR  58
> -#define LIBAVCODEC_VERSION_MINOR  80
> +#define LIBAVCODEC_VERSION_MINOR  81
>  #define LIBAVCODEC_VERSION_MICRO 100
>  
>  #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
> 
LGTM apart from the above comments.

- Andreas