[FFmpeg-devel] [PATCH 1/3] lavc/pgs_frame_merge_bsf: add bsf to merge PGS segments

Fri Apr 17 18:52:00 EEST 2020

On Fri, 2020-04-17 at 13:21 +0300, Petri Hintukainen wrote:
> to, 2020-04-16 kello 16:57 -0600, John Stebbins kirjoitti:
> > Required to remux m2ts to mkv
> > ---
> >  libavcodec/Makefile              |   1 +
> >  libavcodec/bitstream_filters.c   |   1 +
> >  libavcodec/pgs_frame_merge_bsf.c | 164
> > +++++++++++++++++++++++++++++++
> >  3 files changed, 166 insertions(+)
> >  create mode 100644 libavcodec/pgs_frame_merge_bsf.c
> > 
> > diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> > index c1c9a44f2b..c7d8fbebaa 100644
> > --- a/libavcodec/Makefile
> > +++ b/libavcodec/Makefile
> > @@ -1110,6 +1110,7 @@ OBJS-$(CONFIG_MP3_HEADER_DECOMPRESS_BSF)  +=
> > mp3_header_decompress_bsf.o \
> >  OBJS-$(CONFIG_MPEG2_METADATA_BSF)         += mpeg2_metadata_bsf.o
> >  OBJS-$(CONFIG_NOISE_BSF)                  += noise_bsf.o
> >  OBJS-$(CONFIG_NULL_BSF)                   += null_bsf.o
> > +OBJS-$(CONFIG_PGS_FRAME_MERGE_BSF)        += pgs_frame_merge_bsf.o
> >  OBJS-$(CONFIG_PRORES_METADATA_BSF)        += prores_metadata_bsf.o
> >  OBJS-$(CONFIG_REMOVE_EXTRADATA_BSF)       +=
> > remove_extradata_bsf.o
> >  OBJS-$(CONFIG_TEXT2MOVSUB_BSF)            += movsub_bsf.o
> > diff --git a/libavcodec/bitstream_filters.c
> > b/libavcodec/bitstream_filters.c
> > index 6b5ffe4d70..92619225f0 100644
> > --- a/libavcodec/bitstream_filters.c
> > +++ b/libavcodec/bitstream_filters.c
> > @@ -49,6 +49,7 @@ extern const AVBitStreamFilter
> > ff_mpeg4_unpack_bframes_bsf;
> >  extern const AVBitStreamFilter ff_mov2textsub_bsf;
> >  extern const AVBitStreamFilter ff_noise_bsf;
> >  extern const AVBitStreamFilter ff_null_bsf;
> > +extern const AVBitStreamFilter ff_pgs_frame_merge_bsf;
> >  extern const AVBitStreamFilter ff_prores_metadata_bsf;
> >  extern const AVBitStreamFilter ff_remove_extradata_bsf;
> >  extern const AVBitStreamFilter ff_text2movsub_bsf;
> > diff --git a/libavcodec/pgs_frame_merge_bsf.c
> > b/libavcodec/pgs_frame_merge_bsf.c
> > new file mode 100644
> > index 0000000000..d1bb3a60c2
> > --- /dev/null
> > +++ b/libavcodec/pgs_frame_merge_bsf.c
> > @@ -0,0 +1,164 @@
> > +/*
> > + * Copyright (c) 2020 John Stebbins <jstebbins.hb at gmail.com>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later
> > version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General
> > Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > 02110-1301 USA
> > + */
> > +
> > +/**
> > + * @file
> > + * This bitstream filter merges PGS subtitle packets containing
> > incomplete
> > + * set of segments into a single packet
> > + *
> > + * Packets already containing a complete set of segments will be
> > passed through
> > + * unchanged.
> > + */
> > +
> > +#include "avcodec.h"
> > +#include "bsf.h"
> > +#include "libavutil/intreadwrite.h"
> > +
> > +enum PGSSegmentType {
> > +    PALETTE_SEGMENT      = 0x14,
> > +    OBJECT_SEGMENT       = 0x15,
> > +    PRESENTATION_SEGMENT = 0x16,
> > +    WINDOW_SEGMENT       = 0x17,
> > +    DISPLAY_SEGMENT      = 0x80,
> > +};
> 
> This is duplicated in pgssubdec.c and pgs_frame_split_bsf.c. It's not
> much of code for a separate header, but maybe you'll find more while
> writing the encoder ?
> 

I've already written the encoder.  This enum is the only thing shared.
So hardly seems worth adding a header. 

Testing the encoder is what lead me to writing the bsfs. It "works" in
the sense that it's generating correct bitstreams.  But I realized the
bsfs were needed to handle muxing properly.  And there is still the
case to handle when the input AVSubtitle has end_display_time set.  In
this case I need to generate 2 segment sequences.  One for the subtitle
and one to indicate the end of the subtitle.  The current subtitle
encoder API doesn't handle multipe output packets for on input subtitle
gracefully.  So that's next on my list to fix.

> > +typedef struct PGSMergeContext {
> > +    AVPacket *buffer_pkt, *in;
> > +    int presentation_found;
> > +} PGSMergeContext;
> > +
> > +static void frame_merge_flush(AVBSFContext *bsf)
> > +{
> > +    PGSMergeContext *ctx = bsf->priv_data;
> > +
> > +    av_packet_unref(ctx->in);
> > +    av_packet_unref(ctx->buffer_pkt);
> > +}
> > +
> > +static int frame_merge_filter(AVBSFContext *bsf, AVPacket *out)
> > +{
> > +    PGSMergeContext *ctx = bsf->priv_data;
> > +    AVPacket *in = ctx->in, *pkt = ctx->buffer_pkt;
> > +    int ret, i, size, pos, display = 0;
> > +
> > +    if (!in->data) {
> > +        ret = ff_bsf_get_packet_ref(bsf, in);
> > +        if (ret < 0)
> > +            return ret;
> > +    }
> > +    if (!in->size) {
> > +        av_packet_unref(in);
> > +        return AVERROR(EAGAIN);
> > +    }
> > +
> > +    // Validate packet data and find display_end segment
> > +    size = in->size;
> > +    i = 0;
> > +    while (i + 3 <= in->size) {
> > +        uint8_t segment_type;
> > +        int segment_len;
> > +
> > +        segment_type = in->data[i];
> > +        segment_len  = AV_RB16(in->data + i + 1) + 3;
> > +        if (i + segment_len > in->size) // Invalid data
> > +            break;
> > +        if (segment_type == PRESENTATION_SEGMENT && !ctx-
> > > presentation_found) {
> > +            int object_count;
> > +            ctx->presentation_found = 1;
> > +            ret = av_packet_copy_props(pkt, in);
> > +            if (ret < 0)
> > +                goto fail;
> > +            object_count  = in->data[i + 13];
> 
> Possible OOB read with invalid input ?

Yeah, when I woke up this morning, I realized I had done this.
Will fix

> 
> > +            pkt->flags = !!object_count * AV_PKT_FLAG_KEY;
> 
> Does keyframe here mean something that can be decoded and displayed
> without previous display sets ?
> 
> If yes, maybe you could check composition descriptor state instead ?
> I
> haven't looked into PGS internals in long time, but I think objects
> and
> palettes can be shared between display sets. Ex. animations could be
> implemented with only location / cropping / palette changes.

Hmm, good point.  *All* information to decode a subtitle is only
guaranteed to be present in the current sequence when the
composition_state == 1.  For other values, it may be referencing
objects from previous sequences.  This is probably a better way to
signal key frames.

Thanks!

> 
> > +        }
> > +        i += segment_len;
> > +        if (segment_type == DISPLAY_SEGMENT) {
> > +            size = display = i;
> > +            break;
> > +        }
> > +    }
> > +    if (display && pkt->size == 0 && size == in->size) { //
> > passthrough
> > +        av_packet_move_ref(out, in);
> > +        return 0;
> > +    }
> > +    if ((!display && i != in->size) || size > in->size) {
> > +        av_log(bsf, AV_LOG_WARNING, "Failed to parse PGS
> > segments.\n");
> > +        // force output what we have
> > +        display = size = in->size;
> > +    }
> > +
> > +    pos = pkt->size;
> > +    ret = av_grow_packet(pkt, size);
> > +    if (ret < 0)
> > +        goto fail;
> > +    memcpy(pkt->data + pos, in->data, size);
> > +
> > +    if (size == in->size)
> > +        av_packet_unref(in);
> > +    else {
> > +        in->data += size;
> > +        in->size -= size;
> > +    }
> > +
> > +    if (display) {
> > +        ctx->presentation_found = 0;
> > +        av_packet_move_ref(out, pkt);
> > +        return 0;
> > +    }
> > +    return AVERROR(EAGAIN);
> > +
> > +fail:
> > +    frame_merge_flush(bsf);
> > +    return ret;
> > +}
> > +
> > +static int frame_merge_init(AVBSFContext *bsf)
> > +{
> > +    PGSMergeContext *ctx = bsf->priv_data;
> > +
> > +    ctx->in  = av_packet_alloc();
> > +    ctx->buffer_pkt = av_packet_alloc();
> > +    if (!ctx->in || !ctx->buffer_pkt)
> > +        return AVERROR(ENOMEM);
> > +
> > +    return 0;
> > +}
> > +
> > +static void frame_merge_close(AVBSFContext *bsf)
> > +{
> > +    PGSMergeContext *ctx = bsf->priv_data;
> > +
> > +    av_packet_free(&ctx->in);
> > +    av_packet_free(&ctx->buffer_pkt);
> > +}
> > +
> > +static const enum AVCodecID frame_merge_codec_ids[] = {
> > +    AV_CODEC_ID_HDMV_PGS_SUBTITLE, AV_CODEC_ID_NONE,
> > +};
> > +
> > +const AVBitStreamFilter ff_pgs_frame_merge_bsf = {
> > +    .name           = "pgs_frame_merge",
> > +    .priv_data_size = sizeof(PGSMergeContext),
> > +    .init           = frame_merge_init,
> > +    .flush          = frame_merge_flush,
> > +    .close          = frame_merge_close,
> > +    .filter         = frame_merge_filter,
> > +    .codec_ids      = frame_merge_codec_ids,
> > +};
> 
>