[FFmpeg-devel] [PATCH 3/5] lavf/matroskadec: support standard (non-WebM) WebVTT formatting

Andreas Rheinhardt andreas.rheinhardt at gmail.com
Sat Jun 20 12:14:39 EEST 2020


rcombs:
> Fixes ticket #5641
> ---
>  libavformat/matroska.c    |  11 ++--
>  libavformat/matroskadec.c | 111 ++++++++++++++++++++++++--------------
>  2 files changed, 77 insertions(+), 45 deletions(-)
> 
> diff --git a/libavformat/matroska.c b/libavformat/matroska.c
> index 7c56aba403..962fa496f4 100644
> --- a/libavformat/matroska.c
> +++ b/libavformat/matroska.c
> @@ -60,16 +60,12 @@ const CodecTags ff_mkv_codec_tags[]={
>      {"A_VORBIS"         , AV_CODEC_ID_VORBIS},
>      {"A_WAVPACK4"       , AV_CODEC_ID_WAVPACK},
>  
> -    {"D_WEBVTT/SUBTITLES"   , AV_CODEC_ID_WEBVTT},
> -    {"D_WEBVTT/CAPTIONS"    , AV_CODEC_ID_WEBVTT},
> -    {"D_WEBVTT/DESCRIPTIONS", AV_CODEC_ID_WEBVTT},
> -    {"D_WEBVTT/METADATA"    , AV_CODEC_ID_WEBVTT},
> -
>      {"S_TEXT/UTF8"      , AV_CODEC_ID_SUBRIP},
>      {"S_TEXT/UTF8"      , AV_CODEC_ID_TEXT},
>      {"S_TEXT/ASCII"     , AV_CODEC_ID_TEXT},
>      {"S_TEXT/ASS"       , AV_CODEC_ID_ASS},
>      {"S_TEXT/SSA"       , AV_CODEC_ID_ASS},
> +    {"S_TEXT/WEBVTT"    , AV_CODEC_ID_WEBVTT},
>      {"S_ASS"            , AV_CODEC_ID_ASS},
>      {"S_SSA"            , AV_CODEC_ID_ASS},
>      {"S_VOBSUB"         , AV_CODEC_ID_DVD_SUBTITLE},
> @@ -77,6 +73,11 @@ const CodecTags ff_mkv_codec_tags[]={
>      {"S_HDMV/PGS"       , AV_CODEC_ID_HDMV_PGS_SUBTITLE},
>      {"S_HDMV/TEXTST"    , AV_CODEC_ID_HDMV_TEXT_SUBTITLE},
>  
> +    {"D_WEBVTT/SUBTITLES"   , AV_CODEC_ID_WEBVTT},
> +    {"D_WEBVTT/CAPTIONS"    , AV_CODEC_ID_WEBVTT},
> +    {"D_WEBVTT/DESCRIPTIONS", AV_CODEC_ID_WEBVTT},
> +    {"D_WEBVTT/METADATA"    , AV_CODEC_ID_WEBVTT},
> +
>      {"V_AV1"            , AV_CODEC_ID_AV1},
>      {"V_DIRAC"          , AV_CODEC_ID_DIRAC},
>      {"V_FFV1"           , AV_CODEC_ID_FFV1},
> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
> index cff7f0cb54..1e4947e209 100644
> --- a/libavformat/matroskadec.c
> +++ b/libavformat/matroskadec.c
> @@ -3305,62 +3305,81 @@ static int matroska_parse_webvtt(MatroskaDemuxContext *matroska,
>                                   uint8_t *data, int data_len,
>                                   uint64_t timecode,
>                                   uint64_t duration,
> -                                 int64_t pos)
> +                                 int64_t pos,
> +                                 uint8_t *additional, uint64_t additional_id, int additional_size)
>  {
>      AVPacket pktl, *pkt = &pktl;
> -    uint8_t *id, *settings, *text, *buf;
> -    int id_len, settings_len, text_len;
> +    uint8_t *id, *settings, *comment, *text, *buf;
> +    int id_len = 0, settings_len = 0, comment_len = 0, text_len;
>      uint8_t *p, *q;
>      int err;
> +    int webm_style = !strncmp(track->codec_id, "D_WEBVTT/", 9);
>  
>      if (data_len <= 0)
>          return AVERROR_INVALIDDATA;
>  
> -    p = data;
> -    q = data + data_len;
> -
> -    id = p;
> -    id_len = -1;
> -    while (p < q) {
> -        if (*p == '\r' || *p == '\n') {
> -            id_len = p - id;
> -            if (*p == '\r')
> -                p++;
> -            break;
> +    p = webm_style ? data : additional;
> +    q = webm_style ? (data + data_len) : (additional + additional_size);
> +

Unfortunately all pointer arithmetic involving NULL is undefined
behaviour in C, even NULL + 0. So the above is ub when not using
webm-style and when there is no BlockAddition.

> +    if (p) {
> +        id = p;
> +        id_len = -1;

This is unnecessary. Initializing to zero (as is already done above)
works just fine.

> +        while (p < q) {
> +            if (*p == '\r' || *p == '\n') {
> +                id_len = p - id;
> +                if (*p == '\r')
> +                    p++;
> +                break;
> +            }
> +            p++;
>          }
> +
> +        if (p >= q || *p != '\n')
> +            return AVERROR_INVALIDDATA;
>          p++;
> -    }
>  
> -    if (p >= q || *p != '\n')
> -        return AVERROR_INVALIDDATA;
> -    p++;
> -
> -    settings = p;
> -    settings_len = -1;
> -    while (p < q) {
> -        if (*p == '\r' || *p == '\n') {
> -            settings_len = p - settings;
> -            if (*p == '\r')
> -                p++;
> -            break;
> +        settings = p;
> +        settings_len = -1;
> +        while (p < q) {
> +            if (*p == '\r' || *p == '\n') {
> +                settings_len = p - settings;
> +                if (*p == '\r')
> +                    p++;
> +                break;
> +            }
> +            p++;
>          }
> +
> +        if (p >= q || *p != '\n')
> +            return AVERROR_INVALIDDATA;

You're repeating yourself.

>          p++;
> +
> +        if (!webm_style && p < q) {
> +            if (q[-1] != '\r' && q[-1] != '\n')

The current Matroska codec mapping indeed requires the terminating
WebVTT line terminator to be retained, yet mkvmerge doesn't do so, so we
should not be so picky with this.

> +                return AVERROR_INVALIDDATA;
> +
> +            comment = p;
> +            comment_len = q - p;
> +        }
>      }
>  
> -    if (p >= q || *p != '\n')
> -        return AVERROR_INVALIDDATA;
> -    p++;
> -
> -    text = p;
> -    text_len = q - p;
> -    while (text_len > 0) {
> -        const int len = text_len - 1;
> -        const uint8_t c = p[len];
> -        if (c != '\r' && c != '\n')
> -            break;
> -        text_len = len;
> +    if (webm_style) {
> +        text = p;
> +        text_len = q - p;
> +
> +        while (text_len > 0) {
> +            const int len = text_len - 1;
> +            const uint8_t c = p[len];
> +            if (c != '\r' && c != '\n')
> +                break;
> +            text_len = len;
> +        }
> +    } else {
> +        text = data;
> +        text_len = data_len;
>      }
>  
> +
>      if (text_len <= 0)
>          return AVERROR_INVALIDDATA;
>  
> @@ -3393,6 +3412,17 @@ static int matroska_parse_webvtt(MatroskaDemuxContext *matroska,
>          memcpy(buf, settings, settings_len);
>      }
>  
> +    if (comment_len > 0) {
> +        buf = av_packet_new_side_data(pkt,
> +                                      AV_PKT_DATA_WEBVTT_COMMENT,
> +                                      comment_len);
> +        if (!buf) {
> +            av_packet_unref(pkt);
> +            return AVERROR(ENOMEM);
> +        }
> +        memcpy(buf, comment, comment_len);
> +    }
> +
>      // Do we need this for subtitles?
>      // pkt->flags = AV_PKT_FLAG_KEY;
>  
> @@ -3656,7 +3686,8 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, AVBufferRef *buf
>              res = matroska_parse_webvtt(matroska, track, st,
>                                          out_data, out_size,
>                                          timecode, lace_duration,
> -                                        pos);
> +                                        pos,
> +                                        additional, additional_id, additional_size);
>              if (!buf)
>                  av_free(out_data);
>              if (res)
> 



More information about the ffmpeg-devel mailing list