[FFmpeg-devel] matroskadec: support S_TEXT/WEBVTT
Leo Izen
leo.izen at gmail.com
Wed Dec 25 18:45:38 EET 2024
On 12/25/24 3:46 AM, Wang Bin wrote:
> On 12/19/24 3:59 AM, Wang Bin wrote:
>>> based on Hendrik Leppkes's fork
>>>
>>>
>>> _______________________________________________
>>> ffmpeg-devel mailing list
>>> ffmpeg-devel at ffmpeg.org
>>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>>
>>> To unsubscribe, visit link above, or email
>>> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>>
>> This patch has a big web of gotos. This can get very confusing. Is there
>> any way to change it so there's if blocks or other static functions to
>> call? Gotos should be used very sparingly and mostly to goto the end of
>> a block where cleanup happens.
>>
>> - Leo Izen (Traneptora)
> From 5bd4a6f56231c3160be6c2d322f5d1081d7b3622 Mon Sep 17 00:00:00 2001
> From: wang-bin <wbsecg1 at gmail.com>
> Date: Tue, 17 Dec 2024 23:21:57 +0800
> Subject: [PATCH] matroskadec: support S_TEXT/WEBVTT
>
> fix ticket #5641
> ---
> libavformat/matroska.c | 1 +
> libavformat/matroskadec.c | 102 ++++++++++++++++++++++----------------
> 2 files changed, 60 insertions(+), 43 deletions(-)
>
> diff --git a/libavformat/matroska.c b/libavformat/matroska.c
> index d0ecfbeb6a..23c4ad6949 100644
> --- a/libavformat/matroska.c
> +++ b/libavformat/matroska.c
> @@ -63,6 +63,7 @@ const CodecTags ff_mkv_codec_tags[]={
> {"D_WEBVTT/CAPTIONS" , AV_CODEC_ID_WEBVTT},
> {"D_WEBVTT/DESCRIPTIONS", AV_CODEC_ID_WEBVTT},
> {"D_WEBVTT/METADATA" , AV_CODEC_ID_WEBVTT},
> + {"S_TEXT/WEBVTT" , AV_CODEC_ID_WEBVTT},
>
> {"S_TEXT/UTF8" , AV_CODEC_ID_SUBRIP},
> {"S_TEXT/UTF8" , AV_CODEC_ID_TEXT},
> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
> index 0e150f9138..11af2cface 100644
> --- a/libavformat/matroskadec.c
> +++ b/libavformat/matroskadec.c
> @@ -3761,68 +3761,83 @@ static int matroska_parse_prores(MatroskaTrack
> *track,
> return 0;
> }
>
> +static int matroska_webvtt_read_line(uint8_t** begin, uint8_t* end)
static ptrdiff_t matroska_webvtt_read_line(const uint8_t **begin, const
uint8_t *end)
> +{
> + uint8_t *p = *begin;
> + int len = -1;
ptrdiff_t len = -1;
> +
> + while (p < end) {
> + if (*p == '\r' || *p == '\n') {
> + len = p - *begin;
> + if (*p == '\r')
> + p++;
This will cause AVERROR_INVALIDDATA to return, if a \r is found without
a \n after it. Is this intentional?
> + break;
> + }
> + p++;
> + }
> +
> + if (p >= end || *p != '\n')
> + return AVERROR_INVALIDDATA;
> + p++;
> + *begin = p;
These two lines can be replaced with: *begin = p + 1;
> + return len;
> +}
> +
> static int matroska_parse_webvtt(MatroskaDemuxContext *matroska,
> MatroskaTrack *track,
> AVStream *st,
> uint8_t *data, int data_len,
> + MatroskaBlockMore *blockmore, int
> nb_blockmore,
> uint64_t timecode,
> uint64_t duration,
> int64_t pos)
> {
> AVPacket *pkt = matroska->pkt;
> uint8_t *id, *settings, *text, *buf;
> - int id_len, settings_len, text_len;
> + int id_len = -1, settings_len = -1, text_len = -1;
ptrdiff_t id_len, settings_le, text_len;
This are never read uninitialized.
> uint8_t *p, *q;
> int err;
> + const int webm_style = !strncmp(track->codec_id, "D_WEBVTT/", 9);
>
> if (data_len <= 0)
> return AVERROR_INVALIDDATA;
>
> - p = data;
> - q = data + data_len;
> -
> - id = p;
> - id_len = -1;
> - while (p < q) {
> - if (*p == '\r' || *p == '\n') {
> - id_len = p - id;
> - if (*p == '\r')
> - p++;
> - break;
> + if (webm_style) {
> + p = data;
> + q = data + data_len;
> + id = p;
> + id_len = matroska_webvtt_read_line(&p, q);
> + if (id_len < 0)
> + return AVERROR_INVALIDDATA;
> + settings = p;
> + settings_len = matroska_webvtt_read_line(&p, q);
> + if (settings_len < 0)
> + return AVERROR_INVALIDDATA;
> + text = p;
> + text_len = data + data_len - p;
> + while (text_len > 0) {
> + const int len = text_len - 1;
> + const uint8_t c = p[len];
> + if (c != '\r' && c != '\n')
> + break;
> + text_len = len;
> }
You can replace this loop with:
while (text_len-- > 0 && (p[text_len] == '\r' || p[text_len] == '\n')) {
// empty
}
text_len++;
The lazy && ensures that text_len-- is evaluated before p[text_len] is
dereferenced.
> - p++;
> - }
> -
> - if (p >= q || *p != '\n')
> - return AVERROR_INVALIDDATA;
> - p++;
> -
> - settings = p;
> - settings_len = -1;
> - while (p < q) {
> - if (*p == '\r' || *p == '\n') {
> - settings_len = p - settings;
> - if (*p == '\r')
> - p++;
> - break;
> + } else {
> + if (nb_blockmore > 0) {
} else if (nb_blockmore > 0) {
> + p = blockmore->additional.data;
> + q = p + blockmore->additional.size;
> + settings = p;
> + settings_len = matroska_webvtt_read_line(&p, q);
> + if (settings_len < 0)
> + return AVERROR_INVALIDDATA;
> + id = p;
> + id_len = matroska_webvtt_read_line(&p, q);
> + if (id_len < 0)
> + return AVERROR_INVALIDDATA;
> }
> - p++;
> - }
> -
> - if (p >= q || *p != '\n')
> - return AVERROR_INVALIDDATA;
> - p++;
> -
> - text = p;
> - text_len = q - p;
> - while (text_len > 0) {
> - const int len = text_len - 1;
> - const uint8_t c = p[len];
> - if (c != '\r' && c != '\n')
> - break;
> - text_len = len;
> + text = data;
> + text_len = data_len;
> }
> -
> if (text_len <= 0)
> return AVERROR_INVALIDDATA;
>
> @@ -4215,6 +4230,7 @@ static int
> matroska_parse_block(MatroskaDemuxContext *matroska, AVBufferRef *buf
> } else if (st->codecpar->codec_id == AV_CODEC_ID_WEBVTT) {
> res = matroska_parse_webvtt(matroska, track, st,
> out_data, out_size,
> + blockmore, nb_blockmore,
> timecode, lace_duration,
> pos);
> if (!buf)
Overall it's a lot cleaner and easier to read though.
- Leo Izen (Traneptora)
More information about the ffmpeg-devel
mailing list