[FFmpeg-devel] [PATCH 11/37] avformat/matroskadec: Improve read error/EOF checks I
Andreas Rheinhardt
andreas.rheinhardt at gmail.com
Sun Jun 23 20:34:00 EEST 2019
James Almer:
> On 5/16/2019 7:29 PM, Andreas Rheinhardt wrote:
>> ebml_read_num had a number of flaws:
>>
>> 1. The check for read errors/EOF was totally wrong. E.g. an EBML number
>> beginning with the invalid 0x00 would be considered a read error,
>> although it is just invalid data.
>> 2. The check for read errors/EOF was done just once, after reading the
>> first byte of the EBML number. But errors/EOF can happen inbetween, of
>> course, and this wasn't checked.
>> 3. There was no way to distinguish when EOF should be an error (because
>> the data has to be there) for which an error message should be emitted
>> and when it is not necessarily an error (namely during parsing of EBML
>> IDs). Such a possibility has been added and used.
>>
>> All this was fixed; furthermore, the error messages for invalid EBML
>> numbers were improved and useless initializations were removed.
>>
>> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at gmail.com>
>> ---
>> libavformat/matroskadec.c | 76 +++++++++++++++++++++++----------------
>> 1 file changed, 45 insertions(+), 31 deletions(-)
>>
>> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
>> index 0f7decb212..431e742a2d 100644
>> --- a/libavformat/matroskadec.c
>> +++ b/libavformat/matroskadec.c
>> @@ -820,33 +820,30 @@ static int ebml_level_end(MatroskaDemuxContext *matroska)
>> * Returns: number of bytes read, < 0 on error
>> */
>> static int ebml_read_num(MatroskaDemuxContext *matroska, AVIOContext *pb,
>> - int max_size, uint64_t *number)
>> + int max_size, uint64_t *number, int eof_forbidden)
>> {
>> - int read = 1, n = 1;
>> - uint64_t total = 0;
>> + int read, n = 1;
>> + uint64_t total;
>> + int64_t pos;
>>
>> - /* The first byte tells us the length in bytes - avio_r8() can normally
>> - * return 0, but since that's not a valid first ebmlID byte, we can
>> - * use it safely here to catch EOS. */
>> - if (!(total = avio_r8(pb))) {
>> - /* we might encounter EOS here */
>> - if (!avio_feof(pb)) {
>> - int64_t pos = avio_tell(pb);
>> - av_log(matroska->ctx, AV_LOG_ERROR,
>> - "Read error at pos. %"PRIu64" (0x%"PRIx64")\n",
>> - pos, pos);
>> - return pb->error ? pb->error : AVERROR(EIO);
>> - }
>> - return AVERROR_EOF;
>> - }
>> + /* The first byte tells us the length in bytes - except when it is zero. */
>> + total = avio_r8(pb);
>> + if (pb->eof_reached)
>> + goto err;
>>
>> /* get the length of the EBML number */
>> - read = 8 - ff_log2_tab[total];
>> - if (read > max_size) {
>> - int64_t pos = avio_tell(pb) - 1;
>> - av_log(matroska->ctx, AV_LOG_ERROR,
>> - "Invalid EBML number size tag 0x%02x at pos %"PRIu64" (0x%"PRIx64")\n",
>> - (uint8_t) total, pos, pos);
>> + if (!total || (read = 8 - ff_log2_tab[total]) > max_size) {
>> + pos = avio_tell(pb) - 1;
>> + if (!total) {
>> + av_log(matroska->ctx, AV_LOG_ERROR,
>> + "0x00 at pos %"PRId64" (0x%"PRIx64") invalid as first byte "
>> + "of an EBML number\n", pos, pos);
>> + } else {
>> + av_log(matroska->ctx, AV_LOG_ERROR,
>> + "Length %d indicated by an EBML number's first byte 0x%02x "
>> + "at pos %"PRId64" (0x%"PRIx64") exceeds max length %d.\n",
>> + read, (uint8_t) total, pos, pos, max_size);
>> + }
>> return AVERROR_INVALIDDATA;
>> }
>>
>> @@ -855,9 +852,27 @@ static int ebml_read_num(MatroskaDemuxContext *matroska, AVIOContext *pb,
>> while (n++ < read)
>> total = (total << 8) | avio_r8(pb);
>>
>> + if (pb->eof_reached) {
>> + eof_forbidden = 1;
>> + goto err;
>> + }
>> +
>> *number = total;
>>
>> return read;
>> +
>> +err:
>> + pos = avio_tell(pb);
>> + if (pb->error) {
>> + av_log(matroska->ctx, AV_LOG_ERROR,
>> + "Read error at pos. %"PRIu64" (0x%"PRIx64")\n",
>> + pos, pos);
>> + return pb->error;
>> + }
>> + if (eof_forbidden)
>> + av_log(matroska->ctx, AV_LOG_ERROR, "File ended prematurely "
>> + "at pos. %"PRIu64" (0x%"PRIx64")\n", pos, pos);
>> + return AVERROR_EOF;
>
> Same here, return EIO when the file ends unexpectedly and you want to
> make sure libavformat knows demuxing failed.
>
Actually, the philosophy of this demuxer is different: It iterates (in
matroska_read_packet) until it has a packet to output (and returns 0)
or until an error is returned while matroska->done is set. This
effectively means that the demuxer tries to resync on its own and it
implies that the error values here are not really returned to
libavformat. The only information libavformat gets via return values
is whether there is a packet returned or whether the file is finished
(in which case the return value is AVERROR_EOF).
The currently only exception to this is when seeking in matoska_resync
fails, then the error from avio_seek is returned; with my patches
(when seek failure isn't regarded as fatal any more) instead pb->error
is returnd upon failure of matroska_resync if it indicates an error.
But yeah, I'll change this. At the time I thought that AVERROR_EOF was
the generic error for hitting EOF (including when EOF is hit prematurely).
>> }
>>
>> /**
>> @@ -868,7 +883,7 @@ static int ebml_read_num(MatroskaDemuxContext *matroska, AVIOContext *pb,
>> static int ebml_read_length(MatroskaDemuxContext *matroska, AVIOContext *pb,
>> uint64_t *number)
>> {
>> - int res = ebml_read_num(matroska, pb, 8, number);
>> + int res = ebml_read_num(matroska, pb, 8, number, 1);
>> if (res > 0 && *number + 1 == 1ULL << (7 * res))
>> *number = EBML_UNKNOWN_LENGTH;
>> return res;
>> @@ -1010,7 +1025,7 @@ static int matroska_ebmlnum_uint(MatroskaDemuxContext *matroska,
>> {
>> AVIOContext pb;
>> ffio_init_context(&pb, data, size, 0, NULL, NULL, NULL, NULL);
>> - return ebml_read_num(matroska, &pb, FFMIN(size, 8), num);
>> + return ebml_read_num(matroska, &pb, FFMIN(size, 8), num, 1);
>> }
>>
>> /*
>> @@ -1057,7 +1072,7 @@ static int ebml_parse(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
>> {
>> if (!matroska->current_id) {
>> uint64_t id;
>> - int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id);
>> + int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id, 0);
>> if (res < 0) {
>> // in live mode, finish parsing if EOF is reached.
>> return (matroska->is_live && matroska->ctx->pb->eof_reached &&
>> @@ -3353,10 +3368,9 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, AVBufferRef *buf
>> uint64_t num;
>> int trust_default_duration = 1;
>>
>> - if ((n = matroska_ebmlnum_uint(matroska, data, size, &num)) < 0) {
>> - av_log(matroska->ctx, AV_LOG_ERROR, "EBML block data error\n");
>> + if ((n = matroska_ebmlnum_uint(matroska, data, size, &num)) < 0)
>> return n;
>> - }
>> +
>> data += n;
>> size -= n;
>>
>> @@ -3717,7 +3731,7 @@ static int webm_clusters_start_with_keyframe(AVFormatContext *s)
>> AVPacket *pkt;
>> avio_seek(s->pb, cluster_pos, SEEK_SET);
>> // read cluster id and length
>> - read = ebml_read_num(matroska, matroska->ctx->pb, 4, &cluster_id);
>> + read = ebml_read_num(matroska, matroska->ctx->pb, 4, &cluster_id, 1);
>> if (read < 0 || cluster_id != 0xF43B675) // done with all clusters
>> break;
>> read = ebml_read_length(matroska, matroska->ctx->pb, &cluster_length);
>> @@ -3935,7 +3949,7 @@ static int webm_dash_manifest_cues(AVFormatContext *s, int64_t init_range)
>> // cues_end is inclusive and the above sum is reduced by 1.
>> uint64_t cues_length, cues_id;
>> int bytes_read;
>> - bytes_read = ebml_read_num (matroska, matroska->ctx->pb, 4, &cues_id);
>> + bytes_read = ebml_read_num (matroska, matroska->ctx->pb, 4, &cues_id, 1);
>> if (bytes_read < 0 || cues_id != (MATROSKA_ID_CUES & 0xfffffff))
>> return bytes_read < 0 ? bytes_read : AVERROR_INVALIDDATA;
>> bytes_read = ebml_read_length(matroska, matroska->ctx->pb, &cues_length);
>>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>
More information about the ffmpeg-devel
mailing list