[FFmpeg-devel] [PATCH 11/37] avformat/matroskadec: Improve read error/EOF checks I

James Almer jamrial at gmail.com
Sun Jun 23 19:36:07 EEST 2019


On 5/16/2019 7:29 PM, Andreas Rheinhardt wrote:
> ebml_read_num had a number of flaws:
> 
> 1. The check for read errors/EOF was totally wrong. E.g. an EBML number
> beginning with the invalid 0x00 would be considered a read error,
> although it is just invalid data.
> 2. The check for read errors/EOF was done just once, after reading the
> first byte of the EBML number. But errors/EOF can happen inbetween, of
> course, and this wasn't checked.
> 3. There was no way to distinguish when EOF should be an error (because
> the data has to be there) for which an error message should be emitted
> and when it is not necessarily an error (namely during parsing of EBML
> IDs). Such a possibility has been added and used.
> 
> All this was fixed; furthermore, the error messages for invalid EBML
> numbers were improved and useless initializations were removed.
> 
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at gmail.com>
> ---
>  libavformat/matroskadec.c | 76 +++++++++++++++++++++++----------------
>  1 file changed, 45 insertions(+), 31 deletions(-)
> 
> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
> index 0f7decb212..431e742a2d 100644
> --- a/libavformat/matroskadec.c
> +++ b/libavformat/matroskadec.c
> @@ -820,33 +820,30 @@ static int ebml_level_end(MatroskaDemuxContext *matroska)
>   * Returns: number of bytes read, < 0 on error
>   */
>  static int ebml_read_num(MatroskaDemuxContext *matroska, AVIOContext *pb,
> -                         int max_size, uint64_t *number)
> +                         int max_size, uint64_t *number, int eof_forbidden)
>  {
> -    int read = 1, n = 1;
> -    uint64_t total = 0;
> +    int read, n = 1;
> +    uint64_t total;
> +    int64_t pos;
>  
> -    /* The first byte tells us the length in bytes - avio_r8() can normally
> -     * return 0, but since that's not a valid first ebmlID byte, we can
> -     * use it safely here to catch EOS. */
> -    if (!(total = avio_r8(pb))) {
> -        /* we might encounter EOS here */
> -        if (!avio_feof(pb)) {
> -            int64_t pos = avio_tell(pb);
> -            av_log(matroska->ctx, AV_LOG_ERROR,
> -                   "Read error at pos. %"PRIu64" (0x%"PRIx64")\n",
> -                   pos, pos);
> -            return pb->error ? pb->error : AVERROR(EIO);
> -        }
> -        return AVERROR_EOF;
> -    }
> +    /* The first byte tells us the length in bytes - except when it is zero. */
> +    total = avio_r8(pb);
> +    if (pb->eof_reached)
> +        goto err;
>  
>      /* get the length of the EBML number */
> -    read = 8 - ff_log2_tab[total];
> -    if (read > max_size) {
> -        int64_t pos = avio_tell(pb) - 1;
> -        av_log(matroska->ctx, AV_LOG_ERROR,
> -               "Invalid EBML number size tag 0x%02x at pos %"PRIu64" (0x%"PRIx64")\n",
> -               (uint8_t) total, pos, pos);
> +    if (!total || (read = 8 - ff_log2_tab[total]) > max_size) {
> +        pos = avio_tell(pb) - 1;
> +        if (!total) {
> +            av_log(matroska->ctx, AV_LOG_ERROR,
> +                   "0x00 at pos %"PRId64" (0x%"PRIx64") invalid as first byte "
> +                   "of an EBML number\n", pos, pos);
> +        } else {
> +            av_log(matroska->ctx, AV_LOG_ERROR,
> +                   "Length %d indicated by an EBML number's first byte 0x%02x "
> +                   "at pos %"PRId64" (0x%"PRIx64") exceeds max length %d.\n",
> +                   read, (uint8_t) total, pos, pos, max_size);
> +        }
>          return AVERROR_INVALIDDATA;
>      }
>  
> @@ -855,9 +852,27 @@ static int ebml_read_num(MatroskaDemuxContext *matroska, AVIOContext *pb,
>      while (n++ < read)
>          total = (total << 8) | avio_r8(pb);
>  
> +    if (pb->eof_reached) {
> +        eof_forbidden = 1;
> +        goto err;
> +    }
> +
>      *number = total;
>  
>      return read;
> +
> +err:
> +    pos = avio_tell(pb);
> +    if (pb->error) {
> +        av_log(matroska->ctx, AV_LOG_ERROR,
> +               "Read error at pos. %"PRIu64" (0x%"PRIx64")\n",
> +               pos, pos);
> +        return pb->error;
> +    }
> +    if (eof_forbidden)
> +        av_log(matroska->ctx, AV_LOG_ERROR, "File ended prematurely "
> +               "at pos. %"PRIu64" (0x%"PRIx64")\n", pos, pos);
> +    return AVERROR_EOF;

Same here, return EIO when the file ends unexpectedly and you want to
make sure libavformat knows demuxing failed.

>  }
>  
>  /**
> @@ -868,7 +883,7 @@ static int ebml_read_num(MatroskaDemuxContext *matroska, AVIOContext *pb,
>  static int ebml_read_length(MatroskaDemuxContext *matroska, AVIOContext *pb,
>                              uint64_t *number)
>  {
> -    int res = ebml_read_num(matroska, pb, 8, number);
> +    int res = ebml_read_num(matroska, pb, 8, number, 1);
>      if (res > 0 && *number + 1 == 1ULL << (7 * res))
>          *number = EBML_UNKNOWN_LENGTH;
>      return res;
> @@ -1010,7 +1025,7 @@ static int matroska_ebmlnum_uint(MatroskaDemuxContext *matroska,
>  {
>      AVIOContext pb;
>      ffio_init_context(&pb, data, size, 0, NULL, NULL, NULL, NULL);
> -    return ebml_read_num(matroska, &pb, FFMIN(size, 8), num);
> +    return ebml_read_num(matroska, &pb, FFMIN(size, 8), num, 1);
>  }
>  
>  /*
> @@ -1057,7 +1072,7 @@ static int ebml_parse(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
>  {
>      if (!matroska->current_id) {
>          uint64_t id;
> -        int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id);
> +        int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id, 0);
>          if (res < 0) {
>              // in live mode, finish parsing if EOF is reached.
>              return (matroska->is_live && matroska->ctx->pb->eof_reached &&
> @@ -3353,10 +3368,9 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, AVBufferRef *buf
>      uint64_t num;
>      int trust_default_duration = 1;
>  
> -    if ((n = matroska_ebmlnum_uint(matroska, data, size, &num)) < 0) {
> -        av_log(matroska->ctx, AV_LOG_ERROR, "EBML block data error\n");
> +    if ((n = matroska_ebmlnum_uint(matroska, data, size, &num)) < 0)
>          return n;
> -    }
> +
>      data += n;
>      size -= n;
>  
> @@ -3717,7 +3731,7 @@ static int webm_clusters_start_with_keyframe(AVFormatContext *s)
>          AVPacket *pkt;
>          avio_seek(s->pb, cluster_pos, SEEK_SET);
>          // read cluster id and length
> -        read = ebml_read_num(matroska, matroska->ctx->pb, 4, &cluster_id);
> +        read = ebml_read_num(matroska, matroska->ctx->pb, 4, &cluster_id, 1);
>          if (read < 0 || cluster_id != 0xF43B675) // done with all clusters
>              break;
>          read = ebml_read_length(matroska, matroska->ctx->pb, &cluster_length);
> @@ -3935,7 +3949,7 @@ static int webm_dash_manifest_cues(AVFormatContext *s, int64_t init_range)
>          // cues_end is inclusive and the above sum is reduced by 1.
>          uint64_t cues_length, cues_id;
>          int bytes_read;
> -        bytes_read = ebml_read_num   (matroska, matroska->ctx->pb,  4, &cues_id);
> +        bytes_read = ebml_read_num   (matroska, matroska->ctx->pb, 4, &cues_id, 1);
>          if (bytes_read < 0 || cues_id != (MATROSKA_ID_CUES & 0xfffffff))
>              return bytes_read < 0 ? bytes_read : AVERROR_INVALIDDATA;
>          bytes_read = ebml_read_length(matroska, matroska->ctx->pb, &cues_length);
> 



More information about the ffmpeg-devel mailing list