[FFmpeg-devel] [PATCH 15/21] avformat/matroskadec: Redo level handling

Andreas Rheinhardt andreas.rheinhardt at googlemail.com
Wed Mar 27 13:18:46 EET 2019


This commit changes how levels are handled: If the level used for
ebml_parse ends directly after an element that has been consumed, then
ebml_parse ends the level itself (and any finite-sized levels that end
there as well) and informs the caller via the return value; if the
current level is unknown-sized, then the level is ended as soon as
an element that is not valid on the current level is encountered.

This is designed for situations where one wants to parse master elements
incrementally, i.e. not in one go via ebml_parse_nest.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at googlemail.com>
---
 libavformat/matroskadec.c | 104 +++++++++++++++++++++++++++++++-------
 1 file changed, 85 insertions(+), 19 deletions(-)

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 42f1c21042..32cf57685f 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -69,6 +69,8 @@
 #include "qtpalette.h"
 
 #define EBML_UNKNOWN_LENGTH  UINT64_MAX /* EBML unknown length, in uint64_t */
+#define LEVEL_ENDED                   2 /* return value of ebml_parse when the
+                                         * syntax level used for parsing ended. */
 
 typedef enum {
     EBML_NONE,
@@ -1041,16 +1043,32 @@ static int ebml_parse_id(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
                          uint32_t id, void *data)
 {
     int i;
+
     for (i = 0; syntax[i].id; i++)
         if (id == syntax[i].id)
             break;
-    if (!syntax[i].id && id == MATROSKA_ID_CLUSTER &&
-        matroska->num_levels > 0                   &&
-        matroska->levels[matroska->num_levels - 1].length == EBML_UNKNOWN_LENGTH)
-        return 0;  // we reached the end of an unknown size cluster
+
     if (!syntax[i].id && id != EBML_ID_VOID && id != EBML_ID_CRC32) {
-        av_log(matroska->ctx, AV_LOG_DEBUG, "Unknown entry 0x%"PRIX32"\n", id);
+        if (!matroska->num_levels || matroska->levels[matroska->num_levels - 1].length
+                                                                != EBML_UNKNOWN_LENGTH) {
+            av_log(matroska->ctx, AV_LOG_DEBUG, "Unknown entry 0x%"PRIX32"\n", id);
+        } else if (matroska->num_levels > 1) {
+            // Unknown-sized master elements (except root elements) end
+            // when an id not known to be allowed in them is encountered.
+            matroska->num_levels--;
+            return LEVEL_ENDED;
+        } else if (matroska->num_levels == 1) {
+            AVIOContext *pb = matroska->ctx->pb;
+            int64_t     pos = avio_tell(pb);
+            // An unkown level 1 element inside an unknown-sized segment
+            // is considered an error.
+            av_log(matroska->ctx, AV_LOG_ERROR, "Found unknown element id 0x%"PRIX32
+                   " at pos. %"PRIu64" (0x%"PRIx64") in an unknown-sized segment. "
+                   "Will be treated as error.\n", id, pos, pos);
+            return AVERROR_INVALIDDATA;
+        }
     }
+
     return ebml_parse_elem(matroska, &syntax[i], data);
 }
 
@@ -1061,9 +1079,24 @@ static int ebml_parse(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
         uint64_t id;
         int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id, 0);
         if (res < 0) {
-            // in live mode, finish parsing if EOF is reached.
-            return (matroska->is_live && matroska->ctx->pb->eof_reached &&
-                    res == AVERROR_EOF) ? 1 : res;
+            if (matroska->ctx->pb->eof_reached && res == AVERROR_EOF) {
+                if (matroska->is_live)
+                    // in live mode, finish parsing if EOF is reached.
+                    return 1;
+                if (matroska->num_levels) {
+                    MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
+
+                    if (level->length == EBML_UNKNOWN_LENGTH) {
+                        // Unknown-sized elements automatically end at EOF.
+                        matroska->num_levels = 0;
+                        return LEVEL_ENDED;
+                    } else if (avio_tell(matroska->ctx->pb) < level->start + level->length) {
+                        av_log(matroska->ctx, AV_LOG_ERROR, "File ended before "
+                               "its declared end\n");
+                    }
+                }
+            }
+            return res;
         }
         matroska->current_id = id | 1 << 7 * res;
     }
@@ -1098,10 +1131,15 @@ static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
             break;
         }
 
-    while (!res && !ebml_level_end(matroska))
+    if (!matroska->levels[matroska->num_levels - 1].length) {
+        matroska->num_levels--;
+        return 0;
+    }
+
+    while (!res)
         res = ebml_parse(matroska, syntax, data);
 
-    return res;
+    return res == LEVEL_ENDED ? 0 : res;
 }
 
 static int is_ebml_id_valid(uint32_t id)
@@ -1169,7 +1207,7 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
     AVIOContext *pb = matroska->ctx->pb;
     uint32_t id = syntax->id;
     uint64_t length;
-    int res;
+    int res, level_check;
     void *newelem;
     MatroskaLevel1Element *level1_elem;
 
@@ -1195,7 +1233,8 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
                    length, max_lengths[syntax->type], syntax->type);
             return AVERROR_INVALIDDATA;
         }
-        if (matroska->num_levels > 0) {
+
+        if (matroska->num_levels) {
             MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
             int64_t pos = avio_tell(pb);
 
@@ -1204,18 +1243,24 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
                 uint64_t elem_end = pos + length,
                         level_end = level->start + level->length;
 
-                if (level_end < elem_end) {
+                if (elem_end < level_end) {
+                    level_check = 0;
+                } else if (elem_end == level_end) {
+                    level_check = LEVEL_ENDED;
+                } else {
                     av_log(matroska->ctx, AV_LOG_ERROR,
                            "Element at 0x%"PRIx64" ending at 0x%"PRIx64" exceeds "
                            "containing master element ending at 0x%"PRIx64"\n",
                            pos, elem_end, level_end);
                     return AVERROR_INVALIDDATA;
                 }
+            } else if (length != EBML_UNKNOWN_LENGTH) {
+                level_check = 0;
             } else if (level->length != EBML_UNKNOWN_LENGTH) {
                 av_log(matroska->ctx, AV_LOG_ERROR, "Unknown-sized element "
                        "at 0x%"PRIx64" inside parent with finite size\n", pos);
                 return AVERROR_INVALIDDATA;
-            } else if (length == EBML_UNKNOWN_LENGTH && id != MATROSKA_ID_CLUSTER) {
+            } else if (id != MATROSKA_ID_CLUSTER) {
                 // According to the specifications only clusters and segments
                 // are allowed to be unknown-sized.
                 av_log(matroska->ctx, AV_LOG_ERROR,
@@ -1223,7 +1268,8 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
                        "0x%"PRIx64". Dropping the invalid element.\n", pos);
                 return AVERROR_INVALIDDATA;
             }
-        }
+        } else
+            level_check = 0;
     }
 
     switch (syntax->type) {
@@ -1257,19 +1303,36 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
                 av_log(matroska->ctx, AV_LOG_ERROR, "Duplicate element\n");
             level1_elem->parsed = 1;
         }
-        return ebml_parse_nest(matroska, syntax->def.n, data);
+        res = ebml_parse_nest(matroska, syntax->def.n, data);
+        break;
     case EBML_STOP:
         return 1;
     default:
         if (ffio_limit(pb, length) != length)
             return AVERROR(EIO);
-        return avio_skip(pb, length) < 0 ? AVERROR(EIO) : 0;
+        res = avio_skip(pb, length) < 0 ? AVERROR(EIO) : 0;
     }
     if (res == AVERROR_INVALIDDATA)
         av_log(matroska->ctx, AV_LOG_ERROR, "Invalid element\n");
     else if (res == AVERROR(EIO))
         av_log(matroska->ctx, AV_LOG_ERROR, "Read error\n");
-    return res;
+
+    if (res < 0 || res == 1)
+        return res;
+
+    if (level_check == LEVEL_ENDED && matroska->num_levels) {
+        MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
+        int64_t pos = avio_tell(pb);
+
+        // Given that pos >= level->start no check for
+        // level->length != EBML_UNKNOWN_LENGTH is necessary.
+        while (matroska->num_levels && pos == level->start + level->length) {
+            matroska->num_levels--;
+            level--;
+        }
+    }
+
+    return level_check;
 }
 
 static void ebml_free(EbmlSyntax *syntax, void *data)
@@ -3491,7 +3554,7 @@ static int matroska_parse_cluster(MatroskaDemuxContext *matroska)
                              cluster);
     }
 
-    if (!res && block->bin.size > 0) {
+    if ((!res || res == LEVEL_ENDED) && block->bin.size > 0) {
             int is_keyframe = block->non_simple ? block->reference == INT64_MIN : -1;
             uint8_t* additional = block->additional.size > 0 ?
                                     block->additional.data : NULL;
@@ -3506,6 +3569,9 @@ static int matroska_parse_cluster(MatroskaDemuxContext *matroska)
                                        block->discard_padding);
     }
 
+    if (res == LEVEL_ENDED)
+        cluster->pos = 0;
+
     ebml_free(matroska_blockgroup, block);
     memset(block, 0, sizeof(*block));
 
-- 
2.19.2



More information about the ffmpeg-devel mailing list