[FFmpeg-cvslog] matroska: redo seekhead handling

wm4 git at videolan.org
Thu Feb 12 14:17:16 CET 2015


ffmpeg | branch: master | wm4 <nfxjfg at googlemail.com> | Mon Feb  9 20:39:00 2015 +0100| [cac2295b21519d5a5320fe0787b5fd2b65eb1f8c] | committer: Michael Niedermayer

matroska: redo seekhead handling

In particular, this reads chained seekheads. This makes seeking faster
in files which have the index indirectly linked through 2 seekheads.

As a side-effect, this warns when reading level-1 (toplevel) elements
multiple times (other than seekheads, clusters, and void/crc). Such
elements are not valid and likely break everything.

Signed-off-by: Michael Niedermayer <michaelni at gmx.at>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=cac2295b21519d5a5320fe0787b5fd2b65eb1f8c
---

 libavformat/matroskadec.c |  125 +++++++++++++++++++++++++++++++--------------
 1 file changed, 88 insertions(+), 37 deletions(-)

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index f87d7d8..84c6e44 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -71,6 +71,7 @@ typedef enum {
     EBML_UTF8,
     EBML_BIN,
     EBML_NEST,
+    EBML_LEVEL1,
     EBML_PASS,
     EBML_STOP,
     EBML_SINT,
@@ -253,6 +254,12 @@ typedef struct {
 } MatroskaCluster;
 
 typedef struct {
+    uint64_t id;
+    uint64_t pos;
+    int parsed;
+} MatroskaLevel1Element;
+
+typedef struct {
     AVFormatContext *ctx;
 
     /* EBML stuff */
@@ -290,6 +297,10 @@ typedef struct {
     /* File has a CUES element, but we defer parsing until it is needed. */
     int cues_parsing_deferred;
 
+    /* Level1 elements and whether they were read yet */
+    MatroskaLevel1Element level1_elems[64];
+    int num_level1_elems;
+
     int current_cluster_num_blocks;
     int64_t current_cluster_pos;
     MatroskaCluster current_cluster;
@@ -551,13 +562,13 @@ static EbmlSyntax matroska_seekhead[] = {
 };
 
 static EbmlSyntax matroska_segment[] = {
-    { MATROSKA_ID_INFO,        EBML_NEST, 0, 0, { .n = matroska_info } },
-    { MATROSKA_ID_TRACKS,      EBML_NEST, 0, 0, { .n = matroska_tracks } },
-    { MATROSKA_ID_ATTACHMENTS, EBML_NEST, 0, 0, { .n = matroska_attachments } },
-    { MATROSKA_ID_CHAPTERS,    EBML_NEST, 0, 0, { .n = matroska_chapters } },
-    { MATROSKA_ID_CUES,        EBML_NEST, 0, 0, { .n = matroska_index } },
-    { MATROSKA_ID_TAGS,        EBML_NEST, 0, 0, { .n = matroska_tags } },
-    { MATROSKA_ID_SEEKHEAD,    EBML_NEST, 0, 0, { .n = matroska_seekhead } },
+    { MATROSKA_ID_INFO,        EBML_LEVEL1, 0, 0, { .n = matroska_info } },
+    { MATROSKA_ID_TRACKS,      EBML_LEVEL1, 0, 0, { .n = matroska_tracks } },
+    { MATROSKA_ID_ATTACHMENTS, EBML_LEVEL1, 0, 0, { .n = matroska_attachments } },
+    { MATROSKA_ID_CHAPTERS,    EBML_LEVEL1, 0, 0, { .n = matroska_chapters } },
+    { MATROSKA_ID_CUES,        EBML_LEVEL1, 0, 0, { .n = matroska_index } },
+    { MATROSKA_ID_TAGS,        EBML_LEVEL1, 0, 0, { .n = matroska_tags } },
+    { MATROSKA_ID_SEEKHEAD,    EBML_LEVEL1, 0, 0, { .n = matroska_seekhead } },
     { MATROSKA_ID_CLUSTER,     EBML_STOP },
     { 0 }
 };
@@ -976,6 +987,42 @@ static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
     return res;
 }
 
+/*
+ * Allocate and return the entry for the level1 element with the given ID. If
+ * an entry already exists, return the existing entry.
+ */
+static MatroskaLevel1Element *matroska_find_level1_elem(MatroskaDemuxContext *matroska,
+                                                        uint32_t id)
+{
+    int i;
+    MatroskaLevel1Element *elem;
+
+    // Some files link to all clusters; useless.
+    if (id == MATROSKA_ID_CLUSTER)
+        return NULL;
+
+    // There can be multiple seekheads.
+    if (id != MATROSKA_ID_SEEKHEAD) {
+        for (i = 0; i < matroska->num_level1_elems; i++) {
+            if (matroska->level1_elems[i].id == id)
+                return &matroska->level1_elems[i];
+        }
+    }
+
+    // Only a completely broken file would have more elements.
+    // It also provides a low-effort way to escape from circular seekheads
+    // (every iteration will add a level1 entry).
+    if (matroska->num_level1_elems >= FF_ARRAY_ELEMS(matroska->level1_elems)) {
+        av_log(matroska->ctx, AV_LOG_ERROR, "Too many level1 elements or circular seekheads.\n");
+        return NULL;
+    }
+
+    elem = &matroska->level1_elems[matroska->num_level1_elems++];
+    *elem = (MatroskaLevel1Element){.id = id};
+
+    return elem;
+}
+
 static int ebml_parse_elem(MatroskaDemuxContext *matroska,
                            EbmlSyntax *syntax, void *data)
 {
@@ -994,6 +1041,7 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
     uint64_t length;
     int res;
     void *newelem;
+    MatroskaLevel1Element *level1_elem;
 
     data = (char *) data + syntax->data_offset;
     if (syntax->list_elem_size) {
@@ -1036,11 +1084,20 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
     case EBML_BIN:
         res = ebml_read_binary(pb, length, data);
         break;
+    case EBML_LEVEL1:
     case EBML_NEST:
         if ((res = ebml_read_master(matroska, length)) < 0)
             return res;
         if (id == MATROSKA_ID_SEGMENT)
             matroska->segment_start = avio_tell(matroska->ctx->pb);
+        if (id == MATROSKA_ID_CUES)
+            matroska->cues_parsing_deferred = 0;
+        if (syntax->type == EBML_LEVEL1 &&
+            (level1_elem = matroska_find_level1_elem(matroska, syntax->id))) {
+            if (level1_elem->parsed)
+                av_log(matroska->ctx, AV_LOG_ERROR, "Duplicate element\n");
+            level1_elem->parsed = 1;
+        }
         return ebml_parse_nest(matroska, syntax->def.n, data);
     case EBML_PASS:
         return ebml_parse_id(matroska, syntax->def.n, id, data);
@@ -1071,6 +1128,7 @@ static void ebml_free(EbmlSyntax *syntax, void *data)
         case EBML_BIN:
             av_freep(&((EbmlBin *) data_off)->data);
             break;
+        case EBML_LEVEL1:
         case EBML_NEST:
             if (syntax[i].list_elem_size) {
                 EbmlList *list = data_off;
@@ -1356,24 +1414,17 @@ static void matroska_convert_tags(AVFormatContext *s)
 }
 
 static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska,
-                                         int idx)
+                                         uint64_t pos)
 {
-    EbmlList *seekhead_list = &matroska->seekhead;
     uint32_t level_up       = matroska->level_up;
     uint32_t saved_id       = matroska->current_id;
-    MatroskaSeekhead *seekhead = seekhead_list->elem;
     int64_t before_pos = avio_tell(matroska->ctx->pb);
     MatroskaLevel level;
     int64_t offset;
     int ret = 0;
 
-    if (idx >= seekhead_list->nb_elem            ||
-        seekhead[idx].id == MATROSKA_ID_SEEKHEAD ||
-        seekhead[idx].id == MATROSKA_ID_CLUSTER)
-        return 0;
-
     /* seek */
-    offset = seekhead[idx].pos + matroska->segment_start;
+    offset = pos + matroska->segment_start;
     if (avio_seek(matroska->ctx->pb, offset, SEEK_SET) == offset) {
         /* We don't want to lose our seekhead level, so we add
          * a dummy. This is a crude hack. */
@@ -1410,37 +1461,35 @@ static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska,
 static void matroska_execute_seekhead(MatroskaDemuxContext *matroska)
 {
     EbmlList *seekhead_list = &matroska->seekhead;
-    int64_t before_pos = avio_tell(matroska->ctx->pb);
     int i;
-    int nb_elem;
 
     // we should not do any seeking in the streaming case
     if (!matroska->ctx->pb->seekable ||
         (matroska->ctx->flags & AVFMT_FLAG_IGNIDX))
         return;
 
-    // do not read entries that are added while parsing seekhead entries
-    nb_elem = seekhead_list->nb_elem;
+    for (i = 0; i < seekhead_list->nb_elem; i++) {
+        MatroskaSeekhead *seekheads = seekhead_list->elem;
+        uint32_t id  = seekheads[i].id;
+        uint64_t pos = seekheads[i].pos;
 
-    for (i = 0; i < nb_elem; i++) {
-        MatroskaSeekhead *seekhead = seekhead_list->elem;
-        if (seekhead[i].pos <= before_pos)
+        MatroskaLevel1Element *elem = matroska_find_level1_elem(matroska, id);
+        if (!elem || elem->parsed)
             continue;
 
+        elem->pos = pos;
+
         // defer cues parsing until we actually need cue data.
-        if (seekhead[i].id == MATROSKA_ID_CUES) {
-            matroska->cues_parsing_deferred = 1;
+        if (id == MATROSKA_ID_CUES)
             continue;
-        }
 
-        if (matroska_parse_seekhead_entry(matroska, i) < 0) {
+        if (matroska_parse_seekhead_entry(matroska, pos) < 0) {
             // mark index as broken
             matroska->cues_parsing_deferred = -1;
             break;
         }
-    }
-    if (nb_elem != seekhead_list->nb_elem) {
-        avpriv_request_sample(matroska->ctx, "recursive SeekHead elements");
+
+        elem->parsed = 1;
     }
 }
 
@@ -1474,17 +1523,18 @@ static void matroska_add_index_entries(MatroskaDemuxContext *matroska)
 }
 
 static void matroska_parse_cues(MatroskaDemuxContext *matroska) {
-    EbmlList *seekhead_list = &matroska->seekhead;
-    MatroskaSeekhead *seekhead = seekhead_list->elem;
     int i;
 
-    for (i = 0; i < seekhead_list->nb_elem; i++)
-        if (seekhead[i].id == MATROSKA_ID_CUES)
+    for (i = 0; i < matroska->num_level1_elems; i++) {
+        MatroskaLevel1Element *elem = &matroska->level1_elems[i];
+        if (elem->id == MATROSKA_ID_CUES && !elem->parsed) {
+            if (matroska_parse_seekhead_entry(matroska, elem->pos) < 0)
+                matroska->cues_parsing_deferred = -1;
+            elem->parsed = 1;
             break;
-    av_assert1(i <= seekhead_list->nb_elem);
+        }
+    }
 
-    if (matroska_parse_seekhead_entry(matroska, i) < 0)
-       matroska->cues_parsing_deferred = -1;
     matroska_add_index_entries(matroska);
 }
 
@@ -2014,6 +2064,7 @@ static int matroska_read_header(AVFormatContext *s)
     int i, j, res;
 
     matroska->ctx = s;
+    matroska->cues_parsing_deferred = 1;
 
     /* First read the EBML header. */
     if (ebml_parse(matroska, ebml_syntax, &ebml) ||



More information about the ffmpeg-cvslog mailing list