[FFmpeg-devel] [PATCH] matroska: Add incremental parsing of clusters.

dalecurtis at chromium.org dalecurtis at chromium.org
Tue Apr 17 02:54:29 CEST 2012


From: Dale Curtis <dalecurtis at chromium.org>

Reduces the amount of upfront data required for cluster parsing
thus decreasing latency on seek and startup.

The change in the seek-lavf_mkv FATE test is due to incremental
parsing no longer reading as much data as the old parser and
thus not having that additional data to generate index entries
based on keyframes.  Index entries are added correctly as the
file is parsed.

All FATE tests pass and Chrome has been using this patch for ~6
months without issue.

Signed-off-by: Aaron Colwell <acolwell at chromium.org>
Signed-off-by: Dale Curtis <dalecurtis at chromium.org>
---
 libavformat/matroskadec.c |  105 ++++++++++++++++++++++++++++++++++++---------
 tests/ref/seek/lavf_mkv   |    2 +-
 2 files changed, 86 insertions(+), 21 deletions(-)

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index a484c50..41c88b4 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -224,6 +224,11 @@ typedef struct {
 } MatroskaLevel;
 
 typedef struct {
+    uint64_t timecode;
+    EbmlList blocks;
+} MatroskaCluster;
+
+typedef struct {
     AVFormatContext *ctx;
 
     /* EBML stuff */
@@ -259,6 +264,10 @@ typedef struct {
 
     /* File has a CUES element, but we defer parsing until it is needed. */
     int cues_parsing_deferred;
+
+    int current_cluster_num_blocks;
+    int64_t current_cluster_pos;
+    MatroskaCluster current_cluster;
 } MatroskaDemuxContext;
 
 typedef struct {
@@ -268,11 +277,6 @@ typedef struct {
     EbmlBin  bin;
 } MatroskaBlock;
 
-typedef struct {
-    uint64_t timecode;
-    EbmlList blocks;
-} MatroskaCluster;
-
 static EbmlSyntax ebml_header[] = {
     { EBML_ID_EBMLREADVERSION,        EBML_UINT, 0, offsetof(Ebml,version), {.u=EBML_VERSION} },
     { EBML_ID_EBMLMAXSIZELENGTH,      EBML_UINT, 0, offsetof(Ebml,max_size), {.u=8} },
@@ -543,6 +547,38 @@ static EbmlSyntax matroska_clusters[] = {
     { 0 }
 };
 
+static EbmlSyntax matroska_cluster_incremental_parsing[] = {
+    { MATROSKA_ID_CLUSTERTIMECODE,EBML_UINT,0, offsetof(MatroskaCluster,timecode) },
+    { MATROSKA_ID_BLOCKGROUP,     EBML_NEST, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
+    { MATROSKA_ID_SIMPLEBLOCK,    EBML_PASS, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
+    { MATROSKA_ID_CLUSTERPOSITION,EBML_NONE },
+    { MATROSKA_ID_CLUSTERPREVSIZE,EBML_NONE },
+    { MATROSKA_ID_INFO,           EBML_NONE },
+    { MATROSKA_ID_CUES,           EBML_NONE },
+    { MATROSKA_ID_TAGS,           EBML_NONE },
+    { MATROSKA_ID_SEEKHEAD,       EBML_NONE },
+    { MATROSKA_ID_CLUSTER,        EBML_STOP },
+    { 0 }
+};
+
+static EbmlSyntax matroska_cluster_incremental[] = {
+    { MATROSKA_ID_CLUSTERTIMECODE,EBML_UINT,0, offsetof(MatroskaCluster,timecode) },
+    { MATROSKA_ID_BLOCKGROUP,     EBML_STOP },
+    { MATROSKA_ID_SIMPLEBLOCK,    EBML_STOP },
+    { MATROSKA_ID_CLUSTERPOSITION,EBML_NONE },
+    { MATROSKA_ID_CLUSTERPREVSIZE,EBML_NONE },
+    { 0 }
+};
+
+static EbmlSyntax matroska_clusters_incremental[] = {
+    { MATROSKA_ID_CLUSTER,        EBML_NEST, 0, 0, {.n=matroska_cluster_incremental} },
+    { MATROSKA_ID_INFO,           EBML_NONE },
+    { MATROSKA_ID_CUES,           EBML_NONE },
+    { MATROSKA_ID_TAGS,           EBML_NONE },
+    { MATROSKA_ID_SEEKHEAD,       EBML_NONE },
+    { 0 }
+};
+
 static const char *const matroska_doctypes[] = { "matroska", "webm" };
 
 static int matroska_resync(MatroskaDemuxContext *matroska, int64_t last_pos)
@@ -1759,6 +1795,7 @@ static int matroska_deliver_packet(MatroskaDemuxContext *matroska,
                 matroska->packets = newpackets;
         } else {
             av_freep(&matroska->packets);
+            matroska->prev_pkt = NULL;
         }
         matroska->num_packets--;
         return 0;
@@ -2059,29 +2096,56 @@ end:
     return res;
 }
 
-static int matroska_parse_cluster(MatroskaDemuxContext *matroska)
+static int matroska_parse_cluster_incremental(MatroskaDemuxContext *matroska)
 {
-    MatroskaCluster cluster = { 0 };
     EbmlList *blocks_list;
     MatroskaBlock *blocks;
     int i, res;
-    int64_t pos = avio_tell(matroska->ctx->pb);
-    matroska->prev_pkt = NULL;
-    if (matroska->current_id)
-        pos -= 4;  /* sizeof the ID which was already read */
-    res = ebml_parse(matroska, matroska_clusters, &cluster);
-    blocks_list = &cluster.blocks;
-    blocks = blocks_list->elem;
-    for (i=0; i<blocks_list->nb_elem; i++)
+    res = ebml_parse(matroska,
+                     matroska_cluster_incremental_parsing,
+                     &matroska->current_cluster);
+    if (res == 1) {
+        /* New Cluster */
+        if (matroska->current_cluster_pos)
+            ebml_level_end(matroska);
+        ebml_free(matroska_cluster, &matroska->current_cluster);
+        memset(&matroska->current_cluster, 0, sizeof(MatroskaCluster));
+        matroska->current_cluster_num_blocks = 0;
+        matroska->current_cluster_pos = avio_tell(matroska->ctx->pb);
+        matroska->prev_pkt = NULL;
+        /* sizeof the ID which was already read */
+        if (matroska->current_id)
+            matroska->current_cluster_pos -= 4;
+        res = ebml_parse(matroska,
+                         matroska_clusters_incremental,
+                         &matroska->current_cluster);
+        /* Try parsing the block agiain. */
+        if (res == 1)
+            res = ebml_parse(matroska,
+                             matroska_cluster_incremental_parsing,
+                             &matroska->current_cluster);
+    }
+
+    if (!res &&
+        matroska->current_cluster_num_blocks <
+            matroska->current_cluster.blocks.nb_elem) {
+        blocks_list = &matroska->current_cluster.blocks;
+        blocks = blocks_list->elem;
+
+        matroska->current_cluster_num_blocks = blocks_list->nb_elem;
+        i = blocks_list->nb_elem - 1;
         if (blocks[i].bin.size > 0 && blocks[i].bin.data) {
             int is_keyframe = blocks[i].non_simple ? !blocks[i].reference : -1;
             res=matroska_parse_block(matroska,
                                      blocks[i].bin.data, blocks[i].bin.size,
-                                     blocks[i].bin.pos,  cluster.timecode,
+                                     blocks[i].bin.pos,
+                                     matroska->current_cluster.timecode,
                                      blocks[i].duration, is_keyframe,
-                                     pos);
+                                     matroska->current_cluster_pos);
         }
-    ebml_free(matroska_cluster, &cluster);
+    }
+
+    if (res < 0)  matroska->done = 1;
     return res;
 }
 
@@ -2093,7 +2157,7 @@ static int matroska_read_packet(AVFormatContext *s, AVPacket *pkt)
         int64_t pos = avio_tell(matroska->ctx->pb);
         if (matroska->done)
             return AVERROR_EOF;
-        if (matroska_parse_cluster(matroska) < 0)
+        if (matroska_parse_cluster_incremental(matroska) < 0)
             matroska_resync(matroska, pos);
     }
 
@@ -2123,7 +2187,7 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index,
         matroska->current_id = 0;
         while ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) {
             matroska_clear_queue(matroska);
-            if (matroska_parse_cluster(matroska) < 0)
+            if (matroska_parse_cluster_incremental(matroska) < 0)
                 break;
         }
     }
@@ -2180,6 +2244,7 @@ static int matroska_read_close(AVFormatContext *s)
     for (n=0; n < matroska->tracks.nb_elem; n++)
         if (tracks[n].type == MATROSKA_TRACK_TYPE_AUDIO)
             av_free(tracks[n].audio.buf);
+    ebml_free(matroska_cluster, &matroska->current_cluster);
     ebml_free(matroska_segment, matroska);
 
     return 0;
diff --git a/tests/ref/seek/lavf_mkv b/tests/ref/seek/lavf_mkv
index 4e7966b..278d2dc 100644
--- a/tests/ref/seek/lavf_mkv
+++ b/tests/ref/seek/lavf_mkv
@@ -30,7 +30,7 @@ ret: 0         st: 0 flags:1  ts: 2.413000
 ret: 0         st: 0 flags:1 dts: 0.960000 pts: 0.960000 pos: 292193 size: 27834
 ret:-1         st: 1 flags:0  ts: 1.307000
 ret: 0         st: 1 flags:1  ts: 0.201000
-ret: 0         st: 1 flags:1 dts: 0.198000 pts: 0.198000 pos:    555 size:   208
+ret: 0         st: 1 flags:1 dts: 0.015000 pts: 0.015000 pos:    555 size:   208
 ret: 0         st:-1 flags:0  ts:-0.904994
 ret: 0         st: 1 flags:1 dts: 0.000000 pts: 0.000000 pos:    555 size:   208
 ret: 0         st:-1 flags:1  ts: 1.989173
-- 
1.7.7.3



More information about the ffmpeg-devel mailing list