[FFmpeg-devel] [PATCH] mov.c: read fragment start dts from fragmented mp4

Mika Raento mikie at iki.fi
Sat Oct 11 06:25:52 CEST 2014


If present, an MFRA box and its TFRAs are read for fragment start times.

Without this change, timestamps for discontinuous fragmented mp4 are
wrong, and cause audio/video desync and are not usable for generating
HLS.
---
 libavformat/isom.h |  15 ++++++
 libavformat/mov.c  | 146 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 161 insertions(+)

diff --git a/libavformat/isom.h b/libavformat/isom.h
index 979e967..80800f7 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@@ -78,6 +78,7 @@ typedef struct MOVFragment {
     unsigned duration;
     unsigned size;
     unsigned flags;
+    int64_t time;
 } MOVFragment;
 
 typedef struct MOVTrackExt {
@@ -93,6 +94,17 @@ typedef struct MOVSbgp {
     unsigned int index;
 } MOVSbgp;
 
+typedef struct MOVFragmentIndexItem {
+    int64_t time;
+} MOVFragmentIndexItem;
+
+typedef struct MOVFragmentIndex {
+    unsigned track_id;
+    unsigned item_count;
+    unsigned current_item;
+    MOVFragmentIndexItem *items;
+} MOVFragmentIndex;
+
 typedef struct MOVStreamContext {
     AVIOContext *pb;
     int pb_is_copied;
@@ -171,6 +183,9 @@ typedef struct MOVContext {
     int *bitrates;          ///< bitrates read before streams creation
     int bitrates_count;
     int moov_retry;
+    int has_looked_for_mfra;
+    MOVFragmentIndex** fragment_index_data;
+    unsigned fragment_index_count;
 } MOVContext;
 
 int ff_mp4_read_descr_len(AVIOContext *pb);
diff --git a/libavformat/mov.c b/libavformat/mov.c
index 4ff46dd..c9da196 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -68,6 +68,7 @@ typedef struct MOVParseTableEntry {
 } MOVParseTableEntry;
 
 static int mov_read_default(MOVContext *c, AVIOContext *pb, MOVAtom atom);
+static int mov_read_mfra(MOVContext *c, AVIOContext *f);
 
 static int mov_metadata_track_or_disc_number(MOVContext *c, AVIOContext *pb,
                                              unsigned len, const char *key)
@@ -776,6 +777,19 @@ static int mov_read_moov(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 
 static int mov_read_moof(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
+    if (!c->has_looked_for_mfra) {
+        c->has_looked_for_mfra = 1;
+        if (pb->seekable) {
+            av_log(c->fc, AV_LOG_VERBOSE, "stream has moof boxes, will look "
+                    "for a mfra\n");
+            int ret;
+            if ((ret = mov_read_mfra(c, pb)) < 0)
+                av_log(c->fc, AV_LOG_VERBOSE, "found a moof box but failed to "
+                        "read the mfra (may be a live ismv)\n");
+        } else
+            av_log(c->fc, AV_LOG_VERBOSE, "found a moof box but stream is not "
+                    "seekable, can not look for mfra\n");
+    }
     c->fragment.moof_offset = c->fragment.implicit_offset = avio_tell(pb) - 8;
     av_dlog(c->fc, "moof offset %"PRIx64"\n", c->fragment.moof_offset);
     return mov_read_default(c, pb, atom);
@@ -2807,6 +2821,7 @@ static int mov_read_tfhd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
     MOVFragment *frag = &c->fragment;
     MOVTrackExt *trex = NULL;
+    MOVFragmentIndex* index = NULL;
     int flags, track_id, i;
 
     avio_r8(pb); /* version */
@@ -2825,6 +2840,15 @@ static int mov_read_tfhd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         av_log(c->fc, AV_LOG_ERROR, "could not find corresponding trex\n");
         return AVERROR_INVALIDDATA;
     }
+    for (i = 0; i < c->fragment_index_count; i++) {
+        MOVFragmentIndex* candidate = c->fragment_index_data[i];
+        if (candidate->track_id == frag->track_id) {
+            av_log(c->fc, AV_LOG_DEBUG,
+                   "found fragment index for track %u\n", frag->track_id);
+            index = candidate;
+            break;
+        }
+    }
 
     frag->base_data_offset = flags & MOV_TFHD_BASE_DATA_OFFSET ?
                              avio_rb64(pb) : flags & MOV_TFHD_DEFAULT_BASE_IS_MOOF ?
@@ -2837,6 +2861,18 @@ static int mov_read_tfhd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
                      avio_rb32(pb) : trex->size;
     frag->flags    = flags & MOV_TFHD_DEFAULT_FLAGS ?
                      avio_rb32(pb) : trex->flags;
+    frag->time     = AV_NOPTS_VALUE;
+    if (index) {
+        if (index->current_item == index->item_count) {
+            av_log(c->fc, AV_LOG_WARNING, "track %u has a fragment index "
+                   "but it doesn't have entries for all tfhds at tfhd "
+                   "%u\n", frag->track_id, index->current_item);
+        } else if (index->current_item < index->item_count) {
+            frag->time =
+                index->items[index->current_item].time;
+            index->current_item++;
+        }
+    }
     av_dlog(c->fc, "frag flags 0x%x\n", frag->flags);
     return 0;
 }
@@ -2945,6 +2981,18 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         sc->ctts_data[sc->ctts_count].duration = (flags & MOV_TRUN_SAMPLE_CTS) ?
                                                   avio_rb32(pb) : 0;
         mov_update_dts_shift(sc, sc->ctts_data[sc->ctts_count].duration);
+        if (frag->time != AV_NOPTS_VALUE) {
+            int64_t pts = frag->time;
+            av_log(c->fc, AV_LOG_DEBUG, "found frag time %"PRId64"\n", pts);
+            dts = pts - sc->dts_shift;
+            if (flags & MOV_TRUN_SAMPLE_CTS) {
+                dts -= sc->ctts_data[sc->ctts_count].duration;
+            } else {
+                dts -= sc->time_offset;
+            }
+            av_log(c->fc, AV_LOG_DEBUG, "calculated into dts %"PRId64"\n", dts);
+            frag->time = AV_NOPTS_VALUE;
+        }
         sc->ctts_count++;
         if (st->codec->codec_type == AVMEDIA_TYPE_AUDIO)
             keyframe = 1;
@@ -3583,6 +3631,13 @@ static int mov_read_close(AVFormatContext *s)
     av_freep(&mov->trex_data);
     av_freep(&mov->bitrates);
 
+    for (i = 0; i < mov->fragment_index_count; i++) {
+        MOVFragmentIndex* index = mov->fragment_index_data[i];
+        av_freep(&index->items);
+        av_freep(&mov->fragment_index_data[i]);
+    }
+    av_freep(&mov->fragment_index_data);
+
     return 0;
 }
 
@@ -3620,6 +3675,97 @@ static void export_orphan_timecode(AVFormatContext *s)
     }
 }
 
+static int read_tfra(MOVContext *mov, AVIOContext *f)
+{
+    MOVFragmentIndex* index = NULL;
+    int version, fieldlength, i, j, err;
+    int64_t pos = avio_tell(f);
+    uint32_t size = avio_rb32(f);
+    if (avio_rb32(f) != MKBETAG('t', 'f', 'r', 'a')) {
+        return -1;
+    }
+    av_log(mov->fc, AV_LOG_VERBOSE, "found tfra\n");
+    index = av_mallocz(sizeof(MOVFragmentIndex));
+    if (!index)
+        return AVERROR(ENOMEM);
+    mov->fragment_index_count++;
+    if ((err = av_reallocp(&mov->fragment_index_data,
+                           mov->fragment_index_count *
+                           sizeof(MOVFragmentIndex*))) < 0) {
+        av_freep(&index);
+        return err;
+    }
+    mov->fragment_index_data[mov->fragment_index_count - 1] =
+        index;
+
+    version = avio_r8(f);
+    avio_rb24(f);
+    index->track_id = avio_rb32(f);
+    fieldlength = avio_rb32(f);
+    index->item_count = avio_rb32(f);
+    index->items = av_mallocz(
+            index->item_count * sizeof(MOVFragmentIndexItem));
+    if (!index->items)
+        return AVERROR(ENOMEM);
+    for (i = 0; i < index->item_count; i++) {
+        int64_t time;
+        if (version == 1) {
+            time   = avio_rb64(f);
+            avio_rb64(f);  /* offset */
+        } else {
+            time   = avio_rb32(f);
+            avio_rb32(f);  /* offset */
+        }
+        index->items[i].time = time;
+        for (j = 0; j < ((fieldlength >> 4) & 3) + 1; j++)
+            avio_r8(f);
+        for (j = 0; j < ((fieldlength >> 2) & 3) + 1; j++)
+            avio_r8(f);
+        for (j = 0; j < ((fieldlength >> 0) & 3) + 1; j++)
+            avio_r8(f);
+    }
+
+    avio_seek(f, pos + size, SEEK_SET);
+    return 0;
+}
+
+static int mov_read_mfra(MOVContext *c, AVIOContext *f)
+{
+    int64_t stream_size = avio_size(f);
+    int64_t original_pos = avio_tell(f);
+    int32_t mfra_size;
+    int ret = -1;
+    if ((ret = avio_seek(f, stream_size - 4, SEEK_SET)) < 0) goto fail;
+    mfra_size = avio_rb32(f);
+    if (mfra_size < 0 || mfra_size > stream_size) {
+        av_log(c->fc, AV_LOG_DEBUG, "doesn't look like mfra (unreasonable size)\n");
+        ret = -1;
+        goto fail;
+    }
+    if ((ret = avio_seek(f, -mfra_size, SEEK_CUR)) < 0) goto fail;
+    if (avio_rb32(f) != mfra_size) {
+        av_log(c->fc, AV_LOG_DEBUG, "doesn't look like mfra (size mismatch)\n");
+        ret = -1;
+        goto fail;
+    }
+    if (avio_rb32(f) != MKBETAG('m', 'f', 'r', 'a')) {
+        av_log(c->fc, AV_LOG_DEBUG, "doesn't look like mfra (tag mismatch)\n");
+        goto fail;
+    }
+    av_log(c->fc, AV_LOG_VERBOSE, "stream has mfra\n");
+    while (!read_tfra(c, f)) {
+        /* Empty */
+    }
+fail:
+    ret = avio_seek(f, original_pos, SEEK_SET);
+    if (ret < 0)
+        av_log(c->fc, AV_LOG_ERROR,
+               "failed to seek back after looking for mfra\n");
+    else
+        ret = 0;
+    return ret;
+}
+
 static int mov_read_header(AVFormatContext *s)
 {
     MOVContext *mov = s->priv_data;
-- 
1.9.3 (Apple Git-50)



More information about the ffmpeg-devel mailing list