[FFmpeg-devel] [PATCH 3/3] avformat/RFC/WIP: add basic timeline support

Mon Dec 1 23:10:16 CET 2014

From: Clément Bœsch <clement at stupeflix.com>

This commit currently includes support in ffplay only when using
-ignore_editlist -1 (FIXME) as input option. Adding support to FFmpeg
shouldn't be that hard, at least for the transcoding part. Stream copy
might be a bit more tricky.

Comments and tests very welcome here. This patchset will be regularly
reworked and rebased @ https://github.com/ubitux/FFmpeg/compare/edts

The concept is simply this: some demuxers export an opaque timeline, and
users can use avformat_frame_honor_timeline() on the decoded frames to
check if they should be displayed or not. The function will return 0 if
the frame should not be part of the presentation. Otherwise, it will
return 1, with adjusted timestamps if necessary. Later on, it could
truncate large audio frames to get accurate cropping.

Since the timeline structures are completely opaque and users only have
one function to interact with, we can completely change its internals
easily. As a result, the current implementation just mostly follows the
MOV/MP4 edit list structure. We will probably need something smarter for
MKV stuff and friends.

TODO: check if behaviour is actually correct...
TODO: add doxy and various docs
TODO: avformat version bump + document api changes
TODO: add support when transcoding with ffmpeg
TODO: figure out the less intrusive way possible to avoid clashes with
      current demuxer behaviour
TODO: probably many other things
---
 ffplay.c               |  12 ++++--
 libavformat/avformat.h |   9 ++++
 libavformat/internal.h |   6 +++
 libavformat/mov.c      |  13 +++++-
 libavformat/utils.c    | 113 +++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 147 insertions(+), 6 deletions(-)

diff --git a/ffplay.c b/ffplay.c
index 1914a66..8cf48c6 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -545,7 +545,8 @@ static void decoder_init(Decoder *d, AVCodecContext *avctx, PacketQueue *queue,
     d->start_pts = AV_NOPTS_VALUE;
 }
 
-static int decoder_decode_frame(Decoder *d, AVFrame *frame, AVSubtitle *sub) {
+static int decoder_decode_frame(AVFormatContext *fmtctx, Decoder *d, AVFrame *frame, AVSubtitle *sub)
+{
     int got_frame = 0;
 
     d->flushed = 0;
@@ -610,6 +611,9 @@ static int decoder_decode_frame(Decoder *d, AVFrame *frame, AVSubtitle *sub) {
                 break;
         }
 
+        if (got_frame && frame)
+            got_frame = avformat_frame_honor_timeline(fmtctx, frame, d->pkt_temp.stream_index);
+
         if (ret < 0) {
             d->packet_pending = 0;
         } else {
@@ -1879,7 +1883,7 @@ static int get_video_frame(VideoState *is, AVFrame *frame)
 {
     int got_picture;
 
-    if ((got_picture = decoder_decode_frame(&is->viddec, frame, NULL)) < 0)
+    if ((got_picture = decoder_decode_frame(is->ic, &is->viddec, frame, NULL)) < 0)
         return -1;
 
     if (got_picture) {
@@ -2136,7 +2140,7 @@ static int audio_thread(void *arg)
         return AVERROR(ENOMEM);
 
     do {
-        if ((got_frame = decoder_decode_frame(&is->auddec, frame, NULL)) < 0)
+        if ((got_frame = decoder_decode_frame(is->ic, &is->auddec, frame, NULL)) < 0)
             goto the_end;
 
         if (got_frame) {
@@ -2315,7 +2319,7 @@ static int subtitle_thread(void *arg)
         if (!(sp = frame_queue_peek_writable(&is->subpq)))
             return 0;
 
-        if ((got_subtitle = decoder_decode_frame(&is->subdec, NULL, &sp->sub)) < 0)
+        if ((got_subtitle = decoder_decode_frame(is->ic, &is->subdec, NULL, &sp->sub)) < 0)
             break;
 
         pts = 0;
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 2e54ed1..4c3a9d4 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -1755,6 +1755,11 @@ typedef struct AVFormatContext {
      * - demuxing: Set by user.
      */
     uint8_t *dump_separator;
+
+    /**
+     * XXX
+     */
+    void *timeline;
 } AVFormatContext;
 
 int av_format_get_probe_score(const AVFormatContext *s);
@@ -2679,6 +2684,10 @@ int avformat_match_stream_specifier(AVFormatContext *s, AVStream *st,
 
 int avformat_queue_attached_pictures(AVFormatContext *s);
 
+/**
+ * XXX
+ */
+int avformat_frame_honor_timeline(AVFormatContext *fmtctx, AVFrame *f, int sid);
 
 /**
  * @}
diff --git a/libavformat/internal.h b/libavformat/internal.h
index ce03dac..9eb336a 100644
--- a/libavformat/internal.h
+++ b/libavformat/internal.h
@@ -436,4 +436,10 @@ enum AVWriteUncodedFrameFlags {
  */
 int ff_copy_whitelists(AVFormatContext *dst, AVFormatContext *src);
 
+/**
+ * XXX
+ */
+int ff_timeline_add_stream_segment(AVFormatContext *fmtctx, int sid,
+                                   int64_t duration, int64_t time, float rate);
+
 #endif /* AVFORMAT_INTERNAL_H */
diff --git a/libavformat/mov.c b/libavformat/mov.c
index bb3e251..c2fdadc 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -2250,6 +2250,7 @@ static void mov_build_index(MOVContext *mov, AVStream *st)
     uint64_t stream_size = 0;
 
     if (sc->elst_count) {
+        if (mov->ignore_editlist == 0) {
         int i, edit_start_index = 0, unsupported = 0;
         int64_t empty_duration = 0; // empty duration of the first edit list entry
         int64_t start_time = 0; // start time of the media
@@ -2284,6 +2285,14 @@ static void mov_build_index(MOVContext *mov, AVStream *st)
                 st->codec->has_b_frames = 1;
             }
         }
+        } else if (mov->ignore_editlist == -1) { // FIXME: add a const option value meaning exporting
+            for (i = 0; i < sc->elst_count; i++) {
+                const MOVElst *e = &sc->elst_data[i];
+                const int64_t time     = av_rescale(e->time,     sc->time_scale, mov->time_scale);
+                const int64_t duration = av_rescale(e->duration, sc->time_scale, mov->time_scale);
+                ff_timeline_add_stream_segment(mov->fc, st->index, duration, time, e->rate); // XXX: error checking
+            }
+        }
     }
 
     /* only use old uncompressed audio chunk demuxing when stts specifies it */
@@ -3196,7 +3205,7 @@ static int mov_read_elst(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     MOVStreamContext *sc;
     int i, edit_count, version;
 
-    if (c->fc->nb_streams < 1 || c->ignore_editlist)
+    if (c->fc->nb_streams < 1)
         return 0;
     sc = c->fc->streams[c->fc->nb_streams-1]->priv_data;
 
@@ -4223,7 +4232,7 @@ static const AVOption options[] = {
         offsetof(MOVContext, use_absolute_path), FF_OPT_TYPE_INT, {.i64 = 0},
         0, 1, AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_DECODING_PARAM},
     {"ignore_editlist", "", offsetof(MOVContext, ignore_editlist), FF_OPT_TYPE_INT, {.i64 = 0},
-        0, 1, AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_DECODING_PARAM},
+        -1, 1, AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_DECODING_PARAM},
     {"use_mfra_for",
         "use mfra for fragment timestamps",
         offsetof(MOVContext, use_mfra_for), FF_OPT_TYPE_INT, {.i64 = FF_MOV_FLAG_MFRA_AUTO},
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 798c612..cd56fde 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -3511,6 +3511,112 @@ int av_find_best_stream(AVFormatContext *ic, enum AVMediaType type,
 
 /*******************************************************/
 
+struct timeline_segment {
+    int64_t duration;
+    int64_t time;
+    float rate;
+};
+
+struct timeline_stream {
+    struct timeline_segment *segments;
+    int nb_segment;
+    int empty_initial_duration;
+};
+
+struct timeline_context {
+    struct timeline_stream *st;
+    int nb_st;
+};
+
+int ff_timeline_add_stream_segment(AVFormatContext *fmtctx, int sid,
+                                   int64_t duration, int64_t time, float rate)
+{
+    struct timeline_context *t;
+    struct timeline_stream  *st;
+    struct timeline_segment *segment;
+
+    /* Create the timeline if not already done */
+    if (!fmtctx->timeline) {
+        fmtctx->timeline = av_mallocz(sizeof(*t));
+        if (!fmtctx->timeline)
+            return AVERROR(ENOMEM);
+    }
+    t = fmtctx->timeline;
+
+    /* If the first segment is an empty entry, the duration is the start time
+     * of the stream relative to the presentation itself */
+    if (st->nb_segment == 0 && time == -1) {
+        //av_log(0,0,"empty initial duration = %"PRId64"\n", duration);
+        st->empty_initial_duration = duration;
+        return 0;
+    }
+
+    /* Create a new stream if not already present */
+    if (sid >= t->nb_st) {
+        st = av_realloc_array(t->st, sid + 1, sizeof(*t->st));
+        if (!st)
+            return AVERROR(ENOMEM);
+        t->st = st;
+        memset(&t->st[t->nb_st], 0, (sid - t->nb_st + 1) * sizeof(*t->st));
+        t->nb_st = sid + 1;
+    }
+    st = &t->st[sid];
+
+    /* Finally append the new segment */
+    segment = av_dynarray2_add((void **)&st->segments, &st->nb_segment,
+                               sizeof(*st->segments), NULL);
+    if (!segment)
+        return AVERROR(ENOMEM);
+    //av_log(0,0,"[sid %d] segment t=%"PRId64" dur=%"PRId64"\n", sid, time, duration);
+    segment->duration = duration;
+    segment->time     = time;
+    segment->rate     = rate;
+    return 0;
+}
+
+int avformat_frame_honor_timeline(AVFormatContext *fmtctx, AVFrame *f, int sid)
+{
+    int i, frame_is_in_timeline = 0;
+    struct timeline_context *t = fmtctx->timeline;
+    struct timeline_stream *st;
+    int64_t total_drop_time = 0;
+
+    if (!t || sid < 0 || sid >= t->nb_st || sid >= fmtctx->nb_streams || f->pts == AV_NOPTS_VALUE)
+        return 1;
+
+    st = &t->st[sid];
+
+    f->pts += st->empty_initial_duration;
+
+    for (i = 0; i < st->nb_segment; i++) {
+        int64_t gap;
+        const struct timeline_segment *segment = &st->segments[i];
+
+        if (i > 0) {
+            const struct timeline_segment *prev_segment = &st->segments[i - 1];
+            gap = segment->time - prev_segment->time - prev_segment->duration;
+        } else {
+            gap = segment->time;
+        }
+        total_drop_time += gap * segment->rate;
+
+        if (f->pts > segment->time &&
+            f->pts < segment->time + segment->duration) {
+            frame_is_in_timeline = 1;
+            break;
+        }
+    }
+
+    if (!frame_is_in_timeline)
+        return 0;
+
+    f->pts -= total_drop_time;
+    // TODO: truncate frame in case of too large audio frames?
+    return 1;
+}
+
+/*******************************************************/
+
 int av_read_play(AVFormatContext *s)
 {
     if (s->iformat->read_play)
@@ -3589,6 +3695,13 @@ void avformat_free_context(AVFormatContext *s)
     av_dict_free(&s->metadata);
     av_freep(&s->streams);
     av_freep(&s->internal);
+    if (s->timeline) {
+        struct timeline_context *t = s->timeline;
+        for (i = 0; i < t->nb_st; i++)
+            av_freep(&t->st[i].segments);
+        av_freep(&t->st);
+        av_freep(&s->timeline);
+    }
     flush_packet_queue(s);
     av_free(s);
 }
-- 
2.1.3