[FFmpeg-devel] [PATCH] avformat/movenc: Fix tfdt out of sync

Sun Jul 18 13:22:32 EEST 2021

Fix an edge case when auto flushing an empty fragment, and the previous fragment
has inaccurate duration, the track->frag_start would be out of sync from input
dts, and all subsequent tfdt box will have out of sync base_media_decode_time.
This error can accumulate to quite large over long-running streaming.

This can be easily reproduced by remux a variable frame rate source with dash
muxer and set streaming to 1.

When flushing a fragment, we may guess the duration if we have not got the next
package. Then when we get the first packet of the next fragment, we intend to
set the trk->cluster[0].dts to match the previously guessed duration.  However,
at this time trk->track_duration may already be updated from
mov_write_single_packet() when auto flushing the empty fragment (e.g. if
FF_MOV_FLAG_FRAG_EVERY_FRAME is set), but no moof was written to reflect this
change. So the trk->cluster[0].dts do NOT actually match the previously written
duration. Then, when flushing the next non-empty fragment, we will get a wrong
"duration", and track->frag_start will be out of sync. And it cannot get back on
track again.

This error is expected to accumulate over time. trk->cluster[0].dts will only
get too large. it will not get too small because we have check_pkt(), so
track->frag_start will fall more and more behind from input dts.

To fix this, we can directly remove the track->frag_start. It can always be
replaced by "track->cluster[0].dts - track->start_dts".

Signed-off-by: Hu Weiwen <sehuww at mail.scut.edu.cn>
---
 libavformat/movenc.c | 24 ++++--------------------
 libavformat/movenc.h |  1 -
 2 files changed, 4 insertions(+), 21 deletions(-)

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index c85efe87489..48736acab65 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -4482,8 +4482,7 @@ static int mov_write_tfxd_tag(AVIOContext *pb, MOVTrack *track)
     avio_write(pb, uuid, sizeof(uuid));
     avio_w8(pb, 1);
     avio_wb24(pb, 0);
-    avio_wb64(pb, track->start_dts + track->frag_start +
-                  track->cluster[0].cts);
+    avio_wb64(pb, track->cluster[0].dts + track->cluster[0].cts);
     avio_wb64(pb, track->end_pts -
                   (track->cluster[0].dts + track->cluster[0].cts));
 
@@ -4562,8 +4561,7 @@ static int mov_add_tfra_entries(AVIOContext *pb, MOVMuxContext *mov, int tracks,
         info->size     = size;
         // Try to recreate the original pts for the first packet
         // from the fields we have stored
-        info->time     = track->start_dts + track->frag_start +
-                         track->cluster[0].cts;
+        info->time     = track->cluster[0].dts + track->cluster[0].cts;
         info->duration = track->end_pts -
                          (track->cluster[0].dts + track->cluster[0].cts);
         // If the pts is less than zero, we will have trimmed
@@ -4601,7 +4599,7 @@ static int mov_write_tfdt_tag(AVIOContext *pb, MOVTrack *track)
     ffio_wfourcc(pb, "tfdt");
     avio_w8(pb, 1); /* version */
     avio_wb24(pb, 0);
-    avio_wb64(pb, track->frag_start);
+    avio_wb64(pb, track->cluster[0].dts - track->start_dts);
     return update_size(pb, pos);
 }
 
@@ -4678,8 +4676,7 @@ static int mov_write_sidx_tag(AVIOContext *pb,
 
     if (track->entry) {
         entries = 1;
-        presentation_time = track->start_dts + track->frag_start +
-                            track->cluster[0].cts;
+        presentation_time = track->cluster[0].dts + track->cluster[0].cts;
         duration = track->end_pts -
                    (track->cluster[0].dts + track->cluster[0].cts);
         starts_with_SAP = track->cluster[0].flags & MOV_SYNC_SAMPLE;
@@ -5358,10 +5355,6 @@ static int mov_flush_fragment(AVFormatContext *s, int force)
         mov->moov_written = 1;
         mov->mdat_size = 0;
         for (i = 0; i < mov->nb_streams; i++) {
-            if (mov->tracks[i].entry)
-                mov->tracks[i].frag_start += mov->tracks[i].start_dts +
-                                             mov->tracks[i].track_duration -
-                                             mov->tracks[i].cluster[0].dts;
             mov->tracks[i].entry = 0;
             mov->tracks[i].end_reliable = 0;
         }
@@ -5415,11 +5408,7 @@ static int mov_flush_fragment(AVFormatContext *s, int force)
         MOVTrack *track = &mov->tracks[i];
         int buf_size, write_moof = 1, moof_tracks = -1;
         uint8_t *buf;
-        int64_t duration = 0;
 
-        if (track->entry)
-            duration = track->start_dts + track->track_duration -
-                       track->cluster[0].dts;
         if (mov->flags & FF_MOV_FLAG_SEPARATE_MOOF) {
             if (!track->mdat_buf)
                 continue;
@@ -5439,8 +5428,6 @@ static int mov_flush_fragment(AVFormatContext *s, int force)
             ffio_wfourcc(s->pb, "mdat");
         }
 
-        if (track->entry)
-            track->frag_start += duration;
         track->entry = 0;
         track->entries_flushed = 0;
         track->end_reliable = 0;
@@ -5759,7 +5746,6 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
             /* New fragment, but discontinuous from previous fragments.
              * Pretend the duration sum of the earlier fragments is
              * pkt->dts - trk->start_dts. */
-            trk->frag_start = pkt->dts - trk->start_dts;
             trk->end_pts = AV_NOPTS_VALUE;
             trk->frag_discont = 0;
         }
@@ -5781,12 +5767,10 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
                 /* Pretend the whole stream started at pts=0, with earlier fragments
                  * already written. If the stream started at pts=0, the duration sum
                  * of earlier fragments would have been pkt->pts. */
-                trk->frag_start = pkt->pts;
                 trk->start_dts  = pkt->dts - pkt->pts;
             } else {
                 /* Pretend the whole stream started at dts=0, with earlier fragments
                  * already written, with a duration summing up to pkt->dts. */
-                trk->frag_start = pkt->dts;
                 trk->start_dts  = 0;
             }
             trk->frag_discont = 0;
diff --git a/libavformat/movenc.h b/libavformat/movenc.h
index af1ea0bce64..daeaad1cc68 100644
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h
@@ -138,7 +138,6 @@ typedef struct MOVTrack {
 
     AVIOContext *mdat_buf;
     int64_t     data_offset;
-    int64_t     frag_start;
     int         frag_discont;
     int         entries_flushed;
 
-- 
2.25.1