[FFmpeg-devel] [PATCH] avformat/mov: merge stts and ctts arrays into one

Wed Nov 27 16:10:09 EET 2024

Should reduce memory usage as well as remove code duplication.

Signed-off-by: James Almer <jamrial at gmail.com>
---
 libavformat/isom.h |  15 +-
 libavformat/mov.c  | 544 +++++++++++++++++----------------------------
 2 files changed, 221 insertions(+), 338 deletions(-)

diff --git a/libavformat/isom.h b/libavformat/isom.h
index f18b15bbff..08c41a24a9 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@@ -54,6 +54,12 @@ struct AVAESCTR;
  * Here we just use what is needed to read the chunks
  */
 
+typedef struct MOVTimeToSample {
+    unsigned int count;
+    unsigned int duration;
+    int offset;
+} MOVTimeToSample;
+
 typedef struct MOVStts {
     unsigned int count;
     unsigned int duration;
@@ -173,6 +179,9 @@ typedef struct MOVStreamContext {
     int next_chunk;
     unsigned int chunk_count;
     int64_t *chunk_offsets;
+    unsigned int tts_count;
+    unsigned int tts_allocated_size;
+    MOVTimeToSample *tts_data;
     unsigned int stts_count;
     unsigned int stts_allocated_size;
     MOVStts *stts_data;
@@ -189,10 +198,8 @@ typedef struct MOVStreamContext {
     unsigned *stps_data;  ///< partial sync sample for mpeg-2 open gop
     MOVElst *elst_data;
     unsigned int elst_count;
-    int stts_index;
-    int stts_sample;
-    int ctts_index;
-    int ctts_sample;
+    int tts_index;
+    int tts_sample;
     unsigned int sample_size; ///< may contain value calculated from stsd or value from stsz atom
     unsigned int stsz_sample_size; ///< always contains sample size from stsz atom
     unsigned int sample_count;
diff --git a/libavformat/mov.c b/libavformat/mov.c
index 24feadb95b..2e34a6349d 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -86,8 +86,6 @@ typedef struct MOVParseTableEntry {
 static int mov_read_default(MOVContext *c, AVIOContext *pb, MOVAtom atom);
 static int mov_read_mfra(MOVContext *c, AVIOContext *f);
 static void mov_free_stream_context(AVFormatContext *s, AVStream *st);
-static int64_t add_ctts_entry(MOVCtts** ctts_data, unsigned int* ctts_count, unsigned int* allocated_size,
-                              int count, int duration);
 
 static int mov_metadata_track_or_disc_number(MOVContext *c, AVIOContext *pb,
                                              unsigned len, const char *key)
@@ -3711,6 +3709,12 @@ static int mov_read_ctts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return AVERROR(ENOMEM);
 
     for (i = 0; i < entries && !pb->eof_reached; i++) {
+        MOVCtts *ctts_data;
+        const size_t min_size_needed = (ctts_count + 1) * sizeof(MOVCtts);
+        const size_t requested_size =
+            min_size_needed > sc->ctts_allocated_size ?
+            FFMAX(min_size_needed, 2 * sc->ctts_allocated_size) :
+            min_size_needed;
         int count    = avio_rb32(pb);
         int duration = avio_rb32(pb);
 
@@ -3721,8 +3725,19 @@ static int mov_read_ctts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
             continue;
         }
 
-        add_ctts_entry(&sc->ctts_data, &ctts_count, &sc->ctts_allocated_size,
-                       count, duration);
+        if (ctts_count >= UINT_MAX / sizeof(MOVCtts) - 1)
+            return AVERROR(ENOMEM);
+
+        ctts_data = av_fast_realloc(sc->ctts_data, &sc->ctts_allocated_size, requested_size);
+
+        if (!ctts_data)
+            return AVERROR(ENOMEM);
+
+        sc->ctts_data = ctts_data;
+
+        ctts_data[ctts_count].count = count;
+        ctts_data[ctts_count].offset = duration;
+        ctts_count++;
 
         av_log(c->fc, AV_LOG_TRACE, "count=%d, duration=%d\n",
                 count, duration);
@@ -3916,24 +3931,19 @@ static int get_edit_list_entry(MOVContext *mov,
 static int find_prev_closest_index(AVStream *st,
                                    AVIndexEntry *e_old,
                                    int nb_old,
-                                   MOVStts* stts_data,
-                                   int64_t stts_count,
-                                   MOVCtts* ctts_data,
-                                   int64_t ctts_count,
+                                   MOVTimeToSample *tts_data,
+                                   int64_t tts_count,
                                    int64_t timestamp_pts,
                                    int flag,
                                    int64_t* index,
-                                   int64_t* stts_index,
-                                   int64_t* stts_sample,
-                                   int64_t* ctts_index,
-                                   int64_t* ctts_sample)
+                                   int64_t* tts_index,
+                                   int64_t* tts_sample)
 {
     MOVStreamContext *msc = st->priv_data;
     FFStream *const sti = ffstream(st);
     AVIndexEntry *e_keep = sti->index_entries;
     int nb_keep = sti->nb_index_entries;
     int64_t i = 0;
-    int64_t index_ctts_count;
 
     av_assert0(index);
 
@@ -3961,64 +3971,39 @@ static int find_prev_closest_index(AVStream *st,
 
     // If we have CTTS then refine the search, by searching backwards over PTS
     // computed by adding corresponding CTTS durations to index timestamps.
-    if (ctts_data && *index >= 0) {
-        av_assert0(ctts_index);
-        av_assert0(ctts_sample);
+    if (tts_data && *index >= 0) {
+        av_assert0(tts_index);
+        av_assert0(tts_sample);
         // Find out the ctts_index for the found frame.
-        *ctts_index = 0;
-        *ctts_sample = 0;
-
-        if (stts_data) {
-            av_assert0(stts_index);
-            av_assert0(stts_sample);
-
-            *stts_index = 0;
-            *stts_sample = 0;
-        }
-
-        for (index_ctts_count = 0; index_ctts_count < *index; index_ctts_count++) {
-            if (*ctts_index < ctts_count) {
-                (*ctts_sample)++;
-                if (ctts_data[*ctts_index].count == *ctts_sample) {
-                    (*ctts_index)++;
-                    *ctts_sample = 0;
-                }
-            }
-            if (stts_data && *stts_index < stts_count) {
-                (*stts_sample)++;
-                if (stts_data[*stts_index].count == *stts_sample) {
-                    (*stts_index)++;
-                    *stts_sample = 0;
+        *tts_index = 0;
+        *tts_sample = 0;
+
+        for (int64_t index_tts_count = 0; index_tts_count < *index; index_tts_count++) {
+            if (*tts_index < tts_count) {
+                (*tts_sample)++;
+                if (tts_data[*tts_index].count == *tts_sample) {
+                    (*tts_index)++;
+                    *tts_sample = 0;
                 }
             }
         }
 
-        while (*index >= 0 && (*ctts_index) >= 0 && (*ctts_index) < ctts_count) {
+        while (*index >= 0 && (*tts_index) >= 0 && (*tts_index) < tts_count) {
             // Find a "key frame" with PTS <= timestamp_pts (So that we can decode B-frames correctly).
             // No need to add dts_shift to the timestamp here becase timestamp_pts has already been
             // compensated by dts_shift above.
-            if ((e_old[*index].timestamp + ctts_data[*ctts_index].offset) <= timestamp_pts &&
+            if ((e_old[*index].timestamp + tts_data[*tts_index].offset) <= timestamp_pts &&
                 (e_old[*index].flags & AVINDEX_KEYFRAME)) {
                 break;
             }
 
             (*index)--;
-            if (*ctts_sample == 0) {
-                (*ctts_index)--;
-                if (*ctts_index >= 0)
-                  *ctts_sample = ctts_data[*ctts_index].count - 1;
+            if (*tts_sample == 0) {
+                (*tts_index)--;
+                if (*tts_index >= 0)
+                  *tts_sample = tts_data[*tts_index].count - 1;
             } else {
-                (*ctts_sample)--;
-            }
-            if (stts_data) {
-                if (*stts_sample == 0) {
-                    (*stts_index)--;
-                    if (*stts_index >= 0) {
-                        *stts_sample = stts_data[*stts_index].count - 1;
-                    }
-                } else {
-                    (*stts_sample)--;
-                }
+                (*tts_sample)--;
             }
         }
     }
@@ -4093,62 +4078,32 @@ static void fix_index_entry_timestamps(AVStream* st, int end_index, int64_t end_
     }
 }
 
-static int64_t add_stts_entry(MOVStts** stts_data, unsigned int* stts_count, unsigned int* allocated_size,
-                              int count, int duration)
+static int add_tts_entry(MOVTimeToSample **tts_data, unsigned int *tts_count, unsigned int *allocated_size,
+                         int count, int offset, unsigned int duration)
 {
-    MOVStts *stts_buf_new;
-    const size_t min_size_needed = (*stts_count + 1) * sizeof(MOVStts);
+    MOVTimeToSample *tts_buf_new;
+    const size_t min_size_needed = (*tts_count + 1) * sizeof(MOVTimeToSample);
     const size_t requested_size =
         min_size_needed > *allocated_size ?
         FFMAX(min_size_needed, 2 * (*allocated_size)) :
         min_size_needed;
 
-    if ((unsigned)(*stts_count) >= UINT_MAX / sizeof(MOVStts) - 1)
+    if ((unsigned)(*tts_count) >= UINT_MAX / sizeof(MOVTimeToSample) - 1)
         return -1;
 
-    stts_buf_new = av_fast_realloc(*stts_data, allocated_size, requested_size);
+    tts_buf_new = av_fast_realloc(*tts_data, allocated_size, requested_size);
 
-    if (!stts_buf_new)
+    if (!tts_buf_new)
         return -1;
 
-    *stts_data = stts_buf_new;
-
-    stts_buf_new[*stts_count].count = count;
-    stts_buf_new[*stts_count].duration = duration;
+    *tts_data = tts_buf_new;
 
-    *stts_count = (*stts_count) + 1;
-    return *stts_count;
-}
+    tts_buf_new[*tts_count].count = count;
+    tts_buf_new[*tts_count].offset = offset;
+    tts_buf_new[*tts_count].duration = duration;
 
-/**
- * Append a new ctts entry to ctts_data.
- * Returns the new ctts_count if successful, else returns -1.
- */
-static int64_t add_ctts_entry(MOVCtts** ctts_data, unsigned int* ctts_count, unsigned int* allocated_size,
-                              int count, int offset)
-{
-    MOVCtts *ctts_buf_new;
-    const size_t min_size_needed = (*ctts_count + 1) * sizeof(MOVCtts);
-    const size_t requested_size =
-        min_size_needed > *allocated_size ?
-        FFMAX(min_size_needed, 2 * (*allocated_size)) :
-        min_size_needed;
-
-    if ((unsigned)(*ctts_count) >= UINT_MAX / sizeof(MOVCtts) - 1)
-        return -1;
-
-    ctts_buf_new = av_fast_realloc(*ctts_data, allocated_size, requested_size);
-
-    if (!ctts_buf_new)
-        return -1;
-
-    *ctts_data = ctts_buf_new;
-
-    ctts_buf_new[*ctts_count].count = count;
-    ctts_buf_new[*ctts_count].offset = offset;
-
-    *ctts_count = (*ctts_count) + 1;
-    return *ctts_count;
+    *tts_count = (*tts_count) + 1;
+    return 0;
 }
 
 #define MAX_REORDER_DELAY 16
@@ -4165,17 +4120,17 @@ static void mov_estimate_video_delay(MOVContext *c, AVStream* st)
     for (j = 0; j < MAX_REORDER_DELAY + 1; j++)
         pts_buf[j] = INT64_MIN;
 
-    if (st->codecpar->video_delay <= 0 && msc->ctts_data &&
+    if (st->codecpar->video_delay <= 0 && msc->ctts_count &&
         st->codecpar->codec_id == AV_CODEC_ID_H264) {
         st->codecpar->video_delay = 0;
-        for (int ind = 0; ind < sti->nb_index_entries && ctts_ind < msc->ctts_count; ++ind) {
+        for (int ind = 0; ind < sti->nb_index_entries && ctts_ind < msc->tts_count; ++ind) {
             // Point j to the last elem of the buffer and insert the current pts there.
             j = buf_start;
             buf_start = (buf_start + 1);
             if (buf_start == MAX_REORDER_DELAY + 1)
                 buf_start = 0;
 
-            pts_buf[j] = sti->index_entries[ind].timestamp + msc->ctts_data[ctts_ind].offset;
+            pts_buf[j] = sti->index_entries[ind].timestamp + msc->tts_data[ctts_ind].offset;
 
             // The timestamps that are already in the sorted buffer, and are greater than the
             // current pts, are exactly the timestamps that need to be buffered to output PTS
@@ -4198,7 +4153,7 @@ static void mov_estimate_video_delay(MOVContext *c, AVStream* st)
             st->codecpar->video_delay = FFMAX(st->codecpar->video_delay, num_swaps);
 
             ctts_sample++;
-            if (ctts_sample == msc->ctts_data[ctts_ind].count) {
+            if (ctts_sample == msc->tts_data[ctts_ind].count) {
                 ctts_ind++;
                 ctts_sample = 0;
             }
@@ -4268,21 +4223,16 @@ static void mov_fix_index(MOVContext *mov, AVStream *st)
     int nb_old = sti->nb_index_entries;
     const AVIndexEntry *e_old_end = e_old + nb_old;
     const AVIndexEntry *current = NULL;
-    MOVStts *stts_data_old = msc->stts_data;
-    int64_t stts_index_old = 0;
-    int64_t stts_sample_old = 0;
-    int64_t stts_count_old = msc->stts_count;
-    MOVCtts *ctts_data_old = msc->ctts_data;
-    int64_t ctts_index_old = 0;
-    int64_t ctts_sample_old = 0;
-    int64_t ctts_count_old = msc->ctts_count;
+    MOVTimeToSample *tts_data_old = msc->tts_data;
+    int64_t tts_index_old = 0;
+    int64_t tts_sample_old = 0;
+    int64_t tts_count_old = msc->tts_count;
     int64_t edit_list_media_time = 0;
     int64_t edit_list_duration = 0;
     int64_t frame_duration = 0;
     int64_t edit_list_dts_counter = 0;
     int64_t edit_list_dts_entry_end = 0;
-    int64_t edit_list_start_stts_sample = 0;
-    int64_t edit_list_start_ctts_sample = 0;
+    int64_t edit_list_start_tts_sample = 0;
     int64_t curr_cts;
     int64_t curr_ctts = 0;
     int64_t empty_edits_sum_duration = 0;
@@ -4319,16 +4269,11 @@ static void mov_fix_index(MOVContext *mov, AVStream *st)
     sti->nb_index_entries = 0;
 
     // Clean time to sample fields of MOVStreamContext
-    msc->stts_data = NULL;
-    msc->stts_count = 0;
-    msc->stts_index = 0;
-    msc->stts_sample = 0;
-    msc->stts_allocated_size = 0;
-    msc->ctts_data = NULL;
-    msc->ctts_count = 0;
-    msc->ctts_index = 0;
-    msc->ctts_sample = 0;
-    msc->ctts_allocated_size = 0;
+    msc->tts_data = NULL;
+    msc->tts_count = 0;
+    msc->tts_index = 0;
+    msc->tts_sample = 0;
+    msc->tts_allocated_size = 0;
 
     // Reinitialize min_corrected_pts so that it can be computed again.
     msc->min_corrected_pts = -1;
@@ -4381,26 +4326,23 @@ static void mov_fix_index(MOVContext *mov, AVStream *st)
             search_timestamp = FFMAX(search_timestamp - msc->time_scale, e_old[0].timestamp);
         }
 
-        if (find_prev_closest_index(st, e_old, nb_old, stts_data_old, stts_count_old, ctts_data_old, ctts_count_old, search_timestamp, 0,
-                                    &index, &stts_index_old, &stts_sample_old, &ctts_index_old, &ctts_sample_old) < 0) {
+        if (find_prev_closest_index(st, e_old, nb_old, tts_data_old, tts_count_old, search_timestamp, 0,
+                                    &index, &tts_index_old, &tts_sample_old) < 0) {
             av_log(mov->fc, AV_LOG_WARNING,
                    "st: %d edit list: %"PRId64" Missing key frame while searching for timestamp: %"PRId64"\n",
                    st->index, edit_list_index, search_timestamp);
-            if (find_prev_closest_index(st, e_old, nb_old, stts_data_old, stts_count_old, ctts_data_old, ctts_count_old, search_timestamp, AVSEEK_FLAG_ANY,
-                                        &index, &stts_index_old, &stts_sample_old, &ctts_index_old, &ctts_sample_old) < 0) {
+            if (find_prev_closest_index(st, e_old, nb_old, tts_data_old, tts_count_old, search_timestamp, AVSEEK_FLAG_ANY,
+                                        &index, &tts_index_old, &tts_sample_old) < 0) {
                 av_log(mov->fc, AV_LOG_WARNING,
                        "st: %d edit list %"PRId64" Cannot find an index entry before timestamp: %"PRId64".\n",
                        st->index, edit_list_index, search_timestamp);
                 index = 0;
-                stts_index_old = 0;
-                stts_sample_old = 0;
-                ctts_index_old = 0;
-                ctts_sample_old = 0;
+                tts_index_old = 0;
+                tts_sample_old = 0;
             }
         }
         current = e_old + index;
-        edit_list_start_ctts_sample = ctts_sample_old;
-        edit_list_start_stts_sample = stts_sample_old;
+        edit_list_start_tts_sample = tts_sample_old;
 
         // Iterate over index and arrange it according to edit list
         edit_list_start_encountered = 0;
@@ -4416,44 +4358,26 @@ static void mov_fix_index(MOVContext *mov, AVStream *st)
             curr_cts = current->timestamp + msc->dts_shift;
             curr_ctts = 0;
 
-            if (stts_data_old && stts_index_old < stts_count_old) {
-                stts_sample_old++;
-                if (stts_sample_old == stts_data_old[stts_index_old].count) {
-                    if (add_stts_entry(&msc->stts_data, &msc->stts_count,
-                                       &msc->stts_allocated_size,
-                                       stts_data_old[stts_index_old].count - edit_list_start_stts_sample,
-                                       stts_data_old[stts_index_old].duration) == -1) {
-                        av_log(mov->fc, AV_LOG_ERROR, "Cannot add STTS entry %"PRId64" - {%"PRId64", %d}\n",
-                               stts_index_old,
-                               stts_data_old[stts_index_old].count - edit_list_start_stts_sample,
-                               stts_data_old[stts_index_old].duration);
-                        break;
-                    }
-                    stts_index_old++;
-                    stts_sample_old = 0;
-                    edit_list_start_stts_sample = 0;
-                }
-            }
-            if (ctts_data_old && ctts_index_old < ctts_count_old) {
-                curr_ctts = ctts_data_old[ctts_index_old].offset;
+            if (tts_data_old && tts_index_old < tts_count_old) {
+                curr_ctts = tts_data_old[tts_index_old].offset;
                 av_log(mov->fc, AV_LOG_TRACE, "stts: %"PRId64" ctts: %"PRId64", ctts_index: %"PRId64", ctts_count: %"PRId64"\n",
-                       curr_cts, curr_ctts, ctts_index_old, ctts_count_old);
+                       curr_cts, curr_ctts, tts_index_old, tts_count_old);
                 curr_cts += curr_ctts;
-                ctts_sample_old++;
-                if (ctts_sample_old == ctts_data_old[ctts_index_old].count) {
-                    if (add_ctts_entry(&msc->ctts_data, &msc->ctts_count,
-                                       &msc->ctts_allocated_size,
-                                       ctts_data_old[ctts_index_old].count - edit_list_start_ctts_sample,
-                                       ctts_data_old[ctts_index_old].offset) == -1) {
+                tts_sample_old++;
+                if (tts_sample_old == tts_data_old[tts_index_old].count) {
+                    if (add_tts_entry(&msc->tts_data, &msc->tts_count,
+                                       &msc->tts_allocated_size,
+                                       tts_data_old[tts_index_old].count - edit_list_start_tts_sample,
+                                       tts_data_old[tts_index_old].offset, tts_data_old[tts_index_old].duration) == -1) {
                         av_log(mov->fc, AV_LOG_ERROR, "Cannot add CTTS entry %"PRId64" - {%"PRId64", %d}\n",
-                               ctts_index_old,
-                               ctts_data_old[ctts_index_old].count - edit_list_start_ctts_sample,
-                               ctts_data_old[ctts_index_old].offset);
+                               tts_index_old,
+                               tts_data_old[tts_index_old].count - edit_list_start_tts_sample,
+                               tts_data_old[tts_index_old].offset);
                         break;
                     }
-                    ctts_index_old++;
-                    ctts_sample_old = 0;
-                    edit_list_start_ctts_sample = 0;
+                    tts_index_old++;
+                    tts_sample_old = 0;
+                    edit_list_start_tts_sample = 0;
                 }
             }
 
@@ -4539,7 +4463,7 @@ static void mov_fix_index(MOVContext *mov, AVStream *st)
             // Break when found first key frame after edit entry completion
             if ((curr_cts + frame_duration >= (edit_list_duration + edit_list_media_time)) &&
                 ((flags & AVINDEX_KEYFRAME) || ((st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)))) {
-                if (ctts_data_old) {
+                if (tts_data_old) {
                     // If we have CTTS and this is the first keyframe after edit elist,
                     // wait for one more, because there might be trailing B-frames after this I-frame
                     // that do belong to the edit.
@@ -4547,24 +4471,14 @@ static void mov_fix_index(MOVContext *mov, AVStream *st)
                         found_keyframe_after_edit = 1;
                         continue;
                     }
-                    if (ctts_sample_old != 0) {
-                        if (stts_data_old &&
-                            add_stts_entry(&msc->stts_data, &msc->stts_count,
-                                           &msc->stts_allocated_size,
-                                           stts_sample_old - edit_list_start_stts_sample,
-                                           stts_data_old[stts_index_old].duration) == -1) {
-                            av_log(mov->fc, AV_LOG_ERROR, "Cannot add STTS entry %"PRId64" - {%"PRId64", %d}\n",
-                                   stts_index_old, stts_sample_old - edit_list_start_stts_sample,
-                                   stts_data_old[stts_index_old].duration);
-                            break;
-                        }
-                        if (add_ctts_entry(&msc->ctts_data, &msc->ctts_count,
-                                           &msc->ctts_allocated_size,
-                                           ctts_sample_old - edit_list_start_ctts_sample,
-                                           ctts_data_old[ctts_index_old].offset) == -1) {
+                    if (tts_sample_old != 0) {
+                        if (add_tts_entry(&msc->tts_data, &msc->tts_count,
+                                           &msc->tts_allocated_size,
+                                           tts_sample_old - edit_list_start_tts_sample,
+                                           tts_data_old[tts_index_old].offset, tts_data_old[tts_index_old].duration) == -1) {
                             av_log(mov->fc, AV_LOG_ERROR, "Cannot add CTTS entry %"PRId64" - {%"PRId64", %d}\n",
-                                   ctts_index_old, ctts_sample_old - edit_list_start_ctts_sample,
-                                   ctts_data_old[ctts_index_old].offset);
+                                   tts_index_old, tts_sample_old - edit_list_start_tts_sample,
+                                   tts_data_old[tts_index_old].offset);
                             break;
                         }
                     }
@@ -4595,8 +4509,7 @@ static void mov_fix_index(MOVContext *mov, AVStream *st)
 
     // Free the old index and the old CTTS structures
     av_free(e_old);
-    av_free(stts_data_old);
-    av_free(ctts_data_old);
+    av_free(tts_data_old);
     av_freep(&frame_duration_buffer);
 
     // Null terminate the index ranges array
@@ -4678,6 +4591,55 @@ static int build_open_gop_key_points(AVStream *st)
     return 0;
 }
 
+static int mov_merge_tts_arrays(MOVContext *mov, AVStream *st)
+{
+    MOVStreamContext *sc = st->priv_data;
+    unsigned int stts_count;
+    int ret = 0, idx = 0;
+
+    // Expand time to sample entries such that we have a 1-1 mapping with samples
+    if (!sc->sample_count || sc->sample_count >= UINT_MAX / sizeof(*sc->tts_data))
+        return -1;
+    sc->tts_data = av_fast_realloc(NULL, &sc->tts_allocated_size,
+                                   sc->sample_count * sizeof(*sc->tts_data));
+    if (!sc->tts_data)
+        return -1;
+
+    memset(sc->tts_data, 0, sc->tts_allocated_size);
+
+    for (int i = 0; i < sc->ctts_count &&
+                sc->tts_count < sc->sample_count; i++)
+        for (int j = 0; j < sc->ctts_data[i].count &&
+                    sc->tts_count < sc->sample_count; j++) {
+            ret = add_tts_entry(&sc->tts_data, &sc->tts_count,
+                                &sc->tts_allocated_size, 1,
+                                sc->ctts_data[i].offset, 0);
+            if (ret < 0)
+                return ret;
+        }
+    av_freep(&sc->ctts_data);
+    sc->ctts_allocated_size = 0;
+
+    stts_count = FFMIN(sc->tts_count, sc->stts_count);
+    for (int i = 0; i < sc->stts_count &&
+                stts_count < sc->sample_count; i++)
+        for (int j = 0; j < sc->stts_data[i].count &&
+                    stts_count < sc->sample_count; j++) {
+            if (sc->ctts_count)
+                sc->tts_data[idx++].duration = sc->stts_data[i].duration;
+            else
+                ret = add_tts_entry(&sc->tts_data, &sc->tts_count,
+                                    &sc->tts_allocated_size, 1,
+                                    0, sc->stts_data[i].duration);
+            if (ret < 0)
+                return ret;
+        }
+    av_freep(&sc->stts_data);
+    sc->stts_allocated_size = 0;
+
+    return 0;
+}
+
 static void mov_build_index(MOVContext *mov, AVStream *st)
 {
     MOVStreamContext *sc = st->priv_data;
@@ -4690,10 +4652,6 @@ static void mov_build_index(MOVContext *mov, AVStream *st)
     unsigned int stps_index = 0;
     unsigned int i, j;
     uint64_t stream_size = 0;
-    MOVStts *stts_data_old = sc->stts_data;
-    MOVCtts *ctts_data_old = sc->ctts_data;
-    unsigned int stts_count_old = sc->stts_count;
-    unsigned int ctts_count_old = sc->ctts_count;
 
     int ret = build_open_gop_key_points(st);
     if (ret < 0)
@@ -4773,53 +4731,10 @@ static void mov_build_index(MOVContext *mov, AVStream *st)
         }
         sti->index_entries_allocated_size = (sti->nb_index_entries + sc->sample_count) * sizeof(*sti->index_entries);
 
-        if (ctts_data_old) {
-            // Expand ctts entries such that we have a 1-1 mapping with samples
-            if (sc->sample_count >= UINT_MAX / sizeof(*sc->ctts_data))
-                return;
-            sc->ctts_count = 0;
-            sc->ctts_allocated_size = 0;
-            sc->ctts_data = av_fast_realloc(NULL, &sc->ctts_allocated_size,
-                                    sc->sample_count * sizeof(*sc->ctts_data));
-            if (!sc->ctts_data) {
-                av_free(ctts_data_old);
-                return;
-            }
-
-            memset((uint8_t*)(sc->ctts_data), 0, sc->ctts_allocated_size);
-
-            for (i = 0; i < ctts_count_old &&
-                        sc->ctts_count < sc->sample_count; i++)
-                for (j = 0; j < ctts_data_old[i].count &&
-                            sc->ctts_count < sc->sample_count; j++)
-                    add_ctts_entry(&sc->ctts_data, &sc->ctts_count,
-                                   &sc->ctts_allocated_size, 1,
-                                   ctts_data_old[i].offset);
-            av_free(ctts_data_old);
-        }
-        if (stts_data_old) {
-            // Expand stts entries such that we have a 1-1 mapping with samples
-            if (sc->sample_count >= UINT_MAX / sizeof(*sc->stts_data))
-                return;
-            sc->stts_count = 0;
-            sc->stts_allocated_size = 0;
-            sc->stts_data = av_fast_realloc(NULL, &sc->stts_allocated_size,
-                                    sc->sample_count * sizeof(*sc->stts_data));
-            if (!sc->stts_data) {
-                av_free(stts_data_old);
+        if (sc->ctts_data || sc->stts_data) {
+            ret = mov_merge_tts_arrays(mov, st);
+            if (ret < 0)
                 return;
-            }
-
-            memset((uint8_t*)(sc->stts_data), 0, sc->stts_allocated_size);
-
-            for (i = 0; i < stts_count_old &&
-                        sc->stts_count < sc->sample_count; i++)
-                for (j = 0; j < stts_data_old[i].count &&
-                            sc->stts_count < sc->sample_count; j++)
-                    add_stts_entry(&sc->stts_data, &sc->stts_count,
-                                   &sc->stts_allocated_size, 1,
-                                   stts_data_old[i].duration);
-            av_free(stts_data_old);
         }
 
         for (i = 0; i < sc->chunk_count; i++) {
@@ -4901,12 +4816,12 @@ static void mov_build_index(MOVContext *mov, AVStream *st)
                 current_offset += sample_size;
                 stream_size += sample_size;
 
-                current_dts += sc->stts_data[stts_index].duration;
+                current_dts += sc->tts_data[stts_index].duration;
 
                 distance++;
                 stts_sample++;
                 current_sample++;
-                if (stts_index + 1 < sc->stts_count && stts_sample == sc->stts_data[stts_index].count) {
+                if (stts_index + 1 < sc->tts_count && stts_sample == sc->tts_data[stts_index].count) {
                     stts_sample = 0;
                     stts_index++;
                 }
@@ -4917,6 +4832,12 @@ static void mov_build_index(MOVContext *mov, AVStream *st)
     } else {
         unsigned chunk_samples, total = 0;
 
+        if (sc->ctts_data || sc->stts_data) {
+            ret = mov_merge_tts_arrays(mov, st);
+            if (ret < 0)
+                return;
+        }
+
         if (!sc->chunk_count)
             return;
 
@@ -5024,8 +4945,8 @@ static void mov_build_index(MOVContext *mov, AVStream *st)
     // Update start time of the stream.
     if (st->start_time == AV_NOPTS_VALUE && st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO && sti->nb_index_entries > 0) {
         st->start_time = sti->index_entries[0].timestamp + sc->dts_shift;
-        if (sc->ctts_data) {
-            st->start_time += sc->ctts_data[0].offset;
+        if (sc->tts_data) {
+            st->start_time += sc->tts_data[0].offset;
         }
     }
 
@@ -5299,14 +5220,14 @@ static int mov_read_trak(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         }
 
 #if FF_API_R_FRAME_RATE
-        for (int i = 1; sc->stts_count && i < sc->stts_count - 1; i++) {
-            if (sc->stts_data[i].duration == sc->stts_data[0].duration)
+        for (int i = 1; sc->tts_count && i < sc->tts_count - 1; i++) {
+            if (sc->tts_data[i].duration == sc->tts_data[0].duration)
                 continue;
             stts_constant = 0;
         }
         if (stts_constant)
             av_reduce(&st->r_frame_rate.num, &st->r_frame_rate.den,
-                      sc->time_scale, sc->stts_data[0].duration, INT_MAX);
+                      sc->time_scale, sc->tts_data[0].duration, INT_MAX);
 #endif
     }
 
@@ -5337,8 +5258,8 @@ static int mov_read_trak(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     if (st->codecpar->codec_id == AV_CODEC_ID_MP3
         && sc->time_scale == st->codecpar->sample_rate) {
         int stts_constant = 1;
-        for (int i = 1; i < sc->stts_count; i++) {
-            if (sc->stts_data[i].duration == sc->stts_data[0].duration)
+        for (int i = 1; i < sc->tts_count; i++) {
+            if (sc->tts_data[i].duration == sc->tts_data[0].duration)
                 continue;
             stts_constant = 0;
         }
@@ -5821,8 +5742,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     AVStream *st = NULL;
     FFStream *sti = NULL;
     MOVStreamContext *sc;
-    MOVStts *stts_data;
-    MOVCtts *ctts_data;
+    MOVTimeToSample *tts_data;
     uint64_t offset;
     int64_t dts, pts = AV_NOPTS_VALUE;
     int data_offset = 0;
@@ -5878,7 +5798,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     entries = avio_rb32(pb);
     av_log(c->fc, AV_LOG_TRACE, "flags 0x%x entries %u\n", flags, entries);
 
-    if ((uint64_t)entries+sc->ctts_count >= UINT_MAX/sizeof(*sc->ctts_data))
+    if ((uint64_t)entries+sc->tts_count >= UINT_MAX/sizeof(*sc->tts_data))
         return AVERROR_INVALIDDATA;
     if (flags & MOV_TRUN_DATA_OFFSET)        data_offset        = avio_rb32(pb);
     if (flags & MOV_TRUN_FIRST_SAMPLE_FLAGS) first_sample_flags = avio_rb32(pb);
@@ -5951,52 +5871,36 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return AVERROR(ENOMEM);
     sti->index_entries= new_entries;
 
-    requested_size = (sti->nb_index_entries + entries) * sizeof(*sc->ctts_data);
-    old_allocated_size = sc->ctts_allocated_size;
-    ctts_data = av_fast_realloc(sc->ctts_data, &sc->ctts_allocated_size,
+    requested_size = (sti->nb_index_entries + entries) * sizeof(*sc->tts_data);
+    old_allocated_size = sc->tts_allocated_size;
+    tts_data = av_fast_realloc(sc->tts_data, &sc->tts_allocated_size,
                                 requested_size);
-    if (!ctts_data)
+    if (!tts_data)
         return AVERROR(ENOMEM);
-    sc->ctts_data = ctts_data;
+    sc->tts_data = tts_data;
 
-    // In case there were samples without ctts entries, ensure they get
+    // In case there were samples without time to sample entries, ensure they get
     // zero valued entries. This ensures clips which mix boxes with and
-    // without ctts entries don't pickup uninitialized data.
-    memset((uint8_t*)(sc->ctts_data) + old_allocated_size, 0,
-           sc->ctts_allocated_size - old_allocated_size);
-
-    requested_size = (sti->nb_index_entries + entries) * sizeof(*sc->stts_data);
-    old_allocated_size = sc->stts_allocated_size;
-    stts_data = av_fast_realloc(sc->stts_data, &sc->stts_allocated_size,
-                                requested_size);
-    if (!stts_data)
-        return AVERROR(ENOMEM);
-    sc->stts_data = stts_data;
-
-    // See the comment for ctts above.
-    memset((uint8_t*)(sc->stts_data) + old_allocated_size, 0,
-           sc->stts_allocated_size - old_allocated_size);
+    // without time to sample entries don't pickup uninitialized data.
+    memset((uint8_t*)(sc->tts_data) + old_allocated_size, 0,
+           sc->tts_allocated_size - old_allocated_size);
 
     if (index_entry_pos < sti->nb_index_entries) {
-        // Make hole in index_entries and ctts_data for new samples
+        // Make hole in index_entries and tts_data for new samples
         memmove(sti->index_entries + index_entry_pos + entries,
                 sti->index_entries + index_entry_pos,
                 sizeof(*sti->index_entries) *
                 (sti->nb_index_entries - index_entry_pos));
-        memmove(sc->ctts_data + index_entry_pos + entries,
-                sc->ctts_data + index_entry_pos,
-                sizeof(*sc->ctts_data) * (sc->ctts_count - index_entry_pos));
-        memmove(sc->stts_data + index_entry_pos + entries,
-                sc->stts_data + index_entry_pos,
-                sizeof(*sc->stts_data) * (sc->stts_count - index_entry_pos));
+        memmove(sc->tts_data + index_entry_pos + entries,
+                sc->tts_data + index_entry_pos,
+                sizeof(*sc->tts_data) * (sc->tts_count - index_entry_pos));
         if (index_entry_pos < sc->current_sample) {
             sc->current_sample += entries;
         }
     }
 
     sti->nb_index_entries += entries;
-    sc->ctts_count = sti->nb_index_entries;
-    sc->stts_count = sti->nb_index_entries;
+    sc->tts_count = sti->nb_index_entries;
 
     // Record the index_entry position in frag_index of this fragment
     if (frag_stream_info) {
@@ -6058,10 +5962,9 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         sti->index_entries[index_entry_pos].min_distance = distance;
         sti->index_entries[index_entry_pos].flags = index_entry_flags;
 
-        sc->ctts_data[index_entry_pos].count = 1;
-        sc->ctts_data[index_entry_pos].offset = ctts_duration;
-        sc->stts_data[index_entry_pos].count = 1;
-        sc->stts_data[index_entry_pos].duration = sample_duration;
+        sc->tts_data[index_entry_pos].count = 1;
+        sc->tts_data[index_entry_pos].offset = ctts_duration;
+        sc->tts_data[index_entry_pos].duration = sample_duration;
         index_entry_pos++;
 
         av_log(c->fc, AV_LOG_TRACE, "AVIndex stream %d, sample %d, offset %"PRIx64", dts %"PRId64", "
@@ -6087,24 +5990,19 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         frag_stream_info->next_trun_dts = dts + sc->time_offset;
     if (i < entries) {
         // EOF found before reading all entries.  Fix the hole this would
-        // leave in index_entries and ctts_data
+        // leave in index_entries and tts_data
         int gap = entries - i;
         memmove(sti->index_entries + index_entry_pos,
                 sti->index_entries + index_entry_pos + gap,
                 sizeof(*sti->index_entries) *
                 (sti->nb_index_entries - (index_entry_pos + gap)));
-        memmove(sc->ctts_data + index_entry_pos,
-                sc->ctts_data + index_entry_pos + gap,
-                sizeof(*sc->ctts_data) *
-                (sc->ctts_count - (index_entry_pos + gap)));
-        memmove(sc->stts_data + index_entry_pos,
-                sc->stts_data + index_entry_pos + gap,
-                sizeof(*sc->stts_data) *
-                (sc->stts_count - (index_entry_pos + gap)));
+        memmove(sc->tts_data + index_entry_pos,
+                sc->tts_data + index_entry_pos + gap,
+                sizeof(*sc->tts_data) *
+                (sc->tts_count - (index_entry_pos + gap)));
 
         sti->nb_index_entries -= gap;
-        sc->ctts_count -= gap;
-        sc->stts_count -= gap;
+        sc->tts_count -= gap;
         if (index_entry_pos < sc->current_sample) {
             sc->current_sample -= gap;
         }
@@ -9865,7 +9763,7 @@ static void mov_free_stream_context(AVFormatContext *s, AVStream *st)
         return;
     }
 
-    av_freep(&sc->ctts_data);
+    av_freep(&sc->tts_data);
     for (int i = 0; i < sc->drefs_count; i++) {
         av_freep(&sc->drefs[i].path);
         av_freep(&sc->drefs[i].dir);
@@ -9882,6 +9780,7 @@ static void mov_free_stream_context(AVFormatContext *s, AVStream *st)
     av_freep(&sc->stsc_data);
     av_freep(&sc->sample_sizes);
     av_freep(&sc->keyframes);
+    av_freep(&sc->ctts_data);
     av_freep(&sc->stts_data);
     av_freep(&sc->sdtp_data);
     av_freep(&sc->stps_data);
@@ -10876,26 +10775,16 @@ static int mov_finalize_packet(AVFormatContext *s, AVStream *st, AVIndexEntry *s
     if (sample->flags & AVINDEX_DISCARD_FRAME) {
         pkt->flags |= AV_PKT_FLAG_DISCARD;
     }
-    if (sc->stts_data && sc->stts_index < sc->stts_count) {
-        pkt->duration = sc->stts_data[sc->stts_index].duration;
-
-        /* update stts context */
-        sc->stts_sample++;
-        if (sc->stts_index < sc->stts_count &&
-            sc->stts_data[sc->stts_index].count == sc->stts_sample) {
-            sc->stts_index++;
-            sc->stts_sample = 0;
-        }
-    }
-    if (sc->ctts_data && sc->ctts_index < sc->ctts_count) {
-        pkt->pts = av_sat_add64(pkt->dts, av_sat_add64(sc->dts_shift, sc->ctts_data[sc->ctts_index].offset));
+    if (sc->tts_data && sc->tts_index < sc->tts_count) {
+        pkt->duration = sc->tts_data[sc->tts_index].duration;
+        pkt->pts = av_sat_add64(pkt->dts, av_sat_add64(sc->dts_shift, sc->tts_data[sc->tts_index].offset));
 
-        /* update ctts context */
-        sc->ctts_sample++;
-        if (sc->ctts_index < sc->ctts_count &&
-            sc->ctts_data[sc->ctts_index].count == sc->ctts_sample) {
-            sc->ctts_index++;
-            sc->ctts_sample = 0;
+        /* update tts context */
+        sc->tts_sample++;
+        if (sc->tts_index < sc->tts_count &&
+            sc->tts_data[sc->tts_index].count == sc->tts_sample) {
+            sc->tts_index++;
+            sc->tts_sample = 0;
         }
     } else {
         if (pkt->duration == 0) {
@@ -10966,7 +10855,7 @@ static int mov_read_packet(AVFormatContext *s, AVPacket *pkt)
 
             // Clear current sample
             mov_current_sample_set(msc, 0);
-            msc->ctts_index = 0;
+            msc->tts_index = 0;
 
             // Discard current index entries
             avsti = ffstream(avst);
@@ -11199,27 +11088,14 @@ static int mov_seek_stream(AVFormatContext *s, AVStream *st, int64_t timestamp,
 
     mov_current_sample_set(sc, sample);
     av_log(s, AV_LOG_TRACE, "stream %d, found sample %d\n", st->index, sc->current_sample);
-    /* adjust stts index */
-    if (sc->stts_data) {
-        time_sample = 0;
-        for (i = 0; i < sc->stts_count; i++) {
-            int next = time_sample + sc->stts_data[i].count;
-            if (next > sc->current_sample) {
-                sc->stts_index = i;
-                sc->stts_sample = sc->current_sample - time_sample;
-                break;
-            }
-            time_sample = next;
-        }
-    }
-    /* adjust ctts index */
-    if (sc->ctts_data) {
+    /* adjust time to sample index */
+    if (sc->tts_data) {
         time_sample = 0;
-        for (i = 0; i < sc->ctts_count; i++) {
-            int next = time_sample + sc->ctts_data[i].count;
+        for (i = 0; i < sc->tts_count; i++) {
+            int next = time_sample + sc->tts_data[i].count;
             if (next > sc->current_sample) {
-                sc->ctts_index = i;
-                sc->ctts_sample = sc->current_sample - time_sample;
+                sc->tts_index = i;
+                sc->tts_sample = sc->current_sample - time_sample;
                 break;
             }
             time_sample = next;
-- 
2.47.0