[FFmpeg-devel] [PATCH 2/2] avformat/movenc: parse h264 packets to build Sync Sample and Recovery Point tables

James Almer jamrial at gmail.com
Tue Jul 27 16:08:20 EEST 2021


Since we can't blindly trust the keyframe flag in packets and assume its
contents are a valid Sync Sample, do some basic bitstream parsing to build the
Sync Sample table in addition to a Random Access Recovery Point table.

Suggested-by: ffmpeg at fb.com
Signed-off-by: James Almer <jamrial at gmail.com>
---
 libavformat/movenc.c         | 125 +++++++++++++++++++++++++++++++++--
 libavformat/movenc.h         |   1 +
 tests/ref/lavf-fate/h264.mp4 |   6 +-
 3 files changed, 123 insertions(+), 9 deletions(-)

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 57062f45c5..159e0261b7 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -34,13 +34,17 @@
 #include "avc.h"
 #include "libavcodec/ac3_parser_internal.h"
 #include "libavcodec/dnxhddata.h"
+#include "libavcodec/h264.h"
+#include "libavcodec/h2645_parse.h"
 #include "libavcodec/flac.h"
 #include "libavcodec/get_bits.h"
+#include "libavcodec/golomb.h"
 
 #include "libavcodec/internal.h"
 #include "libavcodec/put_bits.h"
 #include "libavcodec/vc1_common.h"
 #include "libavcodec/raw.h"
+#include "libavcodec/sei.h"
 #include "internal.h"
 #include "libavutil/avstring.h"
 #include "libavutil/channel_layout.h"
@@ -2537,7 +2541,9 @@ static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track)
     if (!sgpd_entries)
         return AVERROR(ENOMEM);
 
-    av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS || track->par->codec_id == AV_CODEC_ID_AAC);
+    av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS ||
+               track->par->codec_id == AV_CODEC_ID_AAC  ||
+               track->par->codec_id == AV_CODEC_ID_H264);
 
     if (track->par->codec_id == AV_CODEC_ID_OPUS) {
         for (i = 0; i < track->entry; i++) {
@@ -2545,7 +2551,7 @@ static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track)
             int distance = 0;
             for (j = i - 1; j >= 0; j--) {
                 roll_samples_remaining -= get_cluster_duration(track, j);
-                distance++;
+                distance--;
                 if (roll_samples_remaining <= 0)
                     break;
             }
@@ -2555,7 +2561,7 @@ static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track)
             if (roll_samples_remaining > 0)
                 distance = 0;
             /* Verify distance is a maximum of 32 (2.5ms) packets. */
-            if (distance > 32)
+            if (distance < 32)
                 return AVERROR_INVALIDDATA;
             if (i && distance == sgpd_entries[entries].roll_distance) {
                 sgpd_entries[entries].count++;
@@ -2566,10 +2572,22 @@ static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track)
                 sgpd_entries[entries].group_description_index = distance ? ++group : 0;
             }
         }
+    } else if (track->par->codec_id == AV_CODEC_ID_H264) {
+        for (i = 0; i < track->entry; i++) {
+            int distance = track->cluster[i].roll_distance;
+            if (i && distance == sgpd_entries[entries].roll_distance) {
+                sgpd_entries[entries].count++;
+            } else {
+                entries++;
+                sgpd_entries[entries].count = 1;
+                sgpd_entries[entries].roll_distance = distance;
+                sgpd_entries[entries].group_description_index = distance ? ++group : 0;
+            }
+        }
     } else {
         entries++;
         sgpd_entries[entries].count = track->sample_count;
-        sgpd_entries[entries].roll_distance = 1;
+        sgpd_entries[entries].roll_distance = -1;
         sgpd_entries[entries].group_description_index = ++group;
     }
     entries++;
@@ -2588,7 +2606,7 @@ static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track)
     avio_wb32(pb, group); /* entry_count */
     for (i = 0; i < entries; i++) {
         if (sgpd_entries[i].group_description_index) {
-            avio_wb16(pb, -sgpd_entries[i].roll_distance); /* roll_distance */
+            avio_wb16(pb, sgpd_entries[i].roll_distance); /* roll_distance */
         }
     }
 
@@ -2639,7 +2657,9 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
     if (track->cenc.aes_ctr) {
         ff_mov_cenc_write_stbl_atoms(&track->cenc, pb);
     }
-    if (track->par->codec_id == AV_CODEC_ID_OPUS || track->par->codec_id == AV_CODEC_ID_AAC) {
+    if (track->par->codec_id == AV_CODEC_ID_OPUS ||
+        track->par->codec_id == AV_CODEC_ID_AAC  ||
+        track->par->codec_id == AV_CODEC_ID_H264) {
         mov_preroll_write_stbl_atoms(pb, track);
     }
     return update_size(pb, pos);
@@ -5150,6 +5170,96 @@ static int mov_parse_mpeg2_frame(AVPacket *pkt, uint32_t *flags)
     return 0;
 }
 
+static int mov_parse_h264_frame(AVPacket *pkt, MOVTrack *trk)
+{
+    GetBitContext gb;
+    const uint8_t *buf = pkt->data;
+    const uint8_t *buf_end = pkt->data + pkt->size;
+    uint32_t state = -1;
+    unsigned roll_distance = 0;
+    int nal_length_size = 0, nalsize;
+    int idr = 0;
+
+    if (!pkt->size)
+        return 0;
+    if (trk->vos_data && trk->vos_data[0] == 1) {
+        if (trk->vos_len < 5)
+            return 0;
+        nal_length_size = (trk->vos_data[4] & 0x03) + 1;
+    }
+
+    while (buf_end - buf >= 4) {
+        if (nal_length_size) {
+            int i = 0;
+            nalsize = get_nalsize(nal_length_size, buf, buf_end - buf, &i, NULL);
+            if (nalsize < 0)
+                break;
+            state = buf[nal_length_size];
+            buf += nal_length_size + 1;
+        } else {
+            buf = avpriv_find_start_code(buf, buf_end, &state);
+            if (buf >= buf_end)
+                break;
+        }
+
+        switch (state & 0x1f) {
+        case H264_NAL_IDR_SLICE:
+            idr = 1;
+            goto end;
+        case H264_NAL_SEI:
+            init_get_bits8(&gb, buf, buf_end - buf);
+            do {
+                GetBitContext gb_payload;
+                int ret, type = 0, size = 0;
+
+                do {
+                    if (get_bits_left(&gb) < 8)
+                        goto end;
+                    type += show_bits(&gb, 8);
+                } while (get_bits(&gb, 8) == 255);
+                do {
+                    if (get_bits_left(&gb) < 8)
+                        goto end;
+                    size += show_bits(&gb, 8);
+                } while (get_bits(&gb, 8) == 255);
+
+                if (size > get_bits_left(&gb) / 8)
+                    goto end;
+
+                ret = init_get_bits8(&gb_payload, gb.buffer + get_bits_count(&gb) / 8, size);
+                if (ret < 0)
+                    goto end;
+
+                switch (type) {
+                case SEI_TYPE_RECOVERY_POINT:
+                    roll_distance = get_ue_golomb_long(&gb_payload);
+                    break;
+                default:
+                    break;
+                }
+                skip_bits_long(&gb, 8 * size);
+            } while (get_bits_left(&gb) > 0 && show_bits(&gb, 8) != 0x80);
+            break;
+        default:
+            break;
+        }
+        if (nal_length_size)
+            buf += nalsize - 1;
+    }
+
+end:
+    if (roll_distance != (int16_t)roll_distance)
+        roll_distance = 0;
+    trk->cluster[trk->entry].roll_distance = roll_distance;
+
+    if (idr) {
+        trk->cluster[trk->entry].flags |= MOV_SYNC_SAMPLE;
+        trk->has_keyframes++;
+    }
+
+    return 0;
+}
+
 static void mov_parse_vc1_frame(AVPacket *pkt, MOVTrack *trk)
 {
     const uint8_t *start, *next, *end = pkt->data + pkt->size;
@@ -5740,6 +5850,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
     trk->cluster[trk->entry].entries          = samples_in_chunk;
     trk->cluster[trk->entry].dts              = pkt->dts;
     trk->cluster[trk->entry].pts              = pkt->pts;
+    trk->cluster[trk->entry].roll_distance    = 0;
     if (!trk->entry && trk->start_dts != AV_NOPTS_VALUE) {
         if (!trk->frag_discont) {
             /* First packet of a new fragment. We already wrote the duration
@@ -5821,6 +5932,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
 
     if (par->codec_id == AV_CODEC_ID_VC1) {
         mov_parse_vc1_frame(pkt, trk);
+    } else if (par->codec_id == AV_CODEC_ID_H264) {
+        mov_parse_h264_frame(pkt, trk);
     } else if (par->codec_id == AV_CODEC_ID_TRUEHD) {
         mov_parse_truehd_frame(pkt, trk);
     } else if (pkt->flags & AV_PKT_FLAG_KEY) {
diff --git a/libavformat/movenc.h b/libavformat/movenc.h
index af1ea0bce6..73bf73ce8f 100644
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h
@@ -56,6 +56,7 @@ typedef struct MOVIentry {
 #define MOV_PARTIAL_SYNC_SAMPLE 0x0002
 #define MOV_DISPOSABLE_SAMPLE   0x0004
     uint32_t     flags;
+    int16_t      roll_distance;
     AVProducerReferenceTime prft;
 } MOVIentry;
 
diff --git a/tests/ref/lavf-fate/h264.mp4 b/tests/ref/lavf-fate/h264.mp4
index a9c3823c2c..c08ee4c7ae 100644
--- a/tests/ref/lavf-fate/h264.mp4
+++ b/tests/ref/lavf-fate/h264.mp4
@@ -1,3 +1,3 @@
-fe299ea5205b71a48281f917b1256a5d *tests/data/lavf-fate/lavf.h264.mp4
-547928 tests/data/lavf-fate/lavf.h264.mp4
-tests/data/lavf-fate/lavf.h264.mp4 CRC=0x9da2c999
+2812f617314d23474fcb23898b8a56ab *tests/data/lavf-fate/lavf.h264.mp4
+548084 tests/data/lavf-fate/lavf.h264.mp4
+tests/data/lavf-fate/lavf.h264.mp4 CRC=0x396f0829
-- 
2.32.0



More information about the ffmpeg-devel mailing list