[FFmpeg-devel] [PATCH] avcodec/videotoolboxenc: ignore encoded h264 SEI nalu by default

sharpbai sharpbai at gmail.com
Mon Feb 15 11:33:04 EET 2021


Before macOS 11, the encoded h264 sample contains fixed SEI before
IDR frame, which the content is below

06 05 10 b9 ed b9 30 5d 21 4b 71 83 71 2c 10 a3
14 bb 29 80

The length of the encoded SEI nalu is 20 byte. According to
the ITU-T H.264 Recommendation section 7.3.2.3.1, the last byte
0x80 means type. Then there is no length byte, which make the
SEI nonstandard.

On macOS 11, the encoded h264 sample still contains fixed SEI before
IDR frame, which length is 5 bytes longer than before.

06 05 10 b9 ed b9 30 5d 21 4b 71 83 71 2c 10 a3
14 bb 29 80 00 00 03 00 01
            ^^ ^^ ^^ ^^ ^^

This time type the content of the SEI has type 0 payload but
invalid length payload data, which violates recommendation
appendix D.

Whether it is a bug or not, these SEI nalus should be erased to keep
the output bytestream not violating the standard. But we may
not come across the situration we do need these encoded SEI
nalu, so we just reserve a switch and set not to output by default.

Signed-off-by: sharpbai <sharpbai at gmail.com>
---
 libavcodec/videotoolboxenc.c | 51 +++++++++++++++++++++---------------
 1 file changed, 30 insertions(+), 21 deletions(-)

diff --git a/libavcodec/videotoolboxenc.c b/libavcodec/videotoolboxenc.c
index cc08cf6a50..c240090f10 100644
--- a/libavcodec/videotoolboxenc.c
+++ b/libavcodec/videotoolboxenc.c
@@ -221,6 +221,7 @@ typedef struct VTEncContext {
 
     int64_t allow_sw;
     int64_t require_sw;
+    int64_t ignore_internal_sei;
 
     bool flushing;
     bool has_b_frames;
@@ -1690,7 +1691,8 @@ static int copy_replace_length_codes(
     CMSampleBufferRef sample_buffer,
     ExtraSEI      *sei,
     uint8_t       *dst_data,
-    size_t        dst_size)
+    size_t        dst_size,
+    int           ignore_internal_sei)
 {
     size_t src_size = CMSampleBufferGetTotalSampleSize(sample_buffer);
     size_t remaining_src_size = src_size;
@@ -1707,8 +1709,8 @@ static int copy_replace_length_codes(
     }
 
     while (remaining_src_size > 0) {
-        size_t curr_src_len;
-        size_t curr_dst_len;
+        size_t curr_src_len = 0;
+        size_t curr_dst_len = 0;
         size_t box_len = 0;
         size_t i;
 
@@ -1740,7 +1742,8 @@ static int copy_replace_length_codes(
             box_len |= size_buf[i];
         }
 
-        if (sei && !wrote_sei && is_post_sei_nal_type(nal_type)) {
+        if (sei && !wrote_sei &&
+                (ignore_internal_sei || is_post_sei_nal_type(nal_type))) {
             //No SEI NAL unit - insert.
             int wrote_bytes;
 
@@ -1775,27 +1778,30 @@ static int copy_replace_length_codes(
         }
 
         curr_src_len = box_len + length_code_size;
-        curr_dst_len = box_len + sizeof(start_code);
 
-        if (remaining_src_size < curr_src_len) {
-            return AVERROR_BUFFER_TOO_SMALL;
-        }
+        if (!(ignore_internal_sei && nal_type == H264_NAL_SEI)) {
+            curr_dst_len = box_len + sizeof(start_code);
 
-        if (remaining_dst_size < curr_dst_len) {
-            return AVERROR_BUFFER_TOO_SMALL;
-        }
+            if (remaining_src_size < curr_src_len) {
+                return AVERROR_BUFFER_TOO_SMALL;
+            }
+
+            if (remaining_dst_size < curr_dst_len) {
+                return AVERROR_BUFFER_TOO_SMALL;
+            }
 
-        dst_box = dst_data + sizeof(start_code);
+            dst_box = dst_data + sizeof(start_code);
 
-        memcpy(dst_data, start_code, sizeof(start_code));
-        status = CMBlockBufferCopyDataBytes(block,
-                                            src_offset + length_code_size,
-                                            box_len,
-                                            dst_box);
+            memcpy(dst_data, start_code, sizeof(start_code));
+            status = CMBlockBufferCopyDataBytes(block,
+                                                src_offset + length_code_size,
+                                                box_len,
+                                                dst_box);
 
-        if (status) {
-            av_log(avctx, AV_LOG_ERROR, "Cannot copy data: %d\n", status);
-            return AVERROR_EXTERNAL;
+            if (status) {
+                av_log(avctx, AV_LOG_ERROR, "Cannot copy data: %d\n", status);
+                return AVERROR_EXTERNAL;
+            }
         }
 
         if (sei && !wrote_sei && nal_type == H264_NAL_SEI) {
@@ -1931,7 +1937,8 @@ static int vtenc_cm_to_avpacket(
         sample_buffer,
         sei,
         pkt->data + header_size,
-        pkt->size - header_size
+        pkt->size - header_size,
+        vtctx->ignore_internal_sei
     );
 
     if (status) {
@@ -2543,6 +2550,8 @@ static const enum AVPixelFormat hevc_pix_fmts[] = {
         { .i64 = 0 }, 0, 1, VE }, \
     { "require_sw", "Require software encoding", OFFSET(require_sw), AV_OPT_TYPE_BOOL, \
         { .i64 = 0 }, 0, 1, VE }, \
+    { "ignore_internal_sei", "Ignore SEI nalu generated by videotoolbox", OFFSET(ignore_internal_sei), AV_OPT_TYPE_BOOL, \
+        { .i64 = 1 }, 0, 1, VE }, \
     { "realtime", "Hint that encoding should happen in real-time if not faster (e.g. capturing from camera).", \
         OFFSET(realtime), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, \
     { "frames_before", "Other frames will come before the frames in this session. This helps smooth concatenation issues.", \
-- 
2.30.0



More information about the ffmpeg-devel mailing list