[FFmpeg-devel] [PATCH] Closed caption support for cuviddec with ff_parse_a53_cc preserving a53 data

Dhanish Vijayan dhanishvijayan at gmail.com
Tue Apr 13 17:35:10 EEST 2021


---
 libavcodec/cuviddec.c | 183 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 183 insertions(+)

diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c
index ec57afdefe..9967cb4c94 100644
--- a/libavcodec/cuviddec.c
+++ b/libavcodec/cuviddec.c
@@ -31,6 +31,8 @@
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 
+#include "get_bits.h"
+#include "atsc_a53.h"
 #include "avcodec.h"
 #include "decode.h"
 #include "hwconfig.h"
@@ -46,6 +48,9 @@
 #define CUVID_HAS_AV1_SUPPORT
 #endif
 
+#define MAX_FRAME_COUNT 25
+#define A53_QUEUE_SIZE (MAX_FRAME_COUNT + 8)
+
 typedef struct CuvidContext
 {
     AVClass *avclass;
@@ -89,6 +94,11 @@ typedef struct CuvidContext
     cudaVideoCodec codec_type;
     cudaVideoChromaFormat chroma_format;
 
+    AVBufferRef* a53_caption;
+    int a53_caption_size;
+    uint8_t* a53_caption_queue[A53_QUEUE_SIZE];
+    int a53_caption_size_queue[A53_QUEUE_SIZE];
+
     CUVIDDECODECAPS caps8, caps10, caps12;
 
     CUVIDPARSERPARAMS cuparseinfo;
@@ -103,6 +113,8 @@ typedef struct CuvidParsedFrame
     CUVIDPARSERDISPINFO dispinfo;
     int second_field;
     int is_deinterlacing;
+    uint8_t* a53_caption;
+    int a53_caption_size;
 } CuvidParsedFrame;
 
 #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x)
@@ -338,6 +350,24 @@ static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* pic
 
     ctx->key_frame[picparams->CurrPicIdx] = picparams->intra_pic_flag;
 
+    if (ctx->a53_caption)
+    {
+
+        if (picparams->CurrPicIdx >= A53_QUEUE_SIZE)
+        {
+            av_log(avctx, AV_LOG_WARNING, "CurrPicIdx too big: %d\n", picparams->CurrPicIdx);
+            av_freep(&ctx->a53_caption);
+        }
+        else
+        {
+            int pos = picparams->CurrPicIdx;
+            av_freep(&ctx->a53_caption_queue[pos]);
+            ctx->a53_caption_queue[pos] = ctx->a53_caption;
+            ctx->a53_caption_size_queue[pos] = ctx->a53_caption_size;
+            ctx->a53_caption = NULL;
+        }
+    }
+
     ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams));
     if (ctx->internal_error < 0)
         return 0;
@@ -350,6 +380,20 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF
     AVCodecContext *avctx = opaque;
     CuvidContext *ctx = avctx->priv_data;
     CuvidParsedFrame parsed_frame = { { 0 } };
+    uint8_t* a53_caption = NULL;
+    int a53_caption_size = 0;
+
+    if (dispinfo->picture_index >= A53_QUEUE_SIZE)
+    {
+        av_log(avctx, AV_LOG_WARNING, "picture_index too big: %d\n", dispinfo->picture_index);
+    }
+    else
+    {
+        int pos = dispinfo->picture_index;
+        a53_caption = ctx->a53_caption_queue[pos];
+        a53_caption_size = ctx->a53_caption_size_queue[pos];
+        ctx->a53_caption_queue[pos] = NULL;
+    }
 
     parsed_frame.dispinfo = *dispinfo;
     ctx->internal_error = 0;
@@ -358,11 +402,17 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF
     parsed_frame.dispinfo.progressive_frame = ctx->progressive_sequence;
 
     if (ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) {
+        parsed_frame.a53_caption = a53_caption;
+        parsed_frame.a53_caption_size = a53_caption_size;
         av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
     } else {
         parsed_frame.is_deinterlacing = 1;
+        parsed_frame.a53_caption = a53_caption;
+        parsed_frame.a53_caption_size = a53_caption_size;
         av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
         if (!ctx->drop_second_field) {
+            parsed_frame.a53_caption = NULL;
+            parsed_frame.a53_caption_size = 0;
             parsed_frame.second_field = 1;
             av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
         }
@@ -382,6 +432,121 @@ static int cuvid_is_buffer_full(AVCodecContext *avctx)
     return (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + delay >= ctx->nb_surfaces;
 }
 
+static void cuvid_mpeg_parse_a53(AVCodecContext *avctx, CuvidContext *ctx, const uint8_t* p, int buf_size)
+{
+    const uint8_t* buf_end = p + buf_size;
+    for(;;)
+    {
+        uint32_t start_code = -1;
+        p = avpriv_find_start_code(p, buf_end, &start_code);
+        if (start_code > 0x1ff)
+            break;
+        if (start_code != 0x1b2)
+            continue;
+        buf_size = buf_end - p;
+
+        GetBitContext gb_payload;
+        init_get_bits(&gb_payload, p, buf_size);
+
+        uint32_t user_identifier = get_bits_long(&gb_payload, 32);
+        switch (user_identifier) {
+            case MKBETAG('D', 'T', 'G', '1'):       // afd_data
+                av_log(avctx, AV_LOG_VERBOSE,
+                       "Not implemented ITU-T T35 SEI message (atsc user_identifier = 0x%04x)\n",
+                       user_identifier);
+                break;
+            case MKBETAG('G', 'A', '9', '4'):       // closed captions
+                {
+                    AVBufferRef *avBuffer = NULL;
+                    int cc_count = ff_parse_a53_cc(&avBuffer, gb_payload.buffer + get_bits_count(&gb_payload) / 8,
+                                                   buf_size);
+                    if (cc_count > 0) {
+                        av_freep(&ctx->a53_caption);
+                        ctx->a53_caption_size = cc_count * 3;
+                        ctx->a53_caption = av_malloc(ctx->a53_caption_size);
+                        if (ctx->a53_caption->data) {
+                            memcpy(ctx->a53_caption, avBuffer->data, ctx->a53_caption_size);
+                        }
+                    }
+                }
+                break;
+            default:
+                av_log(avctx, AV_LOG_VERBOSE,
+                       "Unsupported User Data Registered ITU-T T35 SEI message (atsc user_identifier = 0x%04x)\n",
+                       user_identifier);
+                break;
+        }
+    }
+}
+
+static void cuvid_h264_parse_a53(AVCodecContext *avctx, CuvidContext *ctx, const uint8_t* p, int buf_size)
+{
+    const uint8_t* buf_end = p + buf_size;
+    while(p < buf_end)
+    {
+        int i, size, cc_count;
+        uint32_t start_code = -1;
+        uint64_t new_size;
+        p = avpriv_find_start_code(p, buf_end, &start_code);
+        if (start_code > 0x1ff)
+            break;
+        if (start_code != 0x106)
+            continue;
+        buf_size = buf_end - p;
+        if (buf_size < 1 || p[0] != 4)
+            continue;
+        p += 1; buf_size -= 1;
+        size = 0;
+        while (buf_size > 0)
+        {
+            size += p[0];
+            buf_size -= 1;
+            if (*(p++) != 0xFF)
+                break;
+        }
+        if (buf_size <= 0 || buf_size < size)
+            continue;
+        if (size < 7)
+            continue;
+        if (p[0] == 0xFF)
+        {
+            p+=4;
+            size-=4;
+        }
+        else
+        {
+            p+=3;
+            size-=3;
+        }
+
+        GetBitContext gb_payload;
+        init_get_bits(&gb_payload, p, buf_size);
+        uint32_t user_identifier = get_bits_long(&gb_payload, 32);
+        switch (user_identifier) {
+            case MKBETAG('G', 'A', '9', '4'):       // closed captions
+            {
+                AVBufferRef *avBuffer = NULL;
+                int cc_count = ff_parse_a53_cc(&avBuffer, gb_payload.buffer + get_bits_count(&gb_payload) / 8,
+                                               buf_size);
+                if (cc_count > 0) {
+                    av_freep(&ctx->a53_caption);
+                    ctx->a53_caption_size = cc_count * 3;
+                    ctx->a53_caption = av_malloc(ctx->a53_caption_size);
+                    if (ctx->a53_caption->data) {
+                        memcpy(ctx->a53_caption, avBuffer->data, ctx->a53_caption_size);
+                    }
+                }
+            }
+                break;
+            default:
+                av_log(avctx, AV_LOG_VERBOSE,
+                       "Unsupported User Data Registered ITU-T T35 SEI message (atsc user_identifier = 0x%04x)\n",
+                       user_identifier);
+                break;
+        }
+    }
+}
+
 static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
 {
     CuvidContext *ctx = avctx->priv_data;
@@ -424,6 +589,15 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
 
     ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt));
 
+    // assume there is one frame delay (the parser outputs previous picture once it sees new frame data)
+    av_freep(&ctx->a53_caption);
+    if (avpkt && avpkt->size) {
+        if (ctx->cuparseinfo.CodecType == cudaVideoCodec_MPEG2)
+            cuvid_mpeg_parse_a53(avctx, ctx, avpkt->data, avpkt->size);
+        else if (ctx->cuparseinfo.CodecType == cudaVideoCodec_H264){}
+            cuvid_h264_parse_a53(avctx, ctx, avpkt->data, avpkt->size);
+    }
+
     if (ret < 0)
         goto error;
 
@@ -627,6 +801,15 @@ FF_ENABLE_DEPRECATION_WARNINGS
 
         if (frame->interlaced_frame)
             frame->top_field_first = parsed_frame.dispinfo.top_field_first;
+
+        if (parsed_frame.a53_caption)
+        {
+            AVFrameSideData *sd = av_frame_new_side_data(frame, AV_FRAME_DATA_A53_CC, parsed_frame.a53_caption_size);
+            if (sd)
+                memcpy(sd->data, parsed_frame.a53_caption, parsed_frame.a53_caption_size);
+            av_freep(&parsed_frame.a53_caption);
+            avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
+        }
     } else if (ctx->decoder_flushing) {
         ret = AVERROR_EOF;
     } else {
-- 
2.25.1



More information about the ffmpeg-devel mailing list