[FFmpeg-devel] [PATCH v2 1/2] avformat/flvenc: support enhanced flv PacketTypeMetadata

Sun Dec 17 20:22:25 EET 2023

On 12/17/2023 1:24 PM, zhupengfei via ffmpeg-devel wrote:
> From: Zhu Pengfei <411294962 at qq.com>
> 
> Signed-off-by: Zhu Pengfei <411294962 at qq.com>
> ---
>   libavformat/flvenc.c | 155 +++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 155 insertions(+)
> 
> diff --git a/libavformat/flvenc.c b/libavformat/flvenc.c
> index f6d10f331c..7268394b93 100644
> --- a/libavformat/flvenc.c
> +++ b/libavformat/flvenc.c
> @@ -24,6 +24,7 @@
>   #include "libavutil/intfloat.h"
>   #include "libavutil/avassert.h"
>   #include "libavutil/mathematics.h"
> +#include "libavutil/mastering_display_metadata.h"
>   #include "libavcodec/codec_desc.h"
>   #include "libavcodec/mpeg4audio.h"
>   #include "avio.h"
> @@ -124,6 +125,7 @@ typedef struct FLVContext {
>   
>       int flags;
>       int64_t last_ts[FLV_STREAM_TYPE_NB];
> +    int write_metadata_pkt;
>   } FLVContext;
>   
>   static int get_audio_flags(AVFormatContext *s, AVCodecParameters *par)
> @@ -478,6 +480,158 @@ static void write_metadata(AVFormatContext *s, unsigned int ts)
>       avio_wb32(pb, flv->metadata_totalsize + 11);
>   }
>   
> +static void flv_write_metadata_packet(AVFormatContext *s, AVCodecParameters *par, unsigned int ts)
> +{
> +    AVIOContext *pb = s->pb;
> +    FLVContext *flv = s->priv_data;
> +    AVContentLightMetadata *lightMetadata = NULL;
> +    AVMasteringDisplayMetadata *displayMetadata = NULL;
> +    const int flags_size = 5;
> +    int64_t metadata_size_pos = 0;
> +    int64_t total_size = 0;
> +    const AVPacketSideData *side_data = NULL;
> +
> +    if (par->codec_id == AV_CODEC_ID_HEVC || par->codec_id == AV_CODEC_ID_AV1 ||
> +        par->codec_id == AV_CODEC_ID_VP9) {
> +        if (flv->write_metadata_pkt) return;
> +
> +        side_data = av_packet_side_data_get(par->coded_side_data, par->nb_coded_side_data,
> +                                            AV_PKT_DATA_CONTENT_LIGHT_LEVEL);
> +        if (side_data)
> +            lightMetadata = (AVContentLightMetadata *)side_data->data;
> +
> +        side_data = av_packet_side_data_get(par->coded_side_data, par->nb_coded_side_data,
> +                                            AV_PKT_DATA_MASTERING_DISPLAY_METADATA);
> +        if (side_data)
> +            displayMetadata = (AVMasteringDisplayMetadata *)side_data->data;
> +
> +        if (!lightMetadata && !displayMetadata) return;

Again, why are you not writing anything when there's no static HDR metadata?
transferCharacteristics, matrixCoefficients and colorPrimaries don't 
depend on those, and can be written on their own. Similarly, you can 
write lightMetadata when there's no displayMetadata, and vice versa.

> +
> +        /*
> +        * Reference Enhancing FLV
> +        * https://github.com/veovera/enhanced-rtmp/blob/main/enhanced-rtmp.pdf
> +        * */
> +        avio_w8(pb, FLV_TAG_TYPE_VIDEO); //write video tag type
> +        metadata_size_pos = avio_tell(pb);
> +        avio_wb24(pb, 0 + flags_size);
> +        put_timestamp(pb, ts); //ts = pkt->dts, gen
> +        avio_wb24(pb, flv->reserved);
> +
> +        if (par->codec_id == AV_CODEC_ID_HEVC) {
> +            avio_w8(pb, FLV_IS_EX_HEADER | PacketTypeMetadata| FLV_FRAME_VIDEO_INFO_CMD); // ExVideoTagHeader mode with PacketTypeMetadata
> +            avio_write(pb, "hvc1", 4);
> +        } else if (par->codec_id == AV_CODEC_ID_AV1 || par->codec_id == AV_CODEC_ID_VP9) {
> +            avio_w8(pb, FLV_IS_EX_HEADER | PacketTypeMetadata| FLV_FRAME_VIDEO_INFO_CMD);
> +            avio_write(pb, par->codec_id == AV_CODEC_ID_AV1 ? "av01" : "vp09", 4);
> +        }
> +
> +        avio_w8(pb, AMF_DATA_TYPE_STRING);
> +        put_amf_string(pb, "colorInfo");
> +
> +        avio_w8(pb, AMF_DATA_TYPE_OBJECT);
> +
> +        put_amf_string(pb, "colorConfig");  // colorConfig
> +
> +        /* mixed array (hash) with size and string/type/data tuples */
> +        avio_w8(pb, AMF_DATA_TYPE_MIXEDARRAY);
> +
> +        avio_wb32(pb, 0); // write array count
> +
> +        if (par->color_trc != AVCOL_TRC_UNSPECIFIED &&
> +            par->color_trc < AVCOL_TRC_NB) {
> +            put_amf_string(pb, "transferCharacteristics");  // color_trc
> +            put_amf_double(pb, par->color_trc);
> +        }
> +
> +        if (par->color_space != AVCOL_SPC_UNSPECIFIED &&
> +            par->color_space < AVCOL_SPC_NB) {
> +            put_amf_string(pb, "matrixCoefficients"); // colorspace
> +            put_amf_double(pb, par->color_space);
> +        }
> +
> +        if (par->color_primaries != AVCOL_PRI_UNSPECIFIED &&
> +            par->color_primaries < AVCOL_PRI_NB) {
> +            put_amf_string(pb, "colorPrimaries"); // color_primaries
> +            put_amf_double(pb, par->color_primaries);
> +        }
> +
> +        put_amf_string(pb, "");
> +        avio_w8(pb, AMF_END_OF_OBJECT); // array end of object
> +
> +        if (lightMetadata) {
> +            put_amf_string(pb, "hdrCll");
> +
> +            /* mixed array (hash) with size and string/type/data tuples */
> +            avio_w8(pb, AMF_DATA_TYPE_MIXEDARRAY);
> +
> +            avio_wb32(pb, 0); // write array count
> +
> +            put_amf_string(pb, "maxFall");
> +            put_amf_double(pb, lightMetadata->MaxFALL);
> +
> +            put_amf_string(pb, "maxCLL");
> +            put_amf_double(pb, lightMetadata->MaxCLL);
> +
> +            // array end of object
> +            put_amf_string(pb, "");
> +            avio_w8(pb, AMF_END_OF_OBJECT);
> +        }
> +
> +        if (displayMetadata && (displayMetadata->has_primaries || displayMetadata->has_luminance)) {
> +            put_amf_string(pb, "hdrMdcv");
> +
> +            /* mixed array (hash) with size and string/type/data tuples */
> +            avio_w8(pb, AMF_DATA_TYPE_MIXEDARRAY);
> +            avio_wb32(pb, 0); // write array count
> +
> +            if (displayMetadata->has_primaries) {
> +                put_amf_string(pb, "redX");
> +                put_amf_double(pb, av_q2d(displayMetadata->display_primaries[0][0]));
> +
> +                put_amf_string(pb, "redY");
> +                put_amf_double(pb, av_q2d(displayMetadata->display_primaries[0][1]));
> +
> +                put_amf_string(pb, "greenX");
> +                put_amf_double(pb, av_q2d(displayMetadata->display_primaries[1][0]));
> +
> +                put_amf_string(pb, "greenY");
> +                put_amf_double(pb, av_q2d(displayMetadata->display_primaries[1][1]));
> +
> +                put_amf_string(pb, "blueX");
> +                put_amf_double(pb, av_q2d(displayMetadata->display_primaries[2][0]));
> +
> +                put_amf_string(pb, "blueY");
> +                put_amf_double(pb, av_q2d(displayMetadata->display_primaries[2][1]));
> +
> +                put_amf_string(pb, "whitePointX");
> +                put_amf_double(pb, av_q2d(displayMetadata->white_point[0]));
> +
> +                put_amf_string(pb, "whitePointY");
> +                put_amf_double(pb, av_q2d(displayMetadata->white_point[1]));
> +            }
> +            if (displayMetadata->has_luminance) {
> +                put_amf_string(pb, "maxLuminance");
> +                put_amf_double(pb, av_q2d(displayMetadata->max_luminance));
> +
> +                put_amf_string(pb, "minLuminance");
> +                put_amf_double(pb, av_q2d(displayMetadata->min_luminance));
> +            }
> +            put_amf_string(pb, "");
> +            avio_w8(pb, AMF_END_OF_OBJECT);
> +        }
> +        avio_w8(pb, AMF_DATA_TYPE_OBJECT_END);
> +
> +        total_size = avio_tell(pb) - metadata_size_pos - 10;
> +        avio_seek(pb, metadata_size_pos, SEEK_SET);
> +        avio_wb24(pb, total_size);
> +        avio_tell(pb);
> +        avio_skip(pb, total_size + 10 - 3);
> +        avio_tell(pb);
> +        avio_wb32(pb, total_size + 11); // previous tag size
> +        flv->write_metadata_pkt = 1;
> +    }
> +}
> +
>   static int unsupported_codec(AVFormatContext *s,
>                                const char* type, int codec_id)
>   {
> @@ -878,6 +1032,7 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt)
>               memcpy(par->extradata, side, side_size);
>               flv_write_codec_header(s, par, pkt->dts);
>           }
> +        flv_write_metadata_packet(s, par, pkt->dts);
>       }
>   
>       if (flv->delay == AV_NOPTS_VALUE)