[FFmpeg-devel] [PATCH v16 01/16] global: Prepare AVFrame for subtitle handling

Fri Nov 26 12:34:45 EET 2021

Soft Works:
> Root commit for adding subtitle filtering capabilities.
> In detail:
> 
> - Add type (AVMediaType) field to AVFrame
>   Replaces previous way of distinction which was based on checking
>   width and height to determine whether a frame is audio or video
> - Add subtitle fields to AVFrame
> - Add new struct AVSubtitleArea, similar to AVSubtitleRect, but different
>   allocation logic. Cannot and must not be used interchangeably, hence
>   the new struct
> - Move enum AVSubtitleType, AVSubtitle and AVSubtitleRect to avutil
> - Add public-named members to enum AVSubtitleType (AV_SUBTITLE_FMT_)
> - Add avcodec_decode_subtitle3 which takes subtitle frames,
>   serving as compatibility shim to legacy subtitle decoding
> - Add additional methods for conversion between old and new API
> 
> Signed-off-by: softworkz <softworkz at hotmail.com>
> ---
>  libavcodec/avcodec.c             |  19 ---
>  libavcodec/avcodec.h             |  75 ++--------
>  libavcodec/decode.c              |  53 +++++--
>  libavcodec/libzvbi-teletextdec.c |   1 +
>  libavcodec/pgssubdec.c           |   1 +
>  libavcodec/utils.c               |  11 ++
>  libavfilter/vf_subtitles.c       |  50 +++++--
>  libavformat/utils.c              |   1 +
>  libavutil/Makefile               |   2 +
>  libavutil/frame.c                | 194 +++++++++++++++++++++---
>  libavutil/frame.h                |  93 +++++++++++-
>  libavutil/subfmt.c               | 243 +++++++++++++++++++++++++++++++
>  libavutil/subfmt.h               | 185 +++++++++++++++++++++++

As has already been said by others, this should be split: One patch for
the actual new libavutil additions, one patch for the lavc decoding API,
one patch for the encoding API, one patch for every user that is made to
use the new APIs.

>  13 files changed, 803 insertions(+), 125 deletions(-)
>  create mode 100644 libavutil/subfmt.c
>  create mode 100644 libavutil/subfmt.h
> 
> diff --git a/libavcodec/avcodec.c b/libavcodec/avcodec.c
> index c00a9b2af8..13e3711b9c 100644
> --- a/libavcodec/avcodec.c
> +++ b/libavcodec/avcodec.c
> @@ -422,25 +422,6 @@ void avcodec_flush_buffers(AVCodecContext *avctx)
>          av_bsf_flush(avci->bsf);
>  }
>  
> -void avsubtitle_free(AVSubtitle *sub)
> -{
> -    int i;
> -
> -    for (i = 0; i < sub->num_rects; i++) {
> -        av_freep(&sub->rects[i]->data[0]);
> -        av_freep(&sub->rects[i]->data[1]);
> -        av_freep(&sub->rects[i]->data[2]);
> -        av_freep(&sub->rects[i]->data[3]);
> -        av_freep(&sub->rects[i]->text);
> -        av_freep(&sub->rects[i]->ass);
> -        av_freep(&sub->rects[i]);
> -    }
> -
> -    av_freep(&sub->rects);
> -
> -    memset(sub, 0, sizeof(*sub));
> -}
> -
>  av_cold int avcodec_close(AVCodecContext *avctx)
>  {
>      int i;
> diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
> index 7ee8bc2b7c..0c5819b116 100644
> --- a/libavcodec/avcodec.h
> +++ b/libavcodec/avcodec.h
> @@ -35,6 +35,7 @@
>  #include "libavutil/frame.h"
>  #include "libavutil/log.h"
>  #include "libavutil/pixfmt.h"
> +#include "libavutil/subfmt.h"
>  #include "libavutil/rational.h"
>  
>  #include "codec.h"
> @@ -1674,7 +1675,7 @@ typedef struct AVCodecContext {
>  
>      /**
>       * Header containing style information for text subtitles.
> -     * For SUBTITLE_ASS subtitle type, it should contain the whole ASS
> +     * For AV_SUBTITLE_FMT_ASS subtitle type, it should contain the whole ASS
>       * [Script Info] and [V4+ Styles] section, plus the [Events] line and
>       * the Format line following. It shouldn't include any Dialogue line.
>       * - encoding: Set/allocated/freed by user (before avcodec_open2())
> @@ -2238,63 +2239,8 @@ typedef struct AVHWAccel {
>   * @}
>   */
>  
> -enum AVSubtitleType {
> -    SUBTITLE_NONE,
> -
> -    SUBTITLE_BITMAP,                ///< A bitmap, pict will be set
> -
> -    /**
> -     * Plain text, the text field must be set by the decoder and is
> -     * authoritative. ass and pict fields may contain approximations.
> -     */
> -    SUBTITLE_TEXT,
> -
> -    /**
> -     * Formatted text, the ass field must be set by the decoder and is
> -     * authoritative. pict and text fields may contain approximations.
> -     */
> -    SUBTITLE_ASS,
> -};
> -
>  #define AV_SUBTITLE_FLAG_FORCED 0x00000001
>  
> -typedef struct AVSubtitleRect {
> -    int x;         ///< top left corner  of pict, undefined when pict is not set
> -    int y;         ///< top left corner  of pict, undefined when pict is not set
> -    int w;         ///< width            of pict, undefined when pict is not set
> -    int h;         ///< height           of pict, undefined when pict is not set
> -    int nb_colors; ///< number of colors in pict, undefined when pict is not set
> -
> -    /**
> -     * data+linesize for the bitmap of this subtitle.
> -     * Can be set for text/ass as well once they are rendered.
> -     */
> -    uint8_t *data[4];
> -    int linesize[4];
> -
> -    enum AVSubtitleType type;
> -
> -    char *text;                     ///< 0 terminated plain UTF-8 text
> -
> -    /**
> -     * 0 terminated ASS/SSA compatible event line.
> -     * The presentation of this is unaffected by the other values in this
> -     * struct.
> -     */
> -    char *ass;
> -
> -    int flags;
> -} AVSubtitleRect;
> -
> -typedef struct AVSubtitle {
> -    uint16_t format; /* 0 = graphics */
> -    uint32_t start_display_time; /* relative to packet pts, in ms */
> -    uint32_t end_display_time; /* relative to packet pts, in ms */
> -    unsigned num_rects;
> -    AVSubtitleRect **rects;
> -    int64_t pts;    ///< Same as packet pts, in AV_TIME_BASE
> -} AVSubtitle;
> -
>  /**
>   * Return the LIBAVCODEC_VERSION_INT constant.
>   */
> @@ -2430,13 +2376,6 @@ int avcodec_open2(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **op
>   */
>  int avcodec_close(AVCodecContext *avctx);
>  
> -/**
> - * Free all allocated data in the given subtitle struct.
> - *
> - * @param sub AVSubtitle to free.
> - */
> -void avsubtitle_free(AVSubtitle *sub);
> -
>  /**
>   * @}
>   */
> @@ -2527,6 +2466,8 @@ enum AVChromaLocation avcodec_chroma_pos_to_enum(int xpos, int ypos);
>   *                 must be freed with avsubtitle_free if *got_sub_ptr is set.
>   * @param[in,out] got_sub_ptr Zero if no subtitle could be decompressed, otherwise, it is nonzero.
>   * @param[in] avpkt The input AVPacket containing the input buffer.
> + *
> + * @deprecated Use the new decode API (avcodec_send_packet, avcodec_receive_frame) instead.

This is not how you deprecate something: You have to add a @deprecated
doxygen comment as well as the attribute_deprecated to make some noise.

Actually, you should deprecate the whole AVSubtitle API.

>   */
>  int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
>                              int *got_sub_ptr,
> @@ -3125,6 +3066,14 @@ void avcodec_flush_buffers(AVCodecContext *avctx);
>   */
>  int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes);
>  
> +/**
> + * Return subtitle format from a codec descriptor
> + *
> + * @param codec_descriptor codec descriptor
> + * @return                 the subtitle type (e.g. bitmap, text)
> + */
> +enum AVSubtitleType av_get_subtitle_format_from_codecdesc(const AVCodecDescriptor *codec_descriptor);

Do we need this function? It seems too trivial and as Anton has pointed
out is flawed. And anyway, this would belong into codec_desc.h.

> +
>  /* memory */
>  
>  /**
> diff --git a/libavcodec/decode.c b/libavcodec/decode.c
> index c44724d150..788b043266 100644
> --- a/libavcodec/decode.c
> +++ b/libavcodec/decode.c
> @@ -576,6 +576,34 @@ static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
>      return ret;
>  }
>  
> +static int decode_subtitle2_priv(AVCodecContext *avctx, AVSubtitle *sub,
> +                                 int *got_sub_ptr, const AVPacket *avpkt);
> +
> +static int decode_subtitle_shim(AVCodecContext *avctx, AVFrame *frame, const AVPacket *avpkt)
> +{
> +    int ret, got_sub_ptr = 0;
> +    AVSubtitle subtitle = { 0 };
> +
> +    av_frame_unref(frame);
> +
> +    ret = decode_subtitle2_priv(avctx, &subtitle, &got_sub_ptr, avpkt);
> +
> +    if (ret >= 0 && got_sub_ptr) {
> +        frame->type = AVMEDIA_TYPE_SUBTITLE;
> +        frame->format = subtitle.format;
> +        ret = av_frame_get_buffer2(frame, 0);
> +        if (ret < 0)
> +            return ret;
> +        av_frame_put_subtitle(frame, &subtitle);
> +
> +        frame->pkt_dts = avpkt->dts;
> +    }
> +
> +    avsubtitle_free(&subtitle);
> +
> +    return ret;
> +}
> +
>  int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt)
>  {
>      AVCodecInternal *avci = avctx->internal;
> @@ -590,6 +618,9 @@ int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacke
>      if (avpkt && !avpkt->size && avpkt->data)
>          return AVERROR(EINVAL);
>  
> +    if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE)
> +        return decode_subtitle_shim(avctx, avci->buffer_frame, avpkt);

1. This is missing a check whether buffer_frame is empty or not; in the
latter case, you need to return AVERROR(EAGAIN); instead
decode_subtitle_shim() destroys what is already there.
(Said check is currently offloaded to the BSF API in the audio/video
codepath. This is actually a violation of the BSF API.)
2. Flushing is not really implemented well:
a) It is legal to call avcodec_send_packet() with avpkt == NULL to
indicate EOF. If I am not mistaken, decode_subtitle2_priv() will just
crash in this case.
b) avcodec_receive_frame() only returns what is in buffer_frame; it
never calls decode_subtitle2_priv() or the underlying decode function at
all. This basically presumes that a subtitle decoder can only return one
subtitle after flushing. I don't know whether this is true for our
decoders with the delay cap.
c) avcodec_receive_frame() seems to never return AVERROR_EOF after
flushing, but always AVERROR(EAGAIN) (if the delay cap is set, the first
call to avcodec_receive_frame() after flushing can also result in a
frame being returned). Yet this makes no sense and is IMO an API
violation on lavc's part.

(While we have subtitle decoders with the delay cap, I don't know
whether this is actually tested by fate and whether all code actually
flushes subtitle decoders at all.)

> +
>      av_packet_unref(avci->buffer_pkt);
>      if (avpkt && (avpkt->data || avpkt->side_data_elems)) {
>          ret = av_packet_ref(avci->buffer_pkt, avpkt);
> @@ -651,7 +682,9 @@ int attribute_align_arg avcodec_receive_frame(AVCodecContext *avctx, AVFrame *fr
>  
>      if (avci->buffer_frame->buf[0]) {
>          av_frame_move_ref(frame, avci->buffer_frame);
> -    } else {
> +    } else if (avctx->codec_type == AVMEDIA_TYPE_SUBTITLE)
> +        return AVERROR(EAGAIN);
> +    else {
>          ret = decode_receive_frame_internal(avctx, frame);
>          if (ret < 0)
>              return ret;
> @@ -723,7 +756,7 @@ static void get_subtitle_defaults(AVSubtitle *sub)
>  
>  #define UTF8_MAX_BYTES 4 /* 5 and 6 bytes sequences should not be used */
>  static int recode_subtitle(AVCodecContext *avctx, AVPacket **outpkt,
> -                           AVPacket *inpkt, AVPacket *buf_pkt)
> +                           const AVPacket *inpkt, AVPacket *buf_pkt)

*outpkt = inpkt; is now no longer const-correct. And if you make outpkt
const AVPacket**, then avctx->codec->decode(avctx, sub, got_sub_ptr,
pkt) is no longer const-correct (although no subtitle decoder modifies
the packet it is given!). In other words: Been there, done that.

>  {
>  #if CONFIG_ICONV
>      iconv_t cd = (iconv_t)-1;
> @@ -802,9 +835,8 @@ static int utf8_check(const uint8_t *str)
>      return 1;
>  }
>  
> -int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
> -                             int *got_sub_ptr,
> -                             AVPacket *avpkt)
> +static int decode_subtitle2_priv(AVCodecContext *avctx, AVSubtitle *sub,
> +                             int *got_sub_ptr, const AVPacket *avpkt)
>  {
>      int ret = 0;
>  
> @@ -844,10 +876,7 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
>                                                   avctx->pkt_timebase, ms);
>          }
>  
> -        if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB)
> -            sub->format = 0;
> -        else if (avctx->codec_descriptor->props & AV_CODEC_PROP_TEXT_SUB)
> -            sub->format = 1;
> +        sub->format = av_get_subtitle_format_from_codecdesc(avctx->codec_descriptor);
>  
>          for (unsigned i = 0; i < sub->num_rects; i++) {
>              if (avctx->sub_charenc_mode != FF_SUB_CHARENC_MODE_IGNORE &&
> @@ -871,6 +900,12 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
>      return ret;
>  }
>  
> +int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
> +                             int *got_sub_ptr, AVPacket *avpkt)
> +{
> +    return decode_subtitle2_priv(avctx, sub, got_sub_ptr, avpkt);
> +}
> +
>  enum AVPixelFormat avcodec_default_get_format(struct AVCodecContext *avctx,
>                                                const enum AVPixelFormat *fmt)
>  {
> diff --git a/libavcodec/libzvbi-teletextdec.c b/libavcodec/libzvbi-teletextdec.c
> index 1073d6a0bd..535c82d7a5 100644
> --- a/libavcodec/libzvbi-teletextdec.c
> +++ b/libavcodec/libzvbi-teletextdec.c
> @@ -27,6 +27,7 @@
>  #include "libavutil/intreadwrite.h"
>  #include "libavutil/log.h"
>  #include "libavutil/common.h"
> +#include "libavutil/subfmt.h"

If you need to add a new header here for this (which I don't think),
then this means that you broke API.

>  
>  #include <libzvbi.h>
>  
> diff --git a/libavcodec/pgssubdec.c b/libavcodec/pgssubdec.c
> index 388639a110..5709058b1c 100644
> --- a/libavcodec/pgssubdec.c
> +++ b/libavcodec/pgssubdec.c
> @@ -32,6 +32,7 @@
>  #include "libavutil/colorspace.h"
>  #include "libavutil/imgutils.h"
>  #include "libavutil/opt.h"
> +#include "libavutil/subfmt.h"
>  
>  #define RGBA(r,g,b,a) (((unsigned)(a) << 24) | ((r) << 16) | ((g) << 8) | (b))
>  #define MAX_EPOCH_PALETTES 8   // Max 8 allowed per PGS epoch
> diff --git a/libavcodec/utils.c b/libavcodec/utils.c
> index a91a54b0dc..efe14e5a43 100644
> --- a/libavcodec/utils.c
> +++ b/libavcodec/utils.c
> @@ -824,6 +824,17 @@ int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes)
>      return FFMAX(0, duration);
>  }
>  
> +enum AVSubtitleType av_get_subtitle_format_from_codecdesc(const AVCodecDescriptor *codec_descriptor)
> +{
> +    if(codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB)
> +        return AV_SUBTITLE_FMT_BITMAP;
> +
> +    if(codec_descriptor->props & AV_CODEC_PROP_TEXT_SUB)
> +        return AV_SUBTITLE_FMT_ASS;
> +
> +    return AV_SUBTITLE_FMT_UNKNOWN;
> +}
> +
>  int av_get_audio_frame_duration2(AVCodecParameters *par, int frame_bytes)
>  {
>      int duration = get_audio_frame_duration(par->codec_id, par->sample_rate,
> diff --git a/libavfilter/vf_subtitles.c b/libavfilter/vf_subtitles.c
> index 377160c72b..6b54cf669b 100644
> --- a/libavfilter/vf_subtitles.c
> +++ b/libavfilter/vf_subtitles.c
> @@ -35,14 +35,12 @@
>  # include "libavformat/avformat.h"
>  #endif
>  #include "libavutil/avstring.h"
> -#include "libavutil/imgutils.h"
>  #include "libavutil/opt.h"
>  #include "libavutil/parseutils.h"
>  #include "drawutils.h"
>  #include "avfilter.h"
>  #include "internal.h"
>  #include "formats.h"
> -#include "video.h"
>  
>  typedef struct AssContext {
>      const AVClass *class;
> @@ -292,6 +290,29 @@ static int attachment_is_font(AVStream * st)
>      return 0;
>  }
>  
> +static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacket *pkt)
> +{
> +    int ret;
> +
> +    *got_frame = 0;

You don't really need this: You could just return 0 if no frame was
returned, 1 if a frame was returned and < 0 on (actual, not EAGAIN) errors.

> +
> +    if (pkt) {
> +        ret = avcodec_send_packet(avctx, pkt);
> +        // In particular, we don't expect AVERROR(EAGAIN), because we read all
> +        // decoded frames with avcodec_receive_frame() until done.

Where do you do this? For this one would need to call
avcodec_receive_frame() in a loop until it returns EAGAIN, but you don't
do this.

> +        if (ret < 0 && ret != AVERROR_EOF)
> +            return ret;
> +    }
> +
> +    ret = avcodec_receive_frame(avctx, frame);
> +    if (ret < 0 && ret != AVERROR(EAGAIN))
> +        return ret;
> +    if (ret >= 0)
> +        *got_frame = 1;
> +
> +    return 0;
> +}
> +
>  AVFILTER_DEFINE_CLASS(subtitles);
>  
>  static av_cold int init_subtitles(AVFilterContext *ctx)
> @@ -306,6 +327,7 @@ static av_cold int init_subtitles(AVFilterContext *ctx)
>      AVStream *st;
>      AVPacket pkt;
>      AssContext *ass = ctx->priv;
> +    enum AVSubtitleType subtitle_format;
>  
>      /* Init libass */
>      ret = init(ctx);
> @@ -386,13 +408,17 @@ static av_cold int init_subtitles(AVFilterContext *ctx)
>          ret = AVERROR_DECODER_NOT_FOUND;
>          goto end;
>      }
> +
>      dec_desc = avcodec_descriptor_get(st->codecpar->codec_id);
> -    if (dec_desc && !(dec_desc->props & AV_CODEC_PROP_TEXT_SUB)) {
> +    subtitle_format = av_get_subtitle_format_from_codecdesc(dec_desc);
> +
> +    if (subtitle_format != AV_SUBTITLE_FMT_ASS) {
>          av_log(ctx, AV_LOG_ERROR,
> -               "Only text based subtitles are currently supported\n");
> -        ret = AVERROR_PATCHWELCOME;
> +               "Only text based subtitles are supported by this filter\n");
> +        ret = AVERROR_INVALIDDATA;
>          goto end;
>      }
> +
>      if (ass->charenc)
>          av_dict_set(&codec_opts, "sub_charenc", ass->charenc, 0);
>  
> @@ -448,18 +474,18 @@ static av_cold int init_subtitles(AVFilterContext *ctx)
>                                    dec_ctx->subtitle_header_size);
>      while (av_read_frame(fmt, &pkt) >= 0) {
>          int i, got_subtitle;
> -        AVSubtitle sub = {0};
> +        AVFrame *sub = av_frame_alloc();

Unchecked allocation. And anyway, this should not be allocated once per
loop iteration.

>  
>          if (pkt.stream_index == sid) {
> -            ret = avcodec_decode_subtitle2(dec_ctx, &sub, &got_subtitle, &pkt);
> +            ret = decode(dec_ctx, sub, &got_subtitle, &pkt);
>              if (ret < 0) {
>                  av_log(ctx, AV_LOG_WARNING, "Error decoding: %s (ignored)\n",
>                         av_err2str(ret));
>              } else if (got_subtitle) {
> -                const int64_t start_time = av_rescale_q(sub.pts, AV_TIME_BASE_Q, av_make_q(1, 1000));
> -                const int64_t duration   = sub.end_display_time;
> -                for (i = 0; i < sub.num_rects; i++) {
> -                    char *ass_line = sub.rects[i]->ass;
> +                const int64_t start_time = av_rescale_q(sub->subtitle_pts, AV_TIME_BASE_Q, av_make_q(1, 1000));
> +                const int64_t duration   = sub->subtitle_end_time;
> +                for (i = 0; i < sub->num_subtitle_areas; i++) {
> +                    char *ass_line = sub->subtitle_areas[i]->ass;
>                      if (!ass_line)
>                          break;
>                      ass_process_chunk(ass->track, ass_line, strlen(ass_line),
> @@ -468,7 +494,7 @@ static av_cold int init_subtitles(AVFilterContext *ctx)
>              }
>          }
>          av_packet_unref(&pkt);
> -        avsubtitle_free(&sub);
> +        av_frame_free(&sub);
>      }
>  
>  end:
> diff --git a/libavformat/utils.c b/libavformat/utils.c
> index 7840e8717c..d9b54c7725 100644
> --- a/libavformat/utils.c
> +++ b/libavformat/utils.c
> @@ -32,6 +32,7 @@
>  #include "libavutil/opt.h"
>  #include "libavutil/parseutils.h"
>  #include "libavutil/pixfmt.h"
> +#include "libavutil/subfmt.h"
>  #include "libavutil/thread.h"
>  #include "libavutil/time.h"
>  
> diff --git a/libavutil/Makefile b/libavutil/Makefile
> index 529046dbc8..7e79936876 100644
> --- a/libavutil/Makefile
> +++ b/libavutil/Makefile
> @@ -74,6 +74,7 @@ HEADERS = adler32.h                                                     \
>            sha512.h                                                      \
>            spherical.h                                                   \
>            stereo3d.h                                                    \
> +          subfmt.h                                                      \
>            threadmessage.h                                               \
>            time.h                                                        \
>            timecode.h                                                    \
> @@ -159,6 +160,7 @@ OBJS = adler32.o                                                        \
>         slicethread.o                                                    \
>         spherical.o                                                      \
>         stereo3d.o                                                       \
> +       subfmt.o                                                         \
>         threadmessage.o                                                  \
>         time.o                                                           \
>         timecode.o                                                       \
> diff --git a/libavutil/frame.c b/libavutil/frame.c
> index d4d3ad6988..dfe9c269a6 100644
> --- a/libavutil/frame.c
> +++ b/libavutil/frame.c
> @@ -26,6 +26,7 @@
>  #include "imgutils.h"
>  #include "mem.h"
>  #include "samplefmt.h"
> +#include "subfmt.h"
>  #include "hwcontext.h"
>  
>  #define CHECK_CHANNELS_CONSISTENCY(frame) \
> @@ -50,6 +51,9 @@ const char *av_get_colorspace_name(enum AVColorSpace val)
>      return name[val];
>  }
>  #endif
> +
> +static int frame_copy_subtitles(AVFrame *dst, const AVFrame *src, int copy_data);
> +
>  static void get_frame_defaults(AVFrame *frame)
>  {
>      if (frame->extended_data != frame->data)
> @@ -72,7 +76,12 @@ static void get_frame_defaults(AVFrame *frame)
>      frame->colorspace          = AVCOL_SPC_UNSPECIFIED;
>      frame->color_range         = AVCOL_RANGE_UNSPECIFIED;
>      frame->chroma_location     = AVCHROMA_LOC_UNSPECIFIED;
> -    frame->flags               = 0;
> +    frame->subtitle_start_time = 0;
> +    frame->subtitle_end_time   = 0;
> +    frame->num_subtitle_areas  = 0;
> +    frame->subtitle_areas      = NULL;
> +    frame->subtitle_pts        = 0;
> +    frame->subtitle_header     = NULL;
>  }
>  
>  static void free_side_data(AVFrameSideData **ptr_sd)
> @@ -243,23 +252,55 @@ static int get_audio_buffer(AVFrame *frame, int align)
>  
>  }
>  
> +static int get_subtitle_buffer(AVFrame *frame)
> +{
> +    // Buffers in AVFrame->buf[] are not used in case of subtitle frames.
> +    // To accomodate with existing code, checking ->buf[0] to determine
> +    // whether a frame is ref-counted or has data, we're adding a 1-byte
> +    // buffer here, which marks the subtitle frame to contain data.

This is a terrible hack that might be necessary for now. But I don't see
anything

> +    frame->buf[0] = av_buffer_alloc(1);
> +    if (!frame->buf[0]) {
> +        av_frame_unref(frame);
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    frame->extended_data = frame->data;
> +
> +    return 0;
> +}
> +
>  int av_frame_get_buffer(AVFrame *frame, int align)
> +{
> +    if (frame->width > 0 && frame->height > 0)
> +        frame->type = AVMEDIA_TYPE_VIDEO;
> +    else if (frame->nb_samples > 0 && (frame->channel_layout || frame->channels > 0))
> +        frame->type = AVMEDIA_TYPE_AUDIO;
> +
> +    return av_frame_get_buffer2(frame, align);
> +}
> +
> +int av_frame_get_buffer2(AVFrame *frame, int align)
>  {
>      if (frame->format < 0)
>          return AVERROR(EINVAL);
>  
> -    if (frame->width > 0 && frame->height > 0)
> +    switch(frame->type) {
> +    case AVMEDIA_TYPE_VIDEO:
>          return get_video_buffer(frame, align);
> -    else if (frame->nb_samples > 0 && (frame->channel_layout || frame->channels > 0))
> +    case AVMEDIA_TYPE_AUDIO:
>          return get_audio_buffer(frame, align);
> -
> -    return AVERROR(EINVAL);
> +    case AVMEDIA_TYPE_SUBTITLE:
> +        return get_subtitle_buffer(frame);
> +    default:
> +        return AVERROR(EINVAL);
> +    }
>  }
>  
>  static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy)
>  {
>      int ret, i;
>  
> +    dst->type                   = src->type;
>      dst->key_frame              = src->key_frame;
>      dst->pict_type              = src->pict_type;
>      dst->sample_aspect_ratio    = src->sample_aspect_ratio;
> @@ -290,6 +331,12 @@ static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy)
>      dst->colorspace             = src->colorspace;
>      dst->color_range            = src->color_range;
>      dst->chroma_location        = src->chroma_location;
> +    dst->subtitle_start_time    = src->subtitle_start_time;
> +    dst->subtitle_end_time      = src->subtitle_end_time;
> +    dst->subtitle_pts           = src->subtitle_pts;
> +
> +    if ((ret = av_buffer_replace(&dst->subtitle_header, src->subtitle_header)) < 0)
> +        return ret;
>  
>      av_dict_copy(&dst->metadata, src->metadata, 0);
>  
> @@ -331,6 +378,7 @@ int av_frame_ref(AVFrame *dst, const AVFrame *src)
>      av_assert1(dst->width == 0 && dst->height == 0);
>      av_assert1(dst->channels == 0);
>  
> +    dst->type           = src->type;
>      dst->format         = src->format;
>      dst->width          = src->width;
>      dst->height         = src->height;
> @@ -344,7 +392,7 @@ int av_frame_ref(AVFrame *dst, const AVFrame *src)
>  
>      /* duplicate the frame data if it's not refcounted */
>      if (!src->buf[0]) {
> -        ret = av_frame_get_buffer(dst, 0);
> +        ret = av_frame_get_buffer2(dst, 0);
>          if (ret < 0)
>              goto fail;
>  
> @@ -366,6 +414,10 @@ int av_frame_ref(AVFrame *dst, const AVFrame *src)
>          }
>      }
>  
> +    /* copy subtitle areas */
> +    if (src->type == AVMEDIA_TYPE_SUBTITLE)
> +        frame_copy_subtitles(dst, src, 0);
> +
>      if (src->extended_buf) {
>          dst->extended_buf = av_calloc(src->nb_extended_buf,
>                                        sizeof(*dst->extended_buf));
> @@ -436,7 +488,7 @@ AVFrame *av_frame_clone(const AVFrame *src)
>  
>  void av_frame_unref(AVFrame *frame)
>  {
> -    int i;
> +    unsigned i, n;
>  
>      if (!frame)
>          return;
> @@ -455,6 +507,21 @@ void av_frame_unref(AVFrame *frame)
>      av_buffer_unref(&frame->opaque_ref);
>      av_buffer_unref(&frame->private_ref);
>  
> +    av_buffer_unref(&frame->subtitle_header);
> +
> +    for (i = 0; i < frame->num_subtitle_areas; i++) {
> +        AVSubtitleArea *area = frame->subtitle_areas[i];
> +
> +        for (n = 0; n < FF_ARRAY_ELEMS(area->buf); n++)
> +            av_buffer_unref(&area->buf[n]);
> +
> +        av_freep(&area->text);
> +        av_freep(&area->ass);
> +        av_free(area);
> +    }
> +
> +    av_freep(&frame->subtitle_areas);
> +
>      get_frame_defaults(frame);
>  }
>  
> @@ -472,7 +539,8 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src)
>  
>  int av_frame_is_writable(AVFrame *frame)
>  {
> -    int i, ret = 1;
> +    AVSubtitleArea *area;
> +    unsigned i, n, ret = 1;

Use smaller scope for the variables that you add here (i.e. don't add
them here).

>  
>      /* assume non-refcounted frames are not writable */
>      if (!frame->buf[0])
> @@ -484,6 +552,15 @@ int av_frame_is_writable(AVFrame *frame)
>      for (i = 0; i < frame->nb_extended_buf; i++)
>          ret &= !!av_buffer_is_writable(frame->extended_buf[i]);
>  
> +    for (i = 0; i < frame->num_subtitle_areas; i++) {
> +        area = frame->subtitle_areas[i];
> +        if (area) {

Who sets an incorrect num_subtitle_areas?

> +            for (n = 0; n < FF_ARRAY_ELEMS(area->buf); n++)
> +                if (area->buf[n])
> +                    ret &= !!av_buffer_is_writable(area->buf[n]);
> +            }
> +    }
> +
>      return ret;
>  }
>  
> @@ -499,6 +576,7 @@ int av_frame_make_writable(AVFrame *frame)
>          return 0;
>  
>      memset(&tmp, 0, sizeof(tmp));
> +    tmp.type           = frame->type;
>      tmp.format         = frame->format;
>      tmp.width          = frame->width;
>      tmp.height         = frame->height;
> @@ -509,7 +587,7 @@ int av_frame_make_writable(AVFrame *frame)
>      if (frame->hw_frames_ctx)
>          ret = av_hwframe_get_buffer(frame->hw_frames_ctx, &tmp, 0);
>      else
> -        ret = av_frame_get_buffer(&tmp, 0);
> +        ret = av_frame_get_buffer2(&tmp, 0);
>      if (ret < 0)
>          return ret;
>  
> @@ -544,14 +622,22 @@ AVBufferRef *av_frame_get_plane_buffer(AVFrame *frame, int plane)
>      uint8_t *data;
>      int planes, i;
>  
> -    if (frame->nb_samples) {
> -        int channels = frame->channels;
> -        if (!channels)
> -            return NULL;
> -        CHECK_CHANNELS_CONSISTENCY(frame);
> -        planes = av_sample_fmt_is_planar(frame->format) ? channels : 1;
> -    } else
> +    switch(frame->type) {
> +    case AVMEDIA_TYPE_VIDEO:
>          planes = 4;
> +        break;
> +    case AVMEDIA_TYPE_AUDIO:
> +        {
> +            int channels = frame->channels;
> +            if (!channels)
> +                return NULL;
> +            CHECK_CHANNELS_CONSISTENCY(frame);
> +            planes = av_sample_fmt_is_planar(frame->format) ? channels : 1;
> +            break;
> +        }
> +    default:
> +        return NULL;
> +    }
>  
>      if (plane < 0 || plane >= planes || !frame->extended_data[plane])
>          return NULL;
> @@ -675,17 +761,39 @@ static int frame_copy_audio(AVFrame *dst, const AVFrame *src)
>      return 0;
>  }
>  
> +static int frame_copy_subtitles(AVFrame *dst, const AVFrame *src, int copy_data)
> +{
> +    dst->format = src->format;
> +
> +    dst->num_subtitle_areas = src->num_subtitle_areas;
> +
> +    if (src->num_subtitle_areas) {
> +        dst->subtitle_areas = av_malloc_array(src->num_subtitle_areas, sizeof(AVSubtitleArea *));
> +
> +        for (unsigned i = 0; i < src->num_subtitle_areas; i++) {
> +            dst->subtitle_areas[i] = av_mallocz(sizeof(AVSubtitleArea));
> +            av_subtitle_area2area(dst->subtitle_areas[i], src->subtitle_areas[i], copy_data);
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  int av_frame_copy(AVFrame *dst, const AVFrame *src)
>  {
>      if (dst->format != src->format || dst->format < 0)
>          return AVERROR(EINVAL);
>  
> -    if (dst->width > 0 && dst->height > 0)
> +    switch(dst->type) {
> +    case AVMEDIA_TYPE_VIDEO:
>          return frame_copy_video(dst, src);
> -    else if (dst->nb_samples > 0 && dst->channels > 0)
> +    case AVMEDIA_TYPE_AUDIO:
>          return frame_copy_audio(dst, src);
> -
> -    return AVERROR(EINVAL);
> +    case AVMEDIA_TYPE_SUBTITLE:
> +        return frame_copy_subtitles(dst, src, 1);
> +    default:
> +        return AVERROR(EINVAL);
> +    }
>  }
>  
>  void av_frame_remove_side_data(AVFrame *frame, enum AVFrameSideDataType type)
> @@ -832,3 +940,49 @@ int av_frame_apply_cropping(AVFrame *frame, int flags)
>  
>      return 0;
>  }
> +
> +/**
> + * Copies subtitle data from AVSubtitle (deprecated) to AVFrame
> + *
> + * @note This is a compatibility method for conversion to the legacy API
> + */
> +void av_frame_put_subtitle(AVFrame *frame, const AVSubtitle *sub)
> +{
> +    frame->format              = sub->format;
> +    frame->subtitle_start_time = sub->start_display_time;
> +    frame->subtitle_end_time   = sub->end_display_time;
> +    frame->subtitle_pts        = sub->pts;
> +    frame->num_subtitle_areas  = sub->num_rects;
> +
> +    if (sub->num_rects) {
> +        frame->subtitle_areas = av_malloc_array(sub->num_rects, sizeof(AVSubtitleArea *));
> +
> +        for (unsigned i = 0; i < sub->num_rects; i++) {
> +            frame->subtitle_areas[i] = av_mallocz(sizeof(AVSubtitleArea));
> +            av_subtitle_rect2area(frame->subtitle_areas[i], sub->rects[i]);
> +        }
> +    }
> +}
> +
> +/**
> + * Copies subtitle data from AVFrame to AVSubtitle (deprecated)
> + *
> + * @note This is a compatibility method for conversion to the legacy API
> + */
> +void av_frame_get_subtitle(AVSubtitle *sub, AVFrame *frame)
> +{
> +    sub->start_display_time = frame->subtitle_start_time;
> +    sub->end_display_time   = frame->subtitle_end_time;
> +    sub->pts                = frame->subtitle_pts;
> +    sub->num_rects          = frame->num_subtitle_areas;
> +
> +    if (frame->num_subtitle_areas) {
> +        sub->rects = av_malloc_array(frame->num_subtitle_areas, sizeof(AVSubtitle *));

Unchecked. Furthermore, wrong sizeof operand. Notice that you have
already set num_rects before the allocation, so on error, the AVSubtitle
would be inconsistent if you simply returned AVERROR(ENOMEM).

> +
> +        for (unsigned i = 0; i < frame->num_subtitle_areas; i++) {
> +            sub->rects[i] = av_mallocz(sizeof(AVSubtitleRect));

Unchecked allocations.

> +            av_subtitle_area2rect(sub->rects[i], frame->subtitle_areas[i]);

Missing check.

> +        }
> +    }
> +}
> +
> diff --git a/libavutil/frame.h b/libavutil/frame.h
> index 753234792e..742f4ba07e 100644
> --- a/libavutil/frame.h
> +++ b/libavutil/frame.h
> @@ -34,6 +34,7 @@
>  #include "rational.h"
>  #include "samplefmt.h"
>  #include "pixfmt.h"
> +#include "subfmt.h"
>  #include "version.h"
>  
>  
> @@ -278,7 +279,7 @@ typedef struct AVRegionOfInterest {
>  } AVRegionOfInterest;
>  
>  /**
> - * This structure describes decoded (raw) audio or video data.
> + * This structure describes decoded (raw) audio, video or subtitle data.
>   *
>   * AVFrame must be allocated using av_frame_alloc(). Note that this only
>   * allocates the AVFrame itself, the buffers for the data must be managed
> @@ -309,6 +310,13 @@ typedef struct AVRegionOfInterest {
>   */
>  typedef struct AVFrame {
>  #define AV_NUM_DATA_POINTERS 8
> +    /**
> +     * Media type of the frame (audio, video, subtitles..)
> +     *
> +     * See AVMEDIA_TYPE_xxx
> +     */
> +    enum AVMediaType type;
> +
>      /**
>       * pointer to the picture/channel planes.
>       * This might be different from the first allocated byte. For video,
> @@ -390,7 +398,7 @@ typedef struct AVFrame {
>      /**
>       * format of the frame, -1 if unknown or unset
>       * Values correspond to enum AVPixelFormat for video frames,
> -     * enum AVSampleFormat for audio)
> +     * enum AVSampleFormat for audio, AVSubtitleType for subtitles)
>       */
>      int format;
>  
> @@ -481,6 +489,39 @@ typedef struct AVFrame {
>       */
>      uint64_t channel_layout;
>  
> +    /**
> +     * Display start time, relative to packet pts, in ms.
> +     */
> +    uint32_t subtitle_start_time;
> +
> +    /**
> +     * Display end time, relative to packet pts, in ms.
> +     */
> +    uint32_t subtitle_end_time;

Hardcoding a millisecond timestamp precision into the API doesn't seem
good. As does using 32bit.

> +
> +    /**
> +     * Number of items in the @ref subtitle_areas array.
> +     */
> +    unsigned num_subtitle_areas;
> +
> +    /**
> +     * Array of subtitle areas, may be empty.
> +     */
> +    AVSubtitleArea **subtitle_areas;
> +
> +    /**
> +     * Usually the same as packet pts, in AV_TIME_BASE.
> +     *
> +     * @deprecated This is kept for compatibility reasons and corresponds to
> +     * AVSubtitle->pts. Might be removed in the future.
> +     */
> +    int64_t subtitle_pts;

What exactly is the point of having two different timestamps?

> +
> +    /**
> +     * Header containing style information for text subtitles.
> +     */
> +    AVBufferRef *subtitle_header;
> +
>      /**
>       * AVBuffer references backing the data for this frame. If all elements of
>       * this array are NULL, then this frame is not reference counted. This array
> @@ -740,6 +781,8 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src);
>  /**
>   * Allocate new buffer(s) for audio or video data.
>   *
> + * Note: For subtitle data, use av_frame_get_buffer2
> + *
>   * The following fields must be set on frame before calling this function:
>   * - format (pixel format for video, sample format for audio)
>   * - width and height for video
> @@ -759,9 +802,39 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src);
>   *              recommended to pass 0 here unless you know what you are doing.
>   *
>   * @return 0 on success, a negative AVERROR on error.
> + *
> + * @deprecated Use @ref av_frame_get_buffer2 instead and set @ref AVFrame.type
> + * before calling.
>   */
> +attribute_deprecated
>  int av_frame_get_buffer(AVFrame *frame, int align);
>  
> +/**
> + * Allocate new buffer(s) for audio, video or subtitle data.
> + *
> + * The following fields must be set on frame before calling this function:
> + * - format (pixel format for video, sample format for audio)
> + * - width and height for video
> + * - nb_samples and channel_layout for audio
> + * - type (AVMediaType)
> + *
> + * This function will fill AVFrame.data and AVFrame.buf arrays and, if
> + * necessary, allocate and fill AVFrame.extended_data and AVFrame.extended_buf.
> + * For planar formats, one buffer will be allocated for each plane.
> + *
> + * @warning: if frame already has been allocated, calling this function will
> + *           leak memory. In addition, undefined behavior can occur in certain
> + *           cases.
> + *
> + * @param frame frame in which to store the new buffers.
> + * @param align Required buffer size alignment. If equal to 0, alignment will be
> + *              chosen automatically for the current CPU. It is highly
> + *              recommended to pass 0 here unless you know what you are doing.
> + *
> + * @return 0 on success, a negative AVERROR on error.
> + */
> +int av_frame_get_buffer2(AVFrame *frame, int align);
> +
>  /**
>   * Check if the frame data is writable.
>   *
> @@ -863,6 +936,22 @@ AVFrameSideData *av_frame_get_side_data(const AVFrame *frame,
>   */
>  void av_frame_remove_side_data(AVFrame *frame, enum AVFrameSideDataType type);
>  
> +/**
> + * Copies subtitle data from AVSubtitle to AVFrame.
> + *
> + * @deprecated This is a compatibility method for interoperability with
> + * the legacy subtitle API.
> + */
> +void av_frame_put_subtitle(AVFrame *frame, const AVSubtitle *sub);
> +
> +/**
> + * Copies subtitle data from AVFrame to AVSubtitle.
> + *
> + * @deprecated This is a compatibility method for interoperability with
> + * the legacy subtitle API.
> + */
> +void av_frame_get_subtitle(AVSubtitle *sub, AVFrame *frame);

1. Missing const.
2. Wrong return type.
3. I don't see a need for this compatibility at all. As explained below,
the only user that needs something like this is libavcodec and then it
should live in libavcodec.
The last two points apply to both av_frame_get_subtitle and
av_frame_put_subtitle. Notice that the only use of any of these
functions outside of libavcodec is remove later again (patch #8 adds an
av_frame_get_subtitle() in ffmpeg.c, #16 removes it again) which shows
that such a compatibility stuff is not needed outside of libavcodec.

> +
>  
>  /**
>   * Flags for frame cropping.
> diff --git a/libavutil/subfmt.c b/libavutil/subfmt.c
> new file mode 100644
> index 0000000000..f68907a644
> --- /dev/null
> +++ b/libavutil/subfmt.c
> @@ -0,0 +1,243 @@
> +/*
> + * Copyright (c) 2021 softworkz
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <string.h>
> +#include "common.h"
> +#include "subfmt.h"
> +
> +typedef struct SubtitleFmtInfo {
> +    enum AVSubtitleType fmt;
> +    const char* name;
> +} SubtitleFmtInfo;
> +
> +static SubtitleFmtInfo sub_fmt_info[AV_SUBTITLE_FMT_NB] = {
> +    {.fmt = AV_SUBTITLE_FMT_UNKNOWN, .name = "Unknown subtitle format", },
> +    {.fmt = AV_SUBTITLE_FMT_BITMAP,  .name = "Graphical subtitles",     },
> +    {.fmt = AV_SUBTITLE_FMT_TEXT,    .name = "Text subtitles (plain)",  },
> +    {.fmt = AV_SUBTITLE_FMT_ASS,     .name = "Text subtitles (ass)",    },
> +};

There is no need to add an enum AVSubtitleType to the struct. After all,
the valid range of enum AVSubtitleType will always be
0..(AV_SUBTITLE_FMT_NB - 1) as long as we don't shoot ourselves in the foot.
Notice that av_get_subtitle_fmt_name() requires sub_fmt_info[i].fmt == i
for i in 0..(AV_SUBTITLE_FMT_NB - 1), i.e. it both relies on there not
being gaps (that's ok) and on the particular values of AV_SUBTITLE_FMT_*
being set in the way they are. The latter is not ok, use
[AV_SUBTITLE_FMT_UNKNOWN] = "Unknown subtitle format" etc. instead.
Furthermore, these lengths of the names are so close to each other that
it is advantageous to use an array of char[24] instead of an array of
pointers. This also saves relocations.

> +
> +const char *av_get_subtitle_fmt_name(enum AVSubtitleType sub_fmt)
> +{
> +    if (sub_fmt < 0 || sub_fmt >= AV_SUBTITLE_FMT_NB)
> +        return NULL;
> +    return sub_fmt_info[sub_fmt].name;
> +}
> +
> +enum AVSubtitleType av_get_subtitle_fmt(const char *name)
> +{
> +    for (int i = 0; i < AV_SUBTITLE_FMT_NB; i++)
> +        if (!strcmp(sub_fmt_info[i].name, name))
> +            return i;
> +    return AV_SUBTITLE_FMT_NONE;
> +}
> +
> +int av_subtitle_area2rect(AVSubtitleRect *dst, const AVSubtitleArea *src)
> +{
> +    dst->x         =  src->x;        
> +    dst->y         =  src->y;        
> +    dst->w         =  src->w;        
> +    dst->h         =  src->h;        
> +    dst->nb_colors =  src->nb_colors;
> +    dst->type      =  src->type;                     
> +    dst->flags     =  src->flags;
> +
> +    switch (dst->type) {
> +    case AV_SUBTITLE_FMT_BITMAP:
> +
> +        if (src->h > 0 && src->w > 0 && src->buf[0]) {
> +            uint32_t *pal;
> +            AVBufferRef *buf = src->buf[0];
> +            dst->data[0] = av_mallocz(buf->size);
> +            memcpy(dst->data[0], buf->data, buf->size);

av_memdup(); also unchecked allocation and unnecessary zeroing.

> +            dst->linesize[0] = src->linesize[0];
> +
> +            dst->data[1] = av_mallocz(256 * 4);

Unchecked allocation.

> +            pal = (uint32_t *)dst->data[1];
> +
> +            for (unsigned i = 0; i < 256; i++) {
> +                pal[i] = src->pal[i];
> +            }

This loop could be avoided by using av_memdup().

> +        }
> +
> +        break;
> +    case AV_SUBTITLE_FMT_TEXT:
> +
> +        if (src->text)
> +            dst->text = av_strdup(src->text);
> +        else
> +            dst->text = av_strdup("");
> +
> +        if (!dst->text)
> +            return AVERROR(ENOMEM);
> +
> +        break;
> +    case AV_SUBTITLE_FMT_ASS:
> +
> +        if (src->ass)
> +            dst->ass = av_strdup(src->ass);
> +        else
> +            dst->ass = av_strdup("");
> +
> +        if (!dst->ass)
> +            return AVERROR(ENOMEM);
> +
> +        break;
> +    default:
> +
> +        av_log(NULL, AV_LOG_ERROR, "Subtitle rect has invalid format: %d", dst->type);
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    return 0;
> +}
> +
> +int av_subtitle_rect2area(AVSubtitleArea *dst, const AVSubtitleRect *src)
> +{
> +    dst->x         =  src->x;        
> +    dst->y         =  src->y;        
> +    dst->w         =  src->w;        
> +    dst->h         =  src->h;        
> +    dst->nb_colors =  src->nb_colors;
> +    dst->type      =  src->type;                     

There seems to be an awful lot of trailing whitespace in the above lines
(and probably many more lines).

> +    dst->flags     =  src->flags;
> +
> +    switch (dst->type) {
> +    case AV_SUBTITLE_FMT_BITMAP:
> +
> +        if (src->h > 0 && src->w > 0 && src->data[0]) {
> +            AVBufferRef *buf = av_buffer_allocz(src->h * src->linesize[0]);

Unchecked allocation; also zeroing is unnecessary here.

> +            memcpy(buf->data, src->data[0], buf->size);
> +
> +            dst->buf[0] = buf;
> +            dst->linesize[0] = src->linesize[0];
> +        }
> +
> +        if (src->data[1]) {
> +            uint32_t *pal = (uint32_t *)src->data[1];
> +
> +            for (unsigned i = 0; i < 256; i++) {
> +                dst->pal[i] = pal[i];
> +            }
> +        }
> +
> +        break;
> +    case AV_SUBTITLE_FMT_TEXT:
> +
> +        if (src->text) {
> +            dst->text = av_strdup(src->text);
> +            if (!dst->text)
> +                return AVERROR(ENOMEM);
> +        }
> +
> +        break;
> +    case AV_SUBTITLE_FMT_ASS:
> +
> +        if (src->ass) {
> +            dst->ass = av_strdup(src->ass);
> +            if (!dst->ass)
> +                return AVERROR(ENOMEM);
> +        }
> +
> +        break;
> +    default:
> +
> +        av_log(NULL, AV_LOG_ERROR, "Subtitle area has invalid format: %d", dst->type);
> +        return AVERROR(ENOMEM);

ENOMEM?

> +    }
> +
> +    return 0;
> +}
> +
> +int av_subtitle_area2area(AVSubtitleArea *dst, const AVSubtitleArea *src, int copy_data)
> +{
> +    dst->x         =  src->x;        
> +    dst->y         =  src->y;        
> +    dst->w         =  src->w;        
> +    dst->h         =  src->h;        
> +    dst->nb_colors =  src->nb_colors;
> +    dst->type      =  src->type;                     
> +    dst->flags     =  src->flags;
> +
> +    switch (dst->type) {
> +    case AV_SUBTITLE_FMT_BITMAP:
> +
> +        if (src->h > 0 && src->w > 0 && src->buf[0]) {
> +            dst->buf[0] = av_buffer_ref(src->buf[0]);
> +            if (!dst->buf[0])
> +                return AVERROR(ENOMEM);
> +
> +            if (copy_data) {
> +                const int ret = av_buffer_make_writable(&dst->buf[0]);
> +                if (ret < 0)
> +                    return ret;
> +            }
> +
> +            dst->linesize[0] = src->linesize[0];
> +        }

This would actually need to be a loop (for (int i = 0; i <
AV_NUM_BUFFER_POINTERS; i++)). Furthermore, is it intended that there is
no cleanup code in case only a subset of the allocations succeed?

> +
> +        for (unsigned i = 0; i < 256; i++)
> +            dst->pal[i] = src->pal[i];

memcpy?

> +
> +        break;
> +    case AV_SUBTITLE_FMT_TEXT:
> +
> +        if (src->text) {
> +            dst->text = av_strdup(src->text);
> +            if (!dst->text)
> +                return AVERROR(ENOMEM);
> +        }
> +
> +        break;
> +    case AV_SUBTITLE_FMT_ASS:
> +
> +        if (src->ass) {
> +            dst->ass = av_strdup(src->ass);
> +            if (!dst->ass)
> +                return AVERROR(ENOMEM);
> +        }
> +
> +        break;
> +    default:
> +
> +        av_log(NULL, AV_LOG_ERROR, "Subtitle area has invalid format: %d", dst->type);
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    return 0;
> +}
> +
> +void avsubtitle_free(AVSubtitle *sub)
> +{
> +    for (unsigned i = 0; i < sub->num_rects; i++) {
> +        av_freep(&sub->rects[i]->data[0]);
> +        av_freep(&sub->rects[i]->data[1]);
> +        av_freep(&sub->rects[i]->data[2]);
> +        av_freep(&sub->rects[i]->data[3]);
> +        av_freep(&sub->rects[i]->text);
> +        av_freep(&sub->rects[i]->ass);
> +        av_freep(&sub->rects[i]);
> +    }
> +
> +    av_freep(&sub->rects);
> +
> +    memset(sub, 0, sizeof(*sub));
> +}
> +
> diff --git a/libavutil/subfmt.h b/libavutil/subfmt.h
> new file mode 100644
> index 0000000000..372e3db413
> --- /dev/null
> +++ b/libavutil/subfmt.h
> @@ -0,0 +1,185 @@
> +/*
> + * Copyright (c) 2021 softworkz
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVUTIL_SUBFMT_H
> +#define AVUTIL_SUBFMT_H
> +
> +#include <inttypes.h>
> +

stdint is enough.

> +#include "buffer.h"
> +
> +enum AVSubtitleType {
> +
> +    /**
> +     * Subtitle format unknown.
> +     */
> +    AV_SUBTITLE_FMT_NONE = -1,
> +
> +    /**
> +     * Subtitle format unknown.
> +     */
> +    AV_SUBTITLE_FMT_UNKNOWN = 0,
> +
> +    /**
> +     * Bitmap area in AVSubtitleRect.data, pixfmt AV_PIX_FMT_PAL8.
> +     */
> +    AV_SUBTITLE_FMT_BITMAP = 1,
> +    SUBTITLE_BITMAP = 1,        ///< Deprecated, use AV_SUBTITLE_FMT_BITMAP instead.
> +
> +    /**
> +     * Plain text in AVSubtitleRect.text.
> +     */
> +    AV_SUBTITLE_FMT_TEXT = 2,
> +    SUBTITLE_TEXT = 2,          ///< Deprecated, use AV_SUBTITLE_FMT_TEXT instead.
> +
> +    /**
> +     * Text Formatted as per ASS specification, contained AVSubtitleRect.ass.
> +     */
> +    AV_SUBTITLE_FMT_ASS = 3,
> +    SUBTITLE_ASS = 3,           ///< Deprecated, use AV_SUBTITLE_FMT_ASS instead.
> +
> +    AV_SUBTITLE_FMT_NB,
> +};
> +
> +typedef struct AVSubtitleArea {
> +#define AV_NUM_BUFFER_POINTERS 1
> +
> +    enum AVSubtitleType type;
> +    int flags;
> +
> +    int x;         ///< top left corner  of area.
> +    int y;         ///< top left corner  of area.
> +    int w;         ///< width            of area.
> +    int h;         ///< height           of area.
> +    int nb_colors; ///< number of colors in bitmap palette (@ref pal).
> +
> +    /**
> +     * Buffers and line sizes for the bitmap of this subtitle.
> +     * 
> +     * @{
> +     */
> +    AVBufferRef *buf[AV_NUM_BUFFER_POINTERS];
> +    int linesize[AV_NUM_BUFFER_POINTERS];
> +    /**
> +     * @}
> +     */
> +
> +    uint32_t pal[256]; ///< RGBA palette for the bitmap.
> +
> +    char *text;        ///< 0-terminated plain UTF-8 text
> +    char *ass;         ///< 0-terminated ASS/SSA compatible event line.
> +
> +} AVSubtitleArea;

I presume that sizeof(AVSubtitleArea) is not supposed to be part of the
public API/ABI (after all, you used an array of pointers to
AVSubtitleArea in AVFrame). This needs to be documented.

> +
> +typedef struct AVSubtitleRect {
> +    unsigned nb_refs;
> +    int x;         ///< top left corner  of pict, undefined when pict is not set
> +    int y;         ///< top left corner  of pict, undefined when pict is not set
> +    int w;         ///< width            of pict, undefined when pict is not set
> +    int h;         ///< height           of pict, undefined when pict is not set
> +    int nb_colors; ///< number of colors in pict, undefined when pict is not set
> +
> +    /**
> +     * data+linesize for the bitmap of this subtitle.
> +     */
> +    uint8_t *data[4];
> +    int linesize[4];
> +
> +    enum AVSubtitleType type;
> +
> +    char *text;                     ///< 0 terminated plain UTF-8 text
> +
> +    /**
> +     * 0-terminated ASS/SSA compatible event line.
> +     */
> +    char *ass;
> +
> +    int flags;
> +} AVSubtitleRect;
> +
> +typedef struct AVSubtitle {
> +    uint16_t format; /* 0 = graphics */
> +    uint32_t start_display_time; /* relative to packet pts, in ms */
> +    uint32_t end_display_time; /* relative to packet pts, in ms */
> +    unsigned num_rects;
> +    AVSubtitleRect **rects;
> +    int64_t pts;    ///< Same as packet pts, in AV_TIME_BASE
> +} AVSubtitle;
> +
> +/**
> + * Return the name of sub_fmt, or NULL if sub_fmt is not
> + * recognized.
> + */
> +const char *av_get_subtitle_fmt_name(enum AVSubtitleType sub_fmt);
> +
> +/**
> + * Return a subtitle format corresponding to name, or AV_SUBTITLE_FMT_NONE
> + * on error.
> + *
> + * @param name Subtitle format name.
> + */
> +enum AVSubtitleType av_get_subtitle_fmt(const char *name);

Do we need this? Given that there are many possible ways to name the
subtitles, it doesn't seem useful to me. The only way to be sure to use
the "authoritative" name for a subtitle format is by
av_get_subtitle_fmt_name(). But then one doesn't need
av_get_subtitle_fmt() at all.

> +
> +/**
> + * Copy a subtitle area.
> + *
> + * @param dst        The target area.
> + * @param src        The source area.
> + * @param copy_data  Determines whether to copy references or actual
> + *                   data from @ref AVSubtitleArea.buf.
> + *
> + * @return 0 on success.

Better make it "< 0 on error" to reserve positive return values in case
we ever needed them.

> + */
> +int av_subtitle_area2area(AVSubtitleArea *dst, const AVSubtitleArea *src, int copy_data);

This documentation does not mention that dst should be "blank". But this
actually poses a problem: How does the user know that an AVSubtitleArea
is blank given that ? For AVFrames the answer is this: An AVFrame is
blank if it comes directly from av_frame_alloc() or av_frame_unref().
But there is no equivalent for these functions here.
Furthermore, is this API supposed to be a standalone API or is
AVSubtitleArea something that is always supposed to be part of an
AVFrame? The usage in this patchset suggests the latter:
av_subtitle_area2area() is only used in frame.c.

> +
> +/**
> + * Copy data from @ref AVSubtitleArea to @ref AVSubtitleRect.
> + *
> + * @param dst        The target rect (@ref AVSubtitleRect).
> + * @param src        The source area (@ref AVSubtitleArea).
> + *
> + * @return 0 on success.
> + *
> + * @deprecated This is a compatibility method for interoperability with
> + * the legacy subtitle API.
> + */
> +int av_subtitle_area2rect(AVSubtitleRect *dst, const AVSubtitleArea *src);
> +
> +/**
> + * Copy data from @ref AVSubtitleRect to @ref AVSubtitleArea.
> + *
> + * @param dst        The source area (@ref AVSubtitleArea).
> + * @param src        The target rect (@ref AVSubtitleRect).
> + *
> + * @return 0 on success.
> + *
> + * @deprecated This is a compatibility method for interoperability with
> + * the legacy subtitle API.
> + */
> +int av_subtitle_rect2area(AVSubtitleArea *dst, const AVSubtitleRect *src);

Do we need compatibility with the legacy API at all? I only see two
usecases for this:

1. One has a huge project using AVSubtitles that uses AVSubtitle so
pervasively that one can't transition to the new API in one step. But I
doubt that that's a thing.
2. One is forced to provide compatibility with the legacy API while also
providing an API based upon the new API. This is of course the situation
with libavcodec, which for this reason needs such compatibility
functions (in libavcodec!). But I don't see a second user with the same
needs, so I don't see why these functions (which would actually need to
be declared as deprecated from the very beginning if public) should be
public.

> +
> +/**
> + * Free all allocated data in the given subtitle struct.
> + *
> + * @param sub AVSubtitle to free.
> + */
> +void avsubtitle_free(AVSubtitle *sub);
> +
> +#endif /* AVUTIL_SUBFMT_H */
>