[FFmpeg-devel] [PATCH v16 15/16] avcodec/subtitles: Migrate subtitle encoders to frame-based API and provide a compatibility shim for the legacy api

Fri Nov 26 17:09:25 EET 2021

Soft Works:
> Also introduce deferred loading of ass headers for all cases where it can't be taken from the context of a decoder.

This should be a commit of its own.

> 
> Signed-off-by: softworkz <softworkz at hotmail.com>
> ---
>  libavcodec/assenc.c        |  81 ++++++++++++++++++++---------
>  libavcodec/avcodec.h       |   7 +++
>  libavcodec/dvbsubenc.c     |  85 +++++++++++++++---------------
>  libavcodec/dvdsubenc.c     |  89 +++++++++++++++++---------------
>  libavcodec/encode.c        |  98 ++++++++++++++++++++++++++++++++++-
>  libavcodec/movtextenc.c    | 103 +++++++++++++++++++++++++------------
>  libavcodec/srtenc.c        |  96 ++++++++++++++++++++++------------
>  libavcodec/tests/avcodec.c |   2 -
>  libavcodec/ttmlenc.c       |  82 +++++++++++++++++++++++------
>  libavcodec/webvttenc.c     |  73 +++++++++++++++++++-------
>  libavcodec/xsubenc.c       |  65 ++++++++++++-----------
>  11 files changed, 541 insertions(+), 240 deletions(-)
> 
> diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
> index 0c5819b116..304b35ba86 100644
> --- a/libavcodec/avcodec.h
> +++ b/libavcodec/avcodec.h
> @@ -2949,10 +2949,17 @@ void av_parser_close(AVCodecParserContext *s);
>   * @{
>   */
>  
> + /**
> +  * @deprecated Use @ref avcodec_encode_subtitle2() instead.
> +  */
> +attribute_deprecated
>  int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
>                              const AVSubtitle *sub);
>  
>  
> +int avcodec_encode_subtitle2(AVCodecContext* avctx, struct AVPacket* avpkt,
> +    AVFrame* frame, int* got_packet);
> +

Missing documentation. The signature and the implementation shows that
you have recreated the old encode API avcodec_encode_(audio2|video2)
with the difference that you (like the current subtitle API) require
preallocated (not necessarily refcounted) buffers in avpkt. Why not use
the ordinary encode API?
(Why is frame not const?)
Apart from that: You never zero the packet padding. I don't even know
whether you expect the buffer to be padded.

>  /**
>   * @}
>   */
> diff --git a/libavcodec/dvdsubenc.c b/libavcodec/dvdsubenc.c
> index ff4fbed39d..74fefe5664 100644
> --- a/libavcodec/dvdsubenc.c
> +++ b/libavcodec/dvdsubenc.c
> @@ -114,15 +114,14 @@ static int color_distance(uint32_t a, uint32_t b)
>   * Count colors used in a rectangle, quantizing alpha and grouping by
>   * nearest global palette entry.
>   */
> -static void count_colors(AVCodecContext *avctx, unsigned hits[33],
> -                         const AVSubtitleRect *r)
> +static void count_colors(const AVCodecContext *avctx, unsigned hits[33],
> +                         const AVSubtitleArea *r)
>  {
>      DVDSubtitleContext *dvdc = avctx->priv_data;
>      unsigned count[256] = { 0 };
> -    uint32_t *palette = (uint32_t *)r->data[1];
>      uint32_t color;
>      int x, y, i, j, match, d, best_d, av_uninit(best_j);
> -    uint8_t *p = r->data[0];
> +    uint8_t *p = r->buf[0]->data;
>  
>      for (y = 0; y < r->h; y++) {
>          for (x = 0; x < r->w; x++)
> @@ -132,7 +131,7 @@ static void count_colors(AVCodecContext *avctx, unsigned hits[33],
>      for (i = 0; i < 256; i++) {
>          if (!count[i]) /* avoid useless search */
>              continue;
> -        color = palette[i];
> +        color = r->pal[i];
>          /* 0: transparent, 1-16: semi-transparent, 17-33 opaque */
>          match = color < 0x33000000 ? 0 : color < 0xCC000000 ? 1 : 17;
>          if (match) {
> @@ -232,13 +231,13 @@ static void build_color_map(AVCodecContext *avctx, int cmap[],
>      }
>  }
>  
> -static void copy_rectangle(AVSubtitleRect *dst, AVSubtitleRect *src, int cmap[])
> +static void copy_rectangle(AVSubtitleArea*dst, AVSubtitleArea *src, int cmap[])
>  {
>      int x, y;
>      uint8_t *p, *q;
>  
> -    p = src->data[0];
> -    q = dst->data[0] + (src->x - dst->x) +
> +    p = src->buf[0]->data;
> +    q = dst->buf[0]->data + (src->x - dst->x) +
>                              (src->y - dst->y) * dst->linesize[0];
>      for (y = 0; y < src->h; y++) {
>          for (x = 0; x < src->w; x++)
> @@ -248,51 +247,56 @@ static void copy_rectangle(AVSubtitleRect *dst, AVSubtitleRect *src, int cmap[])
>      }
>  }
>  
> -static int encode_dvd_subtitles(AVCodecContext *avctx,
> -                                uint8_t *outbuf, int outbuf_size,
> -                                const AVSubtitle *h)
> +static int encode_dvd_subtitles(AVCodecContext* avctx, AVPacket* avpkt,
> +                                const AVFrame* frame, int* got_packet)
>  {
>      DVDSubtitleContext *dvdc = avctx->priv_data;
>      uint8_t *q, *qq;
>      int offset1, offset2;
> -    int i, rects = h->num_rects, ret;
> +    int ret = 0;
> +    unsigned i, rects = frame->num_subtitle_areas;
>      unsigned global_palette_hits[33] = { 0 };
>      int cmap[256];
>      int out_palette[4];
>      int out_alpha[4];
> -    AVSubtitleRect vrect;
> -    uint8_t *vrect_data = NULL;
> +    AVSubtitleArea vrect;

Ok, seems like sizeof(AVSubtitleArea) is part of the API/ABI.

> +    uint8_t* vrect_data = NULL, *outbuf = avpkt->data;
>      int x2, y2;
>      int forced = 0;
> +    int outbuf_size = avpkt->size;
>  
> -    if (rects == 0 || !h->rects)
> +    if (rects == 0)
> +        return 0;
> +
> +    if (!frame->subtitle_areas)
>          return AVERROR(EINVAL);
> +
>      for (i = 0; i < rects; i++)
> -        if (h->rects[i]->type != SUBTITLE_BITMAP) {
> +        if (frame->subtitle_areas[i]->type != SUBTITLE_BITMAP) {
>              av_log(avctx, AV_LOG_ERROR, "Bitmap subtitle required\n");
>              return AVERROR(EINVAL);
>          }
>      /* Mark this subtitle forced if any of the rectangles is forced. */
>      for (i = 0; i < rects; i++)
> -        if ((h->rects[i]->flags & AV_SUBTITLE_FLAG_FORCED) != 0) {
> +        if ((frame->subtitle_areas[i]->flags & AV_SUBTITLE_FLAG_FORCED) != 0) {
>              forced = 1;
>              break;
>          }
>  
> -    vrect = *h->rects[0];
> +    vrect = *frame->subtitle_areas[0];
>  
>      if (rects > 1) {
>          /* DVD subtitles can have only one rectangle: build a virtual
>             rectangle containing all actual rectangles.
>             The data of the rectangles will be copied later, when the palette
>             is decided, because the rectangles may have different palettes. */
> -        int xmin = h->rects[0]->x, xmax = xmin + h->rects[0]->w;
> -        int ymin = h->rects[0]->y, ymax = ymin + h->rects[0]->h;
> +        int xmin = frame->subtitle_areas[0]->x, xmax = xmin + frame->subtitle_areas[0]->w;
> +        int ymin = frame->subtitle_areas[0]->y, ymax = ymin + frame->subtitle_areas[0]->h;
>          for (i = 1; i < rects; i++) {
> -            xmin = FFMIN(xmin, h->rects[i]->x);
> -            ymin = FFMIN(ymin, h->rects[i]->y);
> -            xmax = FFMAX(xmax, h->rects[i]->x + h->rects[i]->w);
> -            ymax = FFMAX(ymax, h->rects[i]->y + h->rects[i]->h);
> +            xmin = FFMIN(xmin, frame->subtitle_areas[i]->x);
> +            ymin = FFMIN(ymin, frame->subtitle_areas[i]->y);
> +            xmax = FFMAX(xmax, frame->subtitle_areas[i]->x + frame->subtitle_areas[i]->w);
> +            ymax = FFMAX(ymax, frame->subtitle_areas[i]->y + frame->subtitle_areas[i]->h);
>          }
>          vrect.x = xmin;
>          vrect.y = ymin;
> @@ -304,27 +308,29 @@ static int encode_dvd_subtitles(AVCodecContext *avctx,
>          /* Count pixels outside the virtual rectangle as transparent */
>          global_palette_hits[0] = vrect.w * vrect.h;
>          for (i = 0; i < rects; i++)
> -            global_palette_hits[0] -= h->rects[i]->w * h->rects[i]->h;
> +            global_palette_hits[0] -= frame->subtitle_areas[i]->w * frame->subtitle_areas[i]->h;
>      }
>  
>      for (i = 0; i < rects; i++)
> -        count_colors(avctx, global_palette_hits, h->rects[i]);
> +        count_colors(avctx, global_palette_hits, frame->subtitle_areas[i]);
>      select_palette(avctx, out_palette, out_alpha, global_palette_hits);
>  
>      if (rects > 1) {
> -        if (!(vrect_data = av_calloc(vrect.w, vrect.h)))
> +
> +        vrect.buf[0] = av_buffer_allocz((size_t)vrect.w * vrect.h);

You are using the AVBuffer API here although this buffer will never be
shared with anything. This can and should be avoided (and anyway: you
did not remove vrect_data. The av_free(vrect_data) below is an
av_free(NULL). And in case one goes to fail

> +        if (!vrect.buf[0])
>              return AVERROR(ENOMEM);
> -        vrect.data    [0] = vrect_data;
> +
>          vrect.linesize[0] = vrect.w;
>          for (i = 0; i < rects; i++) {
> -            build_color_map(avctx, cmap, (uint32_t *)h->rects[i]->data[1],
> +            build_color_map(avctx, cmap, frame->subtitle_areas[i]->pal,
>                              out_palette, out_alpha);
> -            copy_rectangle(&vrect, h->rects[i], cmap);
> +            copy_rectangle(&vrect, frame->subtitle_areas[i], cmap);
>          }
>          for (i = 0; i < 4; i++)
>              cmap[i] = i;
>      } else {
> -        build_color_map(avctx, cmap, (uint32_t *)h->rects[0]->data[1],
> +        build_color_map(avctx, cmap, frame->subtitle_areas[0]->pal,
>                          out_palette, out_alpha);
>      }
>  
> @@ -344,10 +350,10 @@ static int encode_dvd_subtitles(AVCodecContext *avctx,
>          ret = AVERROR_BUFFER_TOO_SMALL;
>          goto fail;
>      }
> -    dvd_encode_rle(&q, vrect.data[0], vrect.w * 2,
> +    dvd_encode_rle(&q, vrect.buf[0]->data, vrect.w * 2,
>                     vrect.w, (vrect.h + 1) >> 1, cmap);
>      offset2 = q - outbuf;
> -    dvd_encode_rle(&q, vrect.data[0] + vrect.w, vrect.w * 2,
> +    dvd_encode_rle(&q, vrect.buf[0]->data + vrect.w, vrect.w * 2,
>                     vrect.w, vrect.h >> 1, cmap);
>  
>      if (dvdc->even_rows_fix && (vrect.h & 1)) {
> @@ -362,7 +368,7 @@ static int encode_dvd_subtitles(AVCodecContext *avctx,
>      bytestream_put_be16(&qq, q - outbuf);
>  
>      // send start display command
> -    bytestream_put_be16(&q, (h->start_display_time*90) >> 10);
> +    bytestream_put_be16(&q, (frame->subtitle_start_time * 90) >> 10);
>      bytestream_put_be16(&q, (q - outbuf) /*- 2 */ + 8 + 12 + 2);
>      *q++ = 0x03; // palette - 4 nibbles
>      *q++ = (out_palette[3] << 4) | out_palette[2];
> @@ -394,7 +400,7 @@ static int encode_dvd_subtitles(AVCodecContext *avctx,
>      *q++ = 0xff; // terminating command
>  
>      // send stop display command last
> -    bytestream_put_be16(&q, (h->end_display_time*90) >> 10);
> +    bytestream_put_be16(&q, (frame->subtitle_end_time*90) >> 10);
>      bytestream_put_be16(&q, (q - outbuf) - 2 /*+ 4*/);
>      *q++ = 0x02; // set end
>      *q++ = 0xff; // terminating command
> @@ -403,7 +409,9 @@ static int encode_dvd_subtitles(AVCodecContext *avctx,
>      bytestream_put_be16(&qq, q - outbuf);
>  
>      av_log(NULL, AV_LOG_DEBUG, "subtitle_packet size=%"PTRDIFF_SPECIFIER"\n", q - outbuf);
> -    ret = q - outbuf;
> +    avpkt->size = q - outbuf;
> +    ret = 0;
> +    *got_packet = 1;
>  
>  fail:
>      av_free(vrect_data);

vrect.buf[0] leaks here in case rects > 1.

> @@ -467,14 +475,13 @@ static int dvdsub_init(AVCodecContext *avctx)
>      return 0;
>  }
>  
> -static int dvdsub_encode(AVCodecContext *avctx,
> -                         unsigned char *buf, int buf_size,
> -                         const AVSubtitle *sub)
> +static int dvdsub_encode(struct AVCodecContext* avctx, struct AVPacket* avpkt,
> +                         const struct AVFrame* frame, int* got_packet)
>  {
>      //DVDSubtitleContext *s = avctx->priv_data;
>      int ret;
>  
> -    ret = encode_dvd_subtitles(avctx, buf, buf_size, sub);
> +    ret = encode_dvd_subtitles(avctx, avpkt, frame, got_packet);
>      return ret;
>  }
>  
> @@ -499,7 +506,7 @@ const AVCodec ff_dvdsub_encoder = {
>      .type           = AVMEDIA_TYPE_SUBTITLE,
>      .id             = AV_CODEC_ID_DVD_SUBTITLE,
>      .init           = dvdsub_init,
> -    .encode_sub     = dvdsub_encode,
> +    .encode2        = dvdsub_encode,
>      .priv_class     = &dvdsubenc_class,
>      .priv_data_size = sizeof(DVDSubtitleContext),
>      .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
> diff --git a/libavcodec/encode.c b/libavcodec/encode.c
> index dd25cf999b..dfbec01a3d 100644
> --- a/libavcodec/encode.c
> +++ b/libavcodec/encode.c
> @@ -140,16 +140,110 @@ fail:
>      return ret;
>  }
>  
> +/**
> + * \brief 
> + * \param avctx 
> + * \param buf q
> + * \param buf_size 
> + * \param sub 
> + * \return 
> + */
>  int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
>                              const AVSubtitle *sub)
>  {
> -    int ret;
> +    int ret = 0, got_packet = 0;
> +    AVFrame *frame = NULL;
> +    AVPacket* avpkt = NULL;
> +
>      if (sub->start_display_time) {
>          av_log(avctx, AV_LOG_ERROR, "start_display_time must be 0.\n");
>          return -1;
>      }
>  
> -    ret = avctx->codec->encode_sub(avctx, buf, buf_size, sub);
> +    // If the encoder implements the old API (encode_sub), call it directly:

You ported all subtitle encoders at once; this has the downside of
making the diff bigger and reviewing harder, but the advantage of
simplifying the compatibility code, because one knows that one does not
need to care about the case of an encoder implementing the old API. Yet
weirdly you didn't take advantage of this. You also did not remove
AVCodec.encode_sub.

> +    if (avctx->codec->encode_sub) {
> +        ret = avctx->codec->encode_sub(avctx, buf, buf_size, sub);
> +
> +        avctx->frame_number++;
> +        return ret;
> +    }
> +
> +    // Create a temporary frame for calling the regular api:
> +    frame = av_frame_alloc();

avcodec_open2() allocates two frames and three packets which are unused
when encoding subtitles. So you can reuse them here.

> +    if (!frame) {
> +        ret = AVERROR(ENOMEM);
> +        goto exit;
> +    }
> +
> +    frame->format = sub->format;
> +    frame->type = AVMEDIA_TYPE_SUBTITLE;
> +    ret = av_frame_get_buffer2(frame, 0);
> +    if (ret < 0)
> +        goto exit;
> +
> +    // Create a temporary packet
> +    avpkt = av_packet_alloc();
> +    if (!avpkt) {
> +        ret = AVERROR(ENOMEM);
> +        goto exit;
> +    }
> +
> +    avpkt->data = buf;
> +    avpkt->size = buf_size;
> +
> +    // Copy legacy subtitle data to temp frame
> +    av_frame_put_subtitle(frame, sub);
> +
> +    ret = avcodec_encode_subtitle2(avctx, avpkt, frame, &got_packet);
> +
> +    if (got_packet)
> +        ret = avpkt->size;
> +
> +    avpkt->data = NULL;
> +
> +exit:
> +
> +    av_packet_free(&avpkt);
> +    av_frame_free(&frame);
> +    return ret;
> +}
> +
> +int avcodec_encode_subtitle2(AVCodecContext *avctx, AVPacket* avpkt,
> +                             AVFrame* frame, int* got_packet)
> +{
> +    int ret;
> +
> +    *got_packet = 0;
> +    if (frame->subtitle_start_time) {
> +        av_log(avctx, AV_LOG_ERROR, "start_display_time must be 0.\n");
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    if (avctx->codec->encode2) {
> +
> +        // Encoder implements the new/regular API        
> +        ret = avctx->codec->encode2(avctx, avpkt, frame, got_packet);
> +    }
> +    else {
> +
> +        // Encoder implements the legacy API for subtitle encoding (encode_sub)
> +        // copy subtitle data to a temporary AVSubtitle (legacy) buffer
> +        AVSubtitle out_sub = { 0 };
> +        av_frame_get_subtitle(&out_sub, frame);
> +
> +        ret = avctx->codec->encode_sub(avctx, avpkt->data, avpkt->size, &out_sub);
> +
> +        if (ret >= 0) {
> +            avpkt->size = ret;
> +            ret = 0;
> +            *got_packet = 1;
> +        }
> +        else
> +            avpkt->size = 0;
> +
> +        avsubtitle_free(&out_sub);
> +    }
> +
>      avctx->frame_number++;
>      return ret;
>  }