[FFmpeg-devel] [PATCH v5 01/12] avutil/frame: Subtitle Filtering - Add AVMediaType property to AVFrame

Mon Sep 13 01:14:44 EEST 2021

Soft Works:
> This is the root commit for adding subtitle filtering capabilities.
> Adding the media type property to AVFrame replaces the previous
> way of distinction which was based on checking width and height
> to determine whether a frame is audio or video.
> 
> Signed-off-by: softworkz <softworkz at hotmail.com>
> ---
>  libavutil/frame.c   | 85 ++++++++++++++++++++++++++++++++++++---------
>  libavutil/frame.h   | 39 +++++++++++++++++++--
>  libavutil/version.h |  2 +-
>  3 files changed, 106 insertions(+), 20 deletions(-)
> 
> diff --git a/libavutil/frame.c b/libavutil/frame.c
> index b0ceaf7145..ef2867d318 100644
> --- a/libavutil/frame.c
> +++ b/libavutil/frame.c
> @@ -103,6 +103,7 @@ AVFrame *av_frame_alloc(void)
>      if (!frame)
>          return NULL;
>  
> +    frame->type = AVMEDIA_TYPE_UNKNOWN;
>      frame->extended_data = NULL;
>      get_frame_defaults(frame);
>  
> @@ -244,22 +245,37 @@ static int get_audio_buffer(AVFrame *frame, int align)
>  }
>  
>  int av_frame_get_buffer(AVFrame *frame, int align)
> +{
> +    if (frame->width > 0 && frame->height > 0)
> +        frame->type = AVMEDIA_TYPE_VIDEO;
> +    else if (frame->nb_samples > 0 && (frame->channel_layout || frame->channels > 0))
> +        frame->type = AVMEDIA_TYPE_AUDIO;
> +
> +    return av_frame_get_buffer2(frame, align);
> +}
> +
> +int av_frame_get_buffer2(AVFrame *frame, int align)
>  {
>      if (frame->format < 0)
>          return AVERROR(EINVAL);
>  
> -    if (frame->width > 0 && frame->height > 0)
> +    switch(frame->type) {
> +    case AVMEDIA_TYPE_VIDEO:
>          return get_video_buffer(frame, align);
> -    else if (frame->nb_samples > 0 && (frame->channel_layout || frame->channels > 0))
> +    case AVMEDIA_TYPE_AUDIO:
>          return get_audio_buffer(frame, align);
> -
> -    return AVERROR(EINVAL);
> +    case AVMEDIA_TYPE_SUBTITLE:
> +        return 0;

Making this a no-op (and keeping it a no-op in future commits) implies
that av_frame_make_writable() does not work for subtitles at all: It
just temporarily increments the refcount of all the AVSubtitleRects and
then decrements it again, so they are still shared. But it needs to do a
deep copy (in case the frame isn't writable).

> +    default:
> +        return AVERROR(EINVAL);
> +    }
>  }
>  
>  static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy)
>  {
>      int ret, i;
>  
> +    dst->type                   = src->type;
>      dst->key_frame              = src->key_frame;
>      dst->pict_type              = src->pict_type;
>      dst->sample_aspect_ratio    = src->sample_aspect_ratio;
> @@ -331,6 +347,7 @@ int av_frame_ref(AVFrame *dst, const AVFrame *src)
>      av_assert1(dst->width == 0 && dst->height == 0);
>      av_assert1(dst->channels == 0);
>  
> +    dst->type           = src->type;
>      dst->format         = src->format;
>      dst->width          = src->width;
>      dst->height         = src->height;
> @@ -344,7 +361,7 @@ int av_frame_ref(AVFrame *dst, const AVFrame *src)
>  
>      /* duplicate the frame data if it's not refcounted */
>      if (!src->buf[0]) {
> -        ret = av_frame_get_buffer(dst, 0);
> +        ret = av_frame_get_buffer2(dst, 0);
>          if (ret < 0)
>              goto fail;
>  
> @@ -499,6 +516,7 @@ int av_frame_make_writable(AVFrame *frame)
>          return 0;
>  
>      memset(&tmp, 0, sizeof(tmp));
> +    tmp.type           = frame->type;
>      tmp.format         = frame->format;
>      tmp.width          = frame->width;
>      tmp.height         = frame->height;
> @@ -509,7 +527,7 @@ int av_frame_make_writable(AVFrame *frame)
>      if (frame->hw_frames_ctx)
>          ret = av_hwframe_get_buffer(frame->hw_frames_ctx, &tmp, 0);
>      else
> -        ret = av_frame_get_buffer(&tmp, 0);
> +        ret = av_frame_get_buffer2(&tmp, 0);
>      if (ret < 0)
>          return ret;
>  
> @@ -544,14 +562,22 @@ AVBufferRef *av_frame_get_plane_buffer(AVFrame *frame, int plane)
>      uint8_t *data;
>      int planes, i;
>  
> -    if (frame->nb_samples) {
> -        int channels = frame->channels;
> -        if (!channels)
> -            return NULL;
> -        CHECK_CHANNELS_CONSISTENCY(frame);
> -        planes = av_sample_fmt_is_planar(frame->format) ? channels : 1;
> -    } else
> +    switch(frame->type) {
> +    case AVMEDIA_TYPE_VIDEO:
>          planes = 4;
> +        break;
> +    case AVMEDIA_TYPE_AUDIO:
> +        {
> +            int channels = frame->channels;
> +            if (!channels)
> +                return NULL;
> +            CHECK_CHANNELS_CONSISTENCY(frame);
> +            planes = av_sample_fmt_is_planar(frame->format) ? channels : 1;
> +            break;
> +        }
> +    default:
> +        return NULL;
> +    }
>  
>      if (plane < 0 || plane >= planes || !frame->extended_data[plane])
>          return NULL;
> @@ -675,17 +701,42 @@ static int frame_copy_audio(AVFrame *dst, const AVFrame *src)
>      return 0;
>  }
>  
> +static int frame_copy_subtitles(AVFrame *dst, const AVFrame *src)
> +{
> +    unsigned i;
> +
> +    dst->format = src->format;
> +
> +    dst->num_subtitle_rects = src->num_subtitle_rects;
> +
> +    if (src->num_subtitle_rects) {
> +        dst->subtitle_rects = av_malloc_array(src->num_subtitle_rects, sizeof(AVSubtitleRect *));
> +
> +        for (i = 0; i < src->num_subtitle_rects; i++) {
> +            AVSubtitleRect *rect = src->subtitle_rects[i];
> +            rect->nb_refs++;
> +            dst->subtitle_rects[i] = rect;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  int av_frame_copy(AVFrame *dst, const AVFrame *src)
>  {
>      if (dst->format != src->format || dst->format < 0)
>          return AVERROR(EINVAL);
>  
> -    if (dst->width > 0 && dst->height > 0)
> +    switch(dst->type) {
> +    case AVMEDIA_TYPE_VIDEO:
>          return frame_copy_video(dst, src);
> -    else if (dst->nb_samples > 0 && dst->channels > 0)
> +    case AVMEDIA_TYPE_AUDIO:
>          return frame_copy_audio(dst, src);
> -
> -    return AVERROR(EINVAL);
> +    case AVMEDIA_TYPE_SUBTITLE:
> +        return frame_copy_subtitles(dst, src);
> +    default:
> +        return AVERROR(EINVAL);
> +    }
>  }
>  
>  void av_frame_remove_side_data(AVFrame *frame, enum AVFrameSideDataType type)
> diff --git a/libavutil/frame.h b/libavutil/frame.h
> index ff2540a20f..005eed9d18 100644
> --- a/libavutil/frame.h
> +++ b/libavutil/frame.h
> @@ -271,7 +271,7 @@ typedef struct AVRegionOfInterest {
>  } AVRegionOfInterest;
>  
>  /**
> - * This structure describes decoded (raw) audio or video data.
> + * This structure describes decoded (raw) audio, video or subtitle data.
>   *
>   * AVFrame must be allocated using av_frame_alloc(). Note that this only
>   * allocates the AVFrame itself, the buffers for the data must be managed
> @@ -302,6 +302,13 @@ typedef struct AVRegionOfInterest {
>   */
>  typedef struct AVFrame {
>  #define AV_NUM_DATA_POINTERS 8
> +    /**
> +     * Media type of the frame (audio, video, subtitles..)
> +     *
> +     * See AVMEDIA_TYPE_xxx
> +     */
> +    enum AVMediaType type;
> +
>      /**
>       * pointer to the picture/channel planes.
>       * This might be different from the first allocated byte
> @@ -371,7 +378,7 @@ typedef struct AVFrame {
>      /**
>       * format of the frame, -1 if unknown or unset
>       * Values correspond to enum AVPixelFormat for video frames,
> -     * enum AVSampleFormat for audio)
> +     * enum AVSampleFormat for audio, AVSubtitleType for subtitles)
>       */
>      int format;
>  
> @@ -721,6 +728,8 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src);
>  /**
>   * Allocate new buffer(s) for audio or video data.
>   *
> + * Note: For subtitle data, use av_frame_get_buffer2
> + *
>   * The following fields must be set on frame before calling this function:
>   * - format (pixel format for video, sample format for audio)
>   * - width and height for video
> @@ -743,6 +752,32 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src);
>   */
>  int av_frame_get_buffer(AVFrame *frame, int align);
>  
> +/**
> + * Allocate new buffer(s) for audio, video or subtitle data.
> + *
> + * The following fields must be set on frame before calling this function:
> + * - format (pixel format for video, sample format for audio)
> + * - width and height for video
> + * - nb_samples and channel_layout for audio
> + * - type (AVMediaType)
> + *
> + * This function will fill AVFrame.data and AVFrame.buf arrays and, if
> + * necessary, allocate and fill AVFrame.extended_data and AVFrame.extended_buf.
> + * For planar formats, one buffer will be allocated for each plane.
> + *
> + * @warning: if frame already has been allocated, calling this function will
> + *           leak memory. In addition, undefined behavior can occur in certain
> + *           cases.
> + *
> + * @param frame frame in which to store the new buffers.
> + * @param align Required buffer size alignment. If equal to 0, alignment will be
> + *              chosen automatically for the current CPU. It is highly
> + *              recommended to pass 0 here unless you know what you are doing.
> + *
> + * @return 0 on success, a negative AVERROR on error.
> + */
> +int av_frame_get_buffer2(AVFrame *frame, int align);
> +
>  /**
>   * Check if the frame data is writable.
>   *
> diff --git a/libavutil/version.h b/libavutil/version.h
> index f220e192c8..2b5ac540a4 100644
> --- a/libavutil/version.h
> +++ b/libavutil/version.h
> @@ -79,7 +79,7 @@
>   */
>  
>  #define LIBAVUTIL_VERSION_MAJOR  57
> -#define LIBAVUTIL_VERSION_MINOR   5
> +#define LIBAVUTIL_VERSION_MINOR   6
>  #define LIBAVUTIL_VERSION_MICRO 100
>  
>  #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
>