[FFmpeg-devel] [PATCH v3 01/18] avutil/frame: Subtitle Filtering - Add AVMediaType property to AVFrame

Sat Sep 11 13:51:08 EEST 2021

On Sat, Sep 11, 2021 at 8:03 AM Soft Works <softworkz at hotmail.com> wrote:

> This is the root commit for adding subtitle filtering capabilities.
> Adding the media type property to AVFrame replaces the previous
> way of distinction which was based on checking width and height
> to determine whether a frame is audio or video.
>
> Signed-off-by: softworkz <softworkz at hotmail.com>
> ---
>  libavutil/frame.c   | 78 +++++++++++++++++++++++++++++++++++----------
>  libavutil/frame.h   | 39 +++++++++++++++++++++--
>  libavutil/version.h |  2 +-
>  3 files changed, 99 insertions(+), 20 deletions(-)
>
> diff --git a/libavutil/frame.c b/libavutil/frame.c
> index b0ceaf7145..09149721c0 100644
> --- a/libavutil/frame.c
> +++ b/libavutil/frame.c
> @@ -244,22 +244,39 @@ static int get_audio_buffer(AVFrame *frame, int
> align)
>  }
>
>  int av_frame_get_buffer(AVFrame *frame, int align)
> +{
> +    if (frame->width > 0 && frame->height > 0)
> +        return av_frame_get_buffer2(frame, AVMEDIA_TYPE_VIDEO, align);
> +    else if (frame->nb_samples > 0 && (frame->channel_layout ||
> frame->channels > 0))
> +        return av_frame_get_buffer2(frame, AVMEDIA_TYPE_AUDIO, align);
> +
> +    return AVERROR(EINVAL);
> +}
> +
> +int av_frame_get_buffer2(AVFrame *frame, enum AVMediaType type, int align)
>  {
>      if (frame->format < 0)
>          return AVERROR(EINVAL);
>
> -    if (frame->width > 0 && frame->height > 0)
> +    frame->type = type;
> +
> +    switch(frame->type) {
> +    case AVMEDIA_TYPE_VIDEO:
>          return get_video_buffer(frame, align);
> -    else if (frame->nb_samples > 0 && (frame->channel_layout ||
> frame->channels > 0))
> +    case AVMEDIA_TYPE_AUDIO:
>          return get_audio_buffer(frame, align);
> -
> -    return AVERROR(EINVAL);
> +    case AVMEDIA_TYPE_SUBTITLE:
> +        return 0;
> +    default:
> +        return AVERROR(EINVAL);
> +    }
>  }
>
>  static int frame_copy_props(AVFrame *dst, const AVFrame *src, int
> force_copy)
>  {
>      int ret, i;
>
> +    dst->type                   = src->type;
>      dst->key_frame              = src->key_frame;
>      dst->pict_type              = src->pict_type;
>      dst->sample_aspect_ratio    = src->sample_aspect_ratio;
> @@ -331,6 +348,7 @@ int av_frame_ref(AVFrame *dst, const AVFrame *src)
>      av_assert1(dst->width == 0 && dst->height == 0);
>      av_assert1(dst->channels == 0);
>
> +    dst->type           = src->type;
>      dst->format         = src->format;
>      dst->width          = src->width;
>      dst->height         = src->height;
> @@ -344,7 +362,7 @@ int av_frame_ref(AVFrame *dst, const AVFrame *src)
>
>      /* duplicate the frame data if it's not refcounted */
>      if (!src->buf[0]) {
> -        ret = av_frame_get_buffer(dst, 0);
> +        ret = av_frame_get_buffer2(dst, dst->type, 0);
>          if (ret < 0)
>              goto fail;
>
> @@ -499,6 +517,7 @@ int av_frame_make_writable(AVFrame *frame)
>          return 0;
>
>      memset(&tmp, 0, sizeof(tmp));
> +    tmp.type           = frame->type;
>      tmp.format         = frame->format;
>      tmp.width          = frame->width;
>      tmp.height         = frame->height;
> @@ -509,7 +528,7 @@ int av_frame_make_writable(AVFrame *frame)
>      if (frame->hw_frames_ctx)
>          ret = av_hwframe_get_buffer(frame->hw_frames_ctx, &tmp, 0);
>      else
> -        ret = av_frame_get_buffer(&tmp, 0);
> +        ret = av_frame_get_buffer2(&tmp, tmp.type, 0);
>      if (ret < 0)
>          return ret;
>
> @@ -544,14 +563,22 @@ AVBufferRef *av_frame_get_plane_buffer(AVFrame
> *frame, int plane)
>      uint8_t *data;
>      int planes, i;
>
> -    if (frame->nb_samples) {
> -        int channels = frame->channels;
> -        if (!channels)
> -            return NULL;
> -        CHECK_CHANNELS_CONSISTENCY(frame);
> -        planes = av_sample_fmt_is_planar(frame->format) ? channels : 1;
> -    } else
> +    switch(frame->type) {
> +    case AVMEDIA_TYPE_VIDEO:
>          planes = 4;
> +        break;
> +    case AVMEDIA_TYPE_AUDIO:
> +        {
> +            int channels = frame->channels;
> +            if (!channels)
> +                return NULL;
> +            CHECK_CHANNELS_CONSISTENCY(frame);
> +            planes = av_sample_fmt_is_planar(frame->format) ? channels :
> 1;
> +            break;
> +        }
> +    default:
> +        return NULL;
> +    }
>
>      if (plane < 0 || plane >= planes || !frame->extended_data[plane])
>          return NULL;
> @@ -675,17 +702,34 @@ static int frame_copy_audio(AVFrame *dst, const
> AVFrame *src)
>      return 0;
>  }
>
> +static int frame_copy_subtitles(AVFrame *dst, const AVFrame *src)
> +{
> +    dst->type = AVMEDIA_TYPE_SUBTITLE;
> +    dst->format = src->format;
> +
> +    if (src->buf[0]) {
> +        dst->buf[0] = av_buffer_ref(src->buf[0]);
> +        dst->data[0] = src->data[0];
> +    }
> +
> +    return 0;
> +}
> +
>  int av_frame_copy(AVFrame *dst, const AVFrame *src)
>  {
>      if (dst->format != src->format || dst->format < 0)
>          return AVERROR(EINVAL);
>
> -    if (dst->width > 0 && dst->height > 0)
> +    switch(dst->type) {
> +    case AVMEDIA_TYPE_VIDEO:
>          return frame_copy_video(dst, src);
> -    else if (dst->nb_samples > 0 && dst->channels > 0)
> +    case AVMEDIA_TYPE_AUDIO:
>          return frame_copy_audio(dst, src);
> -
> -    return AVERROR(EINVAL);
> +    case AVMEDIA_TYPE_SUBTITLE:
> +        return frame_copy_subtitles(dst, src);
> +    default:
> +        return AVERROR(EINVAL);
> +    }
>  }
>
>  void av_frame_remove_side_data(AVFrame *frame, enum AVFrameSideDataType
> type)
> diff --git a/libavutil/frame.h b/libavutil/frame.h
> index ff2540a20f..c104815df9 100644
> --- a/libavutil/frame.h
> +++ b/libavutil/frame.h
> @@ -271,7 +271,7 @@ typedef struct AVRegionOfInterest {
>  } AVRegionOfInterest;
>
>  /**
> - * This structure describes decoded (raw) audio or video data.
> + * This structure describes decoded (raw) audio, video or subtitle data.
>   *
>   * AVFrame must be allocated using av_frame_alloc(). Note that this only
>   * allocates the AVFrame itself, the buffers for the data must be managed
> @@ -302,6 +302,13 @@ typedef struct AVRegionOfInterest {
>   */
>  typedef struct AVFrame {
>  #define AV_NUM_DATA_POINTERS 8
> +    /**
> +     * Media type of the frame (audio, video, subtitles..)
> +     *
> +     * See AVMEDIA_TYPE_xxx
> +     */
> +    enum AVMediaType type;
> +
>      /**
>       * pointer to the picture/channel planes.
>       * This might be different from the first allocated byte
> @@ -371,7 +378,7 @@ typedef struct AVFrame {
>      /**
>       * format of the frame, -1 if unknown or unset
>       * Values correspond to enum AVPixelFormat for video frames,
> -     * enum AVSampleFormat for audio)
> +     * enum AVSampleFormat for audio, AVSubtitleType for subtitles)
>       */
>      int format;
>
> @@ -721,6 +728,8 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src);
>  /**
>   * Allocate new buffer(s) for audio or video data.
>   *
> + * Note: For subtitle data, use av_frame_get_buffer2
>

I dislike this approach. It should be consistent with audio & video.

> + *
>   * The following fields must be set on frame before calling this function:
>   * - format (pixel format for video, sample format for audio)
>   * - width and height for video
> @@ -743,6 +752,32 @@ void av_frame_move_ref(AVFrame *dst, AVFrame *src);
>   */
>  int av_frame_get_buffer(AVFrame *frame, int align);
>
> +/**
> + * Allocate new buffer(s) for audio, video or subtitle data.
> + *
> + * The following fields must be set on frame before calling this function:
> + * - format (pixel format for video, sample format for audio)
> + * - width and height for video
> + * - nb_samples and channel_layout for audio
> + *
> + * This function will fill AVFrame.data and AVFrame.buf arrays and, if
> + * necessary, allocate and fill AVFrame.extended_data and
> AVFrame.extended_buf.
> + * For planar formats, one buffer will be allocated for each plane.
> + *
> + * @warning: if frame already has been allocated, calling this function
> will
> + *           leak memory. In addition, undefined behavior can occur in
> certain
> + *           cases.
> + *
> + * @param frame frame in which to store the new buffers.
> + * @param type  media type to set for the frame.
> + * @param align Required buffer size alignment. If equal to 0, alignment
> will be
> + *              chosen automatically for the current CPU. It is highly
> + *              recommended to pass 0 here unless you know what you are
> doing.
> + *
> + * @return 0 on success, a negative AVERROR on error.
> + */
> +int av_frame_get_buffer2(AVFrame *frame, enum AVMediaType type, int
> align);
> +
>  /**
>   * Check if the frame data is writable.
>   *
> diff --git a/libavutil/version.h b/libavutil/version.h
> index f220e192c8..2b5ac540a4 100644
> --- a/libavutil/version.h
> +++ b/libavutil/version.h
> @@ -79,7 +79,7 @@
>   */
>
>  #define LIBAVUTIL_VERSION_MAJOR  57
> -#define LIBAVUTIL_VERSION_MINOR   5
> +#define LIBAVUTIL_VERSION_MINOR   6
>  #define LIBAVUTIL_VERSION_MICRO 100
>
>  #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
> --
> 2.30.2.windows.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>