[FFmpeg-devel] [PATCH v8 11/15] avcodec/vaapi_encode: extract a get_recon_format function to base layer

Wed May 15 00:06:21 EEST 2024

On 18/04/2024 09:59, tong1.wu-at-intel.com at ffmpeg.org wrote:
> From: Tong Wu <tong1.wu at intel.com>
> 
> Surface size and block size parameters are also moved to base layer.
> 
> Signed-off-by: Tong Wu <tong1.wu at intel.com>
> ---
>  libavcodec/hw_base_encode.c     | 58 +++++++++++++++++++++++
>  libavcodec/hw_base_encode.h     | 12 +++++
>  libavcodec/vaapi_encode.c       | 81 ++++++++-------------------------
>  libavcodec/vaapi_encode.h       | 10 ----
>  libavcodec/vaapi_encode_av1.c   | 10 ++--
>  libavcodec/vaapi_encode_h264.c  | 11 +++--
>  libavcodec/vaapi_encode_h265.c  | 25 +++++-----
>  libavcodec/vaapi_encode_mjpeg.c |  5 +-
>  libavcodec/vaapi_encode_vp9.c   |  6 +--
>  9 files changed, 118 insertions(+), 100 deletions(-)
> 
> diff --git a/libavcodec/hw_base_encode.c b/libavcodec/hw_base_encode.c
> index a4223d90f0..af85bb99aa 100644
> --- a/libavcodec/hw_base_encode.c
> +++ b/libavcodec/hw_base_encode.c
> @@ -693,6 +693,64 @@ int ff_hw_base_init_gop_structure(AVCodecContext *avctx, uint32_t ref_l0, uint32
>      return 0;
>  }
>  
> +int ff_hw_base_get_recon_format(AVCodecContext *avctx, const void *hwconfig, enum AVPixelFormat *fmt)
> +{
> +    HWBaseEncodeContext *ctx = avctx->priv_data;
> +    AVHWFramesConstraints *constraints = NULL;
> +    enum AVPixelFormat recon_format;
> +    int err, i;
> +
> +    constraints = av_hwdevice_get_hwframe_constraints(ctx->device_ref,
> +                                                      hwconfig);

Does this mechanism actually make sense for D3D12?

VAAPI is the currently the only implementation of this function with non-null hwconfig, and this is really relying on it to get useful information (otherwise the formats are just everything the device can allocate as a surface and the sizes are 0/INT_MAX).

If D3D12 has something which would fit into the hwconfig method then this could work very nicely as well, but if it doesn't then presumably it does have some other calls to check things like the maximum frame size supported by the encoder and we should be using those rather than making this code generic?

(Also consider Vulkan if possible; if two thirds of the cases want it then maybe we should do this even if it doesn't fit in one of them.)

> +    if (!constraints) {
> +        err = AVERROR(ENOMEM);
> +        goto fail;
> +    }
> +
> +    // Probably we can use the input surface format as the surface format
> +    // of the reconstructed frames.  If not, we just pick the first (only?)
> +    // format in the valid list and hope that it all works.
> +    recon_format = AV_PIX_FMT_NONE;
> +    if (constraints->valid_sw_formats) {
> +        for (i = 0; constraints->valid_sw_formats[i] != AV_PIX_FMT_NONE; i++) {
> +            if (ctx->input_frames->sw_format ==
> +                constraints->valid_sw_formats[i]) {
> +                recon_format = ctx->input_frames->sw_format;
> +                break;
> +            }
> +        }
> +        if (recon_format == AV_PIX_FMT_NONE) {
> +            // No match.  Just use the first in the supported list and
> +            // hope for the best.
> +            recon_format = constraints->valid_sw_formats[0];
> +        }
> +    } else {
> +        // No idea what to use; copy input format.
> +        recon_format = ctx->input_frames->sw_format;
> +    }
> +    av_log(avctx, AV_LOG_DEBUG, "Using %s as format of "
> +           "reconstructed frames.\n", av_get_pix_fmt_name(recon_format));
> +
> +    if (ctx->surface_width  < constraints->min_width  ||
> +        ctx->surface_height < constraints->min_height ||
> +        ctx->surface_width  > constraints->max_width ||
> +        ctx->surface_height > constraints->max_height) {
> +        av_log(avctx, AV_LOG_ERROR, "Hardware does not support encoding at "
> +               "size %dx%d (constraints: width %d-%d height %d-%d).\n",
> +               ctx->surface_width, ctx->surface_height,
> +               constraints->min_width,  constraints->max_width,
> +               constraints->min_height, constraints->max_height);
> +        err = AVERROR(EINVAL);
> +        goto fail;
> +    }
> +
> +    *fmt = recon_format;
> +    err = 0;
> +fail:
> +    av_hwframe_constraints_free(&constraints);
> +    return err;
> +}
> +
>  int ff_hw_base_encode_init(AVCodecContext *avctx)
>  {
>      HWBaseEncodeContext *ctx = avctx->priv_data;
> diff --git a/libavcodec/hw_base_encode.h b/libavcodec/hw_base_encode.h
> index d717f955d8..7686cf9501 100644
> --- a/libavcodec/hw_base_encode.h
> +++ b/libavcodec/hw_base_encode.h
> @@ -126,6 +126,16 @@ typedef struct HWBaseEncodeContext {
>      // Desired B frame reference depth.
>      int             desired_b_depth;
>  
> +    // The required size of surfaces.  This is probably the input
> +    // size (AVCodecContext.width|height) aligned up to whatever
> +    // block size is required by the codec.
> +    int             surface_width;
> +    int             surface_height;
> +
> +    // The block size for slice calculations.
> +    int             slice_block_width;
> +    int             slice_block_height;
> +
>      // The hardware device context.
>      AVBufferRef    *device_ref;
>      AVHWDeviceContext *device;
> @@ -210,6 +220,8 @@ int ff_hw_base_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt);
>  int ff_hw_base_init_gop_structure(AVCodecContext *avctx, uint32_t ref_l0, uint32_t ref_l1,
>                                    int flags, int prediction_pre_only);
>  
> +int ff_hw_base_get_recon_format(AVCodecContext *avctx, const void *hwconfig, enum AVPixelFormat *fmt);
> +
>  int ff_hw_base_encode_init(AVCodecContext *avctx);
>  
>  int ff_hw_base_encode_close(AVCodecContext *avctx);
> ...

Thanks,

- Mark