[FFmpeg-devel] [PATCH] avcodec/qsvenc: make QSV encoder encode VAAPI and D3D11 frames directly

Soft Works softworkz at hotmail.com
Wed Jun 8 08:08:11 EEST 2022



> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces at ffmpeg.org> On Behalf Of Tong Wu
> Sent: Tuesday, June 7, 2022 11:22 AM
> To: ffmpeg-devel at ffmpeg.org
> Cc: Tong Wu <tong1.wu at intel.com>; Wenbin Chen <wenbin.chen at intel.com>
> Subject: [FFmpeg-devel] [PATCH] avcodec/qsvenc: make QSV encoder encode VAAPI
> and D3D11 frames directly
> 
> QSV encoder is able to encode frames with VAAPI or D3D11 pixel format
> directly. This patch adds support for qsv encoder to accept VAAPI and
> D3D11 pixel formats as input.
> 
> Signed-off-by: Wenbin Chen <wenbin.chen at intel.com>
> Signed-off-by: Tong Wu <tong1.wu at intel.com>
> ---
>  libavcodec/qsvenc.c       | 59 ++++++++++++++++++++++++++++++++++-----
>  libavcodec/qsvenc_h264.c  |  2 ++
>  libavcodec/qsvenc_hevc.c  |  2 ++
>  libavcodec/qsvenc_jpeg.c  |  2 ++
>  libavcodec/qsvenc_mpeg2.c |  2 ++
>  libavcodec/qsvenc_vp9.c   |  2 ++
>  6 files changed, 62 insertions(+), 7 deletions(-)
> 
> diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
> index 2b3b06767d..132d9ba93b 100644
> --- a/libavcodec/qsvenc.c
> +++ b/libavcodec/qsvenc.c
> @@ -524,7 +524,9 @@ static int check_enc_param(AVCodecContext *avctx,
> QSVEncContext *q)
> 
>  static int init_video_param_jpeg(AVCodecContext *avctx, QSVEncContext *q)
>  {
> -    enum AVPixelFormat sw_format = avctx->pix_fmt == AV_PIX_FMT_QSV ?
> +    enum AVPixelFormat sw_format = avctx->pix_fmt == AV_PIX_FMT_QSV ||
> +                                   avctx->pix_fmt == AV_PIX_FMT_VAAPI ||
> +                                   avctx->pix_fmt == AV_PIX_FMT_D3D11 ?
>                                     avctx->sw_pix_fmt : avctx->pix_fmt;
>      const AVPixFmtDescriptor *desc;
>      int ret;
> @@ -591,7 +593,9 @@ static int init_video_param_jpeg(AVCodecContext *avctx,
> QSVEncContext *q)
> 
>  static int init_video_param(AVCodecContext *avctx, QSVEncContext *q)
>  {
> -    enum AVPixelFormat sw_format = avctx->pix_fmt == AV_PIX_FMT_QSV ?
> +    enum AVPixelFormat sw_format = avctx->pix_fmt == AV_PIX_FMT_QSV ||
> +                                   avctx->pix_fmt == AV_PIX_FMT_VAAPI ||
> +                                   avctx->pix_fmt == AV_PIX_FMT_D3D11 ?
>                                     avctx->sw_pix_fmt : avctx->pix_fmt;
>      const AVPixFmtDescriptor *desc;
>      float quant;
> @@ -1247,7 +1251,31 @@ int ff_qsv_enc_init(AVCodecContext *avctx,
> QSVEncContext *q)
> 
>      if (avctx->hw_frames_ctx) {
>          AVHWFramesContext    *frames_ctx = (AVHWFramesContext*)avctx-
> >hw_frames_ctx->data;
> -        AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx;
> +        AVQSVFramesContext *frames_hwctx = NULL;
> +
> +        if (frames_ctx->format == AV_PIX_FMT_VAAPI || frames_ctx->format ==
> AV_PIX_FMT_D3D11) {
> +            AVBufferRef *derive_device_ref = NULL;
> +            AVBufferRef *derive_frames_ref = NULL;
> +            ret = av_hwdevice_ctx_create_derived(&derive_device_ref,
> +                                                 AV_HWDEVICE_TYPE_QSV,
> frames_ctx->device_ref, 0);
> +            if (ret < 0) {
> +                av_log(avctx, AV_LOG_ERROR, "Failed to derive QSV device
> context: %d.\n", ret);
> +                return ret;
> +            }
> +            ret = av_hwframe_ctx_create_derived(&derive_frames_ref,
> +                                                AV_PIX_FMT_QSV,
> derive_device_ref, avctx->hw_frames_ctx, 0);
> +            if (ret < 0) {
> +                av_log(avctx, AV_LOG_ERROR, "Failed to derive QSV frames
> context: %d.\n", ret);
> +                av_buffer_unref(&derive_device_ref);
> +                return ret;
> +            }
> +            av_buffer_unref(&avctx->hw_device_ctx);
> +            avctx->hw_device_ctx = derive_device_ref;
> +            av_buffer_unref(&avctx->hw_frames_ctx);
> +            avctx->hw_frames_ctx = derive_frames_ref;
> +            frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
> +        }
> +        frames_hwctx = frames_ctx->hwctx;
> 
>          if (!iopattern) {
>              if (frames_hwctx->frame_type & MFX_MEMTYPE_OPAQUE_FRAME)
> @@ -1437,10 +1465,25 @@ static int submit_frame(QSVEncContext *q, const
> AVFrame *frame,
>      if (ret < 0)
>          return ret;
> 
> -    if (frame->format == AV_PIX_FMT_QSV) {
> -        ret = av_frame_ref(qf->frame, frame);
> -        if (ret < 0)
> -            return ret;
> +    if (frame->format == AV_PIX_FMT_QSV || frame->format == AV_PIX_FMT_VAAPI
> || frame->format == AV_PIX_FMT_D3D11) {
> +        if (frame->format == AV_PIX_FMT_QSV) {
> +            ret = av_frame_ref(qf->frame, frame);
> +            if (ret < 0)
> +                return ret;
> +        } else {
> +            qf->frame->format = AV_PIX_FMT_QSV;
> +            qf->frame->hw_frames_ctx = av_buffer_ref(q->avctx-
> >hw_frames_ctx);
> +            if (!qf->frame->hw_frames_ctx)
> +                return AVERROR(ENOMEM);
> +            ret = av_hwframe_map(qf->frame, frame, 0);
> +            if (ret < 0) {
> +                av_log(q->avctx, AV_LOG_ERROR, "Failed to map to QSV
> frames\n");
> +                return ret;
> +            }
> +            ret = av_frame_copy_props(qf->frame, frame);
> +            if (ret < 0)
> +                return ret;
> +        }
> 
>          qf->surface = *(mfxFrameSurface1*)qf->frame->data[3];
> 
> @@ -1735,6 +1778,8 @@ int ff_qsv_enc_close(AVCodecContext *avctx,
> QSVEncContext *q)
> 
>  const AVCodecHWConfigInternal *const ff_qsv_enc_hw_configs[] = {
>      HW_CONFIG_ENCODER_FRAMES(QSV,  QSV),
> +    HW_CONFIG_ENCODER_FRAMES(VAAPI,VAAPI),
> +    HW_CONFIG_ENCODER_FRAMES(D3D11,D3D11VA),
>      HW_CONFIG_ENCODER_DEVICE(NV12, QSV),
>      HW_CONFIG_ENCODER_DEVICE(P010, QSV),
>      NULL,
> diff --git a/libavcodec/qsvenc_h264.c b/libavcodec/qsvenc_h264.c
> index cf77ea575b..93ba8d8ded 100644
> --- a/libavcodec/qsvenc_h264.c
> +++ b/libavcodec/qsvenc_h264.c
> @@ -196,6 +196,8 @@ const FFCodec ff_h264_qsv_encoder = {
>      .p.pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
>                                                      AV_PIX_FMT_P010,
>                                                      AV_PIX_FMT_QSV,
> +                                                    AV_PIX_FMT_VAAPI,
> +                                                    AV_PIX_FMT_D3D11,
>                                                      AV_PIX_FMT_NONE },
>      .p.priv_class   = &class,
>      .defaults       = qsv_enc_defaults,
> diff --git a/libavcodec/qsvenc_hevc.c b/libavcodec/qsvenc_hevc.c
> index a6bf39c148..63b6ad9150 100644
> --- a/libavcodec/qsvenc_hevc.c
> +++ b/libavcodec/qsvenc_hevc.c
> @@ -309,6 +309,8 @@ const FFCodec ff_hevc_qsv_encoder = {
>                                                      AV_PIX_FMT_YUYV422,
>                                                      AV_PIX_FMT_Y210,
>                                                      AV_PIX_FMT_QSV,
> +                                                    AV_PIX_FMT_VAAPI,
> +                                                    AV_PIX_FMT_D3D11,
>                                                      AV_PIX_FMT_BGRA,
>                                                      AV_PIX_FMT_X2RGB10,
>                                                      AV_PIX_FMT_NONE },
> diff --git a/libavcodec/qsvenc_jpeg.c b/libavcodec/qsvenc_jpeg.c
> index 825eb8dc06..5b7611bb85 100644
> --- a/libavcodec/qsvenc_jpeg.c
> +++ b/libavcodec/qsvenc_jpeg.c
> @@ -91,6 +91,8 @@ const FFCodec ff_mjpeg_qsv_encoder = {
>      .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID,
>      .p.pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
>                                                      AV_PIX_FMT_QSV,
> +                                                    AV_PIX_FMT_VAAPI,
> +                                                    AV_PIX_FMT_D3D11,
>                                                      AV_PIX_FMT_NONE },
>      .p.priv_class   = &class,
>      .defaults       = qsv_enc_defaults,
> diff --git a/libavcodec/qsvenc_mpeg2.c b/libavcodec/qsvenc_mpeg2.c
> index 5cb12a2582..cba4001ee1 100644
> --- a/libavcodec/qsvenc_mpeg2.c
> +++ b/libavcodec/qsvenc_mpeg2.c
> @@ -105,6 +105,8 @@ const FFCodec ff_mpeg2_qsv_encoder = {
>      .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID,
>      .p.pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
>                                                      AV_PIX_FMT_QSV,
> +                                                    AV_PIX_FMT_VAAPI,
> +                                                    AV_PIX_FMT_D3D11,
>                                                      AV_PIX_FMT_NONE },
>      .p.priv_class   = &class,
>      .defaults       = qsv_enc_defaults,
> diff --git a/libavcodec/qsvenc_vp9.c b/libavcodec/qsvenc_vp9.c
> index 4b2a6ce77f..2825b98a4a 100644
> --- a/libavcodec/qsvenc_vp9.c
> +++ b/libavcodec/qsvenc_vp9.c
> @@ -115,6 +115,8 @@ const FFCodec ff_vp9_qsv_encoder = {
>      .p.pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
>                                                      AV_PIX_FMT_P010,
>                                                      AV_PIX_FMT_QSV,
> +                                                    AV_PIX_FMT_VAAPI,
> +                                                    AV_PIX_FMT_D3D11,
>                                                      AV_PIX_FMT_NONE },
>      .p.priv_class   = &class,
>      .defaults       = qsv_enc_defaults,
> --
> 2.35.1.windows.2

Hi,

thanks for submitting this patch. Though, I'm afraid, but this 

- fundamentally contradicts the logic of ffmpeg's handling of hw acceleration,
  hw device and hw frames contexts
- adds code to an encoder, doing things an encoder is not supposed to do- qsv encoders and decoders have their own context => QSV
- is not safe/guaranteed to work always
  there are different requirements for QSV than for other other cases
  like VAAPI - for example: QSV requires a fixed-size frame pool
  and encoders often need a larger frame pool than VAAPI


My personal opinion on such kind of automatic handling is this:

when you are not able to build a command line in a way that you exactly 
know at each stage of the transcoding pipeline in which hw (or sw) context
it will be executed, then you might be lost anyway - in most cases :-)

When you really want to achieve that kind of behavior, then it would be 
a better idea to create a mechanism for "auto-insertion" of hwmap
filters for such cases.
I don't think that such behavior should be active by default though, as
it would most likely create more non-understood failures than convenience 
moments for not having to type        ,hwmap=derive_device=qsv

Best,
softworkz














More information about the ffmpeg-devel mailing list