[FFmpeg-devel] [PATCH] avcodec/qsvenc: make QSV encoder encode VAAPI and D3D11 frames directly

Wed Jun 8 11:41:30 EEST 2022

On Wed, 2022-06-08 at 05:08 +0000, Soft Works wrote:
> > -----Original Message-----
> > From: ffmpeg-devel <ffmpeg-devel-bounces at ffmpeg.org> On Behalf Of Tong Wu
> > Sent: Tuesday, June 7, 2022 11:22 AM
> > To: ffmpeg-devel at ffmpeg.org
> > Cc: Tong Wu <tong1.wu at intel.com>; Wenbin Chen <wenbin.chen at intel.com>
> > Subject: [FFmpeg-devel] [PATCH] avcodec/qsvenc: make QSV encoder encode
> > VAAPI
> > and D3D11 frames directly
> > 
> > QSV encoder is able to encode frames with VAAPI or D3D11 pixel format
> > directly. This patch adds support for qsv encoder to accept VAAPI and
> > D3D11 pixel formats as input.
> > 
> > Signed-off-by: Wenbin Chen <wenbin.chen at intel.com>
> > Signed-off-by: Tong Wu <tong1.wu at intel.com>
> > ---
> >  libavcodec/qsvenc.c       | 59 ++++++++++++++++++++++++++++++++++-----
> >  libavcodec/qsvenc_h264.c  |  2 ++
> >  libavcodec/qsvenc_hevc.c  |  2 ++
> >  libavcodec/qsvenc_jpeg.c  |  2 ++
> >  libavcodec/qsvenc_mpeg2.c |  2 ++
> >  libavcodec/qsvenc_vp9.c   |  2 ++
> >  6 files changed, 62 insertions(+), 7 deletions(-)
> > 
> > diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
> > index 2b3b06767d..132d9ba93b 100644
> > --- a/libavcodec/qsvenc.c
> > +++ b/libavcodec/qsvenc.c
> > @@ -524,7 +524,9 @@ static int check_enc_param(AVCodecContext *avctx,
> > QSVEncContext *q)
> > 
> >  static int init_video_param_jpeg(AVCodecContext *avctx, QSVEncContext *q)
> >  {
> > -    enum AVPixelFormat sw_format = avctx->pix_fmt == AV_PIX_FMT_QSV ?
> > +    enum AVPixelFormat sw_format = avctx->pix_fmt == AV_PIX_FMT_QSV ||
> > +                                   avctx->pix_fmt == AV_PIX_FMT_VAAPI ||
> > +                                   avctx->pix_fmt == AV_PIX_FMT_D3D11 ?
> >                                     avctx->sw_pix_fmt : avctx->pix_fmt;
> >      const AVPixFmtDescriptor *desc;
> >      int ret;
> > @@ -591,7 +593,9 @@ static int init_video_param_jpeg(AVCodecContext *avctx,
> > QSVEncContext *q)
> > 
> >  static int init_video_param(AVCodecContext *avctx, QSVEncContext *q)
> >  {
> > -    enum AVPixelFormat sw_format = avctx->pix_fmt == AV_PIX_FMT_QSV ?
> > +    enum AVPixelFormat sw_format = avctx->pix_fmt == AV_PIX_FMT_QSV ||
> > +                                   avctx->pix_fmt == AV_PIX_FMT_VAAPI ||
> > +                                   avctx->pix_fmt == AV_PIX_FMT_D3D11 ?
> >                                     avctx->sw_pix_fmt : avctx->pix_fmt;
> >      const AVPixFmtDescriptor *desc;
> >      float quant;
> > @@ -1247,7 +1251,31 @@ int ff_qsv_enc_init(AVCodecContext *avctx,
> > QSVEncContext *q)
> > 
> >      if (avctx->hw_frames_ctx) {
> >          AVHWFramesContext    *frames_ctx = (AVHWFramesContext*)avctx-
> > > hw_frames_ctx->data;
> > 
> > -        AVQSVFramesContext *frames_hwctx = frames_ctx->hwctx;
> > +        AVQSVFramesContext *frames_hwctx = NULL;
> > +
> > +        if (frames_ctx->format == AV_PIX_FMT_VAAPI || frames_ctx->format ==
> > AV_PIX_FMT_D3D11) {
> > +            AVBufferRef *derive_device_ref = NULL;
> > +            AVBufferRef *derive_frames_ref = NULL;
> > +            ret = av_hwdevice_ctx_create_derived(&derive_device_ref,
> > +                                                 AV_HWDEVICE_TYPE_QSV,
> > frames_ctx->device_ref, 0);
> > +            if (ret < 0) {
> > +                av_log(avctx, AV_LOG_ERROR, "Failed to derive QSV device
> > context: %d.\n", ret);
> > +                return ret;
> > +            }
> > +            ret = av_hwframe_ctx_create_derived(&derive_frames_ref,
> > +                                                AV_PIX_FMT_QSV,
> > derive_device_ref, avctx->hw_frames_ctx, 0);
> > +            if (ret < 0) {
> > +                av_log(avctx, AV_LOG_ERROR, "Failed to derive QSV frames
> > context: %d.\n", ret);
> > +                av_buffer_unref(&derive_device_ref);
> > +                return ret;
> > +            }
> > +            av_buffer_unref(&avctx->hw_device_ctx);
> > +            avctx->hw_device_ctx = derive_device_ref;
> > +            av_buffer_unref(&avctx->hw_frames_ctx);
> > +            avctx->hw_frames_ctx = derive_frames_ref;
> > +            frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
> > +        }
> > +        frames_hwctx = frames_ctx->hwctx;
> > 
> >          if (!iopattern) {
> >              if (frames_hwctx->frame_type & MFX_MEMTYPE_OPAQUE_FRAME)
> > @@ -1437,10 +1465,25 @@ static int submit_frame(QSVEncContext *q, const
> > AVFrame *frame,
> >      if (ret < 0)
> >          return ret;
> > 
> > -    if (frame->format == AV_PIX_FMT_QSV) {
> > -        ret = av_frame_ref(qf->frame, frame);
> > -        if (ret < 0)
> > -            return ret;
> > +    if (frame->format == AV_PIX_FMT_QSV || frame->format ==
> > AV_PIX_FMT_VAAPI
> > > > frame->format == AV_PIX_FMT_D3D11) {
> > 
> > +        if (frame->format == AV_PIX_FMT_QSV) {
> > +            ret = av_frame_ref(qf->frame, frame);
> > +            if (ret < 0)
> > +                return ret;
> > +        } else {
> > +            qf->frame->format = AV_PIX_FMT_QSV;
> > +            qf->frame->hw_frames_ctx = av_buffer_ref(q->avctx-
> > > hw_frames_ctx);
> > 
> > +            if (!qf->frame->hw_frames_ctx)
> > +                return AVERROR(ENOMEM);
> > +            ret = av_hwframe_map(qf->frame, frame, 0);
> > +            if (ret < 0) {
> > +                av_log(q->avctx, AV_LOG_ERROR, "Failed to map to QSV
> > frames\n");
> > +                return ret;
> > +            }
> > +            ret = av_frame_copy_props(qf->frame, frame);
> > +            if (ret < 0)
> > +                return ret;
> > +        }
> > 
> >          qf->surface = *(mfxFrameSurface1*)qf->frame->data[3];
> > 
> > @@ -1735,6 +1778,8 @@ int ff_qsv_enc_close(AVCodecContext *avctx,
> > QSVEncContext *q)
> > 
> >  const AVCodecHWConfigInternal *const ff_qsv_enc_hw_configs[] = {
> >      HW_CONFIG_ENCODER_FRAMES(QSV,  QSV),
> > +    HW_CONFIG_ENCODER_FRAMES(VAAPI,VAAPI),
> > +    HW_CONFIG_ENCODER_FRAMES(D3D11,D3D11VA),
> >      HW_CONFIG_ENCODER_DEVICE(NV12, QSV),
> >      HW_CONFIG_ENCODER_DEVICE(P010, QSV),
> >      NULL,
> > diff --git a/libavcodec/qsvenc_h264.c b/libavcodec/qsvenc_h264.c
> > index cf77ea575b..93ba8d8ded 100644
> > --- a/libavcodec/qsvenc_h264.c
> > +++ b/libavcodec/qsvenc_h264.c
> > @@ -196,6 +196,8 @@ const FFCodec ff_h264_qsv_encoder = {
> >      .p.pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
> >                                                      AV_PIX_FMT_P010,
> >                                                      AV_PIX_FMT_QSV,
> > +                                                    AV_PIX_FMT_VAAPI,
> > +                                                    AV_PIX_FMT_D3D11,
> >                                                      AV_PIX_FMT_NONE },
> >      .p.priv_class   = &class,
> >      .defaults       = qsv_enc_defaults,
> > diff --git a/libavcodec/qsvenc_hevc.c b/libavcodec/qsvenc_hevc.c
> > index a6bf39c148..63b6ad9150 100644
> > --- a/libavcodec/qsvenc_hevc.c
> > +++ b/libavcodec/qsvenc_hevc.c
> > @@ -309,6 +309,8 @@ const FFCodec ff_hevc_qsv_encoder = {
> >                                                      AV_PIX_FMT_YUYV422,
> >                                                      AV_PIX_FMT_Y210,
> >                                                      AV_PIX_FMT_QSV,
> > +                                                    AV_PIX_FMT_VAAPI,
> > +                                                    AV_PIX_FMT_D3D11,
> >                                                      AV_PIX_FMT_BGRA,
> >                                                      AV_PIX_FMT_X2RGB10,
> >                                                      AV_PIX_FMT_NONE },
> > diff --git a/libavcodec/qsvenc_jpeg.c b/libavcodec/qsvenc_jpeg.c
> > index 825eb8dc06..5b7611bb85 100644
> > --- a/libavcodec/qsvenc_jpeg.c
> > +++ b/libavcodec/qsvenc_jpeg.c
> > @@ -91,6 +91,8 @@ const FFCodec ff_mjpeg_qsv_encoder = {
> >      .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID,
> >      .p.pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
> >                                                      AV_PIX_FMT_QSV,
> > +                                                    AV_PIX_FMT_VAAPI,
> > +                                                    AV_PIX_FMT_D3D11,
> >                                                      AV_PIX_FMT_NONE },
> >      .p.priv_class   = &class,
> >      .defaults       = qsv_enc_defaults,
> > diff --git a/libavcodec/qsvenc_mpeg2.c b/libavcodec/qsvenc_mpeg2.c
> > index 5cb12a2582..cba4001ee1 100644
> > --- a/libavcodec/qsvenc_mpeg2.c
> > +++ b/libavcodec/qsvenc_mpeg2.c
> > @@ -105,6 +105,8 @@ const FFCodec ff_mpeg2_qsv_encoder = {
> >      .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID,
> >      .p.pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
> >                                                      AV_PIX_FMT_QSV,
> > +                                                    AV_PIX_FMT_VAAPI,
> > +                                                    AV_PIX_FMT_D3D11,
> >                                                      AV_PIX_FMT_NONE },
> >      .p.priv_class   = &class,
> >      .defaults       = qsv_enc_defaults,
> > diff --git a/libavcodec/qsvenc_vp9.c b/libavcodec/qsvenc_vp9.c
> > index 4b2a6ce77f..2825b98a4a 100644
> > --- a/libavcodec/qsvenc_vp9.c
> > +++ b/libavcodec/qsvenc_vp9.c
> > @@ -115,6 +115,8 @@ const FFCodec ff_vp9_qsv_encoder = {
> >      .p.pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
> >                                                      AV_PIX_FMT_P010,
> >                                                      AV_PIX_FMT_QSV,
> > +                                                    AV_PIX_FMT_VAAPI,
> > +                                                    AV_PIX_FMT_D3D11,
> >                                                      AV_PIX_FMT_NONE },
> >      .p.priv_class   = &class,
> >      .defaults       = qsv_enc_defaults,
> > --
> > 2.35.1.windows.2
> 
> Hi,
> 
> thanks for submitting this patch. Though, I'm afraid, but this 
> 
> - fundamentally contradicts the logic of ffmpeg's handling of hw acceleration,
>   hw device and hw frames contexts
> - adds code to an encoder, doing things an encoder is not supposed to do- qsv
> encoders and decoders have their own context => QSV

nvdec and nvenc have CUDA but nvenc can also support D3D11va, it sounds make
sense for me to support D3D11va/vaapi in qsvenc too as d3d11va/vaapi are used
internally in MediaSDK.  

> - is not safe/guaranteed to work always
>   there are different requirements for QSV than for other other cases
>   like VAAPI - for example: QSV requires a fixed-size frame pool
>   and encoders often need a larger frame pool than VAAPI
> 

Encoders in MediaSDK don't need a fixed pool, probably we may relax this
limitation in QSV.

Thanks
Haihao

> 
> My personal opinion on such kind of automatic handling is this:
> 
> when you are not able to build a command line in a way that you exactly 
> know at each stage of the transcoding pipeline in which hw (or sw) context
> it will be executed, then you might be lost anyway - in most cases :-)
> 
> When you really want to achieve that kind of behavior, then it would be 
> a better idea to create a mechanism for "auto-insertion" of hwmap
> filters for such cases.
> I don't think that such behavior should be active by default though, as
> it would most likely create more non-understood failures than convenience 
> moments for not having to type        ,hwmap=derive_device=qsv
>