[FFmpeg-devel] [PATCH] avcodec/amfenc: DX12 Reference-only feature support
Tong Wu
wutong1208 at outlook.com
Mon Feb 10 17:18:18 EET 2025
Araz Iusubov:
>Subject: [FFmpeg-devel] [PATCH] avcodec/amfenc: DX12 Reference-only feature
>support
>
>The Reference-Only feature in DirectX 12 is a memory optimization technique
>designed for video decoding scenarios.
>This feature requires that reference resources must be allocated with the
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY resource flag.
>Reference textures must also be separated from output textures.
>This feature is not supported in the current version of ffmpeg.
>Since AMD GPU uses this feature in Direct 12 decoder, ffmpeg does not support
>AMD GPU Direct 12 decoding.
>To properly support the Reference-Only feature, two parallel resource pools must
>be configured and managed:
>General Resource Pool:
>Contains resources used for output decoded frames.
>Defined in AVHWFramesContext and manages the final decoded textures.
>Reference-Only Resource Pool:
>Intended for storing reference frame resources.
>Resources created with the
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY flag are allocated
>to AVBufferPool.
>
>---
> libavcodec/d3d12va_decode.c | 58 ++++++++++++++++++++++++++++---
> libavutil/hwcontext_d3d12va.c | 65 ++++++++++++++++++++++++++++++++---
> 2 files changed, 115 insertions(+), 8 deletions(-)
>
>diff --git a/libavcodec/d3d12va_decode.c b/libavcodec/d3d12va_decode.c index
>3b8978635e..8916f94d10 100644
>--- a/libavcodec/d3d12va_decode.c
>+++ b/libavcodec/d3d12va_decode.c
>@@ -51,11 +51,19 @@ unsigned ff_d3d12va_get_surface_index(const
>AVCodecContext *avctx,
> D3D12VADecodeContext *ctx, const AVFrame *frame,
> int curr) {
>+ AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx);
>+ AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx;
>+
> AVD3D12VAFrame *f;
> ID3D12Resource *res;
> unsigned i;
>
>- f = (AVD3D12VAFrame *)frame->data[0];
>+ if (frames_hwctx->flags &
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
>+ f = (AVD3D12VAFrame*)frame->data[1];
>+ } else {
>+ f = (AVD3D12VAFrame*)frame->data[0];
>+ }
>+
> if (!f)
> goto fail;
>
>@@ -250,6 +258,11 @@ static int d3d12va_create_decoder(AVCodecContext
>*avctx)
> return AVERROR_PATCHWELCOME;
> }
Need to handle when DecodeTier == D3D12_VIDEO_DECODE_TIER_2?
>
>+ if (feature.ConfigurationFlags &
>D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_REFERENCE_ONLY_ALLOCATI
>ONS_REQUIRED) {
>+ frames_hwctx->flags |=
>(D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY |
>D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE);
>+ av_log(avctx, AV_LOG_INFO, "Reference-Only Allocations are required for
>this configuration.\n");
>+ }
>+
> desc = (D3D12_VIDEO_DECODER_DESC) {
> .NodeMask = 0,
> .Configuration = ctx->cfg,
>@@ -440,8 +453,19 @@ int ff_d3d12va_common_end_frame(AVCodecContext
>*avctx, AVFrame *frame,
> D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
> ID3D12Resource *buffer = NULL;
> ID3D12CommandAllocator *command_allocator = NULL;
>- AVD3D12VAFrame *f = (AVD3D12VAFrame *)frame->data[0];
>- ID3D12Resource *resource = (ID3D12Resource *)f->texture;
>+ AVHWFramesContext *frames_ctx =
>D3D12VA_FRAMES_CONTEXT(avctx);
>+ AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx;
>+ AVD3D12VAFrame *f = NULL;
>+ AVD3D12VAFrame *output_data = NULL;
>+
>+ if (frames_hwctx->flags &
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
>+ f = (AVD3D12VAFrame*)frame->data[1];
>+ output_data = (AVD3D12VAFrame*)frame->data[0];
>+ } else {
>+ f = (AVD3D12VAFrame*)frame->data[0];
>+ }
>+
>+ ID3D12Resource* resource = (ID3D12Resource*)f->texture;
>
> ID3D12VideoDecodeCommandList *cmd_list = ctx->command_list;
> D3D12_RESOURCE_BARRIER barriers[32] = { 0 }; @@ -469,6 +493,14 @@ int
>ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
> .pOutputTexture2D = resource,
> };
>
>+ if (frames_hwctx->flags &
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
>+ output_args.pOutputTexture2D = output_data->texture;
>+
>+ output_args.ConversionArguments.Enable = 1;
>+ output_args.ConversionArguments.pReferenceTexture2D = resource;
>+ output_args.ConversionArguments.ReferenceSubresource = 0;
>+ }
>+
> UINT num_barrier = 1;
> barriers[0] = (D3D12_RESOURCE_BARRIER) {
> .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
>@@ -481,6 +513,20 @@ int ff_d3d12va_common_end_frame(AVCodecContext
>*avctx, AVFrame *frame,
> },
> };
>
>+ if (frames_hwctx->flags &
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
>+ barriers[1] = (D3D12_RESOURCE_BARRIER) {
>+ .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
>+ .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
>+ .Transition = {
>+ .pResource = output_data->texture,
>+ .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
>+ .StateBefore = D3D12_RESOURCE_STATE_COMMON,
>+ .StateAfter = D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
>+ },
>+ };
>+ num_barrier++;
>+ }
>+
> memset(ctx->ref_subresources, 0, sizeof(UINT) * ctx->max_num_ref);
> input_args.ReferenceFrames.NumTexture2Ds = ctx->max_num_ref;
> input_args.ReferenceFrames.ppTexture2Ds = ctx->ref_resources; @@ -505,7
>+551,7 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx,
>AVFrame *frame,
>
> DX_CHECK(ID3D12VideoDecodeCommandList_Reset(cmd_list,
>command_allocator));
>
>- num_barrier += d3d12va_update_reference_frames_state(avctx, &barriers[1],
>resource, D3D12_RESOURCE_STATE_COMMON,
>D3D12_RESOURCE_STATE_VIDEO_DECODE_READ);
>+ num_barrier += d3d12va_update_reference_frames_state(avctx,
>+ &barriers[num_barrier], resource, D3D12_RESOURCE_STATE_COMMON,
>+ D3D12_RESOURCE_STATE_VIDEO_DECODE_READ);
>
> ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, num_barrier,
>barriers);
>
>@@ -522,6 +568,10 @@ int ff_d3d12va_common_end_frame(AVCodecContext
>*avctx, AVFrame *frame,
>
> DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, f-
>>sync_ctx.fence, ++f->sync_ctx.fence_value));
>
>+ if (frames_hwctx->flags &
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
>+ DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue,
>output_data->sync_ctx.fence, ++output_data->sync_ctx.fence_value));
>+ }
>+
> DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, ctx-
>>sync_ctx.fence, ++ctx->sync_ctx.fence_value));
>
> ret = d3d12va_discard_helper_objects(avctx, command_allocator, buffer, ctx-
>>sync_ctx.fence_value); diff --git a/libavutil/hwcontext_d3d12va.c
>b/libavutil/hwcontext_d3d12va.c index 6507cf69c1..328827b040 100644
>--- a/libavutil/hwcontext_d3d12va.c
>+++ b/libavutil/hwcontext_d3d12va.c
>@@ -49,6 +49,24 @@ typedef struct D3D12VAFramesContext {
> ID3D12GraphicsCommandList *command_list;
> AVD3D12VASyncContext sync_ctx;
> UINT luma_component_size;
>+
>+ /**
>+ * The Reference-Only feature in DirectX 12 is a memory optimization
>+ * technique designed for video decoding/encoding scenarios.
>+ * This feature requires that reference resources must be allocated
>+ * with the `D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY`
>resource flag.
>+ * Reference textures must also be separated from output textures.
>+ * To correctly support the Reference-Only feature, two parallel resource
>+ * pools must be configured and managed:
>+ * 1. General Resource Pool:
>+ * - Contains resources used for outputting decoded frames.
>+ * - Defined in `AVHWFramesContext` and manages the final decoded
>textures.
>+ * 2. Reference-Only Resource Pool:
>+ * - Dedicated to storing reference frame resources.
>+ * - Resources created with the
>`D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY`
>+ * flag are allocated to this pool.
>+ */
>+ AVBufferPool *pool_reference_only;
> } D3D12VAFramesContext;
>
> typedef struct D3D12VADevicePriv {
>@@ -174,7 +192,8 @@ fail:
>
> static void d3d12va_frames_uninit(AVHWFramesContext *ctx) {
>- D3D12VAFramesContext *s = ctx->hwctx;
>+ D3D12VAFramesContext *s = ctx->hwctx;
>+ AVD3D12VAFramesContext *frames_hwctx = &s->p;
>
> D3D12_OBJECT_RELEASE(s->sync_ctx.fence);
> if (s->sync_ctx.event)
>@@ -185,6 +204,11 @@ static void d3d12va_frames_uninit(AVHWFramesContext
>*ctx)
> D3D12_OBJECT_RELEASE(s->command_allocator);
> D3D12_OBJECT_RELEASE(s->command_list);
> D3D12_OBJECT_RELEASE(s->command_queue);
>+
>+ if (frames_hwctx->flags &
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
>+ if (s->pool_reference_only)
>+ av_buffer_pool_uninit(&s->pool_reference_only);
>+ }
> }
>
> static int d3d12va_frames_get_constraints(AVHWDeviceContext *ctx, const void
>*hwconfig, AVHWFramesConstraints *constraints) @@ -281,6 +305,7 @@ fail:
> static int d3d12va_frames_init(AVHWFramesContext *ctx) {
> AVD3D12VAFramesContext *hwctx = ctx->hwctx;
>+ D3D12VAFramesContext *s = ctx->hwctx;
> int i;
>
> for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) { @@ -304,16 +329,43
>@@ static int d3d12va_frames_init(AVHWFramesContext *ctx)
> if (!ffhwframesctx(ctx)->pool_internal)
> return AVERROR(ENOMEM);
>
>+ s->pool_reference_only = NULL;
>+
> return 0;
> }
>
> static int d3d12va_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) {
> int ret;
>+ D3D12VAFramesContext *s = ctx->hwctx;
>+ AVD3D12VAFramesContext *frames_hwctx = &s->p;
>
>- frame->buf[0] = av_buffer_pool_get(ctx->pool);
>- if (!frame->buf[0])
>- return AVERROR(ENOMEM);
>+ if (frames_hwctx->flags &
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
>+ /*
>+ * for the output texture, temporarily unset
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY
>+ * and D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE
>+ */
>+ D3D12_RESOURCE_FLAGS temp_flags = frames_hwctx->flags;
>+ frames_hwctx->flags &=
>+ ~(D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY |
>+ D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE);
>+
>+ frame->buf[0] = av_buffer_pool_get(ctx->pool);
>+ if (!frame->buf[0])
>+ return AVERROR(ENOMEM);
>+
>+ if (s->pool_reference_only == NULL) {
>+ s->pool_reference_only = av_buffer_pool_init2(sizeof(AVD3D12VAFrame),
>+ ctx, d3d12va_pool_alloc, NULL);
>+ }
>+
>+ frames_hwctx->flags = temp_flags;
>+ frame->buf[1] = av_buffer_pool_get(s->pool_reference_only);
>+ if (!frame->buf[1])
>+ return AVERROR(ENOMEM);
>+ } else {
>+ frame->buf[0] = av_buffer_pool_get(ctx->pool);
>+ if (!frame->buf[0])
>+ return AVERROR(ENOMEM);
>+ }
>
> ret = av_image_fill_arrays(frame->data, frame->linesize, NULL,
> ctx->sw_format, ctx->width, ctx->height, @@ -322,6 +374,11
>@@ static int d3d12va_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
> return ret;
>
> frame->data[0] = frame->buf[0]->data;
>+
>+ if (frames_hwctx->flags &
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
>+ frame->data[1] = frame->buf[1]->data;
>+ }
>+
> frame->format = AV_PIX_FMT_D3D12;
> frame->width = ctx->width;
> frame->height = ctx->height;
>--
>2.45.2.windows.1
>
>_______________________________________________
>ffmpeg-devel mailing list
>ffmpeg-devel at ffmpeg.org
>https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
>To unsubscribe, visit link above, or email ffmpeg-devel-request at ffmpeg.org with
>subject "unsubscribe".
More information about the ffmpeg-devel
mailing list