[FFmpeg-devel] [PATCH] avcodec/amfenc: DX12 Reference-only feature support

Tong Wu wutong1208 at outlook.com
Mon Feb 10 17:18:18 EET 2025


Araz Iusubov:
>Subject: [FFmpeg-devel] [PATCH] avcodec/amfenc: DX12 Reference-only feature
>support
>
>The Reference-Only feature in DirectX 12 is a memory optimization technique
>designed for video decoding scenarios.
>This feature requires that reference resources must be allocated with the
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY resource flag.
>Reference textures must also be separated from output textures.
>This feature is not supported in the current version of ffmpeg.
>Since AMD GPU uses this feature in Direct 12 decoder, ffmpeg does not support
>AMD GPU Direct 12 decoding.
>To properly support the Reference-Only feature, two parallel resource pools must
>be configured and managed:
>General Resource Pool:
>Contains resources used for output decoded frames.
>Defined in AVHWFramesContext and manages the final decoded textures.
>Reference-Only Resource Pool:
>Intended for storing reference frame resources.
>Resources created with the
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY flag are allocated
>to AVBufferPool.
>
>---
> libavcodec/d3d12va_decode.c   | 58 ++++++++++++++++++++++++++++---
> libavutil/hwcontext_d3d12va.c | 65 ++++++++++++++++++++++++++++++++---
> 2 files changed, 115 insertions(+), 8 deletions(-)
>
>diff --git a/libavcodec/d3d12va_decode.c b/libavcodec/d3d12va_decode.c index
>3b8978635e..8916f94d10 100644
>--- a/libavcodec/d3d12va_decode.c
>+++ b/libavcodec/d3d12va_decode.c
>@@ -51,11 +51,19 @@ unsigned ff_d3d12va_get_surface_index(const
>AVCodecContext *avctx,
>                                       D3D12VADecodeContext *ctx, const AVFrame *frame,
>                                       int curr)  {
>+    AVHWFramesContext      *frames_ctx   = D3D12VA_FRAMES_CONTEXT(avctx);
>+    AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx;
>+
>     AVD3D12VAFrame *f;
>     ID3D12Resource *res;
>     unsigned i;
>
>-    f = (AVD3D12VAFrame *)frame->data[0];
>+    if (frames_hwctx->flags &
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
>+        f = (AVD3D12VAFrame*)frame->data[1];
>+    } else {
>+        f = (AVD3D12VAFrame*)frame->data[0];
>+    }
>+
>     if (!f)
>         goto fail;
>
>@@ -250,6 +258,11 @@ static int d3d12va_create_decoder(AVCodecContext
>*avctx)
>         return AVERROR_PATCHWELCOME;
>     }

Need to handle when DecodeTier == D3D12_VIDEO_DECODE_TIER_2?

>
>+    if (feature.ConfigurationFlags &
>D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_REFERENCE_ONLY_ALLOCATI
>ONS_REQUIRED) {
>+        frames_hwctx->flags |=
>(D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY |
>D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE);
>+        av_log(avctx, AV_LOG_INFO, "Reference-Only Allocations are required for
>this configuration.\n");
>+    }
>+
>     desc = (D3D12_VIDEO_DECODER_DESC) {
>         .NodeMask = 0,
>         .Configuration = ctx->cfg,
>@@ -440,8 +453,19 @@ int ff_d3d12va_common_end_frame(AVCodecContext
>*avctx, AVFrame *frame,
>     D3D12VADecodeContext   *ctx               = D3D12VA_DECODE_CONTEXT(avctx);
>     ID3D12Resource         *buffer            = NULL;
>     ID3D12CommandAllocator *command_allocator = NULL;
>-    AVD3D12VAFrame         *f                 = (AVD3D12VAFrame *)frame->data[0];
>-    ID3D12Resource         *resource          = (ID3D12Resource *)f->texture;
>+    AVHWFramesContext      *frames_ctx        =
>D3D12VA_FRAMES_CONTEXT(avctx);
>+    AVD3D12VAFramesContext *frames_hwctx      = frames_ctx->hwctx;
>+    AVD3D12VAFrame         *f                 = NULL;
>+    AVD3D12VAFrame         *output_data       = NULL;
>+
>+    if (frames_hwctx->flags &
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
>+        f           = (AVD3D12VAFrame*)frame->data[1];
>+        output_data = (AVD3D12VAFrame*)frame->data[0];
>+    } else {
>+        f           = (AVD3D12VAFrame*)frame->data[0];
>+    }
>+
>+    ID3D12Resource* resource = (ID3D12Resource*)f->texture;
>
>     ID3D12VideoDecodeCommandList *cmd_list = ctx->command_list;
>     D3D12_RESOURCE_BARRIER barriers[32] = { 0 }; @@ -469,6 +493,14 @@ int
>ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
>         .pOutputTexture2D    = resource,
>     };
>
>+    if (frames_hwctx->flags &
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
>+        output_args.pOutputTexture2D = output_data->texture;
>+
>+        output_args.ConversionArguments.Enable               = 1;
>+        output_args.ConversionArguments.pReferenceTexture2D  = resource;
>+        output_args.ConversionArguments.ReferenceSubresource = 0;
>+    }
>+
>     UINT num_barrier = 1;
>     barriers[0] = (D3D12_RESOURCE_BARRIER) {
>         .Type  = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
>@@ -481,6 +513,20 @@ int ff_d3d12va_common_end_frame(AVCodecContext
>*avctx, AVFrame *frame,
>         },
>     };
>
>+    if (frames_hwctx->flags &
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
>+        barriers[1] = (D3D12_RESOURCE_BARRIER) {
>+            .Type  = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
>+            .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
>+            .Transition = {
>+                .pResource   = output_data->texture,
>+                .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
>+                .StateBefore = D3D12_RESOURCE_STATE_COMMON,
>+                .StateAfter  = D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
>+            },
>+        };
>+        num_barrier++;
>+    }
>+
>     memset(ctx->ref_subresources, 0, sizeof(UINT) * ctx->max_num_ref);
>     input_args.ReferenceFrames.NumTexture2Ds = ctx->max_num_ref;
>     input_args.ReferenceFrames.ppTexture2Ds  = ctx->ref_resources; @@ -505,7
>+551,7 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx,
>AVFrame *frame,
>
>     DX_CHECK(ID3D12VideoDecodeCommandList_Reset(cmd_list,
>command_allocator));
>
>-    num_barrier += d3d12va_update_reference_frames_state(avctx, &barriers[1],
>resource, D3D12_RESOURCE_STATE_COMMON,
>D3D12_RESOURCE_STATE_VIDEO_DECODE_READ);
>+    num_barrier += d3d12va_update_reference_frames_state(avctx,
>+ &barriers[num_barrier], resource, D3D12_RESOURCE_STATE_COMMON,
>+ D3D12_RESOURCE_STATE_VIDEO_DECODE_READ);
>
>     ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, num_barrier,
>barriers);
>
>@@ -522,6 +568,10 @@ int ff_d3d12va_common_end_frame(AVCodecContext
>*avctx, AVFrame *frame,
>
>     DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, f-
>>sync_ctx.fence, ++f->sync_ctx.fence_value));
>
>+    if (frames_hwctx->flags &
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
>+        DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue,
>output_data->sync_ctx.fence, ++output_data->sync_ctx.fence_value));
>+    }
>+
>     DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, ctx-
>>sync_ctx.fence, ++ctx->sync_ctx.fence_value));
>
>     ret = d3d12va_discard_helper_objects(avctx, command_allocator, buffer, ctx-
>>sync_ctx.fence_value); diff --git a/libavutil/hwcontext_d3d12va.c
>b/libavutil/hwcontext_d3d12va.c index 6507cf69c1..328827b040 100644
>--- a/libavutil/hwcontext_d3d12va.c
>+++ b/libavutil/hwcontext_d3d12va.c
>@@ -49,6 +49,24 @@ typedef struct D3D12VAFramesContext {
>     ID3D12GraphicsCommandList *command_list;
>     AVD3D12VASyncContext       sync_ctx;
>     UINT                       luma_component_size;
>+
>+    /**
>+     * The Reference-Only feature in DirectX 12 is a memory optimization
>+     * technique designed for video decoding/encoding scenarios.
>+     * This feature requires that reference resources must be allocated
>+     * with the `D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY`
>resource flag.
>+     * Reference textures must also be separated from output textures.
>+     * To correctly support the Reference-Only feature, two parallel resource
>+     * pools must be configured and managed:
>+     * 1. General Resource Pool:
>+     *   - Contains resources used for outputting decoded frames.
>+     *   - Defined in `AVHWFramesContext` and manages the final decoded
>textures.
>+     * 2. Reference-Only Resource Pool:
>+     *   - Dedicated to storing reference frame resources.
>+     *   - Resources created with the
>`D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY`
>+     *     flag are allocated to this pool.
>+     */
>+    AVBufferPool              *pool_reference_only;
> } D3D12VAFramesContext;
>
> typedef struct D3D12VADevicePriv {
>@@ -174,7 +192,8 @@ fail:
>
> static void d3d12va_frames_uninit(AVHWFramesContext *ctx)  {
>-    D3D12VAFramesContext *s = ctx->hwctx;
>+    D3D12VAFramesContext   *s            = ctx->hwctx;
>+    AVD3D12VAFramesContext *frames_hwctx = &s->p;
>
>     D3D12_OBJECT_RELEASE(s->sync_ctx.fence);
>     if (s->sync_ctx.event)
>@@ -185,6 +204,11 @@ static void d3d12va_frames_uninit(AVHWFramesContext
>*ctx)
>     D3D12_OBJECT_RELEASE(s->command_allocator);
>     D3D12_OBJECT_RELEASE(s->command_list);
>     D3D12_OBJECT_RELEASE(s->command_queue);
>+
>+    if (frames_hwctx->flags &
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
>+        if (s->pool_reference_only)
>+            av_buffer_pool_uninit(&s->pool_reference_only);
>+    }
> }
>
> static int d3d12va_frames_get_constraints(AVHWDeviceContext *ctx, const void
>*hwconfig, AVHWFramesConstraints *constraints) @@ -281,6 +305,7 @@ fail:
> static int d3d12va_frames_init(AVHWFramesContext *ctx)  {
>     AVD3D12VAFramesContext *hwctx = ctx->hwctx;
>+    D3D12VAFramesContext   *s     = ctx->hwctx;
>     int i;
>
>     for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) { @@ -304,16 +329,43
>@@ static int d3d12va_frames_init(AVHWFramesContext *ctx)
>     if (!ffhwframesctx(ctx)->pool_internal)
>         return AVERROR(ENOMEM);
>
>+    s->pool_reference_only = NULL;
>+
>     return 0;
> }
>
> static int d3d12va_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)  {
>     int ret;
>+    D3D12VAFramesContext   *s            = ctx->hwctx;
>+    AVD3D12VAFramesContext *frames_hwctx = &s->p;
>
>-    frame->buf[0] = av_buffer_pool_get(ctx->pool);
>-    if (!frame->buf[0])
>-        return AVERROR(ENOMEM);
>+    if (frames_hwctx->flags &
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
>+        /*
>+         * for the output texture, temporarily unset
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY
>+         * and D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE
>+        */
>+        D3D12_RESOURCE_FLAGS temp_flags = frames_hwctx->flags;
>+        frames_hwctx->flags &=
>+ ~(D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY |
>+ D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE);
>+
>+        frame->buf[0] = av_buffer_pool_get(ctx->pool);
>+        if (!frame->buf[0])
>+            return AVERROR(ENOMEM);
>+
>+        if (s->pool_reference_only == NULL) {
>+            s->pool_reference_only = av_buffer_pool_init2(sizeof(AVD3D12VAFrame),
>+                ctx, d3d12va_pool_alloc, NULL);
>+        }
>+
>+        frames_hwctx->flags = temp_flags;
>+        frame->buf[1] = av_buffer_pool_get(s->pool_reference_only);
>+        if (!frame->buf[1])
>+            return AVERROR(ENOMEM);
>+    } else {
>+        frame->buf[0] = av_buffer_pool_get(ctx->pool);
>+        if (!frame->buf[0])
>+            return AVERROR(ENOMEM);
>+    }
>
>     ret = av_image_fill_arrays(frame->data, frame->linesize, NULL,
>                                ctx->sw_format, ctx->width, ctx->height, @@ -322,6 +374,11
>@@ static int d3d12va_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
>         return ret;
>
>     frame->data[0] = frame->buf[0]->data;
>+
>+    if (frames_hwctx->flags &
>D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY) {
>+        frame->data[1] = frame->buf[1]->data;
>+    }
>+
>     frame->format  = AV_PIX_FMT_D3D12;
>     frame->width   = ctx->width;
>     frame->height  = ctx->height;
>--
>2.45.2.windows.1
>
>_______________________________________________
>ffmpeg-devel mailing list
>ffmpeg-devel at ffmpeg.org
>https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
>To unsubscribe, visit link above, or email ffmpeg-devel-request at ffmpeg.org with
>subject "unsubscribe".


More information about the ffmpeg-devel mailing list