[FFmpeg-devel] [PATCH 05/16] avutil: add common code for nvtegra
Rémi Denis-Courmont
remi at remlab.net
Fri May 31 11:32:59 EEST 2024
Le 30 mai 2024 22:43:07 GMT+03:00, averne <averne381 at gmail.com> a écrit :
>This includes a new pixel format for nvtegra hardware frames, and several objects for interaction with hardware blocks.
>In particular, this contains code for channels (handles to hardware engines), maps (memory-mapped buffers shared with engines), and command buffers (abstraction for building command lists sent to the engines).
>
>Signed-off-by: averne <averne381 at gmail.com>
>---
> configure | 2 +
> libavutil/Makefile | 4 +
> libavutil/nvtegra.c | 1035 ++++++++++++++++++++++++++++++++++++
> libavutil/nvtegra.h | 258 +++++++++
> libavutil/nvtegra_host1x.h | 94 ++++
> libavutil/pixdesc.c | 4 +
> libavutil/pixfmt.h | 8 +
> 7 files changed, 1405 insertions(+)
> create mode 100644 libavutil/nvtegra.c
> create mode 100644 libavutil/nvtegra.h
> create mode 100644 libavutil/nvtegra_host1x.h
>
>diff --git a/configure b/configure
>index 09fb2aed1b..51f169bfbd 100755
>--- a/configure
>+++ b/configure
>@@ -361,6 +361,7 @@ External library support:
> --disable-vdpau disable Nvidia Video Decode and Presentation API for Unix code [autodetect]
> --disable-videotoolbox disable VideoToolbox code [autodetect]
> --disable-vulkan disable Vulkan code [autodetect]
>+ --enable-nvtegra enable nvtegra code [no]
>
> Toolchain options:
> --arch=ARCH select architecture [$arch]
>@@ -3151,6 +3152,7 @@ videotoolbox_hwaccel_deps="videotoolbox pthreads"
> videotoolbox_hwaccel_extralibs="-framework QuartzCore"
> vulkan_deps="threads"
> vulkan_deps_any="libdl LoadLibrary"
>+nvtegra_deps="gpl"
>
> av1_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_AV1"
> av1_d3d11va_hwaccel_select="av1_decoder"
>diff --git a/libavutil/Makefile b/libavutil/Makefile
>index 9c112bc58a..733a23a8a3 100644
>--- a/libavutil/Makefile
>+++ b/libavutil/Makefile
>@@ -52,6 +52,7 @@ HEADERS = adler32.h \
> hwcontext_videotoolbox.h \
> hwcontext_vdpau.h \
> hwcontext_vulkan.h \
>+ nvtegra.h \
> nvhost_ioctl.h \
> nvmap_ioctl.h \
> iamf.h \
>@@ -209,6 +210,7 @@ OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o
> OBJS-$(CONFIG_VULKAN) += hwcontext_vulkan.o vulkan.o
>
> OBJS-$(!CONFIG_VULKAN) += hwcontext_stub.o
>+OBJS-$(CONFIG_NVTEGRA) += nvtegra.o
>
> OBJS += $(COMPAT_OBJS:%=../compat/%)
>
>@@ -230,6 +232,8 @@ SKIPHEADERS-$(CONFIG_VDPAU) += hwcontext_vdpau.h
> SKIPHEADERS-$(CONFIG_VULKAN) += hwcontext_vulkan.h vulkan.h \
> vulkan_functions.h \
> vulkan_loader.h
>+SKIPHEADERS-$(CONFIG_NVTEGRA) += nvtegra.h \
>+ nvtegra_host1x.h
>
> TESTPROGS = adler32 \
> aes \
>diff --git a/libavutil/nvtegra.c b/libavutil/nvtegra.c
>new file mode 100644
>index 0000000000..ad0bbbdfaa
>--- /dev/null
>+++ b/libavutil/nvtegra.c
>@@ -0,0 +1,1035 @@
>+/*
>+ * Copyright (c) 2024 averne <averne381 at gmail.com>
>+ *
>+ * This file is part of FFmpeg.
>+ *
>+ * FFmpeg is free software; you can redistribute it and/or modify
>+ * it under the terms of the GNU General Public License as published by
>+ * the Free Software Foundation; either version 2 of the License, or
>+ * (at your option) any later version.
>+ *
>+ * FFmpeg is distributed in the hope that it will be useful,
>+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
>+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>+ * GNU General Public License for more details.
>+ *
>+ * You should have received a copy of the GNU General Public License along
>+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
>+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
>+ */
>+
>+#ifndef __SWITCH__
>+# include <sys/ioctl.h>
>+# include <sys/mman.h>
>+# include <fcntl.h>
>+# include <unistd.h>
>+#else
>+# include <stdlib.h>
>+# include <switch.h>
>+#endif
>+
>+#include <string.h>
>+
>+#include "buffer.h"
>+#include "log.h"
>+#include "error.h"
>+#include "mem.h"
>+#include "thread.h"
>+
>+#include "nvhost_ioctl.h"
>+#include "nvmap_ioctl.h"
>+#include "nvtegra_host1x.h"
>+
>+#include "nvtegra.h"
>+
>+/*
>+ * Tag used by the kernel to identify allocations.
>+ * Official software has been seen using 0x900, 0xf00, 0x1100, 0x1400, 0x4000.
>+ */
>+#define MEM_TAG (0xfeed)
>+
>+struct DriverState {
>+ int nvmap_fd, nvhost_fd;
>+};
>+
>+static AVMutex g_driver_init_mtx = AV_MUTEX_INITIALIZER;
>+static struct DriverState *g_driver_state = NULL;
>+static AVBufferRef *g_driver_state_ref = NULL;
>+
>+static void free_driver_fds(void *opaque, uint8_t *data) {
>+ if (!g_driver_state)
>+ return;
>+
>+#ifndef __SWITCH__
>+ if (g_driver_state->nvmap_fd > 0)
>+ close(g_driver_state->nvmap_fd);
>+
>+ if (g_driver_state->nvhost_fd > 0)
>+ close(g_driver_state->nvhost_fd);
>+#else
>+ nvFenceExit();
>+ nvMapExit();
>+ nvExit();
>+ mmuExit();
>+#endif
>+
>+ g_driver_init_mtx = (AVMutex)AV_MUTEX_INITIALIZER;
>+ g_driver_state_ref = NULL;
>+ av_freep(&g_driver_state);
>+}
>+
>+static int init_driver_fds(void) {
>+ AVBufferRef *ref;
>+ struct DriverState *state;
>+ int err;
>+
>+ state = av_mallocz(sizeof(*state));
>+ if (!state)
>+ return AVERROR(ENOMEM);
>+
>+ ref = av_buffer_create((uint8_t *)state, sizeof(*state), free_driver_fds, NULL, 0);
>+ if (!state)
>+ return AVERROR(ENOMEM);
>+
>+ g_driver_state = state;
>+ g_driver_state_ref = ref;
>+
>+#ifndef __SWITCH__
>+ err = open("/dev/nvmap", O_RDWR | O_SYNC);
There's helpers to open files, and you're missing the close on exec here. Also not clear why you need O_SYNC.
But did you consider just reimplementing libnvdec instead of putting the device driver directly in FFmpeg?
>+ if (err < 0)
>+ return AVERROR(errno);
>+ state->nvmap_fd = err;
>+
>+ err = open("/dev/nvhost-ctrl", O_RDWR | O_SYNC);
>+ if (err < 0)
>+ return AVERROR(errno);
>+ state->nvhost_fd = err;
>+#else
>+ err = nvInitialize();
>+ if (R_FAILED(err))
>+ return AVERROR(err);
>+
>+ err = nvMapInit();
>+ if (R_FAILED(err))
>+ return AVERROR(err);
>+ state->nvmap_fd = nvMapGetFd();
>+
>+ err = nvFenceInit();
>+ if (R_FAILED(err))
>+ return AVERROR(err);
>+ /* libnx doesn't export the nvhost-ctrl file descriptor */
>+
>+ err = mmuInitialize();
>+ if (R_FAILED(err))
>+ return AVERROR(err);
>+#endif
>+
>+ return 0;
>+}
>+
>+static inline int get_nvmap_fd(void) {
>+ if (!g_driver_state)
>+ return AVERROR_UNKNOWN;
>+
>+ if (!g_driver_state->nvmap_fd)
>+ return AVERROR_UNKNOWN;
>+
>+ return g_driver_state->nvmap_fd;
>+}
>+
>+static inline int get_nvhost_fd(void) {
>+ if (!g_driver_state)
>+ return AVERROR_UNKNOWN;
>+
>+ if (!g_driver_state->nvhost_fd)
>+ return AVERROR_UNKNOWN;
>+
>+ return g_driver_state->nvhost_fd;
>+}
>+
>+AVBufferRef *av_nvtegra_driver_init(void) {
>+ AVBufferRef *out = NULL;
>+ int err;
>+
>+ /*
>+ * We have to do this overly complex dance of putting driver fds in a refcounted struct,
>+ * otherwise initializing multiple hwcontexts would leak fds
>+ */
>+
>+ err = ff_mutex_lock(&g_driver_init_mtx);
>+ if (err != 0)
>+ goto exit;
>+
>+ if (g_driver_state_ref) {
>+ out = av_buffer_ref(g_driver_state_ref);
>+ goto exit;
>+ }
>+
>+ err = init_driver_fds();
>+ if (err < 0) {
>+ /* In case memory allocations failed, call the destructor ourselves */
>+ av_buffer_unref(&g_driver_state_ref);
>+ free_driver_fds(NULL, NULL);
>+ goto exit;
>+ }
>+
>+ out = g_driver_state_ref;
>+
>+exit:
>+ ff_mutex_unlock(&g_driver_init_mtx);
>+ return out;
>+}
>+
>+int av_nvtegra_channel_open(AVNVTegraChannel *channel, const char *dev) {
>+ int err;
>+#ifndef __SWITCH__
>+ struct nvhost_get_param_arg args;
>+
>+ err = open(dev, O_RDWR);
>+ if (err < 0)
>+ return AVERROR(errno);
>+
>+ channel->fd = err;
>+
>+ args = (struct nvhost_get_param_arg){0};
>+
>+ err = ioctl(channel->fd, NVHOST_IOCTL_CHANNEL_GET_SYNCPOINT, &args);
>+ if (err < 0)
>+ goto fail;
>+
>+ channel->syncpt = args.value;
>+
>+ return 0;
>+
>+fail:
>+ close(channel->fd);
>+ return AVERROR(errno);
>+#else
>+ err = nvChannelCreate(&channel->channel, dev);
>+ if (R_FAILED(err))
>+ return AVERROR(err);
>+
>+ err = nvioctlChannel_GetSyncpt(channel->channel.fd, 0, &channel->syncpt);
>+ if (R_FAILED(err))
>+ goto fail;
>+
>+ return 0;
>+
>+fail:
>+ nvChannelClose(&channel->channel);
>+ return AVERROR(err);
>+#endif
>+}
>+
>+int av_nvtegra_channel_close(AVNVTegraChannel *channel) {
>+#ifndef __SWITCH__
>+ if (!channel->fd)
>+ return 0;
>+
>+ return close(channel->fd);
>+#else
>+ nvChannelClose(&channel->channel);
>+ return 0;
>+#endif
>+}
>+
>+int av_nvtegra_channel_get_clock_rate(AVNVTegraChannel *channel, uint32_t moduleid, uint32_t *clock_rate) {
>+ int err;
>+#ifndef __SWITCH__
>+ struct nvhost_clk_rate_args args;
>+
>+ args = (struct nvhost_clk_rate_args){
>+ .moduleid = moduleid,
>+ };
>+
>+ err = ioctl(channel->fd, NVHOST_IOCTL_CHANNEL_GET_CLK_RATE, &args);
>+ if (err < 0)
>+ return AVERROR(errno);
>+
>+ if (clock_rate)
>+ *clock_rate = args.rate;
>+
>+ return 0;
>+#else
>+ uint32_t tmp;
>+
>+ err = AVERROR(nvioctlChannel_GetModuleClockRate(channel->channel.fd, moduleid, &tmp));
>+ if (err < 0)
>+ return err;
>+
>+ if (clock_rate)
>+ *clock_rate = tmp * 1000;
>+
>+ return 0;
>+#endif
>+}
>+
>+int av_nvtegra_channel_set_clock_rate(AVNVTegraChannel *channel, uint32_t moduleid, uint32_t clock_rate) {
>+#ifndef __SWITCH__
>+ struct nvhost_clk_rate_args args;
>+
>+ args = (struct nvhost_clk_rate_args){
>+ .rate = clock_rate,
>+ .moduleid = moduleid,
>+ };
>+
>+ return (ioctl(channel->fd, NVHOST_IOCTL_CHANNEL_SET_CLK_RATE, &args) < 0) ? AVERROR(errno) : 0;
>+#else
>+ return AVERROR(nvioctlChannel_SetModuleClockRate(channel->channel.fd, moduleid, clock_rate / 1000));
>+#endif
>+}
>+
>+int av_nvtegra_channel_submit(AVNVTegraChannel *channel, AVNVTegraCmdbuf *cmdbuf, uint32_t *fence) {
>+ int err;
>+#ifndef __SWITCH__
>+ struct nvhost_submit_args args;
>+
>+ args = (struct nvhost_submit_args){
>+ .submit_version = NVHOST_SUBMIT_VERSION_V2,
>+ .num_syncpt_incrs = cmdbuf->num_syncpt_incrs,
>+ .num_cmdbufs = cmdbuf->num_cmdbufs,
>+ .num_relocs = cmdbuf->num_relocs,
>+ .num_waitchks = cmdbuf->num_waitchks,
>+ .timeout = 0,
>+ .flags = 0,
>+ .fence = 0,
>+ .syncpt_incrs = (uintptr_t)cmdbuf->syncpt_incrs,
>+ .cmdbuf_exts = (uintptr_t)cmdbuf->cmdbuf_exts,
>+ .checksum_methods = 0,
>+ .checksum_falcon_methods = 0,
>+ .pad = { 0 },
>+ .reloc_types = (uintptr_t)cmdbuf->reloc_types,
>+ .cmdbufs = (uintptr_t)cmdbuf->cmdbufs,
>+ .relocs = (uintptr_t)cmdbuf->relocs,
>+ .reloc_shifts = (uintptr_t)cmdbuf->reloc_shifts,
>+ .waitchks = (uintptr_t)cmdbuf->waitchks,
>+ .waitbases = 0,
>+ .class_ids = (uintptr_t)cmdbuf->class_ids,
>+ .fences = (uintptr_t)cmdbuf->fences,
>+ };
>+
>+ err = ioctl(channel->fd, NVHOST_IOCTL_CHANNEL_SUBMIT, &args);
>+ if (err < 0)
>+ return AVERROR(errno);
>+
>+ if (fence)
>+ *fence = args.fence;
>+
>+ return 0;
>+#else
>+ nvioctl_fence tmp;
>+
>+ err = nvioctlChannel_Submit(channel->channel.fd, (nvioctl_cmdbuf *)cmdbuf->cmdbufs, cmdbuf->num_cmdbufs,
>+ NULL, NULL, 0, (nvioctl_syncpt_incr *)cmdbuf->syncpt_incrs, cmdbuf->num_syncpt_incrs,
>+ &tmp, 1);
>+ if (R_FAILED(err))
>+ return AVERROR(err);
>+
>+ if (fence)
>+ *fence = tmp.value;
>+
>+ return 0;
>+#endif
>+}
>+
>+int av_nvtegra_channel_set_submit_timeout(AVNVTegraChannel *channel, uint32_t timeout_ms) {
>+#ifndef __SWITCH__
>+ struct nvhost_set_timeout_args args;
>+
>+ args = (struct nvhost_set_timeout_args){
>+ .timeout = timeout_ms,
>+ };
>+
>+ return (ioctl(channel->fd, NVHOST_IOCTL_CHANNEL_SET_TIMEOUT, &args) < 0) ? AVERROR(errno) : 0;
>+#else
>+ return AVERROR(nvioctlChannel_SetSubmitTimeout(channel->channel.fd, timeout_ms));
>+#endif
>+}
>+
>+int av_nvtegra_syncpt_wait(AVNVTegraChannel *channel, uint32_t threshold, int32_t timeout) {
>+#ifndef __SWITCH__
>+ struct nvhost_ctrl_syncpt_waitex_args args = {
>+ .id = channel->syncpt,
>+ .thresh = threshold,
>+ .timeout = timeout,
>+ };
>+
>+ return (ioctl(get_nvhost_fd(), NVHOST_IOCTL_CTRL_SYNCPT_WAITEX, &args) < 0) ? AVERROR(errno) : 0;
>+#else
>+ NvFence fence;
>+
>+ fence = (NvFence){
>+ .id = channel->syncpt,
>+ .value = threshold,
>+ };
>+
>+ return AVERROR(nvFenceWait(&fence, timeout));
>+#endif
>+}
>+
>+#ifdef __SWITCH__
>+static inline bool convert_cache_flags(uint32_t flags) {
>+ /* Return whether the map should be CPU-cacheable */
>+ switch (flags & NVMAP_HANDLE_CACHE_FLAG) {
>+ case NVMAP_HANDLE_INNER_CACHEABLE:
>+ case NVMAP_HANDLE_CACHEABLE:
>+ return true;
>+ default:
>+ return false;
>+ }
>+}
>+#endif
>+
>+int av_nvtegra_map_allocate(AVNVTegraMap *map, AVNVTegraChannel *channel, uint32_t size,
>+ uint32_t align, int heap_mask, int flags)
>+{
>+#ifndef __SWITCH__
>+ struct nvmap_create_handle create_args;
>+ struct nvmap_alloc_handle alloc_args;
>+ int err;
>+
>+ create_args = (struct nvmap_create_handle){
>+ .size = size,
>+ };
>+
>+ err = ioctl(get_nvmap_fd(), NVMAP_IOC_CREATE, &create_args);
>+ if (err < 0)
>+ return AVERROR(errno);
>+
>+ map->size = size;
>+ map->handle = create_args.handle;
>+
>+ alloc_args = (struct nvmap_alloc_handle){
>+ .handle = create_args.handle,
>+ .heap_mask = heap_mask,
>+ .flags = flags | (MEM_TAG << 16),
>+ .align = align,
>+ };
>+
>+ err = ioctl(get_nvmap_fd(), NVMAP_IOC_ALLOC, &alloc_args);
>+ if (err < 0)
>+ goto fail;
>+
>+ return 0;
>+
>+fail:
>+ av_nvtegra_map_free(map);
>+ return AVERROR(errno);
>+#else
>+ void *mem;
>+
>+ map->owner = channel->channel.fd;
>+
>+ size = FFALIGN(size, 0x1000);
>+
>+ mem = aligned_alloc(FFALIGN(align, 0x1000), size);
>+ if (!mem)
>+ return AVERROR(ENOMEM);
>+
>+ return AVERROR(nvMapCreate(&map->map, mem, size, 0x10000, NvKind_Pitch,
>+ convert_cache_flags(flags)));
>+#endif
>+}
>+
>+int av_nvtegra_map_free(AVNVTegraMap *map) {
>+#ifndef __SWITCH__
>+ int err;
>+
>+ if (!map->handle)
>+ return 0;
>+
>+ err = ioctl(get_nvmap_fd(), NVMAP_IOC_FREE, map->handle);
>+ if (err < 0)
>+ return AVERROR(errno);
>+
>+ map->handle = 0;
>+
>+ return 0;
>+#else
>+ void *addr = map->map.cpu_addr;
>+
>+ if (!map->map.cpu_addr)
>+ return 0;
>+
>+ nvMapClose(&map->map);
>+ free(addr);
>+ return 0;
>+#endif
>+}
>+
>+int av_nvtegra_map_from_va(AVNVTegraMap *map, AVNVTegraChannel *owner, void *mem,
>+ uint32_t size, uint32_t align, uint32_t flags)
>+{
>+#ifndef __SWITCH__
>+ struct nvmap_create_handle_from_va args;
>+ int err;
>+
>+ args = (struct nvmap_create_handle_from_va){
>+ .va = (uintptr_t)mem,
>+ .size = size,
>+ .flags = flags | (MEM_TAG << 16),
>+ };
>+
>+ err = ioctl(get_nvmap_fd(), NVMAP_IOC_FROM_VA, &args);
>+ if (err < 0)
>+ return AVERROR(errno);
>+
>+ map->cpu_addr = mem;
>+ map->size = size;
>+ map->handle = args.handle;
>+
>+ return 0;
>+#else
>+
>+ map->owner = owner->channel.fd;
>+
>+ return AVERROR(nvMapCreate(&map->map, mem, FFALIGN(size, 0x1000), 0x10000, NvKind_Pitch,
>+ convert_cache_flags(flags)));;
>+#endif
>+}
>+
>+int av_nvtegra_map_close(AVNVTegraMap *map) {
>+#ifndef __SWITCH__
>+ return av_nvtegra_map_free(map);
>+#else
>+ nvMapClose(&map->map);
>+ return 0;
>+#endif
>+}
>+
>+int av_nvtegra_map_map(AVNVTegraMap *map) {
>+#ifndef __SWITCH__
>+ void *addr;
>+
>+ addr = mmap(NULL, map->size, PROT_READ | PROT_WRITE, MAP_SHARED, map->handle, 0);
>+ if (addr == MAP_FAILED)
>+ return AVERROR(errno);
>+
>+ map->cpu_addr = addr;
>+
>+ return 0;
>+#else
>+ nvioctl_command_buffer_map params;
>+ int err;
>+
>+ params = (nvioctl_command_buffer_map){
>+ .handle = map->map.handle,
>+ };
>+
>+ err = nvioctlChannel_MapCommandBuffer(map->owner, ¶ms, 1, false);
>+ if (R_FAILED(err))
>+ return AVERROR(err);
>+
>+ map->iova = params.iova;
>+
>+ return 0;
>+#endif
>+}
>+
>+int av_nvtegra_map_unmap(AVNVTegraMap *map) {
>+ int err;
>+#ifndef __SWITCH__
>+ if (!map->cpu_addr)
>+ return 0;
>+
>+ err = munmap(map->cpu_addr, map->size);
>+ if (err < 0)
>+ return AVERROR(errno);
>+
>+ map->cpu_addr = NULL;
>+
>+ return 0;
>+#else
>+ nvioctl_command_buffer_map params;
>+
>+ if (!map->iova)
>+ return 0;
>+
>+ params = (nvioctl_command_buffer_map){
>+ .handle = map->map.handle,
>+ .iova = map->iova,
>+ };
>+
>+ err = nvioctlChannel_UnmapCommandBuffer(map->owner, ¶ms, 1, false);
>+ if (R_FAILED(err))
>+ return AVERROR(err);
>+
>+ map->iova = 0;
>+
>+ return 0;
>+#endif
>+}
>+
>+int av_nvtegra_map_cache_op(AVNVTegraMap *map, int op, void *addr, size_t len) {
>+#ifndef __SWITCH__
>+ struct nvmap_cache_op args;
>+
>+ args = (struct nvmap_cache_op){
>+ .addr = (uintptr_t)addr,
>+ .len = len,
>+ .handle = av_nvtegra_map_get_handle(map),
>+ .op = op,
>+ };
>+
>+ return AVERROR(ioctl(get_nvmap_fd(), NVMAP_IOC_CACHE, &args));
>+#else
>+ if (!map->map.is_cpu_cacheable)
>+ return 0;
>+
>+ switch (op) {
>+ case NVMAP_CACHE_OP_WB:
>+ armDCacheClean(addr, len);
>+ break;
>+ default:
>+ case NVMAP_CACHE_OP_INV:
>+ case NVMAP_CACHE_OP_WB_INV:
>+ /* libnx internally performs a clean-invalidate, since invalidate is a privileged instruction */
>+ armDCacheFlush(addr, len);
>+ break;
>+ }
>+
>+ return 0;
>+#endif
>+}
>+
>+int av_nvtegra_map_realloc(AVNVTegraMap *map, uint32_t size, uint32_t align,
>+ int heap_mask, int flags)
>+{
>+ AVNVTegraChannel channel;
>+ AVNVTegraMap tmp = {0};
>+ int err;
>+
>+ if (av_nvtegra_map_get_size(map) >= size)
>+ return 0;
>+
>+ /* Dummy channel object to hold the owner fd */
>+ channel = (AVNVTegraChannel){
>+#ifdef __SWITCH__
>+ .channel.fd = map->owner,
>+#endif
>+ };
>+
>+ err = av_nvtegra_map_create(&tmp, &channel, size, align, heap_mask, flags);
>+ if (err < 0)
>+ goto fail;
>+
>+ memcpy(av_nvtegra_map_get_addr(&tmp), av_nvtegra_map_get_addr(map), av_nvtegra_map_get_size(map));
>+
>+ err = av_nvtegra_map_destroy(map);
>+ if (err < 0)
>+ goto fail;
>+
>+ *map = tmp;
>+
>+ return 0;
>+
>+fail:
>+ av_nvtegra_map_destroy(&tmp);
>+ return err;
>+}
>+
>+int av_nvtegra_cmdbuf_init(AVNVTegraCmdbuf *cmdbuf) {
>+ cmdbuf->num_cmdbufs = 0;
>+#ifndef __SWITCH__
>+ cmdbuf->num_relocs = 0;
>+ cmdbuf->num_waitchks = 0;
>+#endif
>+ cmdbuf->num_syncpt_incrs = 0;
>+
>+#define NUM_INITIAL_CMDBUFS 3
>+#define NUM_INITIAL_RELOCS 15
>+#define NUM_INITIAL_SYNCPT_INCRS 3
>+
>+ cmdbuf->cmdbufs = av_malloc_array(NUM_INITIAL_CMDBUFS, sizeof(*cmdbuf->cmdbufs));
>+#ifndef __SWITCH__
>+ cmdbuf->cmdbuf_exts = av_malloc_array(NUM_INITIAL_CMDBUFS, sizeof(*cmdbuf->cmdbuf_exts));
>+ cmdbuf->class_ids = av_malloc_array(NUM_INITIAL_CMDBUFS, sizeof(*cmdbuf->class_ids));
>+#endif
>+
>+#ifndef __SWITCH__
>+ if (!cmdbuf->cmdbufs || !cmdbuf->cmdbuf_exts || !cmdbuf->class_ids)
>+#else
>+ if (!cmdbuf->cmdbufs)
>+#endif
>+ return AVERROR(ENOMEM);
>+
>+#ifndef __SWITCH__
>+ cmdbuf->relocs = av_malloc_array(NUM_INITIAL_RELOCS, sizeof(*cmdbuf->relocs));
>+ cmdbuf->reloc_types = av_malloc_array(NUM_INITIAL_RELOCS, sizeof(*cmdbuf->reloc_types));
>+ cmdbuf->reloc_shifts = av_malloc_array(NUM_INITIAL_RELOCS, sizeof(*cmdbuf->reloc_shifts));
>+ if (!cmdbuf->relocs || !cmdbuf->reloc_types || !cmdbuf->reloc_shifts)
>+ return AVERROR(ENOMEM);
>+#endif
>+
>+ cmdbuf->syncpt_incrs = av_malloc_array(NUM_INITIAL_SYNCPT_INCRS, sizeof(*cmdbuf->syncpt_incrs));
>+#ifndef __SWITCH__
>+ cmdbuf->fences = av_malloc_array(NUM_INITIAL_SYNCPT_INCRS, sizeof(*cmdbuf->fences));
>+#endif
>+
>+#ifndef __SWITCH__
>+ if (!cmdbuf->syncpt_incrs || !cmdbuf->fences)
>+#else
>+ if (!cmdbuf->syncpt_incrs)
>+#endif
>+ return AVERROR(ENOMEM);
>+
>+ return 0;
>+}
>+
>+int av_nvtegra_cmdbuf_deinit(AVNVTegraCmdbuf *cmdbuf) {
>+ av_freep(&cmdbuf->cmdbufs);
>+ av_freep(&cmdbuf->syncpt_incrs);
>+
>+#ifndef __SWITCH__
>+ av_freep(&cmdbuf->cmdbuf_exts), av_freep(&cmdbuf->class_ids);
>+ av_freep(&cmdbuf->relocs), av_freep(&cmdbuf->reloc_types), av_freep(&cmdbuf->reloc_shifts);
>+ av_freep(&cmdbuf->fences);
>+#endif
>+
>+ return 0;
>+}
>+
>+int av_nvtegra_cmdbuf_add_memory(AVNVTegraCmdbuf *cmdbuf, AVNVTegraMap *map, uint32_t offset, uint32_t size) {
>+ uint8_t *mem;
>+
>+ mem = av_nvtegra_map_get_addr(map);
>+
>+ cmdbuf->map = map;
>+ cmdbuf->mem_offset = offset;
>+ cmdbuf->mem_size = size;
>+
>+ cmdbuf->cur_word = (uint32_t *)(mem + cmdbuf->mem_offset);
>+
>+ return 0;
>+}
>+
>+int av_nvtegra_cmdbuf_clear(AVNVTegraCmdbuf *cmdbuf) {
>+ uint8_t *mem;
>+
>+ mem = av_nvtegra_map_get_addr(cmdbuf->map);
>+
>+ cmdbuf->num_cmdbufs = 0, cmdbuf->num_syncpt_incrs = 0;
>+#ifndef __SWITCH__
>+ cmdbuf->num_relocs = 0, cmdbuf->num_waitchks = 0;
>+#endif
>+
>+ cmdbuf->cur_word = (uint32_t *)(mem + cmdbuf->mem_offset);
>+ return 0;
>+}
>+
>+int av_nvtegra_cmdbuf_begin(AVNVTegraCmdbuf *cmdbuf, uint32_t class_id) {
>+ uint8_t *mem;
>+ void *tmp1;
>+#ifndef __SWITCH__
>+ void *tmp2, *tmp3;
>+#endif
>+
>+ mem = av_nvtegra_map_get_addr(cmdbuf->map);
>+
>+ tmp1 = av_realloc_array(cmdbuf->cmdbufs, cmdbuf->num_cmdbufs + 1, sizeof(*cmdbuf->cmdbufs));
>+#ifndef __SWITCH__
>+ tmp2 = av_realloc_array(cmdbuf->cmdbuf_exts, cmdbuf->num_cmdbufs + 1, sizeof(*cmdbuf->cmdbuf_exts));
>+ tmp3 = av_realloc_array(cmdbuf->class_ids, cmdbuf->num_cmdbufs + 1, sizeof(*cmdbuf->class_ids));
>+#endif
>+
>+#ifndef __SWITCH__
>+ if (!tmp1 || !tmp2 || !tmp3)
>+#else
>+ if (!tmp1)
>+#endif
>+ return AVERROR(ENOMEM);
>+
>+ cmdbuf->cmdbufs = tmp1;
>+
>+#ifndef __SWITCH__
>+ cmdbuf->cmdbuf_exts = tmp2, cmdbuf->class_ids = tmp3;
>+#endif
>+
>+ cmdbuf->cmdbufs[cmdbuf->num_cmdbufs] = (struct nvhost_cmdbuf){
>+ .mem = av_nvtegra_map_get_handle(cmdbuf->map),
>+ .offset = (uint8_t *)cmdbuf->cur_word - mem,
>+ };
>+
>+#ifndef __SWITCH__
>+ cmdbuf->cmdbuf_exts[cmdbuf->num_cmdbufs] = (struct nvhost_cmdbuf_ext){
>+ .pre_fence = -1,
>+ };
>+
>+ cmdbuf->class_ids[cmdbuf->num_cmdbufs] = class_id;
>+#endif
>+
>+#ifdef __SWITCH__
>+ if (cmdbuf->num_cmdbufs == 0)
>+ av_nvtegra_cmdbuf_push_word(cmdbuf, host1x_opcode_setclass(class_id, 0, 0));
>+#endif
>+
>+ return 0;
>+}
>+
>+int av_nvtegra_cmdbuf_end(AVNVTegraCmdbuf *cmdbuf) {
>+ cmdbuf->num_cmdbufs++;
>+ return 0;
>+}
>+
>+int av_nvtegra_cmdbuf_push_word(AVNVTegraCmdbuf *cmdbuf, uint32_t word) {
>+ uintptr_t mem_start = (uintptr_t)av_nvtegra_map_get_addr(cmdbuf->map) + cmdbuf->mem_offset;
>+
>+ if ((uintptr_t)cmdbuf->cur_word - mem_start >= cmdbuf->mem_size)
>+ return AVERROR(ENOMEM);
>+
>+ *cmdbuf->cur_word++ = word;
>+ cmdbuf->cmdbufs[cmdbuf->num_cmdbufs].words += 1;
>+ return 0;
>+}
>+
>+int av_nvtegra_cmdbuf_push_value(AVNVTegraCmdbuf *cmdbuf, uint32_t offset, uint32_t word) {
>+ int err;
>+
>+ err = av_nvtegra_cmdbuf_push_word(cmdbuf, host1x_opcode_incr(NV_THI_METHOD0>>2, 2));
>+ if (err < 0)
>+ return err;
>+
>+ err = av_nvtegra_cmdbuf_push_word(cmdbuf, offset);
>+ if (err < 0)
>+ return err;
>+
>+ err = av_nvtegra_cmdbuf_push_word(cmdbuf, word);
>+ if (err < 0)
>+ return err;
>+
>+ return 0;
>+}
>+
>+int av_nvtegra_cmdbuf_push_reloc(AVNVTegraCmdbuf *cmdbuf, uint32_t offset, AVNVTegraMap *target, uint32_t target_offset,
>+ int reloc_type, int shift)
>+{
>+ int err;
>+#ifndef __SWITCH__
>+ uint8_t *mem;
>+ void *tmp1, *tmp2, *tmp3;
>+
>+ mem = av_nvtegra_map_get_addr(cmdbuf->map);
>+
>+ tmp1 = av_realloc_array(cmdbuf->relocs, cmdbuf->num_relocs + 1, sizeof(*cmdbuf->relocs));
>+ tmp2 = av_realloc_array(cmdbuf->reloc_types, cmdbuf->num_relocs + 1, sizeof(*cmdbuf->reloc_types));
>+ tmp3 = av_realloc_array(cmdbuf->reloc_shifts, cmdbuf->num_relocs + 1, sizeof(*cmdbuf->reloc_shifts));
>+ if (!tmp1 || !tmp2 || !tmp3)
>+ return AVERROR(ENOMEM);
>+
>+ cmdbuf->relocs = tmp1, cmdbuf->reloc_types = tmp2, cmdbuf->reloc_shifts = tmp3;
>+
>+ err = av_nvtegra_cmdbuf_push_value(cmdbuf, offset, 0xdeadbeef);
>+ if (err < 0)
>+ return err;
>+
>+ cmdbuf->relocs[cmdbuf->num_relocs] = (struct nvhost_reloc){
>+ .cmdbuf_mem = av_nvtegra_map_get_handle(cmdbuf->map),
>+ .cmdbuf_offset = (uint8_t *)cmdbuf->cur_word - mem - sizeof(uint32_t),
>+ .target = av_nvtegra_map_get_handle(target),
>+ .target_offset = target_offset,
>+ };
>+
>+ cmdbuf->reloc_types[cmdbuf->num_relocs] = (struct nvhost_reloc_type){
>+ .reloc_type = reloc_type,
>+ };
>+
>+ cmdbuf->reloc_shifts[cmdbuf->num_relocs] = (struct nvhost_reloc_shift){
>+ .shift = shift,
>+ };
>+
>+ cmdbuf->num_relocs++;
>+
>+ return 0;
>+#else
>+ err = av_nvtegra_cmdbuf_push_value(cmdbuf, offset, (target->iova + target_offset) >> shift);
>+ if (err < 0)
>+ return err;
>+
>+ return 0;
>+#endif
>+}
>+
>+int av_nvtegra_cmdbuf_push_syncpt_incr(AVNVTegraCmdbuf *cmdbuf, uint32_t syncpt) {
>+ int err;
>+
>+ err = av_nvtegra_cmdbuf_push_word(cmdbuf, host1x_opcode_nonincr(NV_THI_INCR_SYNCPT>>2, 1));
>+ if (err < 0)
>+ return err;
>+
>+ err = av_nvtegra_cmdbuf_push_word(cmdbuf,
>+ AV_NVTEGRA_VALUE(NV_THI_INCR_SYNCPT, INDX, syncpt) |
>+ AV_NVTEGRA_ENUM (NV_THI_INCR_SYNCPT, COND, OP_DONE));
>+ if (err < 0)
>+ return err;
>+
>+ return 0;
>+}
>+
>+int av_nvtegra_cmdbuf_push_wait(AVNVTegraCmdbuf *cmdbuf, uint32_t syncpt, uint32_t fence) {
>+ int err;
>+
>+ err = av_nvtegra_cmdbuf_push_word(cmdbuf, host1x_opcode_setclass(HOST1X_CLASS_HOST1X, 0, 0));
>+ if (err < 0)
>+ return err;
>+
>+ err = av_nvtegra_cmdbuf_push_word(cmdbuf, host1x_opcode_mask(NV_CLASS_HOST_LOAD_SYNCPT_PAYLOAD>>2,
>+ (1<<(NV_CLASS_HOST_LOAD_SYNCPT_PAYLOAD - NV_CLASS_HOST_LOAD_SYNCPT_PAYLOAD)) |
>+ (1<<(NV_CLASS_HOST_WAIT_SYNCPT - NV_CLASS_HOST_LOAD_SYNCPT_PAYLOAD))));
>+ if (err < 0)
>+ return err;
>+
>+ err = av_nvtegra_cmdbuf_push_word(cmdbuf, fence);
>+ if (err < 0)
>+ return err;
>+
>+ err = av_nvtegra_cmdbuf_push_word(cmdbuf, syncpt);
>+ if (err < 0)
>+ return err;
>+
>+ return 0;
>+}
>+
>+int av_nvtegra_cmdbuf_add_syncpt_incr(AVNVTegraCmdbuf *cmdbuf, uint32_t syncpt, uint32_t fence)
>+{
>+ void *tmp1;
>+#ifndef __SWITCH__
>+ void *tmp2;
>+#endif
>+
>+ tmp1 = av_realloc_array(cmdbuf->syncpt_incrs, cmdbuf->num_syncpt_incrs + 1, sizeof(*cmdbuf->syncpt_incrs));
>+#ifndef __SWITCH__
>+ tmp2 = av_realloc_array(cmdbuf->fences, cmdbuf->num_syncpt_incrs + 1, sizeof(*cmdbuf->fences));
>+#endif
>+
>+#ifndef __SWITCH__
>+ if (!tmp1 || !tmp2)
>+#else
>+ if (!tmp1)
>+#endif
>+ return AVERROR(ENOMEM);
>+
>+ cmdbuf->syncpt_incrs = tmp1;
>+#ifndef __SWITCH__
>+ cmdbuf->fences = tmp2;
>+#endif
>+
>+ cmdbuf->syncpt_incrs[cmdbuf->num_syncpt_incrs] = (struct nvhost_syncpt_incr){
>+ .syncpt_id = syncpt,
>+ .syncpt_incrs = 1,
>+ };
>+
>+#ifndef __SWITCH__
>+ cmdbuf->fences[cmdbuf->num_syncpt_incrs] = fence;
>+#endif
>+
>+ cmdbuf->num_syncpt_incrs++;
>+
>+ return av_nvtegra_cmdbuf_push_syncpt_incr(cmdbuf, syncpt);
>+}
>+
>+int av_nvtegra_cmdbuf_add_waitchk(AVNVTegraCmdbuf *cmdbuf, uint32_t syncpt, uint32_t fence) {
>+#ifndef __SWITCH__
>+ uint8_t *mem;
>+ void *tmp;
>+
>+ mem = av_nvtegra_map_get_addr(cmdbuf->map);
>+
>+ tmp = av_realloc_array(cmdbuf->waitchks, cmdbuf->num_waitchks + 1, sizeof(*cmdbuf->waitchks));
>+ if (!tmp)
>+ return AVERROR(ENOMEM);
>+
>+ cmdbuf->waitchks = tmp;
>+
>+ cmdbuf->waitchks[cmdbuf->num_waitchks] = (struct nvhost_waitchk){
>+ .mem = av_nvtegra_map_get_handle(cmdbuf->map),
>+ .offset = (uint8_t *)cmdbuf->cur_word - mem - sizeof(uint32_t),
>+ .syncpt_id = syncpt,
>+ .thresh = fence,
>+ };
>+
>+ cmdbuf->num_waitchks++;
>+#endif
>+
>+ return av_nvtegra_cmdbuf_push_wait(cmdbuf, syncpt, fence);
>+}
>+
>+static void nvtegra_job_free(void *opaque, uint8_t *data) {
>+ AVNVTegraJob *job = (AVNVTegraJob *)data;
>+
>+ if (!job)
>+ return;
>+
>+ av_nvtegra_cmdbuf_deinit(&job->cmdbuf);
>+ av_nvtegra_map_destroy(&job->input_map);
>+
>+ av_freep(&job);
>+}
>+
>+static AVBufferRef *nvtegra_job_alloc(void *opaque, size_t size) {
>+ AVNVTegraJobPool *pool = opaque;
>+
>+ AVBufferRef *buffer;
>+ AVNVTegraJob *job;
>+ int err;
>+
>+ job = av_mallocz(sizeof(*job));
>+ if (!job)
>+ return NULL;
>+
>+ err = av_nvtegra_map_create(&job->input_map, pool->channel, pool->input_map_size, 0x100,
>+ NVMAP_HEAP_IOVMM, NVMAP_HANDLE_WRITE_COMBINE);
>+ if (err < 0)
>+ goto fail;
>+
>+ err = av_nvtegra_cmdbuf_init(&job->cmdbuf);
>+ if (err < 0)
>+ goto fail;
>+
>+ err = av_nvtegra_cmdbuf_add_memory(&job->cmdbuf, &job->input_map, pool->cmdbuf_off, pool->max_cmdbuf_size);
>+ if (err < 0)
>+ goto fail;
>+
>+ buffer = av_buffer_create((uint8_t *)job, sizeof(*job), nvtegra_job_free, pool, 0);
>+ if (!buffer)
>+ goto fail;
>+
>+ return buffer;
>+
>+fail:
>+ av_nvtegra_cmdbuf_deinit(&job->cmdbuf);
>+ av_nvtegra_map_destroy(&job->input_map);
>+ av_freep(job);
>+ return NULL;
>+}
>+
>+int av_nvtegra_job_pool_init(AVNVTegraJobPool *pool, AVNVTegraChannel *channel,
>+ size_t input_map_size, off_t cmdbuf_off, size_t max_cmdbuf_size)
>+{
>+ pool->channel = channel;
>+ pool->input_map_size = input_map_size;
>+ pool->cmdbuf_off = cmdbuf_off;
>+ pool->max_cmdbuf_size = max_cmdbuf_size;
>+ pool->pool = av_buffer_pool_init2(sizeof(AVNVTegraJob), pool,
>+ nvtegra_job_alloc, NULL);
>+ if (!pool->pool)
>+ return AVERROR(ENOMEM);
>+
>+ return 0;
>+}
>+
>+int av_nvtegra_job_pool_uninit(AVNVTegraJobPool *pool) {
>+ av_buffer_pool_uninit(&pool->pool);
>+ return 0;
>+}
>+
>+AVBufferRef *av_nvtegra_job_pool_get(AVNVTegraJobPool *pool) {
>+ return av_buffer_pool_get(pool->pool);
>+}
>+
>+int av_nvtegra_job_submit(AVNVTegraJobPool *pool, AVNVTegraJob *job) {
>+ return av_nvtegra_channel_submit(pool->channel, &job->cmdbuf, &job->fence);
>+}
>+
>+int av_nvtegra_job_wait(AVNVTegraJobPool *pool, AVNVTegraJob *job, int timeout) {
>+ return av_nvtegra_syncpt_wait(pool->channel, job->fence, timeout);
>+}
>diff --git a/libavutil/nvtegra.h b/libavutil/nvtegra.h
>new file mode 100644
>index 0000000000..3b63335d6c
>--- /dev/null
>+++ b/libavutil/nvtegra.h
>@@ -0,0 +1,258 @@
>+/*
>+ * Copyright (c) 2024 averne <averne381 at gmail.com>
>+ *
>+ * This file is part of FFmpeg.
>+ *
>+ * FFmpeg is free software; you can redistribute it and/or modify
>+ * it under the terms of the GNU General Public License as published by
>+ * the Free Software Foundation; either version 2 of the License, or
>+ * (at your option) any later version.
>+ *
>+ * FFmpeg is distributed in the hope that it will be useful,
>+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
>+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>+ * GNU General Public License for more details.
>+ *
>+ * You should have received a copy of the GNU General Public License along
>+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
>+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
>+ */
>+
>+#ifndef AVUTIL_NVTEGRA_H
>+#define AVUTIL_NVTEGRA_H
>+
>+#include <stdint.h>
>+#include <stdbool.h>
>+
>+#include "buffer.h"
>+
>+#include "nvhost_ioctl.h"
>+#include "nvmap_ioctl.h"
>+
>+typedef struct AVNVTegraChannel {
>+#ifndef __SWITCH__
>+ int fd;
>+ int module_id;
>+#else
>+ NvChannel channel;
>+#endif
>+
>+ uint32_t syncpt;
>+
>+#ifdef __SWITCH__
>+ MmuRequest mmu_request;
>+#endif
>+ uint32_t clock;
>+} AVNVTegraChannel;
>+
>+typedef struct AVNVTegraMap {
>+#ifndef __SWITCH__
>+ uint32_t handle;
>+ uint32_t size;
>+ void *cpu_addr;
>+#else
>+ NvMap map;
>+ uint32_t iova;
>+ uint32_t owner;
>+#endif
>+ bool is_linear;
>+} AVNVTegraMap;
>+
>+typedef struct AVNVTegraCmdbuf {
>+ AVNVTegraMap *map;
>+
>+ uint32_t mem_offset, mem_size;
>+
>+ uint32_t *cur_word;
>+
>+ struct nvhost_cmdbuf *cmdbufs;
>+#ifndef __SWITCH__
>+ struct nvhost_cmdbuf_ext *cmdbuf_exts;
>+ uint32_t *class_ids;
>+#endif
>+ uint32_t num_cmdbufs;
>+
>+#ifndef __SWITCH__
>+ struct nvhost_reloc *relocs;
>+ struct nvhost_reloc_type *reloc_types;
>+ struct nvhost_reloc_shift *reloc_shifts;
>+ uint32_t num_relocs;
>+#endif
>+
>+ struct nvhost_syncpt_incr *syncpt_incrs;
>+#ifndef __SWITCH__
>+ uint32_t *fences;
>+#endif
>+ uint32_t num_syncpt_incrs;
>+
>+#ifndef __SWITCH__
>+ struct nvhost_waitchk *waitchks;
>+ uint32_t num_waitchks;
>+#endif
>+} AVNVTegraCmdbuf;
>+
>+typedef struct AVNVTegraJobPool {
>+ /*
>+ * Pool object for job allocation
>+ */
>+ AVBufferPool *pool;
>+
>+ /*
>+ * Hardware channel the jobs will be submitted to
>+ */
>+ AVNVTegraChannel *channel;
>+
>+ /*
>+ * Total size of the input memory-mapped buffer
>+ */
>+ size_t input_map_size;
>+
>+ /*
>+ * Offset of the command data within the input map
>+ */
>+ off_t cmdbuf_off;
>+
>+ /*
>+ * Maximum memory usable by the command buffer
>+ */
>+ size_t max_cmdbuf_size;
>+} AVNVTegraJobPool;
>+
>+typedef struct AVNVTegraJob {
>+ /*
>+ * Memory-mapped buffer for command buffers, metadata structures, ...
>+ */
>+ AVNVTegraMap input_map;
>+
>+ /*
>+ * Object for command recording
>+ */
>+ AVNVTegraCmdbuf cmdbuf;
>+
>+ /*
>+ * Fence indicating completion of the job
>+ */
>+ uint32_t fence;
>+} AVNVTegraJob;
>+
>+AVBufferRef *av_nvtegra_driver_init(void);
>+
>+int av_nvtegra_channel_open(AVNVTegraChannel *channel, const char *dev);
>+int av_nvtegra_channel_close(AVNVTegraChannel *channel);
>+int av_nvtegra_channel_get_clock_rate(AVNVTegraChannel *channel, uint32_t moduleid, uint32_t *clock_rate);
>+int av_nvtegra_channel_set_clock_rate(AVNVTegraChannel *channel, uint32_t moduleid, uint32_t clock_rate);
>+int av_nvtegra_channel_submit(AVNVTegraChannel *channel, AVNVTegraCmdbuf *cmdbuf, uint32_t *fence);
>+int av_nvtegra_channel_set_submit_timeout(AVNVTegraChannel *channel, uint32_t timeout_ms);
>+
>+int av_nvtegra_syncpt_wait(AVNVTegraChannel *channel, uint32_t threshold, int32_t timeout);
>+
>+int av_nvtegra_map_allocate(AVNVTegraMap *map, AVNVTegraChannel *owner, uint32_t size,
>+ uint32_t align, int heap_mask, int flags);
>+int av_nvtegra_map_free(AVNVTegraMap *map);
>+int av_nvtegra_map_from_va(AVNVTegraMap *map, AVNVTegraChannel *owner, void *mem,
>+ uint32_t size, uint32_t align, uint32_t flags);
>+int av_nvtegra_map_close(AVNVTegraMap *map);
>+int av_nvtegra_map_map(AVNVTegraMap *map);
>+int av_nvtegra_map_unmap(AVNVTegraMap *map);
>+int av_nvtegra_map_cache_op(AVNVTegraMap *map, int op, void *addr, size_t len);
>+int av_nvtegra_map_realloc(AVNVTegraMap *map, uint32_t size, uint32_t align, int heap_mask, int flags);
>+
>+static inline int av_nvtegra_map_create(AVNVTegraMap *map, AVNVTegraChannel *owner, uint32_t size, uint32_t align,
>+ int heap_mask, int flags)
>+{
>+ int err;
>+
>+ err = av_nvtegra_map_allocate(map, owner, size, align, heap_mask, flags);
>+ if (err < 0)
>+ return err;
>+
>+ return av_nvtegra_map_map(map);
>+}
>+
>+static inline int av_nvtegra_map_destroy(AVNVTegraMap *map) {
>+ int err;
>+
>+ err = av_nvtegra_map_unmap(map);
>+ if (err < 0)
>+ return err;
>+
>+ return av_nvtegra_map_free(map);
>+}
>+
>+int av_nvtegra_cmdbuf_init(AVNVTegraCmdbuf *cmdbuf);
>+int av_nvtegra_cmdbuf_deinit(AVNVTegraCmdbuf *cmdbuf);
>+int av_nvtegra_cmdbuf_add_memory(AVNVTegraCmdbuf *cmdbuf, AVNVTegraMap *map, uint32_t offset, uint32_t size);
>+int av_nvtegra_cmdbuf_clear(AVNVTegraCmdbuf *cmdbuf);
>+int av_nvtegra_cmdbuf_begin(AVNVTegraCmdbuf *cmdbuf, uint32_t class_id);
>+int av_nvtegra_cmdbuf_end(AVNVTegraCmdbuf *cmdbuf);
>+int av_nvtegra_cmdbuf_push_word(AVNVTegraCmdbuf *cmdbuf, uint32_t word);
>+int av_nvtegra_cmdbuf_push_value(AVNVTegraCmdbuf *cmdbuf, uint32_t offset, uint32_t word);
>+int av_nvtegra_cmdbuf_push_reloc(AVNVTegraCmdbuf *cmdbuf, uint32_t offset, AVNVTegraMap *target, uint32_t target_offset,
>+ int reloc_type, int shift);
>+int av_nvtegra_cmdbuf_push_syncpt_incr(AVNVTegraCmdbuf *cmdbuf, uint32_t syncpt);
>+int av_nvtegra_cmdbuf_push_wait(AVNVTegraCmdbuf *cmdbuf, uint32_t syncpt, uint32_t fence);
>+int av_nvtegra_cmdbuf_add_syncpt_incr(AVNVTegraCmdbuf *cmdbuf, uint32_t syncpt, uint32_t fence);
>+int av_nvtegra_cmdbuf_add_waitchk(AVNVTegraCmdbuf *cmdbuf, uint32_t syncpt, uint32_t fence);
>+
>+/*
>+ * Job allocation and submission routines
>+ */
>+int av_nvtegra_job_pool_init(AVNVTegraJobPool *pool, AVNVTegraChannel *channel,
>+ size_t input_map_size, off_t cmdbuf_off, size_t max_cmdbuf_size);
>+int av_nvtegra_job_pool_uninit(AVNVTegraJobPool *pool);
>+AVBufferRef *av_nvtegra_job_pool_get(AVNVTegraJobPool *pool);
>+
>+int av_nvtegra_job_submit(AVNVTegraJobPool *pool, AVNVTegraJob *job);
>+int av_nvtegra_job_wait(AVNVTegraJobPool *pool, AVNVTegraJob *job, int timeout);
>+
>+static inline uint32_t av_nvtegra_map_get_handle(AVNVTegraMap *map) {
>+#ifndef __SWITCH__
>+ return map->handle;
>+#else
>+ return map->map.handle;
>+#endif
>+}
>+
>+static inline void *av_nvtegra_map_get_addr(AVNVTegraMap *map) {
>+#ifndef __SWITCH__
>+ return map->cpu_addr;
>+#else
>+ return map->map.cpu_addr;
>+#endif
>+}
>+
>+static inline uint32_t av_nvtegra_map_get_size(AVNVTegraMap *map) {
>+#ifndef __SWITCH__
>+ return map->size;
>+#else
>+ return map->map.size;
>+#endif
>+}
>+
>+/* Addresses are shifted by 8 bits in the command buffer, requiring an alignment to 256 */
>+#define AV_NVTEGRA_MAP_ALIGN (1 << 8)
>+
>+#define AV_NVTEGRA_VALUE(offset, field, value) \
>+ ((value & \
>+ ((uint32_t)((UINT64_C(1) << ((1?offset ## _ ## field) - (0?offset ## _ ## field) + 1)) - 1))) \
>+ << (0?offset ## _ ## field))
>+
>+#define AV_NVTEGRA_ENUM(offset, field, value) \
>+ ((offset ## _ ## field ## _ ## value & \
>+ ((uint32_t)((UINT64_C(1) << ((1?offset ## _ ## field) - (0?offset ## _ ## field) + 1)) - 1))) \
>+ << (0?offset ## _ ## field))
>+
>+#define AV_NVTEGRA_PUSH_VALUE(cmdbuf, offset, value) ({ \
>+ int _err = av_nvtegra_cmdbuf_push_value(cmdbuf, (offset) / sizeof(uint32_t), value); \
>+ if (_err < 0) \
>+ return _err; \
>+})
>+
>+#define AV_NVTEGRA_PUSH_RELOC(cmdbuf, offset, target, target_offset, type) ({ \
>+ int _err = av_nvtegra_cmdbuf_push_reloc(cmdbuf, (offset) / sizeof(uint32_t), \
>+ target, target_offset, type, 8); \
>+ if (_err < 0) \
>+ return _err; \
>+})
>+
>+#endif /* AVUTIL_NVTEGRA_H */
>diff --git a/libavutil/nvtegra_host1x.h b/libavutil/nvtegra_host1x.h
>new file mode 100644
>index 0000000000..25e37eae61
>--- /dev/null
>+++ b/libavutil/nvtegra_host1x.h
>@@ -0,0 +1,94 @@
>+/*
>+ * Copyright (c) 2024 averne <averne381 at gmail.com>
>+ *
>+ * This file is part of FFmpeg.
>+ *
>+ * FFmpeg is free software; you can redistribute it and/or modify
>+ * it under the terms of the GNU General Public License as published by
>+ * the Free Software Foundation; either version 2 of the License, or
>+ * (at your option) any later version.
>+ *
>+ * FFmpeg is distributed in the hope that it will be useful,
>+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
>+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>+ * GNU General Public License for more details.
>+ *
>+ * You should have received a copy of the GNU General Public License along
>+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
>+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
>+ */
>+
>+#ifndef AVUTIL_NVTEGRA_HOST1X_H
>+#define AVUTIL_NVTEGRA_HOST1X_H
>+
>+#include <stdint.h>
>+
>+#include "macros.h"
>+
>+/* From L4T include/linux/host1x.h */
>+enum host1x_class {
>+ HOST1X_CLASS_HOST1X = 0x01,
>+ HOST1X_CLASS_NVENC = 0x21,
>+ HOST1X_CLASS_VI = 0x30,
>+ HOST1X_CLASS_ISPA = 0x32,
>+ HOST1X_CLASS_ISPB = 0x34,
>+ HOST1X_CLASS_GR2D = 0x51,
>+ HOST1X_CLASS_GR2D_SB = 0x52,
>+ HOST1X_CLASS_VIC = 0x5d,
>+ HOST1X_CLASS_GR3D = 0x60,
>+ HOST1X_CLASS_NVJPG = 0xc0,
>+ HOST1X_CLASS_NVDEC = 0xf0,
>+};
>+
>+static inline uint32_t host1x_opcode_setclass(unsigned class_id, unsigned offset, unsigned mask) {
>+ return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
>+}
>+
>+static inline uint32_t host1x_opcode_incr(unsigned offset, unsigned count) {
>+ return (1 << 28) | (offset << 16) | count;
>+}
>+
>+static inline uint32_t host1x_opcode_nonincr(unsigned offset, unsigned count) {
>+ return (2 << 28) | (offset << 16) | count;
>+}
>+
>+static inline uint32_t host1x_opcode_mask(unsigned offset, unsigned mask) {
>+ return (3 << 28) | (offset << 16) | mask;
>+}
>+
>+static inline uint32_t host1x_opcode_imm(unsigned offset, unsigned value) {
>+ return (4 << 28) | (offset << 16) | value;
>+}
>+
>+#define NV_CLASS_HOST_LOAD_SYNCPT_PAYLOAD (0x00000138)
>+#define NV_CLASS_HOST_WAIT_SYNCPT (0x00000140)
>+
>+#define NV_THI_INCR_SYNCPT (0x00000000)
>+#define NV_THI_INCR_SYNCPT_INDX 7:0
>+#define NV_THI_INCR_SYNCPT_COND 15:8
>+#define NV_THI_INCR_SYNCPT_COND_IMMEDIATE (0x00000000)
>+#define NV_THI_INCR_SYNCPT_COND_OP_DONE (0x00000001)
>+#define NV_THI_INCR_SYNCPT_ERR (0x00000008)
>+#define NV_THI_INCR_SYNCPT_ERR_COND_STS_IMM 0:0
>+#define NV_THI_INCR_SYNCPT_ERR_COND_STS_OPDONE 1:1
>+#define NV_THI_CTXSW_INCR_SYNCPT (0x0000000c)
>+#define NV_THI_CTXSW_INCR_SYNCPT_INDX 7:0
>+#define NV_THI_CTXSW (0x00000020)
>+#define NV_THI_CTXSW_CURR_CLASS 9:0
>+#define NV_THI_CTXSW_AUTO_ACK 11:11
>+#define NV_THI_CTXSW_CURR_CHANNEL 15:12
>+#define NV_THI_CTXSW_NEXT_CLASS 25:16
>+#define NV_THI_CTXSW_NEXT_CHANNEL 31:28
>+#define NV_THI_CONT_SYNCPT_EOF (0x00000028)
>+#define NV_THI_CONT_SYNCPT_EOF_INDEX 7:0
>+#define NV_THI_CONT_SYNCPT_EOF_COND 8:8
>+#define NV_THI_METHOD0 (0x00000040)
>+#define NV_THI_METHOD0_OFFSET 11:0
>+#define NV_THI_METHOD1 (0x00000044)
>+#define NV_THI_METHOD1_DATA 31:0
>+#define NV_THI_INT_STATUS (0x00000078)
>+#define NV_THI_INT_STATUS_FALCON_INT 0:0
>+#define NV_THI_INT_MASK (0x0000007c)
>+#define NV_THI_INT_MASK_FALCON_INT 0:0
>+
>+#endif /* AVUTIL_NVTEGRA_HOST1X_H */
>diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
>index 1c0bcf2232..bb14b1b306 100644
>--- a/libavutil/pixdesc.c
>+++ b/libavutil/pixdesc.c
>@@ -2791,6 +2791,10 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
> },
> .flags = AV_PIX_FMT_FLAG_PLANAR,
> },
>+ [AV_PIX_FMT_NVTEGRA] = {
>+ .name = "nvtegra",
>+ .flags = AV_PIX_FMT_FLAG_HWACCEL,
>+ },
> };
>
> static const char * const color_range_names[] = {
>diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
>index a7f50e1690..a3213c792a 100644
>--- a/libavutil/pixfmt.h
>+++ b/libavutil/pixfmt.h
>@@ -439,6 +439,14 @@ enum AVPixelFormat {
> */
> AV_PIX_FMT_D3D12,
>
>+ /**
>+ * Hardware surfaces for Tegra devices.
>+ *
>+ * data[0..2] points to memory-mapped buffer containing frame data
>+ * buf[0] contains an AVBufferRef to an AVNTegraMap
>+ */
>+ AV_PIX_FMT_NVTEGRA,
>+
> AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
> };
>
More information about the ffmpeg-devel
mailing list