[FFmpeg-devel] [PATCH 2/5] avcodec: add common V4L2 request API code

Thu Dec 10 01:16:49 EET 2020

On 09/12/2020 20:25, Jonas Karlman wrote:
> Signed-off-by: Jonas Karlman <jonas at kwiboo.se>
> ---
>   configure                 |  12 +
>   libavcodec/Makefile       |   1 +
>   libavcodec/hwconfig.h     |   2 +
>   libavcodec/v4l2_request.c | 987 ++++++++++++++++++++++++++++++++++++++
>   libavcodec/v4l2_request.h |  77 +++
>   5 files changed, 1079 insertions(+)
>   create mode 100644 libavcodec/v4l2_request.c
>   create mode 100644 libavcodec/v4l2_request.h
> 
> diff --git a/configure b/configure
> index 10cf61007b..fac85bfab4 100755
> --- a/configure
> +++ b/configure
> @@ -278,6 +278,7 @@ External library support:
>                              if openssl, gnutls or mbedtls is not used [no]
>     --enable-libtwolame      enable MP2 encoding via libtwolame [no]
>     --enable-libuavs3d       enable AVS3 decoding via libuavs3d [no]
> +  --enable-libudev         enable libudev [no]

Can you explain what the udev dep is actually helping with here?  You appear to be searching every V4L2 device looking for a compatible one anyway.

>     --enable-libv4l2         enable libv4l2/v4l-utils [no]
>     --enable-libvidstab      enable video stabilization using vid.stab [no]
>     --enable-libvmaf         enable vmaf filter via libvmaf [no]
> @@ -345,6 +346,7 @@ External library support:
>     --enable-omx-rpi         enable OpenMAX IL code for Raspberry Pi [no]
>     --enable-rkmpp           enable Rockchip Media Process Platform code [no]
>     --disable-v4l2-m2m       disable V4L2 mem2mem code [autodetect]
> +  --enable-v4l2-request    enable V4L2 request API code [no]
>     --disable-vaapi          disable Video Acceleration API (mainly Unix/Intel) code [autodetect]
>     --disable-vdpau          disable Nvidia Video Decode and Presentation API for Unix code [autodetect]
>     --disable-videotoolbox   disable VideoToolbox code [autodetect]
> @@ -1812,6 +1814,7 @@ EXTERNAL_LIBRARY_LIST="
>       libtheora
>       libtwolame
>       libuavs3d
> +    libudev
>       libv4l2
>       libvmaf
>       libvorbis
> @@ -1866,6 +1869,7 @@ HWACCEL_LIBRARY_LIST="
>       mmal
>       omx
>       opencl
> +    v4l2_request
>       vulkan
>   "
>   
> @@ -2913,6 +2917,7 @@ d3d11va_deps="dxva_h ID3D11VideoDecoder ID3D11VideoContext"
>   dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode ole32 user32"
>   ffnvcodec_deps_any="libdl LoadLibrary"
>   nvdec_deps="ffnvcodec"
> +v4l2_request_deps="linux_videodev2_h linux_media_h v4l2_timeval_to_ns libdrm libudev"
>   vaapi_x11_deps="xlib"
>   videotoolbox_hwaccel_deps="videotoolbox pthreads"
>   videotoolbox_hwaccel_extralibs="-framework QuartzCore"
> @@ -6423,6 +6428,7 @@ enabled libtwolame        && require libtwolame twolame.h twolame_init -ltwolame
>                                { check_lib libtwolame twolame.h twolame_encode_buffer_float32_interleaved -ltwolame ||
>                                  die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; }
>   enabled libuavs3d         && require_pkg_config libuavs3d "uavs3d >= 1.1.41" uavs3d.h uavs3d_decode
> +enabled libudev           && require_pkg_config libudev libudev libudev.h udev_new
>   enabled libv4l2           && require_pkg_config libv4l2 libv4l2 libv4l2.h v4l2_ioctl
>   enabled libvidstab        && require_pkg_config libvidstab "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit
>   enabled libvmaf           && require_pkg_config libvmaf "libvmaf >= 1.5.2" libvmaf.h compute_vmaf
> @@ -6521,6 +6527,10 @@ enabled rkmpp             && { require_pkg_config rkmpp rockchip_mpp  rockchip/r
>                                  { enabled libdrm ||
>                                    die "ERROR: rkmpp requires --enable-libdrm"; }
>                                }
> +enabled v4l2_request      && { enabled libdrm ||
> +                               die "ERROR: v4l2-request requires --enable-libdrm"; } &&
> +                             { enabled libudev ||
> +                               die "ERROR: v4l2-request requires --enable-libudev"; }

It might also want kernel headers defining the necessary structures.

>   enabled vapoursynth       && require_pkg_config vapoursynth "vapoursynth-script >= 42" VSScript.h vsscript_init
>   
>   
> @@ -6602,6 +6612,8 @@ if enabled v4l2_m2m; then
>       check_cc vp9_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_VP9;"
>   fi
>   
> +check_func_headers "linux/media.h linux/videodev2.h" v4l2_timeval_to_ns
> +
>   check_headers sys/videoio.h
>   test_code cc sys/videoio.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete
>   
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 9b370ffc44..2fafc4e028 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -155,6 +155,7 @@ OBJS-$(CONFIG_VP3DSP)                  += vp3dsp.o
>   OBJS-$(CONFIG_VP56DSP)                 += vp56dsp.o
>   OBJS-$(CONFIG_VP8DSP)                  += vp8dsp.o
>   OBJS-$(CONFIG_V4L2_M2M)                += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o
> +OBJS-$(CONFIG_V4L2_REQUEST)            += v4l2_request.o
>   OBJS-$(CONFIG_WMA_FREQS)               += wma_freqs.o
>   OBJS-$(CONFIG_WMV2DSP)                 += wmv2dsp.o
>   
> diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h
> index f421dc909f..ee78d8ab8e 100644
> --- a/libavcodec/hwconfig.h
> +++ b/libavcodec/hwconfig.h
> @@ -80,6 +80,8 @@ typedef struct AVCodecHWConfigInternal {
>       HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD,  NONE,         ff_ ## codec ## _d3d11va_hwaccel)
>   #define HWACCEL_XVMC(codec) \
>       HW_CONFIG_HWACCEL(0, 0, 1, XVMC,         NONE,         ff_ ## codec ## _xvmc_hwaccel)
> +#define HWACCEL_V4L2REQUEST(codec) \
> +    HW_CONFIG_HWACCEL(1, 0, 0, DRM_PRIME,    DRM,          ff_ ## codec ## _v4l2request_hwaccel)

You're asking for a DRM hwcontext device here, but you don't actually use it anywhere.

Would METHOD_INTERNAL make more sense?  (Make your own DRM hwcontext device and attach the frames to it.)

>   
>   #define HW_CONFIG_ENCODER(device, frames, ad_hoc, format, device_type_) \
>       &(const AVCodecHWConfigInternal) { \
> diff --git a/libavcodec/v4l2_request.c b/libavcodec/v4l2_request.c
> new file mode 100644
> index 0000000000..a85f522c18
> --- /dev/null
> +++ b/libavcodec/v4l2_request.c
> @@ -0,0 +1,987 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <drm_fourcc.h>
> +#include <linux/media.h>
> +#include <sys/mman.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +
> +#include <sys/sysmacros.h>
> +#include <libudev.h>
> +
> +#include "decode.h"
> +#include "internal.h"
> +#include "v4l2_request.h"
> +
> +#define OUTPUT_BUFFER_PADDING_SIZE (AV_INPUT_BUFFER_PADDING_SIZE * 4)

What is this doing?  Is the value still right if AV_INPUT_BUFFER_PADDING_SIZE changes?

> +
> +uint64_t ff_v4l2_request_get_capture_timestamp(AVFrame *frame)
> +{
> +    V4L2RequestDescriptor *req = (V4L2RequestDescriptor*)frame->data[0];
> +    return req ? v4l2_timeval_to_ns(&req->capture.buffer.timestamp) : 0;
> +}
> +
> +int ff_v4l2_request_reset_frame(AVCodecContext *avctx, AVFrame *frame)
> +{
> +    V4L2RequestDescriptor *req = (V4L2RequestDescriptor*)frame->data[0];
> +    memset(&req->drm, 0, sizeof(AVDRMFrameDescriptor));
> +    req->output.used = 0;
> +    return 0;
> +}
> +
> +int ff_v4l2_request_append_output_buffer(AVCodecContext *avctx, AVFrame *frame, const uint8_t *data, uint32_t size)

Please break lines to keep them to a vaguely sensible length.

int ff_v4l2_request_append_output_buffer(AVCodecContext *avctx, AVFrame *frame,
                                          const uint8_t *data, uint32_t size)

80 columns is a guideline and can be ignored where necessary, but in most of the cases below like function arguments or log messages it doesn't seem useful.

> +{
> +    V4L2RequestDescriptor *req = (V4L2RequestDescriptor*)frame->data[0];
> +    if (req->output.used + size + OUTPUT_BUFFER_PADDING_SIZE <= req->output.size) {
> +        memcpy(req->output.addr + req->output.used, data, size);
> +        req->output.used += size;
> +    } else {
> +        av_log(avctx, AV_LOG_ERROR, "%s: output.used=%u output.size=%u size=%u\n", __func__, req->output.used, req->output.size, size);

Logging at INFO or above can be seen by an ordinary user, so it should at least try to be helpful.

What is this case?  Something wasn't big enough but we are ignoring it and continuing anyway, so maybe it wasn't so bad?

> +    }
> +    return 0;
> +}
> +
> +static int v4l2_request_controls(V4L2RequestContext *ctx, int request_fd, unsigned long type, struct v4l2_ext_control *control, int count)
> +{
> +    struct v4l2_ext_controls controls = {
> +        .controls = control,
> +        .count = count,
> +        .request_fd = request_fd,
> +        .which = (request_fd >= 0) ? V4L2_CTRL_WHICH_REQUEST_VAL : 0,
> +    };
> +
> +    if (!control || !count)
> +        return 0;
> +
> +    return ioctl(ctx->video_fd, type, &controls);
> +}
> +
> +static int v4l2_request_set_controls(V4L2RequestContext *ctx, int request_fd, struct v4l2_ext_control *control, int count)
> +{
> +    return v4l2_request_controls(ctx, request_fd, VIDIOC_S_EXT_CTRLS, control, count);
> +}
> +
> +int ff_v4l2_request_set_controls(AVCodecContext *avctx, struct v4l2_ext_control *control, int count)
> +{
> +    V4L2RequestContext *ctx = avctx->internal->hwaccel_priv_data;
> +    int ret;
> +
> +    ret = v4l2_request_controls(ctx, -1, VIDIOC_S_EXT_CTRLS, control, count);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "%s: set controls failed, %s (%d)\n", __func__, strerror(errno), errno);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    return ret;
> +}
> +
> +int ff_v4l2_request_get_controls(AVCodecContext *avctx, struct v4l2_ext_control *control, int count)
> +{
> +    V4L2RequestContext *ctx = avctx->internal->hwaccel_priv_data;
> +    int ret;
> +
> +    ret = v4l2_request_controls(ctx, -1, VIDIOC_G_EXT_CTRLS, control, count);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "%s: get controls failed, %s (%d)\n", __func__, strerror(errno), errno);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    return ret;
> +}
> +
> +int ff_v4l2_request_query_control(AVCodecContext *avctx, struct v4l2_query_ext_ctrl *control)
> +{
> +    V4L2RequestContext *ctx = avctx->internal->hwaccel_priv_data;
> +    int ret;
> +
> +    ret = ioctl(ctx->video_fd, VIDIOC_QUERY_EXT_CTRL, control);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "%s: query control failed, %s (%d)\n", __func__, strerror(errno), errno);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    return 0;
> +}
> +
> +int ff_v4l2_request_query_control_default_value(AVCodecContext *avctx, uint32_t id)
> +{
> +    V4L2RequestContext *ctx = avctx->internal->hwaccel_priv_data;
> +    struct v4l2_queryctrl control = { .id = id };
> +    int ret;
> +
> +    ret = ioctl(ctx->video_fd, VIDIOC_QUERYCTRL, &control);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "%s: query control failed, %s (%d)\n", __func__, strerror(errno), errno);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    return control.default_value;
> +}
> +
> +static int v4l2_request_queue_buffer(V4L2RequestContext *ctx, int request_fd, V4L2RequestBuffer *buf, uint32_t flags)
> +{
> +    struct v4l2_plane planes[1] = {};
> +    struct v4l2_buffer buffer = {
> +        .type = buf->buffer.type,
> +        .memory = buf->buffer.memory,
> +        .index = buf->index,
> +        .timestamp.tv_usec = ctx->timestamp,
> +        .bytesused = buf->used,
> +        .request_fd = request_fd,
> +        .flags = ((request_fd >= 0) ? V4L2_BUF_FLAG_REQUEST_FD : 0) | flags,
> +    };
> +
> +    buf->buffer.flags = buffer.flags;
> +    buf->buffer.timestamp = buffer.timestamp;
> +
> +    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buffer.type)) {
> +        planes[0].bytesused = buf->used;
> +        buffer.bytesused = 0;
> +        buffer.length = 1;
> +        buffer.m.planes = planes;
> +    }
> +
> +    return ioctl(ctx->video_fd, VIDIOC_QBUF, &buffer);
> +}
> +
> +static int v4l2_request_dequeue_buffer(V4L2RequestContext *ctx, V4L2RequestBuffer *buf)
> +{
> +    int ret;
> +    struct v4l2_plane planes[1] = {};
> +    struct v4l2_buffer buffer = {
> +        .type = buf->buffer.type,
> +        .memory = buf->buffer.memory,
> +        .index = buf->index,
> +    };
> +
> +    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buffer.type)) {
> +        buffer.length = 1;
> +        buffer.m.planes = planes;
> +    }
> +
> +    ret = ioctl(ctx->video_fd, VIDIOC_DQBUF, &buffer);
> +    if (ret < 0)
> +        return ret;
> +
> +    buf->buffer.flags = buffer.flags;
> +    buf->buffer.timestamp = buffer.timestamp;
> +    return 0;
> +}
> +
> +const uint32_t v4l2_request_capture_pixelformats[] = {
> +    V4L2_PIX_FMT_NV12,
> +#ifdef DRM_FORMAT_MOD_ALLWINNER_TILED
> +    V4L2_PIX_FMT_SUNXI_TILED_NV12,
> +#endif
> +};
> +
> +static int v4l2_request_set_drm_descriptor(V4L2RequestDescriptor *req, struct v4l2_format *format)
> +{
> +    AVDRMFrameDescriptor *desc = &req->drm;
> +    AVDRMLayerDescriptor *layer = &desc->layers[0];
> +    uint32_t pixelformat = V4L2_TYPE_IS_MULTIPLANAR(format->type) ? format->fmt.pix_mp.pixelformat : format->fmt.pix.pixelformat;
> +
> +    switch (pixelformat) {
> +    case V4L2_PIX_FMT_NV12:
> +        layer->format = DRM_FORMAT_NV12;
> +        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
> +        break;
> +#ifdef DRM_FORMAT_MOD_ALLWINNER_TILED
> +    case V4L2_PIX_FMT_SUNXI_TILED_NV12:
> +        layer->format = DRM_FORMAT_NV12;
> +        desc->objects[0].format_modifier = DRM_FORMAT_MOD_ALLWINNER_TILED;
> +        break;
> +#endif
> +    default:
> +        return -1;

If returning an error please use an AVERROR() code, not -1.  (Also in more places below.)

> +    }
> +
> +    desc->nb_objects = 1;
> +    desc->objects[0].fd = req->capture.fd;
> +    desc->objects[0].size = req->capture.size;
> +
> +    desc->nb_layers = 1;
> +    layer->nb_planes = 2;
> +
> +    layer->planes[0].object_index = 0;
> +    layer->planes[0].offset = 0;
> +    layer->planes[0].pitch = V4L2_TYPE_IS_MULTIPLANAR(format->type) ? format->fmt.pix_mp.plane_fmt[0].bytesperline : format->fmt.pix.bytesperline;
> +
> +    layer->planes[1].object_index = 0;
> +    layer->planes[1].offset = layer->planes[0].pitch * (V4L2_TYPE_IS_MULTIPLANAR(format->type) ? format->fmt.pix_mp.height : format->fmt.pix.height);

If you have a MULTIPLANAR device then ignore the planes and just assume the chroma plane immediately follows the luma plane?  That seems dubious.

> +    layer->planes[1].pitch = layer->planes[0].pitch;
> +
> +    return 0;
> +}
> +
> +static int v4l2_request_queue_decode(AVCodecContext *avctx, AVFrame *frame, struct v4l2_ext_control *control, int count, int first_slice, int last_slice)
> +{
> +    V4L2RequestContext *ctx = avctx->internal->hwaccel_priv_data;
> +    V4L2RequestDescriptor *req = (V4L2RequestDescriptor*)frame->data[0];
> +    struct timeval tv = { 2, 0 };

So you are going to use this below wait two seconds in select().  Where did that number come from?

> +    fd_set except_fds;
> +    int ret;
> +
> +    av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p used=%u controls=%d index=%d fd=%d request_fd=%d first_slice=%d last_slice=%d\n", __func__, avctx, req->output.used, count, req->capture.index, req->capture.fd, req->request_fd, first_slice, last_slice);
> +
> +    if (first_slice)
> +        ctx->timestamp++;
> +
> +    ret = v4l2_request_set_controls(ctx, req->request_fd, control, count);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "%s: set controls failed for request %d, %s (%d)\n", __func__, req->request_fd, strerror(errno), errno);
> +        return -1;
> +    }
> +
> +    memset(req->output.addr + req->output.used, 0, OUTPUT_BUFFER_PADDING_SIZE);
> +
> +    ret = v4l2_request_queue_buffer(ctx, req->request_fd, &req->output, last_slice ? 0 : V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "%s: queue output buffer %d failed for request %d, %s (%d)\n", __func__, req->output.index, req->request_fd, strerror(errno), errno);
> +        return -1;
> +    }
> +
> +    if (first_slice) {
> +        ret = v4l2_request_queue_buffer(ctx, -1, &req->capture, 0);
> +        if (ret < 0) {
> +            av_log(avctx, AV_LOG_ERROR, "%s: queue capture buffer %d failed for request %d, %s (%d)\n", __func__, req->capture.index, req->request_fd, strerror(errno), errno);
> +            return -1;
> +        }
> +    }
> +
> +    ret = ioctl(req->request_fd, MEDIA_REQUEST_IOC_QUEUE, NULL);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "%s: queue request %d failed, %s (%d)\n", __func__, req->request_fd, strerror(errno), errno);
> +        goto fail;
> +    }
> +
> +    FD_ZERO(&except_fds);
> +    FD_SET(req->request_fd, &except_fds);
> +
> +    ret = select(req->request_fd + 1, NULL, NULL, &except_fds, &tv);
> +    if (ret == 0) {
> +        av_log(avctx, AV_LOG_ERROR, "%s: request %d timeout\n", __func__, req->request_fd);
> +        goto fail;
> +    } else if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "%s: select request %d failed, %s (%d)\n", __func__, req->request_fd, strerror(errno), errno);

EINTR?

> +        goto fail;
> +    }
> +
> +    ret = v4l2_request_dequeue_buffer(ctx, &req->output);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "%s: dequeue output buffer %d failed for request %d, %s (%d)\n", __func__, req->output.index, req->request_fd, strerror(errno), errno);
> +        return -1;
> +    }
> +
> +    if (last_slice) {
> +        ret = v4l2_request_dequeue_buffer(ctx, &req->capture);
> +        if (ret < 0) {
> +            av_log(avctx, AV_LOG_ERROR, "%s: dequeue capture buffer %d failed for request %d, %s (%d)\n", __func__, req->capture.index, req->request_fd, strerror(errno), errno);
> +            return -1;
> +        }
> +
> +        if (req->capture.buffer.flags & V4L2_BUF_FLAG_ERROR) {
> +            av_log(avctx, AV_LOG_WARNING, "%s: capture buffer %d flagged with error for request %d\n", __func__, req->capture.index, req->request_fd);
> +            frame->flags |= AV_FRAME_FLAG_CORRUPT;
> +        }
> +    }
> +
> +    ret = ioctl(req->request_fd, MEDIA_REQUEST_IOC_REINIT, NULL);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "%s: reinit request %d failed, %s (%d)\n", __func__, req->request_fd, strerror(errno), errno);
> +        return -1;
> +    }
> +
> +    if (last_slice)
> +        return v4l2_request_set_drm_descriptor(req, &ctx->format);
> +
> +    return 0;
> +
> +fail:
> +    ret = v4l2_request_dequeue_buffer(ctx, &req->output);
> +    if (ret < 0)
> +        av_log(avctx, AV_LOG_ERROR, "%s: dequeue output buffer %d failed for request %d, %s (%d)\n", __func__, req->output.index, req->request_fd, strerror(errno), errno);
> +
> +    ret = v4l2_request_dequeue_buffer(ctx, &req->capture);
> +    if (ret < 0)
> +        av_log(avctx, AV_LOG_ERROR, "%s: dequeue capture buffer %d failed for request %d, %s (%d)\n", __func__, req->capture.index, req->request_fd, strerror(errno), errno);
> +
> +    ret = ioctl(req->request_fd, MEDIA_REQUEST_IOC_REINIT, NULL);
> +    if (ret < 0)
> +        av_log(avctx, AV_LOG_ERROR, "%s: reinit request %d failed, %s (%d)\n", __func__, req->request_fd, strerror(errno), errno);
> +
> +    return -1;
> +}
> +
> +int ff_v4l2_request_decode_slice(AVCodecContext *avctx, AVFrame *frame, struct v4l2_ext_control *control, int count, int first_slice, int last_slice)
> +{
> +    V4L2RequestDescriptor *req = (V4L2RequestDescriptor*)frame->data[0];
> +
> +    /* fall back to queue each slice as a full frame */
> +    if ((req->output.capabilities & V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF) != V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF)
> +        return v4l2_request_queue_decode(avctx, frame, control, count, 1, 1);
> +
> +    return v4l2_request_queue_decode(avctx, frame, control, count, first_slice, last_slice);
> +}
> +
> +int ff_v4l2_request_decode_frame(AVCodecContext *avctx, AVFrame *frame, struct v4l2_ext_control *control, int count)
> +{
> +    return v4l2_request_queue_decode(avctx, frame, control, count, 1, 1);
> +}

How does the synchronisation work in these decode operations?

(In other hwaccels it generally means that the decode has been submitted, but some (possibly-implicit) API-dependent method will be used later to check the result.  Not sure how that would work on a bare DRM object fd.)

> +
> ...
> +
> +static int v4l2_request_buffer_alloc(AVCodecContext *avctx, V4L2RequestBuffer *buf, enum v4l2_buf_type type)
> +{
> +    V4L2RequestContext *ctx = avctx->internal->hwaccel_priv_data;
> +    int ret;
> +    struct v4l2_plane planes[1] = {};
> +    struct v4l2_create_buffers buffers = {
> +        .count = 1,
> +        .memory = V4L2_MEMORY_MMAP,

Shouldn't this be V4L2_MEMORY_DMABUF for capture, since you are never actually mmap()ping the frames at all?  (It doesn't have to be in CPU-addressable memory at all.)

> +        .format.type = type,
> +    };
> +
> +    av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p buf=%p type=%u\n", __func__, avctx, buf, type);
> +
> +    ret = ioctl(ctx->video_fd, VIDIOC_G_FMT, &buffers.format);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "%s: get format failed for type %u, %s (%d)\n", __func__, type, strerror(errno), errno);
> +        return ret;
> +    }
> +
> +    if (V4L2_TYPE_IS_MULTIPLANAR(buffers.format.type)) {
> +        av_log(avctx, AV_LOG_DEBUG, "%s: pixelformat=%d width=%u height=%u bytesperline=%u sizeimage=%u num_planes=%u\n", __func__, buffers.format.fmt.pix_mp.pixelformat, buffers.format.fmt.pix_mp.width, buffers.format.fmt.pix_mp.height, buffers.format.fmt.pix_mp.plane_fmt[0].bytesperline, buffers.format.fmt.pix_mp.plane_fmt[0].sizeimage, buffers.format.fmt.pix_mp.num_planes);
> +    } else {
> +        av_log(avctx, AV_LOG_DEBUG, "%s: pixelformat=%d width=%u height=%u bytesperline=%u sizeimage=%u\n", __func__, buffers.format.fmt.pix.pixelformat, buffers.format.fmt.pix.width, buffers.format.fmt.pix.height, buffers.format.fmt.pix.bytesperline, buffers.format.fmt.pix.sizeimage);
> +    }
> +
> +    ret = ioctl(ctx->video_fd, VIDIOC_CREATE_BUFS, &buffers);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "%s: create buffers failed for type %u, %s (%d)\n", __func__, type, strerror(errno), errno);
> +        return ret;
> +    }
> +
> +    if (V4L2_TYPE_IS_MULTIPLANAR(type)) {
> +        buf->width = buffers.format.fmt.pix_mp.width;
> +        buf->height = buffers.format.fmt.pix_mp.height;
> +        buf->size = buffers.format.fmt.pix_mp.plane_fmt[0].sizeimage;
> +        buf->buffer.length = 1;
> +        buf->buffer.m.planes = planes;

This is one-plane NV12 again?

> +    } else {
> +        buf->width = buffers.format.fmt.pix.width;
> +        buf->height = buffers.format.fmt.pix.height;
> +        buf->size = buffers.format.fmt.pix.sizeimage;
> +    }
> +
> +    buf->index = buffers.index;
> +    buf->capabilities = buffers.capabilities;
> +    buf->used = 0;
> +
> +    buf->buffer.type = type;
> +    buf->buffer.memory = V4L2_MEMORY_MMAP;
> +    buf->buffer.index = buf->index;
> +
> +    ret = ioctl(ctx->video_fd, VIDIOC_QUERYBUF, &buf->buffer);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "%s: query buffer %d failed, %s (%d)\n", __func__, buf->index, strerror(errno), errno);
> +        return ret;
> +    }
> +
> +    if (V4L2_TYPE_IS_OUTPUT(type)) {
> +        void *addr = mmap(NULL, buf->size, PROT_READ | PROT_WRITE, MAP_SHARED, ctx->video_fd, V4L2_TYPE_IS_MULTIPLANAR(type) ? buf->buffer.m.planes[0].m.mem_offset : buf->buffer.m.offset);
> +        if (addr == MAP_FAILED) {
> +            av_log(avctx, AV_LOG_ERROR, "%s: mmap failed, %s (%d)\n", __func__, strerror(errno), errno);
> +            return -1;
> +        }
> +
> +        buf->addr = (uint8_t*)addr;
> +    } else {
> +        struct v4l2_exportbuffer exportbuffer = {
> +            .type = type,
> +            .index = buf->index,
> +            .flags = O_RDONLY,
> +        };
> +
> +        ret = ioctl(ctx->video_fd, VIDIOC_EXPBUF, &exportbuffer);
> +        if (ret < 0) {
> +            av_log(avctx, AV_LOG_ERROR, "%s: export buffer %d failed, %s (%d)\n", __func__, buf->index, strerror(errno), errno);
> +            return ret;
> +        }
> +
> +        buf->fd = exportbuffer.fd;
> +    }
> +
> +    av_log(avctx, AV_LOG_DEBUG, "%s: buf=%p index=%d fd=%d addr=%p width=%u height=%u size=%u\n", __func__, buf, buf->index, buf->fd, buf->addr, buf->width, buf->height, buf->size);
> +    return 0;
> +}
> +
> +static void v4l2_request_buffer_free(V4L2RequestBuffer *buf)
> +{
> +    av_log(NULL, AV_LOG_DEBUG, "%s: buf=%p index=%d fd=%d addr=%p width=%u height=%u size=%u\n", __func__, buf, buf->index, buf->fd, buf->addr, buf->width, buf->height, buf->size);
> +
> +    if (buf->addr)
> +        munmap(buf->addr, buf->size);
> +
> +    if (buf->fd >= 0)
> +        close(buf->fd);
> +}
> +
> +static void v4l2_request_frame_free(void *opaque, uint8_t *data)
> +{
> +    AVCodecContext *avctx = opaque;
> +    V4L2RequestDescriptor *req = (V4L2RequestDescriptor*)data;
> +
> +    av_log(NULL, AV_LOG_DEBUG, "%s: avctx=%p data=%p request_fd=%d\n", __func__, avctx, data, req->request_fd);
> +
> +    if (req->request_fd >= 0)
> +        close(req->request_fd);
> +
> +    v4l2_request_buffer_free(&req->capture);
> +    v4l2_request_buffer_free(&req->output);
> +
> +    av_free(data);
> +}
> +
> +static AVBufferRef *v4l2_request_frame_alloc(void *opaque, int size)
> +{
> +    AVCodecContext *avctx = opaque;
> +    V4L2RequestContext *ctx = avctx->internal->hwaccel_priv_data;
> +    V4L2RequestDescriptor *req;
> +    AVBufferRef *ref;
> +    uint8_t *data;
> +    int ret;
> +
> +    data = av_mallocz(size);
> +    if (!data)
> +        return NULL;
> +
> +    av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p size=%d data=%p\n", __func__, avctx, size, data);
> +
> +    ref = av_buffer_create(data, size, v4l2_request_frame_free, avctx, 0);
> +    if (!ref) {
> +        av_freep(&data);
> +        return NULL;
> +    }
> +
> +    req = (V4L2RequestDescriptor*)data;
> +    req->request_fd = -1;
> +    req->output.fd = -1;
> +    req->capture.fd = -1;
> +
> +    ret = v4l2_request_buffer_alloc(avctx, &req->output, ctx->output_type);
> +    if (ret < 0) {
> +        av_buffer_unref(&ref);
> +        return NULL;
> +    }
> +
> +    ret = v4l2_request_buffer_alloc(avctx, &req->capture, ctx->format.type);
> +    if (ret < 0) {
> +        av_buffer_unref(&ref);
> +        return NULL;
> +    }

I'm a bit unclear on the logic of tying the output and capture buffers together; is there some synchronisation reason for this?

(The decoded frames may be very long-lived as reference frames, while the input bitstream changes every time.)

> +
> +    ret = ioctl(ctx->media_fd, MEDIA_IOC_REQUEST_ALLOC, &req->request_fd);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "%s: request alloc failed, %s (%d)\n", __func__, strerror(errno), errno);
> +        av_buffer_unref(&ref);
> +        return NULL;
> +    }
> +
> +    av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p size=%d data=%p request_fd=%d\n", __func__, avctx, size, data, req->request_fd);
> +    return ref;
> +}
> +
> +static void v4l2_request_pool_free(void *opaque)
> +{
> +    av_log(NULL, AV_LOG_DEBUG, "%s: opaque=%p\n", __func__, opaque);
> +}
> +
> +static void v4l2_request_hwframe_ctx_free(AVHWFramesContext *hwfc)
> +{
> +    av_log(NULL, AV_LOG_DEBUG, "%s: hwfc=%p pool=%p\n", __func__, hwfc, hwfc->pool);
> +
> +    av_buffer_pool_flush(hwfc->pool);
> +    av_buffer_pool_uninit(&hwfc->pool);
> +}
> +
> +int ff_v4l2_request_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
> +{
> +    V4L2RequestContext *ctx = avctx->internal->hwaccel_priv_data;
> +    AVHWFramesContext *hwfc = (AVHWFramesContext*)hw_frames_ctx->data;
> +
> +    hwfc->format = AV_PIX_FMT_DRM_PRIME;
> +    hwfc->sw_format = AV_PIX_FMT_NV12;
> +    if (V4L2_TYPE_IS_MULTIPLANAR(ctx->format.type)) {
> +        hwfc->width = ctx->format.fmt.pix_mp.width;
> +        hwfc->height = ctx->format.fmt.pix_mp.height;
> +    } else {
> +        hwfc->width = ctx->format.fmt.pix.width;
> +        hwfc->height = ctx->format.fmt.pix.height;
> +    }
> +
> +    hwfc->pool = av_buffer_pool_init2(sizeof(V4L2RequestDescriptor), avctx, v4l2_request_frame_alloc, v4l2_request_pool_free);
> +    if (!hwfc->pool)
> +        return AVERROR(ENOMEM);
> +
> +    hwfc->free = v4l2_request_hwframe_ctx_free;
> +
> +    hwfc->initial_pool_size = 1;
> +
> +    switch (avctx->codec_id) {
> +    case AV_CODEC_ID_VP9:
> +        hwfc->initial_pool_size += 8;
> +        break;
> +    case AV_CODEC_ID_VP8:
> +        hwfc->initial_pool_size += 3;
> +        break;

Maybe H.264 would want 16 frames for the DPB here?

> +    default:
> +        hwfc->initial_pool_size += 2;
> +    }

Does the pool size actually have to be fixed?  (Most streams will not use all of the space.)

> +
> +    av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p ctx=%p hw_frames_ctx=%p hwfc=%p pool=%p width=%d height=%d initial_pool_size=%d\n", __func__, avctx, ctx, hw_frames_ctx, hwfc, hwfc->pool, hwfc->width, hwfc->height, hwfc->initial_pool_size);
> +
> +    return 0;
> +}
> diff --git a/libavcodec/v4l2_request.h b/libavcodec/v4l2_request.h
> new file mode 100644
> index 0000000000..58d2aa70af
> --- /dev/null
> +++ b/libavcodec/v4l2_request.h
> @@ -0,0 +1,77 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVCODEC_V4L2_REQUEST_H
> +#define AVCODEC_V4L2_REQUEST_H
> +
> +#include <linux/videodev2.h>
> +
> +#include "libavutil/hwcontext_drm.h"
> +
> +typedef struct V4L2RequestContext {
> +    int video_fd;
> +    int media_fd;
> +    enum v4l2_buf_type output_type;
> +    struct v4l2_format format;
> +    int timestamp;
> +} V4L2RequestContext;
> +
> +typedef struct V4L2RequestBuffer {
> +    int index;
> +    int fd;
> +    uint8_t *addr;
> +    uint32_t width;
> +    uint32_t height;
> +    uint32_t size;
> +    uint32_t used;
> +    uint32_t capabilities;
> +    struct v4l2_buffer buffer;
> +} V4L2RequestBuffer;
> +
> +typedef struct V4L2RequestDescriptor {
> +    AVDRMFrameDescriptor drm;
> +    int request_fd;
> +    V4L2RequestBuffer output;
> +    V4L2RequestBuffer capture;
> +} V4L2RequestDescriptor;
> +
> +uint64_t ff_v4l2_request_get_capture_timestamp(AVFrame *frame);
> +
> +int ff_v4l2_request_reset_frame(AVCodecContext *avctx, AVFrame *frame);
> +
> +int ff_v4l2_request_append_output_buffer(AVCodecContext *avctx, AVFrame *frame, const uint8_t *data, uint32_t size);
> +
> +int ff_v4l2_request_set_controls(AVCodecContext *avctx, struct v4l2_ext_control *control, int count);
> +
> +int ff_v4l2_request_get_controls(AVCodecContext *avctx, struct v4l2_ext_control *control, int count);
> +
> +int ff_v4l2_request_query_control(AVCodecContext *avctx, struct v4l2_query_ext_ctrl *control);
> +
> +int ff_v4l2_request_query_control_default_value(AVCodecContext *avctx, uint32_t id);
> +
> +int ff_v4l2_request_decode_slice(AVCodecContext *avctx, AVFrame *frame, struct v4l2_ext_control *control, int count, int first_slice, int last_slice);
> +
> +int ff_v4l2_request_decode_frame(AVCodecContext *avctx, AVFrame *frame, struct v4l2_ext_control *control, int count);
> +
> +int ff_v4l2_request_init(AVCodecContext *avctx, uint32_t pixelformat, uint32_t buffersize, struct v4l2_ext_control *control, int count);
> +
> +int ff_v4l2_request_uninit(AVCodecContext *avctx);
> +
> +int ff_v4l2_request_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
> +
> +#endif /* AVCODEC_V4L2_REQUEST_H */
> 

- Mark