[FFmpeg-devel] [PATCH 2/4] libavcodec: v4l2m2m: output AVDRMFrameDescriptor

Sun Aug 5 00:43:32 EEST 2018

On 04/08/18 01:40, Lukas Rusak wrote:
> This allows for a zero-copy output by exporting the v4l2 buffer then wrapping that buffer
> in the AVDRMFrameDescriptor like it is done in rkmpp.
> 
> This has been in use for quite some time with great success on many platforms including:
>  - Amlogic S905
>  - Raspberry Pi
>  - i.MX6
>  - Dragonboard 410c
> 
> This was developed in conjunction with Kodi to allow handling the zero-copy buffer rendering.
> A simply utility for testing is also available here: https://github.com/BayLibre/ffmpeg-drm
> 
> todo:
>  - allow selecting pixel format output from decoder
>  - allow configuring amount of output and capture buffers
> 
> V2:
>  - allow selecting AV_PIX_FMT_DRM_PRIME
> 
> V3:
>  - use get_format to select AV_PIX_FMT_DRM_PRIME
>  - use hw_configs
>  - add handling of AV_PIX_FMT_YUV420P format (for raspberry pi)
>  - add handling of AV_PIX_FMT_YUYV422 format (for i.MX6 coda decoder)
> ---
>  libavcodec/v4l2_buffers.c | 216 ++++++++++++++++++++++++++++++++------
>  libavcodec/v4l2_buffers.h |   4 +
>  libavcodec/v4l2_context.c |  40 ++++++-
>  libavcodec/v4l2_m2m.c     |   4 +-
>  libavcodec/v4l2_m2m.h     |   3 +
>  libavcodec/v4l2_m2m_dec.c |  23 ++++
>  6 files changed, 253 insertions(+), 37 deletions(-)

The v4l2_m2m decoders need to depend on libdrm in configure for this.  (And if you don't want that as a hard dependency then you'll need #ifdefs everywhere.)

> diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
> index aef911f3bb..e5c46ac81e 100644
> --- a/libavcodec/v4l2_buffers.c
> +++ b/libavcodec/v4l2_buffers.c
> @@ -21,6 +21,7 @@
>   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>   */
>  
> +#include <drm/drm_fourcc.h>

Don't include the outer path, pkg-config deals with it.  (The path you've picked is wrong for a default install of current libdrm.)

>  #include <linux/videodev2.h>
>  #include <sys/ioctl.h>
>  #include <sys/mman.h>
> @@ -29,6 +30,7 @@
>  #include <poll.h>
>  #include "libavcodec/avcodec.h"
>  #include "libavcodec/internal.h"
> +#include "libavutil/hwcontext.h"
>  #include "v4l2_context.h"
>  #include "v4l2_buffers.h"
>  #include "v4l2_m2m.h"
> @@ -203,7 +205,79 @@ static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf)
>      return AVCOL_TRC_UNSPECIFIED;
>  }
>  
> -static void v4l2_free_buffer(void *opaque, uint8_t *unused)
> +static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf)
> +{
> +    AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame;
> +    AVDRMLayerDescriptor *layer;
> +
> +    /* fill the DRM frame descriptor */
> +    drm_desc->nb_objects = avbuf->num_planes;
> +    drm_desc->nb_layers = 1;
> +
> +    layer = &drm_desc->layers[0];
> +    layer->nb_planes = avbuf->num_planes;
> +
> +    for (int i = 0; i < avbuf->num_planes; i++) {
> +        layer->planes[i].object_index = i;
> +        layer->planes[i].offset = 0;
> +        layer->planes[i].pitch = avbuf->plane_info[i].bytesperline;
> +    }
> +
> +    switch (avbuf->context->av_pix_fmt) {
> +    case AV_PIX_FMT_YUYV422:
> +
> +        layer->format = DRM_FORMAT_YUYV;
> +        layer->nb_planes = 1;
> +
> +        break;
> +
> +    case AV_PIX_FMT_NV12:
> +    case AV_PIX_FMT_NV21:
> +
> +        layer->format = avbuf->context->av_pix_fmt == AV_PIX_FMT_NV12 ?
> +            DRM_FORMAT_NV12 : DRM_FORMAT_NV21;
> +
> +        if (avbuf->num_planes > 1)
> +            break;
> +
> +        layer->nb_planes = 2;
> +
> +        layer->planes[1].object_index = 0;
> +        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
> +            avbuf->context->format.fmt.pix.height;
> +        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline;

To confirm, it's necessarily true that there is no padding at all between the luma and chroma planes here?

> +        break;
> +
> +    case AV_PIX_FMT_YUV420P:
> +
> +        layer->format = DRM_FORMAT_YUV420;
> +
> +        if (avbuf->num_planes > 1)
> +            break;
> +
> +        layer->nb_planes = 3;
> +
> +        layer->planes[1].object_index = 0;
> +        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
> +            avbuf->context->format.fmt.pix.height;
> +        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline >> 1;
> +
> +        layer->planes[2].object_index = 0;
> +        layer->planes[2].offset = layer->planes[1].offset +
> +            ((avbuf->plane_info[0].bytesperline *
> +              avbuf->context->format.fmt.pix.height) >> 2);
> +        layer->planes[2].pitch = avbuf->plane_info[0].bytesperline >> 1;
> +        break;
> +
> +    default:
> +        drm_desc->nb_layers = 0;
> +        break;
> +    }
> +
> +    return (uint8_t *) drm_desc;
> +}
> +
> +static void v4l2_free_buffer(void *opaque, uint8_t *data)
>  {
>      V4L2Buffer* avbuf = opaque;
>      V4L2m2mContext *s = buf_to_m2mctx(avbuf);
> @@ -227,27 +301,47 @@ static void v4l2_free_buffer(void *opaque, uint8_t *unused)
>      }
>  }
>  
> -static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf)
> +static int v4l2_buffer_export_drm(V4L2Buffer* avbuf)
>  {
> -    V4L2m2mContext *s = buf_to_m2mctx(in);
> +    struct v4l2_exportbuffer expbuf;
> +    int i, ret;
>  
> -    if (plane >= in->num_planes)
> -        return AVERROR(EINVAL);
> +    for (i = 0; i < avbuf->num_planes; i++) {
> +        memset(&expbuf, 0, sizeof(expbuf));
>  
> -    /* even though most encoders return 0 in data_offset encoding vp8 does require this value */
> -    *buf = av_buffer_create((char *)in->plane_info[plane].mm_addr + in->planes[plane].data_offset,
> -                            in->plane_info[plane].length, v4l2_free_buffer, in, 0);
> -    if (!*buf)
> -        return AVERROR(ENOMEM);
> +        expbuf.index = avbuf->buf.index;
> +        expbuf.type = avbuf->buf.type;
> +        expbuf.plane = i;
> +
> +        ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_EXPBUF, &expbuf);
> +        if (ret < 0)
> +            return AVERROR(errno);
> +
> +        if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type)) {
> +            /* drm frame */
> +            avbuf->drm_frame.objects[i].size = avbuf->buf.m.planes[i].length;
> +            avbuf->drm_frame.objects[i].fd = expbuf.fd;
> +        } else {
> +            /* drm frame */
> +            avbuf->drm_frame.objects[0].size = avbuf->buf.length;
> +            avbuf->drm_frame.objects[0].fd = expbuf.fd;
> +        }

Is there actually a case where you get multiple fds here?  The handling in the previous function doesn't look right (object_index gets reset to zero for the later planes).

Do you know the format modifier here?  If it's necessarily linear then set the field explicitly to DRM_FORMAT_MOD_LINEAR; if you don't know then set it to DRM_FORMAT_MOD_INVALID to indicate that the consumer may need to guess.

> +    }
> +
> +    return 0;
> +}
> +
> +static int v4l2_buf_increase_ref(V4L2Buffer *in)
> +{
> +    V4L2m2mContext *s = buf_to_m2mctx(in);
>  
>      if (in->context_ref)
>          atomic_fetch_add(&in->context_refcount, 1);
>      else {
>          in->context_ref = av_buffer_ref(s->self_ref);
> -        if (!in->context_ref) {
> -            av_buffer_unref(buf);
> +        if (!in->context_ref)
>              return AVERROR(ENOMEM);
> -        }
> +
>          in->context_refcount = 1;
>      }
>  
> @@ -257,6 +351,46 @@ static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf)
>      return 0;
>  }
>  
> +static int v4l2_buf_to_bufref_drm(V4L2Buffer *in, AVBufferRef **buf)
> +{
> +    int ret;
> +
> +    *buf = av_buffer_create((uint8_t *) &in->drm_frame,
> +                            sizeof(in->drm_frame),
> +                            v4l2_free_buffer,
> +                            in, AV_BUFFER_FLAG_READONLY);
> +    if (!*buf)
> +        return AVERROR(ENOMEM);
> +
> +    ret = v4l2_buf_increase_ref(in);
> +    if (ret)
> +         av_buffer_unref(buf);
> +
> +    return ret;
> +}
> +
> +static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf)
> +{
> +    int ret;
> +
> +    if (plane >= in->num_planes)
> +        return AVERROR(EINVAL);
> +
> +    /* most encoders return 0 in data_offset but vp8 does require this value */
> +    *buf = av_buffer_create((char *)in->plane_info[plane].mm_addr + in->planes[plane].data_offset,
> +                            in->plane_info[plane].length,
> +                            v4l2_free_buffer,
> +                            in, 0);
> +    if (!*buf)
> +        return AVERROR(ENOMEM);
> +
> +    ret = v4l2_buf_increase_ref(in);
> +    if (ret)
> +        av_buffer_unref(buf);
> +
> +    return ret;
> +}
> +
>  static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, int size, AVBufferRef* bref)
>  {
>      unsigned int bytesused, length;
> @@ -308,31 +442,43 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
>  
>      av_frame_unref(frame);
>  
> -    /* 1. get references to the actual data */
> -    for (i = 0; i < avbuf->num_planes; i++) {
> -        ret = v4l2_buf_to_bufref(avbuf, i, &frame->buf[i]);
> +    if (buf_to_m2mctx(avbuf)->output_drm) {
> +        /* 1. get references to the actual data */
> +        ret = v4l2_buf_to_bufref_drm(avbuf, &frame->buf[0]);
>          if (ret)
>              return ret;
>  
> -        frame->linesize[i] = avbuf->plane_info[i].bytesperline;
> -        frame->data[i] = frame->buf[i]->data;
> -    }
> +        frame->data[0] = (uint8_t *) v4l2_get_drm_frame(avbuf);
> +        frame->format = AV_PIX_FMT_DRM_PRIME;

frame->hw_frames_ctx needs to be set here as well.  (I think you can use the same trivial device/frames context setup as in rkmppdec.c.)

> +    } else {
> +        /* 1. get references to the actual data */
> +        for (i = 0; i < avbuf->num_planes; i++) {
> +            ret = v4l2_buf_to_bufref(avbuf, i, &frame->buf[i]);
> +            if (ret)
> +                return ret;
> +
> +            frame->linesize[i] = avbuf->plane_info[i].bytesperline;
> +            frame->data[i] = frame->buf[i]->data;
> +        }
>  
> -    /* 1.1 fixup special cases */
> -    switch (avbuf->context->av_pix_fmt) {
> -    case AV_PIX_FMT_NV12:
> -        if (avbuf->num_planes > 1)
> +        /* 1.1 fixup special cases */
> +        switch (avbuf->context->av_pix_fmt) {
> +        case AV_PIX_FMT_NV12:
> +            if (avbuf->num_planes > 1)
> +                break;
> +            frame->linesize[1] = avbuf->plane_info[0].bytesperline;
> +            frame->data[1] = frame->buf[0]->data +
> +                avbuf->plane_info[0].bytesperline *
> +                avbuf->context->format.fmt.pix.height;
>              break;
> -        frame->linesize[1] = avbuf->plane_info[0].bytesperline;
> -        frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height;
> -        break;
> -    default:
> -        break;
> +        default:
> +            break;
> +        }
> +        frame->format = avbuf->context->av_pix_fmt;
>      }
>  
>      /* 2. get frame information */
>      frame->key_frame = !!(avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME);
> -    frame->format = avbuf->context->av_pix_fmt;
>      frame->color_primaries = v4l2_get_color_primaries(avbuf);
>      frame->colorspace = v4l2_get_color_space(avbuf);
>      frame->color_range = v4l2_get_color_range(avbuf);
> @@ -447,9 +593,6 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
>  
>      avbuf->status = V4L2BUF_AVAILABLE;
>  
> -    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
> -        return 0;
> -
>      if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
>          avbuf->buf.m.planes = avbuf->planes;
>          avbuf->buf.length   = avbuf->num_planes;
> @@ -459,6 +602,15 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
>          avbuf->buf.length    = avbuf->planes[0].length;
>      }
>  
> +    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
> +        return 0;
> +
> +    if (buf_to_m2mctx(avbuf)->output_drm) {
> +        ret = v4l2_buffer_export_drm(avbuf);
> +        if (ret)
> +                return ret;
> +    }

Above here, I don't think you want to call mmap() at all in the DRM object output case?  The mapped memory is never touched, so it just wastes virtual address space.

> +
>      return ff_v4l2_buffer_enqueue(avbuf);
>  }
>  
> diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h
> index dc5cc9e267..a8a50ecc65 100644
> --- a/libavcodec/v4l2_buffers.h
> +++ b/libavcodec/v4l2_buffers.h
> @@ -27,6 +27,7 @@
>  #include <stdatomic.h>
>  #include <linux/videodev2.h>
>  
> +#include "libavutil/hwcontext_drm.h"
>  #include "avcodec.h"
>  
>  enum V4L2Buffer_status {
> @@ -42,6 +43,9 @@ typedef struct V4L2Buffer {
>      /* each buffer needs to have a reference to its context */
>      struct V4L2Context *context;
>  
> +    /* DRM descriptor */
> +    AVDRMFrameDescriptor drm_frame;
> +
>      /* This object is refcounted per-plane, so we need to keep track
>       * of how many context-refs we are holding. */
>      AVBufferRef *context_ref;
> diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
> index efcb0426e4..9457fadb1e 100644
> --- a/libavcodec/v4l2_context.c
> +++ b/libavcodec/v4l2_context.c
> @@ -393,22 +393,54 @@ static int v4l2_release_buffers(V4L2Context* ctx)
>      struct v4l2_requestbuffers req = {
>          .memory = V4L2_MEMORY_MMAP,
>          .type = ctx->type,
> -        .count = 0, /* 0 -> unmaps buffers from the driver */
> +        .count = 0, /* 0 -> unmap all buffers from the driver */
>      };
> -    int i, j;
> +    int ret, i, j;
>  
>      for (i = 0; i < ctx->num_buffers; i++) {
>          V4L2Buffer *buffer = &ctx->buffers[i];
>  
>          for (j = 0; j < buffer->num_planes; j++) {
>              struct V4L2Plane_info *p = &buffer->plane_info[j];
> +
> +            if (V4L2_TYPE_IS_OUTPUT(ctx->type)) {
> +                /* output buffers are not EXPORTED */
> +                goto unmap;
> +            }
> +
> +            if (ctx_to_m2mctx(ctx)->output_drm) {
> +                /* use the DRM frame to close */
> +                if (buffer->drm_frame.objects[j].fd >= 0) {
> +                    if (close(buffer->drm_frame.objects[j].fd) < 0) {
> +                        av_log(logger(ctx), AV_LOG_ERROR, "%s close drm fd "
> +                            "[buffer=%2d, plane=%d, fd=%2d] - %s \n",
> +                            ctx->name, i, j, buffer->drm_frame.objects[j].fd,
> +                            av_err2str(AVERROR(errno)));
> +                    }
> +                }
> +            }
> +unmap:
>              if (p->mm_addr && p->length)
>                  if (munmap(p->mm_addr, p->length) < 0)
> -                    av_log(logger(ctx), AV_LOG_ERROR, "%s unmap plane (%s))\n", ctx->name, av_err2str(AVERROR(errno)));
> +                    av_log(logger(ctx), AV_LOG_ERROR, "%s unmap plane (%s))\n",
> +                        ctx->name, av_err2str(AVERROR(errno)));
>          }

(This whole function feels like it might fit better in v4l2_buffers.c?)

To check my understanding here of what you've got currently (please correct me if I make a mistake here):
* When making a new set of buffers (on start or format change), VIDIOC_EXPBUF is called once for each V4L2 buffer to make a DRM PRIME fd for it.
* Whenever you want to return a buffer, return the fd instead if using DRM PRIME output.
* When returning a set of buffers (on close or format change), wait for all references to disappear and then close all of the fds before releasing the V4L2 buffers.

>      }
>  
> -    return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_REQBUFS, &req);
> +    ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_REQBUFS, &req);
> +    if (ret < 0) {
> +            av_log(logger(ctx), AV_LOG_ERROR, "release all %s buffers (%s)\n",
> +                ctx->name, av_err2str(AVERROR(errno)));
> +
> +            if (ctx_to_m2mctx(ctx)->output_drm)
> +                av_log(logger(ctx), AV_LOG_ERROR,
> +                    "Make sure the DRM client releases all FB/GEM objects before closing the codec (ie):\n"
> +                    "for all buffers: \n"
> +                    "  1. drmModeRmFB(..)\n"
> +                    "  2. drmIoctl(.., DRM_IOCTL_GEM_CLOSE,... )\n");

Is it possible to hit this case?  Waiting for all references to disappear seems like it should cover it.  (And if the user has freed an object they are still using then that's certainly undefined behaviour, so if that's the only case here it would probably be better to abort() so that it's caught immediately.)

> +    }
> +
> +    return ret;
>  }
>  
>  static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfmt)
> diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c
> index 427e165f58..7896326e80 100644
> --- a/libavcodec/v4l2_m2m.c
> +++ b/libavcodec/v4l2_m2m.c
> @@ -159,7 +159,9 @@ static int v4l2_configure_contexts(V4L2m2mContext* s)
>          goto error;
>      }
>  
> -    /* decoder's buffers need to be updated at a later stage */
> +    /* decoder's capture buffers are updated during v4l2_try_start once we find
> +     * the valid format.
> +     */
>      if (!av_codec_is_decoder(s->avctx->codec)) {
>          ret = ff_v4l2_context_init(&s->capture);
>          if (ret) {
> diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
> index 452bf0d9bc..9ac5a2448d 100644
> --- a/libavcodec/v4l2_m2m.h
> +++ b/libavcodec/v4l2_m2m.h
> @@ -59,6 +59,9 @@ typedef struct V4L2m2mContext {
>  
>      /* Reference to self; only valid while codec is active. */
>      AVBufferRef *self_ref;
> +
> +    /* generate DRM frames */
> +    int output_drm;
>  } V4L2m2mContext;
>  
>  typedef struct V4L2m2mPriv
> diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
> index 7926e25efa..29d894492f 100644
> --- a/libavcodec/v4l2_m2m_dec.c
> +++ b/libavcodec/v4l2_m2m_dec.c
> @@ -23,12 +23,18 @@
>  
>  #include <linux/videodev2.h>
>  #include <sys/ioctl.h>
> +
> +#include "libavutil/hwcontext.h"
> +#include "libavutil/hwcontext_drm.h"
>  #include "libavutil/pixfmt.h"
>  #include "libavutil/pixdesc.h"
>  #include "libavutil/opt.h"
>  #include "libavcodec/avcodec.h"
>  #include "libavcodec/decode.h"
>  
> +#include "libavcodec/hwaccel.h"
> +#include "libavcodec/internal.h"
> +
>  #include "v4l2_context.h"
>  #include "v4l2_m2m.h"
>  #include "v4l2_fmt.h"
> @@ -186,6 +192,15 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
>      capture->av_codec_id = AV_CODEC_ID_RAWVIDEO;
>      capture->av_pix_fmt = avctx->pix_fmt;
>  
> +    /* the client requests the codec to generate DRM frames:
> +     *   - data[0] will therefore point to the returned AVDRMFrameDescriptor
> +     *       check the ff_v4l2_buffer_to_avframe conversion function.
> +     *   - the DRM frame format is passed in the DRM frame descriptor layer.
> +     *       check the v4l2_get_drm_frame function.
> +     */
> +    if (ff_get_format(avctx, avctx->codec->pix_fmts) == AV_PIX_FMT_DRM_PRIME)
> +        s->output_drm = 1;

This list needs to contain the software pixfmts as well, so that the user can pick from the correct list.

(If ff_get_format() returns AV_PIX_FMT_NONE it means the user has declined to use any of the available pixfmts, and the decoder should exit cleanly in that case.)

> +
>      ret = ff_v4l2_m2m_codec_init(avctx);
>      if (ret) {
>          av_log(avctx, AV_LOG_ERROR, "can't configure decoder\n");
> @@ -205,6 +220,11 @@ static const AVOption options[] = {
>      { NULL},
>  };
>  
> +static const AVCodecHWConfigInternal *v4l2_m2m_hw_configs[] = {
> +    HW_CONFIG_INTERNAL(DRM_PRIME),
> +    NULL
> +};
> +
>  #define M2MDEC_CLASS(NAME) \
>      static const AVClass v4l2_m2m_ ## NAME ## _dec_class = { \
>          .class_name = #NAME "_v4l2_m2m_decoder", \
> @@ -225,7 +245,10 @@ static const AVOption options[] = {
>          .init           = v4l2_decode_init, \
>          .receive_frame  = v4l2_receive_frame, \
>          .close          = ff_v4l2_m2m_codec_end, \
> +        .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_DRM_PRIME, \
> +                                                         AV_PIX_FMT_NONE}, \

I'm not entirely sure, but I think this list is meant to be exhaustive if provided?

>          .bsfs           = bsf_name, \
> +        .hw_configs     = v4l2_m2m_hw_configs, \
>          .capabilities   = AV_CODEC_CAP_HARDWARE | AV_CODEC_CAP_DELAY, \
>  	                      AV_CODEC_CAP_AVOID_PROBING, \
>          .wrapper_name   = "v4l2m2m", \
> 

I had a go at using this on an Exynos device (Odroid XU4) with OpenCL in the ffmpeg utility (using cl_arm_import_memory, which works with kmsgrab on this device and can be used with the decoder on Rockchip).

The hacky patch below let me get past the initial setup so "-hwaccel drm -hwaccel_output_format drm_prime" would work (with a dummy device, I'll try to make the setup better here), and I was able to decode and discard the result with that.  For then mapping to OpenCL I ended up stuck on the lack of hw_frames_ctx for hwmap, though.

Some other observations:
* The ff_get_format() call needs all of the pixfmts so that software output is still selectable.
* The AVCodecContext fields need to be set - it spams "Frame parameters mismatch context 1920,1080,181 != 1920,1080,24" currently.
* It hangs forever at "waiting for user to release AVBufferRefs" when run on a file with a format change (e.g. fate/h264/reinit-large_420_8-to-small_420_8.h264).

Thanks,

- Mark

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index 55faec8..54839dd 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -2815,29 +2815,32 @@ static enum AVPixelFormat get_format(AVCodecContext *s, const enum AVPixelFormat
                 if (!config)
                     break;
                 if (!(config->methods &
-                      AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX))
+                      (AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX |
+                       AV_CODEC_HW_CONFIG_METHOD_INTERNAL)))
                     continue;
                 if (config->pix_fmt == *p)
                     break;
             }
         }
         if (config) {
-            if (config->device_type != ist->hwaccel_device_type) {
-                // Different hwaccel offered, ignore.
-                continue;
-            }
+            if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) {
+                if (config->device_type != ist->hwaccel_device_type) {
+                    // Different hwaccel offered, ignore.
+                    continue;
+                }
 
-            ret = hwaccel_decode_init(s);
-            if (ret < 0) {
-                if (ist->hwaccel_id == HWACCEL_GENERIC) {
-                    av_log(NULL, AV_LOG_FATAL,
-                           "%s hwaccel requested for input stream #%d:%d, "
-                           "but cannot be initialized.\n",
-                           av_hwdevice_get_type_name(config->device_type),
-                           ist->file_index, ist->st->index);
-                    return AV_PIX_FMT_NONE;
+                ret = hwaccel_decode_init(s);
+                if (ret < 0) {
+                    if (ist->hwaccel_id == HWACCEL_GENERIC) {
+                        av_log(NULL, AV_LOG_FATAL,
+                               "%s hwaccel requested for input stream #%d:%d, "
+                               "but cannot be initialized.\n",
+                               av_hwdevice_get_type_name(config->device_type),
+                               ist->file_index, ist->st->index);
+                        return AV_PIX_FMT_NONE;
+                    }
+                    continue;
                 }
-                continue;
             }
         } else {
             const HWAccel *hwaccel = NULL;