[FFmpeg-devel] [PATCH 2/4] libavcodec: v4l2m2m: output AVDRMFrameDescriptor

Tue Sep 4 00:55:30 EEST 2018

On 17/08/18 04:36, Lukas Rusak wrote:
> On Sat, 2018-08-04 at 22:43 +0100, Mark Thompson wrote:
>> On 04/08/18 01:40, Lukas Rusak wrote:
>>> This allows for a zero-copy output by exporting the v4l2 buffer
>>> then wrapping that buffer
>>> in the AVDRMFrameDescriptor like it is done in rkmpp.
>>>
>>> This has been in use for quite some time with great success on many
>>> platforms including:
>>>  - Amlogic S905
>>>  - Raspberry Pi
>>>  - i.MX6
>>>  - Dragonboard 410c
>>>
>>> This was developed in conjunction with Kodi to allow handling the
>>> zero-copy buffer rendering.
>>> A simply utility for testing is also available here: 
>>> https://github.com/BayLibre/ffmpeg-drm
>>>
>>> todo:
>>>  - allow selecting pixel format output from decoder
>>>  - allow configuring amount of output and capture buffers
>>>
>>> V2:
>>>  - allow selecting AV_PIX_FMT_DRM_PRIME
>>>
>>> V3:
>>>  - use get_format to select AV_PIX_FMT_DRM_PRIME
>>>  - use hw_configs
>>>  - add handling of AV_PIX_FMT_YUV420P format (for raspberry pi)
>>>  - add handling of AV_PIX_FMT_YUYV422 format (for i.MX6 coda
>>> decoder)
>>> ---
>>>  libavcodec/v4l2_buffers.c | 216 ++++++++++++++++++++++++++++++++
>>> ------
>>>  libavcodec/v4l2_buffers.h |   4 +
>>>  libavcodec/v4l2_context.c |  40 ++++++-
>>>  libavcodec/v4l2_m2m.c     |   4 +-
>>>  libavcodec/v4l2_m2m.h     |   3 +
>>>  libavcodec/v4l2_m2m_dec.c |  23 ++++
>>>  6 files changed, 253 insertions(+), 37 deletions(-)
>>> ...
>>> +
>>>  static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const
>>> uint8_t* data, int size, AVBufferRef* bref)
>>>  {
>>>      unsigned int bytesused, length;
>>> @@ -308,31 +442,43 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame
>>> *frame, V4L2Buffer *avbuf)
>>>  
>>>      av_frame_unref(frame);
>>>  
>>> -    /* 1. get references to the actual data */
>>> -    for (i = 0; i < avbuf->num_planes; i++) {
>>> -        ret = v4l2_buf_to_bufref(avbuf, i, &frame->buf[i]);
>>> +    if (buf_to_m2mctx(avbuf)->output_drm) {
>>> +        /* 1. get references to the actual data */
>>> +        ret = v4l2_buf_to_bufref_drm(avbuf, &frame->buf[0]);
>>>          if (ret)
>>>              return ret;
>>>  
>>> -        frame->linesize[i] = avbuf->plane_info[i].bytesperline;
>>> -        frame->data[i] = frame->buf[i]->data;
>>> -    }
>>> +        frame->data[0] = (uint8_t *) v4l2_get_drm_frame(avbuf);
>>> +        frame->format = AV_PIX_FMT_DRM_PRIME;
>>
>> frame->hw_frames_ctx needs to be set here as well.  (I think you can
>> use the same trivial device/frames context setup as in rkmppdec.c.)
>>
> 
> Can you help me with this? This is the part I'm confused about. the
> v4l2 code here seems to use it's own reference counting so are we
> wanting to convert that to use the hw ctx instead or what is actually
> happening?

Working test patch below without changing anything else.  Given that the V4L2 code already has its own custom reference counting I don't think there is any point in changing that part.  (Todo: put the initialisation in a more sensible place, actually do cleanup, make the failure modes something other than abort().)

With that change (and the other hacky patch), hwdownload does work on the Odroid XU4 as, e.g.:

$ ./ffmpeg_g -y -c:v h264_v4l2m2m -hwaccel drm -hwaccel_device /dev/dri/renderD128 -hwaccel_output_format drm_prime -i in.mp4 -an -vf 'hwdownload,format=nv21' -c:v libx264 out.mp4

(The output DRM objects are apparently both linear and CPU-mappable.)

Then trying to map to OpenCL to do something with it like:

$ ./ffmpeg_g -y -c:v h264_v4l2m2m -hwaccel drm -hwaccel_device /dev/dri/renderD128 -hwaccel_output_format drm_prime -init_hw_device opencl=cl:0.0 -filter_hw_device cl -i in.mp4 -an -vf hwmap,unsharp_opencl -f null -

almost works, but not quite because the Mali T628 driver lacks the standard cl_khr_image2d_from_buffer extension.  Removing the extension check does get it right up to a failure on creating the image from the imported buffer, though, so the import is indeed working.  (The same case is working with the T760 in the RK3288, clearly I need a slightly newer test setup to get this working with V4L2.)

>>> ...
>>> @@ -186,6 +192,15 @@ static av_cold int
>>> v4l2_decode_init(AVCodecContext *avctx)
>>>      capture->av_codec_id = AV_CODEC_ID_RAWVIDEO;
>>>      capture->av_pix_fmt = avctx->pix_fmt;
>>>  
>>> +    /* the client requests the codec to generate DRM frames:
>>> +     *   - data[0] will therefore point to the returned
>>> AVDRMFrameDescriptor
>>> +     *       check the ff_v4l2_buffer_to_avframe conversion
>>> function.
>>> +     *   - the DRM frame format is passed in the DRM frame
>>> descriptor layer.
>>> +     *       check the v4l2_get_drm_frame function.
>>> +     */
>>> +    if (ff_get_format(avctx, avctx->codec->pix_fmts) ==
>>> AV_PIX_FMT_DRM_PRIME)
>>> +        s->output_drm = 1;
>>
>> This list needs to contain the software pixfmts as well, so that the
>> user can pick from the correct list.
>>
> 
> Is there a simple way to do this or a list that stays updated with all
> the possible formats?

Not really - you will need to query what software pixfmts are actually supported by the hardware to make the list, so it's completely dependent on V4L2.

>> (If ff_get_format() returns AV_PIX_FMT_NONE it means the user has
>> declined to use any of the available pixfmts, and the decoder should
>> exit cleanly in that case.)
>>
> 
> makes sense.
> 
>>> +
>>>      ret = ff_v4l2_m2m_codec_init(avctx);
>>>      if (ret) {
>>>          av_log(avctx, AV_LOG_ERROR, "can't configure decoder\n");
>>> @@ -205,6 +220,11 @@ static const AVOption options[] = {
>>>      { NULL},
>>>  };
>>>  
>>> +static const AVCodecHWConfigInternal *v4l2_m2m_hw_configs[] = {
>>> +    HW_CONFIG_INTERNAL(DRM_PRIME),
>>> +    NULL
>>> +};
>>> +
>>>  #define M2MDEC_CLASS(NAME) \
>>>      static const AVClass v4l2_m2m_ ## NAME ## _dec_class = { \
>>>          .class_name = #NAME "_v4l2_m2m_decoder", \
>>> @@ -225,7 +245,10 @@ static const AVOption options[] = {
>>>          .init           = v4l2_decode_init, \
>>>          .receive_frame  = v4l2_receive_frame, \
>>>          .close          = ff_v4l2_m2m_codec_end, \
>>> +        .pix_fmts       = (const enum AVPixelFormat[]) {
>>> AV_PIX_FMT_DRM_PRIME, \
>>> +                                                         AV_PIX_FM
>>> T_NONE}, \
>>
>> I'm not entirely sure, but I think this list is meant to be
>> exhaustive if provided?
>>
>>>          .bsfs           = bsf_name, \
>>> +        .hw_configs     = v4l2_m2m_hw_configs, \
>>>          .capabilities   = AV_CODEC_CAP_HARDWARE |
>>> AV_CODEC_CAP_DELAY, \
>>>  	                      AV_CODEC_CAP_AVOID_PROBING, \
>>>          .wrapper_name   = "v4l2m2m", \
>>>
>>
>> I had a go at using this on an Exynos device (Odroid XU4) with OpenCL
>> in the ffmpeg utility (using cl_arm_import_memory, which works with
>> kmsgrab on this device and can be used with the decoder on Rockchip).
>>
> 
> unfortunately the exynos devices aren't the best to test on, the v4l2
> device can decode to the correct formats but the drm plane cannot
> accept any of the formats the decoder outputs. So it can be used for
> pure conversion testing if you want. I recommend using an RPi with Dave
> Stevensons v4l2 patches

I'm not sure what you mean by "the drm plane" here?  The DRM object export seems fine and it imports to Mali OpenCL - getting tripped up for actually doing anything with it by lack of support in the OpenCL driver is mostly orthogonal.

Thanks,

- Mark

diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
index 897c3c4..45f7bcf 100644
--- a/libavcodec/v4l2_buffers.c
+++ b/libavcodec/v4l2_buffers.c
@@ -30,6 +30,7 @@
 #include <poll.h>
 #include "libavcodec/avcodec.h"
 #include "libavcodec/internal.h"
+#include "libavutil/avassert.h"
 #include "libavutil/hwcontext.h"
 #include "v4l2_context.h"
 #include "v4l2_buffers.h"
@@ -446,6 +447,31 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
 
     av_frame_unref(frame);
 
+    if (!s->device_ref) {
+        AVHWDeviceContext *dev;
+        AVDRMDeviceContext *drm_dev;
+        s->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM);
+        av_assert0(s->device_ref);
+        dev = (AVHWDeviceContext*)s->device_ref->data;
+        drm_dev = dev->hwctx;
+        // V4L2 is not associated with any DRM device.
+        drm_dev->fd = -1;
+        ret = av_hwdevice_ctx_init(s->device_ref);
+        av_assert0(ret == 0);
+    }
+    if (!s->frames_ref) {
+        AVHWFramesContext *frames;
+        s->frames_ref = av_hwframe_ctx_alloc(s->device_ref);
+        av_assert0(s->frames_ref);
+        frames = (AVHWFramesContext*)s->frames_ref->data;
+        frames->width     = s->output.width;
+        frames->height    = s->output.height;
+        frames->format    = AV_PIX_FMT_DRM_PRIME;
+        frames->sw_format = avbuf->context->av_pix_fmt;
+        ret = av_hwframe_ctx_init(s->frames_ref);
+        av_assert0(ret == 0);
+    }
+
     if (buf_to_m2mctx(avbuf)->output_drm) {
         /* 1. get references to the actual data */
         ret = v4l2_buf_to_bufref_drm(avbuf, &frame->buf[0]);
@@ -454,6 +480,9 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
 
         frame->data[0] = (uint8_t *) v4l2_get_drm_frame(avbuf);
         frame->format = AV_PIX_FMT_DRM_PRIME;
+
+        frame->hw_frames_ctx = av_buffer_ref(s->frames_ref);
+        av_assert0(frame->hw_frames_ctx);
     } else {
         /* 1. get references to the actual data */
         for (i = 0; i < avbuf->num_planes; i++) {
diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
index 9ac5a24..7b87db6 100644
--- a/libavcodec/v4l2_m2m.h
+++ b/libavcodec/v4l2_m2m.h
@@ -62,6 +62,9 @@ typedef struct V4L2m2mContext {
 
     /* generate DRM frames */
     int output_drm;
+
+    AVBufferRef *device_ref;
+    AVBufferRef *frames_ref;
 } V4L2m2mContext;
 
 typedef struct V4L2m2mPriv