[FFmpeg-devel] [PATCH] videotoolbox: allow to enable the async decoding.

Sun Aug 9 13:30:09 CEST 2015

On Sun,  9 Aug 2015 13:11:44 +0200
Sebastien Zwickert <dilaroga at gmail.com> wrote:

> This patch allows to use the Videotoolbox API in asynchonous mode.

> Note that when using async decoding the user is responsible for
> releasing the async frame.

What does this mean?

> Moreover, an option called videotoolbox_async was added to enable
> async decoding with ffmpeg CLI.
> 
> ---
>  ffmpeg.h                  |   1 +
>  ffmpeg_opt.c              |   1 +
>  ffmpeg_videotoolbox.c     |  69 +++++++++++++----
>  libavcodec/videotoolbox.c | 186 ++++++++++++++++++++++++++++++++++++++++------
>  libavcodec/videotoolbox.h |  73 ++++++++++++++++++
>  5 files changed, 294 insertions(+), 36 deletions(-)
> 
> diff --git a/ffmpeg.h b/ffmpeg.h
> index 6544e6f..73a1031 100644
> --- a/ffmpeg.h
> +++ b/ffmpeg.h
> @@ -522,6 +522,7 @@ extern AVIOContext *progress_avio;
>  extern float max_error_rate;
>  extern int vdpau_api_ver;
>  extern char *videotoolbox_pixfmt;
> +extern int videotoolbox_async;
>  
>  extern const AVIOInterruptCB int_cb;
>  
> diff --git a/ffmpeg_opt.c b/ffmpeg_opt.c
> index 28d3051..91be9b9 100644
> --- a/ffmpeg_opt.c
> +++ b/ffmpeg_opt.c
> @@ -3238,6 +3238,7 @@ const OptionDef options[] = {
>  #endif
>  #if CONFIG_VDA || CONFIG_VIDEOTOOLBOX
>      { "videotoolbox_pixfmt", HAS_ARG | OPT_STRING | OPT_EXPERT, { &videotoolbox_pixfmt}, "" },
> +    { "videotoolbox_async", HAS_ARG | OPT_INT | OPT_EXPERT, { &videotoolbox_async}, "" },
>  #endif
>      { "autorotate",       HAS_ARG | OPT_BOOL | OPT_SPEC |
>                            OPT_EXPERT | OPT_INPUT,                                { .off = OFFSET(autorotate) },
> diff --git a/ffmpeg_videotoolbox.c b/ffmpeg_videotoolbox.c
> index 6688452..0bb0600 100644
> --- a/ffmpeg_videotoolbox.c
> +++ b/ffmpeg_videotoolbox.c
> @@ -34,21 +34,42 @@ typedef struct VTContext {
>  } VTContext;
>  
>  char *videotoolbox_pixfmt;
> +int videotoolbox_async;
>  
>  static int videotoolbox_retrieve_data(AVCodecContext *s, AVFrame *frame)
>  {
>      InputStream *ist = s->opaque;
>      VTContext  *vt = ist->hwaccel_ctx;
> -    CVPixelBufferRef pixbuf = (CVPixelBufferRef)frame->data[3];
> -    OSType pixel_format = CVPixelBufferGetPixelFormatType(pixbuf);
> +    AVVideotoolboxContext *videotoolbox = s->hwaccel_context;
> +    AVVideotoolboxAsyncFrame *async_frame = NULL;
> +    CVPixelBufferRef pixbuf;
> +    OSType pixel_format;
>      CVReturn err;
>      uint8_t *data[4] = { 0 };
>      int linesize[4] = { 0 };
>      int planes, ret, i;
>      char codec_str[32];
> +    int width, height;
>  
>      av_frame_unref(vt->tmp_frame);
>  
> +    if (videotoolbox->useAsyncDecoding) {
> +        async_frame = av_videotoolbox_pop_async_frame(videotoolbox);
> +
> +        if (!async_frame)
> +            return -1;
> +
> +        pixbuf = async_frame->cv_buffer;
> +        width  = CVPixelBufferGetWidth(pixbuf);
> +        height = CVPixelBufferGetHeight(pixbuf);
> +    } else {
> +        pixbuf = (CVPixelBufferRef)frame->data[3];
> +        width  = frame->width;
> +        height = frame->height;
> +    }
> +
> +    pixel_format = CVPixelBufferGetPixelFormatType(pixbuf);
> +
>      switch (pixel_format) {
>      case kCVPixelFormatType_420YpCbCr8Planar: vt->tmp_frame->format = AV_PIX_FMT_YUV420P; break;
>      case kCVPixelFormatType_422YpCbCr8:       vt->tmp_frame->format = AV_PIX_FMT_UYVY422; break;
> @@ -60,19 +81,21 @@ static int videotoolbox_retrieve_data(AVCodecContext *s, AVFrame *frame)
>          av_get_codec_tag_string(codec_str, sizeof(codec_str), s->codec_tag);
>          av_log(NULL, AV_LOG_ERROR,
>                 "%s: Unsupported pixel format: %s\n", codec_str, videotoolbox_pixfmt);
> -        return AVERROR(ENOSYS);
> +        ret = AVERROR(ENOSYS);
> +        goto fail;
>      }
>  
> -    vt->tmp_frame->width  = frame->width;
> -    vt->tmp_frame->height = frame->height;
> +    vt->tmp_frame->width  = width;
> +    vt->tmp_frame->height = height;
>      ret = av_frame_get_buffer(vt->tmp_frame, 32);
> -    if (ret < 0)
> -        return ret;
> -
> +    if (ret < 0) {
> +        goto fail;
> +    }
>      err = CVPixelBufferLockBaseAddress(pixbuf, kCVPixelBufferLock_ReadOnly);
>      if (err != kCVReturnSuccess) {
>          av_log(NULL, AV_LOG_ERROR, "Error locking the pixel buffer.\n");
> -        return AVERROR_UNKNOWN;
> +        ret = AVERROR_UNKNOWN;
> +        goto fail;
>      }
>  
>      if (CVPixelBufferIsPlanar(pixbuf)) {
> @@ -89,17 +112,27 @@ static int videotoolbox_retrieve_data(AVCodecContext *s, AVFrame *frame)
>  
>      av_image_copy(vt->tmp_frame->data, vt->tmp_frame->linesize,
>                    (const uint8_t **)data, linesize, vt->tmp_frame->format,
> -                  frame->width, frame->height);
> +                  width, height);
>  
>      ret = av_frame_copy_props(vt->tmp_frame, frame);
>      CVPixelBufferUnlockBaseAddress(pixbuf, kCVPixelBufferLock_ReadOnly);
> -    if (ret < 0)
> -        return ret;
> +    if (ret < 0) {
> +        goto fail;
> +    }
>  
>      av_frame_unref(frame);
>      av_frame_move_ref(frame, vt->tmp_frame);
>  
> +    if (videotoolbox->useAsyncDecoding) {
> +        av_videotoolbox_release_async_frame(async_frame);
> +    }
> +
>      return 0;
> +fail:
> +    if (videotoolbox->useAsyncDecoding) {
> +        av_videotoolbox_release_async_frame(async_frame);
> +    }
> +    return ret;
>  }
>  
>  static void videotoolbox_uninit(AVCodecContext *s)
> @@ -147,10 +180,18 @@ int videotoolbox_init(AVCodecContext *s)
>  
>      if (ist->hwaccel_id == HWACCEL_VIDEOTOOLBOX) {
>  #if CONFIG_VIDEOTOOLBOX
> +        AVVideotoolboxContext *vtctx = NULL;
>          if (!videotoolbox_pixfmt) {
> -            ret = av_videotoolbox_default_init(s);
> +            if (videotoolbox_async) {
> +                vtctx = av_videotoolbox_alloc_async_context();
> +            }
> +            ret = av_videotoolbox_default_init2(s, vtctx);
>          } else {
> -            AVVideotoolboxContext *vtctx = av_videotoolbox_alloc_context();
> +            if (videotoolbox_async) {
> +                vtctx = av_videotoolbox_alloc_async_context();
> +            } else {
> +                vtctx = av_videotoolbox_alloc_context();
> +            }
>              CFStringRef pixfmt_str = CFStringCreateWithCString(kCFAllocatorDefault,
>                                                                 videotoolbox_pixfmt,
>                                                                 kCFStringEncodingUTF8);
> diff --git a/libavcodec/videotoolbox.c b/libavcodec/videotoolbox.c
> index b78238a..7047257 100644
> --- a/libavcodec/videotoolbox.c
> +++ b/libavcodec/videotoolbox.c
> @@ -22,6 +22,7 @@
>  
>  #include "config.h"
>  #if CONFIG_VIDEOTOOLBOX
> +#  include <pthread.h>
>  #  include "videotoolbox.h"
>  #else
>  #  include "vda.h"
> @@ -177,6 +178,41 @@ int ff_videotoolbox_uninit(AVCodecContext *avctx)
>  }
>  
>  #if CONFIG_VIDEOTOOLBOX
> +static int videotoolbox_lock_operation(void **mtx, enum AVLockOp op)
> +{
> +    switch(op) {
> +    case AV_LOCK_CREATE:
> +        *mtx = av_malloc(sizeof(pthread_mutex_t));
> +        if(!*mtx)
> +            return 1;
> +        return !!pthread_mutex_init(*mtx, NULL);
> +    case AV_LOCK_OBTAIN:
> +        return !!pthread_mutex_lock(*mtx);
> +    case AV_LOCK_RELEASE:
> +        return !!pthread_mutex_unlock(*mtx);
> +    case AV_LOCK_DESTROY:
> +        pthread_mutex_destroy(*mtx);
> +        av_freep(mtx);
> +        return 0;
> +    }
> +    return 1;
> +}

This is ugly and seems to serve no purpose as far as I can see. Use
pthread directly.

> +
> +static void videotoolbox_clear_queue(struct AVVideotoolboxContext *videotoolbox)
> +{
> +    AVVideotoolboxAsyncFrame *top_frame;
> +
> +    videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_OBTAIN);
> +
> +    while (videotoolbox->queue != NULL) {
> +        top_frame = videotoolbox->queue;
> +        videotoolbox->queue = top_frame->next_frame;
> +        av_videotoolbox_release_async_frame(top_frame);
> +    }
> +
> +    videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_RELEASE);
> +}
> +
>  static void videotoolbox_write_mp4_descr_length(PutByteContext *pb, int length)
>  {
>      int i;
> @@ -244,11 +280,17 @@ static CFDataRef videotoolbox_esds_extradata_create(AVCodecContext *avctx)
>  
>  static CMSampleBufferRef videotoolbox_sample_buffer_create(CMFormatDescriptionRef fmt_desc,
>                                                             void *buffer,
> -                                                           int size)
> +                                                           int size,
> +                                                           int64_t frame_pts)
>  {
>      OSStatus status;
>      CMBlockBufferRef  block_buf;
>      CMSampleBufferRef sample_buf;
> +    CMSampleTimingInfo timeInfo;
> +    CMSampleTimingInfo timeInfoArray[1];
> +
> +    timeInfo.presentationTimeStamp = CMTimeMake(frame_pts, 1);
> +    timeInfoArray[0] = timeInfo;
>  
>      block_buf  = NULL;
>      sample_buf = NULL;
> @@ -271,8 +313,8 @@ static CMSampleBufferRef videotoolbox_sample_buffer_create(CMFormatDescriptionRe
>                                        0,                    // makeDataReadyRefcon
>                                        fmt_desc,             // formatDescription
>                                        1,                    // numSamples
> -                                      0,                    // numSampleTimingEntries
> -                                      NULL,                 // sampleTimingArray
> +                                      1,                    // numSampleTimingEntries
> +                                      timeInfoArray,        // sampleTimingArray
>                                        0,                    // numSampleSizeEntries
>                                        NULL,                 // sampleSizeArray
>                                        &sample_buf);
> @@ -293,41 +335,88 @@ static void videotoolbox_decoder_callback(void *opaque,
>                                            CMTime duration)
>  {
>      AVCodecContext *avctx = opaque;
> -    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
> +    AVVideotoolboxContext *videotoolbox = avctx->hwaccel_context;
>  
> -    if (vtctx->frame) {
> -        CVPixelBufferRelease(vtctx->frame);
> -        vtctx->frame = NULL;
> -    }
> +    if (!videotoolbox->useAsyncDecoding) {
> +        VTContext *vtctx = avctx->internal->hwaccel_priv_data;
>  
> -    if (!image_buffer) {
> -        av_log(NULL, AV_LOG_DEBUG, "vt decoder cb: output image buffer is null\n");
> -        return;
> -    }
> +        if (vtctx->frame) {
> +            CVPixelBufferRelease(vtctx->frame);
> +            vtctx->frame = NULL;
> +        }
>  
> -    vtctx->frame = CVPixelBufferRetain(image_buffer);
> +        if (!image_buffer) {
> +            av_log(NULL, AV_LOG_DEBUG, "vt decoder cb: output image buffer is null\n");
> +            return;
> +        }
> +
> +        vtctx->frame = CVPixelBufferRetain(image_buffer);
> +    } else { // async decoding
> +        AVVideotoolboxAsyncFrame *new_frame;
> +        AVVideotoolboxAsyncFrame *queue_walker;
> +
> +        if (!image_buffer) {
> +            av_log(NULL, AV_LOG_DEBUG, "vt decoder cb: output image buffer is null\n");
> +            return;
> +        }
> +
> +        new_frame = (AVVideotoolboxAsyncFrame *)av_mallocz(sizeof(AVVideotoolboxAsyncFrame));
> +        new_frame->next_frame = NULL;

Unchecked malloc.

> +        new_frame->cv_buffer = CVPixelBufferRetain(image_buffer);
> +        new_frame->pts = pts.value;
> +
> +        videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_OBTAIN);
> +
> +        queue_walker = videotoolbox->queue;
> +
> +        if (!queue_walker || (new_frame->pts < queue_walker->pts)) {
> +            /* we have an empty queue, or this frame earlier than the current queue head */
> +            new_frame->next_frame = queue_walker;
> +            videotoolbox->queue = new_frame;
> +        } else {
> +            /* walk the queue and insert this frame where it belongs in display order */
> +            AVVideotoolboxAsyncFrame *next_frame;
> +
> +            while (1) {
> +                next_frame = queue_walker->next_frame;
> +
> +                if (!next_frame || (new_frame->pts < next_frame->pts)) {
> +                    new_frame->next_frame = next_frame;
> +                    queue_walker->next_frame = new_frame;
> +                    break;
> +                }

As Hendrik Leppkes said, this is fragile.

> +                queue_walker = next_frame;
> +            }
> +        }
> +
> +        videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_RELEASE);
> +    }
>  }
>  
> -static OSStatus videotoolbox_session_decode_frame(AVCodecContext *avctx)
> +static OSStatus videotoolbox_session_decode_frame(AVCodecContext *avctx, AVFrame *frame)
>  {
>      OSStatus status;
>      CMSampleBufferRef sample_buf;
>      AVVideotoolboxContext *videotoolbox = avctx->hwaccel_context;
>      VTContext *vtctx = avctx->internal->hwaccel_priv_data;
> +    VTDecodeFrameFlags decodeFlags = videotoolbox->useAsyncDecoding ?
> +                                         kVTDecodeFrame_EnableAsynchronousDecompression : 0;
>  
>      sample_buf = videotoolbox_sample_buffer_create(videotoolbox->cm_fmt_desc,
>                                                     vtctx->bitstream,
> -                                                   vtctx->bitstream_size);
> +                                                   vtctx->bitstream_size,
> +                                                   frame->pkt_pts);
>  
>      if (!sample_buf)
>          return -1;
>  
>      status = VTDecompressionSessionDecodeFrame(videotoolbox->session,
>                                                 sample_buf,
> -                                               0,       // decodeFlags
> +                                               decodeFlags,
>                                                 NULL,    // sourceFrameRefCon
>                                                 0);      // infoFlagsOut
> -    if (status == noErr)
> +
> +    if (status == noErr && !videotoolbox->useAsyncDecoding)
>          status = VTDecompressionSessionWaitForAsynchronousFrames(videotoolbox->session);
>  
>      CFRelease(sample_buf);
> @@ -344,17 +433,21 @@ static int videotoolbox_common_end_frame(AVCodecContext *avctx, AVFrame *frame)
>      if (!videotoolbox->session || !vtctx->bitstream)
>          return AVERROR_INVALIDDATA;
>  
> -    status = videotoolbox_session_decode_frame(avctx);
> +    status = videotoolbox_session_decode_frame(avctx, frame);
>  
>      if (status) {
>          av_log(avctx, AV_LOG_ERROR, "Failed to decode frame (%d)\n", status);
>          return AVERROR_UNKNOWN;
>      }
>  
> -    if (!vtctx->frame)
> -        return AVERROR_UNKNOWN;
> +    if (!videotoolbox->useAsyncDecoding) {
> +        if (!vtctx->frame)
> +            return AVERROR_UNKNOWN;
>  
> -    return ff_videotoolbox_buffer_create(vtctx, frame);
> +        status = ff_videotoolbox_buffer_create(vtctx, frame);
> +    }
> +
> +    return status;
>  }
>  
>  static int videotoolbox_h264_end_frame(AVCodecContext *avctx)
> @@ -508,6 +601,13 @@ static int videotoolbox_default_init(AVCodecContext *avctx)
>          return -1;
>      }
>  
> +    if (videotoolbox->useAsyncDecoding) {
> +        if (av_lockmgr_register(videotoolbox_lock_operation))
> +            return -1;
> +
> +        videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_CREATE);
> +    }
> +
>      switch( avctx->codec_id ) {
>      case AV_CODEC_ID_H263 :
>          videotoolbox->cm_codec_type = kCMVideoCodecType_H263;
> @@ -586,6 +686,15 @@ static void videotoolbox_default_free(AVCodecContext *avctx)
>          if (videotoolbox->cm_fmt_desc)
>              CFRelease(videotoolbox->cm_fmt_desc);
>  
> +        if (videotoolbox->useAsyncDecoding) {
> +            VTDecompressionSessionWaitForAsynchronousFrames(videotoolbox->session);
> +
> +            videotoolbox_clear_queue(videotoolbox);
> +
> +            if (videotoolbox->queue_mutex != NULL)
> +                videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_DESTROY);
> +        }
> +
>          if (videotoolbox->session)
>              VTDecompressionSessionInvalidate(videotoolbox->session);
>      }
> @@ -668,6 +777,17 @@ AVVideotoolboxContext *av_videotoolbox_alloc_context(void)
>      return ret;
>  }
>  
> +AVVideotoolboxContext *av_videotoolbox_alloc_async_context(void)
> +{
> +    AVVideotoolboxContext *ret = av_videotoolbox_alloc_context();
> +
> +    if (ret) {
> +        ret->useAsyncDecoding = 1;
> +    }
> +
> +    return ret;
> +}
> +
>  int av_videotoolbox_default_init(AVCodecContext *avctx)
>  {
>      return av_videotoolbox_default_init2(avctx, NULL);
> @@ -683,8 +803,30 @@ int av_videotoolbox_default_init2(AVCodecContext *avctx, AVVideotoolboxContext *
>  
>  void av_videotoolbox_default_free(AVCodecContext *avctx)
>  {
> -
>      videotoolbox_default_free(avctx);
>      av_freep(&avctx->hwaccel_context);
>  }
> +
> +AVVideotoolboxAsyncFrame *av_videotoolbox_pop_async_frame(AVVideotoolboxContext *videotoolbox)
> +{
> +    AVVideotoolboxAsyncFrame *top_frame;
> +
> +    if (!videotoolbox->queue)
> +        return NULL;
> +
> +    videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_OBTAIN);
> +    top_frame = videotoolbox->queue;
> +    videotoolbox->queue = top_frame->next_frame;
> +    videotoolbox_lock_operation(&videotoolbox->queue_mutex, AV_LOCK_RELEASE);
> +
> +    return top_frame;
> +}
> +
> +void av_videotoolbox_release_async_frame(AVVideotoolboxAsyncFrame *frame)
> +{
> +    if (frame != NULL) {
> +        CVPixelBufferRelease(frame->cv_buffer);
> +        av_freep(&frame);
> +    }
> +}
>  #endif /* CONFIG_VIDEOTOOLBOX */
> diff --git a/libavcodec/videotoolbox.h b/libavcodec/videotoolbox.h
> index a48638e..b5bf030 100644
> --- a/libavcodec/videotoolbox.h
> +++ b/libavcodec/videotoolbox.h
> @@ -38,6 +38,29 @@
>  #include "libavcodec/avcodec.h"
>  
>  /**
> + *  This structure is used to store a decoded frame information and data
> + *  when using the Videotoolbox Async API.
> + */
> +typedef struct AVVideotoolboxAsyncFrame
> +{
> +    /**
> +     * The PTS of the frame.
> +     */
> +    int64_t             pts;
> +
> +    /**
> +     * The CoreVideo buffer that contains the decoded data.
> +     */
> +    CVPixelBufferRef    cv_buffer;
> +
> +    /**
> +     * A pointer to the next frame.
> +     */
> +    struct AVVideotoolboxAsyncFrame *next_frame;
> +
> +} AVVideotoolboxAsyncFrame;
> +
> +/**
>   * This struct holds all the information that needs to be passed
>   * between the caller and libavcodec for initializing Videotoolbox decoding.
>   * Its size is not a part of the public ABI, it must be allocated with
> @@ -73,6 +96,23 @@ typedef struct AVVideotoolboxContext {
>       * Set by the caller.
>       */
>      int cm_codec_type;
> +
> +    /**
> +     * Enable the async decoding mode.
> +     * Set by av_videotoolbox_alloc_async_context()
> +     */
> +    int useAsyncDecoding;
> +
> +    /**
> +     * Videotoolbox async frames queue ordered by presentation timestamp.
> +     */
> +    AVVideotoolboxAsyncFrame *queue;
> +
> +    /**
> +     * Mutex for locking queue operations when async decoding is enabled.
> +     */
> +    void *queue_mutex;
> +
>  } AVVideotoolboxContext;
>  
>  /**
> @@ -91,6 +131,21 @@ typedef struct AVVideotoolboxContext {
>  AVVideotoolboxContext *av_videotoolbox_alloc_context(void);
>  
>  /**
> + * Allocate and initialize an async Videotoolbox context.
> + *
> + * This function should be called from the get_format() callback when the caller
> + * selects the AV_PIX_FMT_VIDETOOLBOX format. The caller must then create
> + * the decoder object (using the output callback provided by libavcodec) that
> + * will be used for Videotoolbox-accelerated decoding.
> + *
> + * When decoding with Videotoolbox is finished, the caller must destroy the decoder
> + * object and free the Videotoolbox context using av_free().
> + *
> + * @return the newly allocated context or NULL on failure
> + */
> +AVVideotoolboxContext *av_videotoolbox_alloc_async_context(void);
> +
> +/**
>   * This is a convenience function that creates and sets up the Videotoolbox context using
>   * an internal implementation.
>   *
> @@ -120,6 +175,24 @@ int av_videotoolbox_default_init2(AVCodecContext *avctx, AVVideotoolboxContext *
>  void av_videotoolbox_default_free(AVCodecContext *avctx);
>  
>  /**
> + * This function must be called to retrieve the top frame of the queue when async decoding
> + * is enabled.
> + *
> + * @param vtctx the corresponding videotoolbox context
> + *
> + * @return the top async frame from the queue.
> + */
> +AVVideotoolboxAsyncFrame *av_videotoolbox_pop_async_frame(AVVideotoolboxContext *vtctx);
> +
> +/**
> + * This function must be called to release the top frame returned by
> + * av_videotoolbox_pop_async_frame().
> + *
> + * @param frame the frame to release
> + */
> +void av_videotoolbox_release_async_frame(AVVideotoolboxAsyncFrame *frame);
> +
> +/**
>   * @}
>   */
>  

So I'm not sure if I understand this. Is the API user supposed to use
these functions to get decoded frames, instead of using the AVFrame?