[FFmpeg-devel] [PATCH]: Video Decoder Acceleration (VDA) HWAccel module for Mac OS X

Wed Nov 2 01:50:17 CET 2011

On Nov 1, 2011, at 8:15 PM, Michael Niedermayer wrote:

> On Tue, Nov 01, 2011 at 05:39:30PM +0100, Sebastien Zwickert wrote:
>> 
>> On Nov 1, 2011, at 4:43 PM, Carl Eugen Hoyos wrote:
>> 
>>> Sebastien Zwickert <dilaroga <at> gmail.com> writes:
>>> 
>>>> Change done in the upated patch in attachment.
>>> 
>>> Please bump libavutil and libavcodec version number and please add a line to the
>>> Changelog.
>> 
>> Done. patch updated.
>> 
>> Best regards,
>> 
>> Sebastien.
>> 
> 
> [...]
> 
>> +/* Mutex manager callback. */
>> +static int ff_vda_lock_operation(void **mtx, enum AVLockOp op)
> 
> the ff prefix is normally used for non static private functions
> within one library in ffmpeg.
> 

I removed the ff prefix for static functions.

> 
>> +{
>> +    switch(op) {
>> +        case AV_LOCK_CREATE:
>> +            *mtx = av_malloc(sizeof(pthread_mutex_t));
>> +            if(!*mtx)
>> +                return 1;
>> +            return !!pthread_mutex_init(*mtx, NULL);
>> +        case AV_LOCK_OBTAIN:
>> +            return !!pthread_mutex_lock(*mtx);
>> +        case AV_LOCK_RELEASE:
>> +            return !!pthread_mutex_unlock(*mtx);
>> +        case AV_LOCK_DESTROY:
>> +            pthread_mutex_destroy(*mtx);
> 
>> +            av_free(*mtx);
> 
> av_freep() is a bit safer as it zeros mtx

Done.

> 
> 
> [...]
>> +/* Removes and releases all frames from the queue. */
>> +static void ff_vda_clear_queue(struct vda_context *vda_ctx)
>> +{
>> +    vda_frame *top_frame;
>> +    top_frame = NULL;
>> +
>> +    ff_vda_lock_operation(&vda_ctx->queue_mutex, AV_LOCK_OBTAIN);
>> +
>> +    while (vda_ctx->queue != NULL)
>> +    {
>> +        top_frame = vda_ctx->queue;
> 
>> +        vda_ctx->queue = (vda_frame *)top_frame->next_frame;
> 
> the cast should be unneeded, the type seems to be matching

I removed the unneeded cast and another one in ff_vda_pop_queue.

> 
> 
>> +        vda_ctx->queue_depth--;
>> +        ff_vda_release_vda_frame(top_frame);
>> +    }
>> +
>> +    ff_vda_lock_operation(&vda_ctx->queue_mutex, AV_LOCK_RELEASE);
>> +}
>> +
>> +
>> +/* Decoder callback that adds the vda frame to the queue in display order. */
>> +static void ff_vda_decoder_callback (void *vda_hw_ctx,
>> +                                     CFDictionaryRef user_info,
>> +                                     OSStatus status,
>> +                                     uint32_t infoFlags,
>> +                                     CVImageBufferRef image_buffer)
>> +{
>> +    struct vda_context *vda_ctx = (struct vda_context*)vda_hw_ctx;
>> +    vda_frame *new_frame = NULL;
>> +    vda_frame *queue_walker = NULL;
>> +
>> +    if (NULL == image_buffer)
>> +        return;
>> +
>> +    if (kCVPixelFormatType_422YpCbCr8 != CVPixelBufferGetPixelFormatType(image_buffer))
>> +        return;
>> +
> 
>> +    new_frame = (vda_frame *)calloc(sizeof(vda_frame), 1);
> 
> this probably should be av_mallocz()
> av_free() cannot be used on calloced() data
> 

Done. I now use av_freep() to release this data.

> 
> 
>> +    new_frame->next_frame = NULL;
>> +    new_frame->cv_buffer = CVPixelBufferRetain(image_buffer);
>> +    new_frame->pts = ff_vda_pts_from_dictionary(user_info);
>> +
>> +    ff_vda_lock_operation(&vda_ctx->queue_mutex, AV_LOCK_OBTAIN);
>> +
>> +    queue_walker = vda_ctx->queue;
>> +
>> +    if (!queue_walker || (new_frame->pts < queue_walker->pts))
>> +    {
>> +        /* we have an empty queue, or this frame earlier than the current queue head */
>> +        new_frame->next_frame = queue_walker;
>> +        vda_ctx->queue = new_frame;
>> +    }
>> +    else
>> +    {
>> +        /* walk the queue and insert this frame where it belongs in display order */
>> +        bool inserted = false;
> 
>> +        vda_frame *next_frame = NULL;
> 
> the variable is always written before read so the init is unneeded
> 

I removed all this kind of useless init of the code.

> 
>> +
>> +        while (!inserted)
>> +        {
>> +            next_frame = queue_walker->next_frame;
>> +
>> +            if (!next_frame || (new_frame->pts < next_frame->pts))
>> +            {
>> +                new_frame->next_frame = next_frame;
>> +                queue_walker->next_frame = new_frame;
> 
>> +                inserted = true;
> 
> using break; here would avoid the need for the "inserted" variable

Done.

> 
> 
> [...]
>> +vda_frame *ff_vda_queue_pop(struct vda_context *vda_ctx)
>> +{
>> +    if (!vda_ctx->queue_depth)
>> +        return NULL;
> 
> this is the only case where queue_depth is read, can it be replaced
> by a   vda_ctx->queue == NULL check ?
> if so you could remove the queue_depth variable and all code
> maintaining its value
> 

You're right. Done.

> 
> [...]
>> +static int decode_slice(AVCodecContext *avctx,
>> +                        const uint8_t *buffer,
>> +                        uint32_t size)
>> +{
>> +    H264Context *h = avctx->priv_data;
>> +    struct vda_context *vda_ctx = avctx->hwaccel_context;
>> +    struct vda_picture_context *pic_ctx = h->s.current_picture_ptr->f.hwaccel_picture_private;
>> +    void *tmp;
>> +
>> +    if (!vda_ctx->decoder)
>> +        return -1;
>> +
> 
>> +    tmp = NULL;
>> +    tmp = av_realloc(pic_ctx->bitstream, pic_ctx->bitstream_size+size+4);
> 
> the tmp = NULL is unneeded
> also please consider using *av_fast_realloc() if its possible in this
> case here as its faster (*alloc is actually slow, so never downsizing
> and instead reusing buffers is faster)

It would be possible if i can get the bitstream total size.
This couldn't be done in start_frame ? buffer and size parameters sounds good candidates to get this info.

> 
> 
>> +    if (!tmp)
>> +        return AVERROR(ENOMEM);
>> +
>> +    pic_ctx->bitstream = tmp;
>> +
>> +    AV_WB32(pic_ctx->bitstream+pic_ctx->bitstream_size, size);
>> +    memcpy(pic_ctx->bitstream+pic_ctx->bitstream_size+4, buffer, size);
>> +
>> +    pic_ctx->bitstream_size += size + 4;
>> +
>> +    return 0;
>> +}
>> +
>> +static int end_frame(AVCodecContext *avctx)
>> +{
>> +    H264Context *h = avctx->priv_data;
>> +    struct vda_context *vda_ctx = avctx->hwaccel_context;
>> +    struct vda_picture_context *pic_ctx = h->s.current_picture_ptr->f.hwaccel_picture_private;
>> +    AVFrame *frame = (AVFrame*)h->s.current_picture_ptr;
>> +    int status;
>> +
>> +    if (!vda_ctx->decoder || !pic_ctx->bitstream)
>> +        return -1;
>> +
>> +    status = ff_vda_decoder_decode(vda_ctx, pic_ctx->bitstream,
>> +                                   pic_ctx->bitstream_size,
>> +                                   frame->reordered_opaque);
>> +
>> +    if (status)
>> +        av_log(avctx, AV_LOG_ERROR, "Failed to decode frame (%d)\n", status);
>> +
>> +    av_free(pic_ctx->bitstream);
> 
> av_freep() is safer (against double free bugs for example)

Done.

> 
> 
> [...]
> 
> also, if you want to maintain this code in ffmpeg,
> please add yourself to the MAINTAINERS file

Of course. Done.

Updated patch in attachment.

Thanks for reviewing.

Best regards,

Sebastien.

-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-HWAccel-adds-Video-Decoder-Acceleration-VDA-module-f.patch
Type: application/octet-stream
Size: 26769 bytes
Desc: not available
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20111102/7be842b6/attachment.obj>
-------------- next part --------------