[FFmpeg-devel] [PATCH 2/2] vda: implement h264_vda decoder

Tue Aug 21 03:48:47 CEST 2012

---
 configure                 |   1 +
 libavcodec/Makefile       |   1 +
 libavcodec/allcodecs.c    |   1 +
 libavcodec/vda_h264_dec.c | 450 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 453 insertions(+)
 create mode 100644 libavcodec/vda_h264_dec.c

diff --git a/configure b/configure
index 679fae0..ca2feab 100755
--- a/configure
+++ b/configure
@@ -1559,6 +1559,7 @@ h264_vaapi_hwaccel_select="vaapi h264_decoder"
 h264_vda_hwaccel_deps="VideoDecodeAcceleration_VDADecoder_h pthreads"
 h264_vda_hwaccel_select="vda h264_decoder"
 h264_vdpau_decoder_select="vdpau h264_decoder"
+h264_vda_decoder_select="vda h264_parser"
 iac_decoder_select="fft mdct sinewin"
 imc_decoder_select="fft mdct sinewin"
 jpegls_decoder_select="golomb"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 576ec5f..4188fb3 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -215,6 +215,7 @@ OBJS-$(CONFIG_H264_DECODER)            += h264.o                               \
 OBJS-$(CONFIG_H264_DXVA2_HWACCEL)      += dxva2_h264.o
 OBJS-$(CONFIG_H264_VAAPI_HWACCEL)      += vaapi_h264.o
 OBJS-$(CONFIG_H264_VDA_HWACCEL)        += vda_h264.o
+OBJS-$(CONFIG_H264_VDA_DECODER)        += vda_h264_dec.o
 OBJS-$(CONFIG_HUFFYUV_DECODER)         += huffyuv.o
 OBJS-$(CONFIG_HUFFYUV_ENCODER)         += huffyuv.o
 OBJS-$(CONFIG_IAC_DECODER)             += imc.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 4a247c4..905aa18 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -134,6 +134,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER (H264, h264);
     REGISTER_DECODER (H264_CRYSTALHD, h264_crystalhd);
     REGISTER_DECODER (H264_VDPAU, h264_vdpau);
+    REGISTER_DECODER (H264_VDA, h264_vda);
     REGISTER_ENCDEC  (HUFFYUV, huffyuv);
     REGISTER_DECODER (IDCIN, idcin);
     REGISTER_DECODER (IFF_BYTERUN1, iff_byterun1);
diff --git a/libavcodec/vda_h264_dec.c b/libavcodec/vda_h264_dec.c
new file mode 100644
index 0000000..1a097de
--- /dev/null
+++ b/libavcodec/vda_h264_dec.c
@@ -0,0 +1,450 @@
+/*
+ * Copyright (c) 2012, Xidorn Quan
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.264 decoder via VDA
+ * @author Xidorn Quan <quanxunzhen at gmail.com>
+ */
+
+#include <string.h>
+#include <libkern/OSAtomic.h>
+
+#include "vda.h"
+#include "h264.h"
+#include "golomb.h"
+#include "avcodec.h"
+
+typedef struct display_frame {
+    AVFrame f;
+    int mmco_reset, poc;
+    CVPixelBufferRef cv_buffer;
+    struct display_frame *next, *prev;
+} DisplayFrame;
+
+typedef struct {
+    struct vda_context vda_ctx;
+    AVCodecParserContext *parser;
+    H264Context *h;
+    DisplayFrame queue;
+    DisplayFrame *next_frame;
+    int queued_pics;
+    OSSpinLock queue_lock;
+} VDADecoderContext;
+
+static inline DisplayFrame *alloc_frame(CVPixelBufferRef buffer)
+{
+    DisplayFrame *new_frame = av_calloc(1, sizeof(DisplayFrame));
+    if (!new_frame)
+        return NULL;
+
+    new_frame->cv_buffer = buffer;
+    CVPixelBufferLockBaseAddress(buffer, 0);
+    new_frame->f.data[0] = CVPixelBufferGetBaseAddress(buffer);
+    new_frame->f.data[3] = (void *)buffer;
+    new_frame->f.linesize[0] = CVPixelBufferGetBytesPerRow(buffer);
+
+    return new_frame;
+}
+
+static inline void release_frame(DisplayFrame *df)
+{
+    if (!df)
+        return;
+    CVPixelBufferUnlockBaseAddress(df->cv_buffer, 0);
+    CVPixelBufferRelease(df->cv_buffer);
+    av_free(df);
+}
+
+static inline void push_queue(VDADecoderContext *ctx, DisplayFrame *df)
+{
+    DisplayFrame *prev;
+
+    OSSpinLockLock(&ctx->queue_lock);
+
+    prev = ctx->queue.prev;
+    if (!df->f.key_frame && !df->mmco_reset)
+        while (prev->poc > df->poc && !prev->f.key_frame && !prev->mmco_reset)
+            prev = prev->prev;
+    prev->next->prev = df;
+    df->next = prev->next;
+    df->prev = prev;
+    prev->next = df;
+    ctx->queued_pics++;
+
+    OSSpinLockUnlock(&ctx->queue_lock);
+}
+
+static inline DisplayFrame *pop_queue(VDADecoderContext *ctx)
+{
+    DisplayFrame *next;
+
+    OSSpinLockLock(&ctx->queue_lock);
+
+    next = ctx->queue.next;
+    if (next == &ctx->queue) {
+        next = NULL;
+        goto end;
+    }
+
+    ctx->queue.next = next->next;
+    next->next->prev = &ctx->queue;
+    ctx->queued_pics--;
+
+end:
+    OSSpinLockUnlock(&ctx->queue_lock);
+    return next;
+}
+
+static inline void flush_queue(VDADecoderContext *ctx)
+{
+    DisplayFrame *df;
+    while ((df = pop_queue(ctx)))
+        release_frame(df);
+}
+
+/**
+ * Fill poc of frame
+ * Copy & slightly modify from init_poc in h264.c
+ */
+static inline void init_poc(H264Context *h, DisplayFrame *df)
+{
+    MpegEncContext *const s = &h->s;
+    const int max_frame_num = 1 << h->sps.log2_max_frame_num;
+    int field_poc[2];
+    int top_foc = INT_MAX, bottom_foc = INT_MAX;
+
+    h->frame_num_offset = h->prev_frame_num_offset;
+    if (h->frame_num < h->prev_frame_num)
+        h->frame_num_offset += max_frame_num;
+
+    if (h->sps.poc_type == 0) {
+        const int max_poc_lsb = 1 << h->sps.log2_max_poc_lsb;
+
+        if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb / 2)
+            h->poc_msb = h->prev_poc_msb + max_poc_lsb;
+        else if (h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb / 2)
+            h->poc_msb = h->prev_poc_msb - max_poc_lsb;
+        else
+            h->poc_msb = h->prev_poc_msb;
+        field_poc[0] =
+        field_poc[1] = h->poc_msb + h->poc_lsb;
+        if (s->picture_structure == PICT_FRAME)
+            field_poc[1] += h->delta_poc_bottom;
+    } else if (h->sps.poc_type == 1) {
+        int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
+        int i;
+
+        if (h->sps.poc_cycle_length != 0)
+            abs_frame_num = h->frame_num_offset + h->frame_num;
+        else
+            abs_frame_num = 0;
+
+        if (h->nal_ref_idc == 0 && abs_frame_num > 0)
+            abs_frame_num--;
+
+        expected_delta_per_poc_cycle = 0;
+        for (i = 0; i < h->sps.poc_cycle_length; i++)
+            // FIXME integrate during sps parse
+            expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[i];
+
+        if (abs_frame_num > 0) {
+            int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
+            int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
+
+            expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
+            for (i = 0; i <= frame_num_in_poc_cycle; i++)
+                expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[i];
+        } else
+            expectedpoc = 0;
+
+        if (h->nal_ref_idc == 0)
+            expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
+
+        field_poc[0] = expectedpoc + h->delta_poc[0];
+        field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
+
+        if (s->picture_structure == PICT_FRAME)
+            field_poc[1] += h->delta_poc[1];
+    } else {
+        int poc = 2 * (h->frame_num_offset + h->frame_num);
+
+        if (!h->nal_ref_idc)
+            poc--;
+
+        field_poc[0] = poc;
+        field_poc[1] = poc;
+    }
+
+    if (s->picture_structure != PICT_BOTTOM_FIELD)
+        top_foc = field_poc[0];
+    if (s->picture_structure != PICT_TOP_FIELD)
+        bottom_foc = field_poc[1];
+    df->poc = FFMIN(top_foc, bottom_foc);
+
+    h->prev_poc_msb = h->poc_msb;
+    h->prev_poc_lsb = h->poc_lsb;
+    h->prev_frame_num_offset = h->frame_num_offset;
+    h->prev_frame_num = h->frame_num;
+}
+
+static void idr(H264Context *h)
+{
+    int i;
+    h->prev_frame_num        = 0;
+    h->prev_frame_num_offset = 0;
+    h->prev_poc_msb          = 1 << 16;
+    h->prev_poc_lsb          = 0;
+    for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
+        h->last_pocs[i] = INT_MIN;
+}
+
+/**
+ * Parse frame information and fill DisplayFrame
+ * Mostly copyed from decode_slice_header in h264.c
+ */
+static inline void parse_frame(AVCodecContext *avctx, void *buf, int len, DisplayFrame *df)
+{
+    VDADecoderContext *ctx = avctx->priv_data;
+    H264Context *h = ctx->h;
+    MpegEncContext *s = &h->s;
+    uint8_t *pout;
+    int psize;
+    int i;
+    int out_of_order;
+
+    /* parse information by H.264 parser */
+    i = av_parser_parse2(ctx->parser, avctx, &pout, &psize, buf, len,
+            avctx->pkt->pts, avctx->pkt->dts, 0);
+
+    /* fill fields for calculating poc */
+    if (h->nal_unit_type == NAL_IDR_SLICE)
+        get_ue_golomb(&s->gb);
+    if (h->sps.poc_type == 0) {
+        h->poc_lsb = get_bits(&s->gb, h->sps.log2_max_poc_lsb);
+        if (h->pps.pic_order_present == 1 && s->picture_structure == PICT_FRAME)
+            h->delta_poc_bottom = get_se_golomb(&s->gb);
+    }
+    if (h->sps.poc_type == 1 && !h->sps.delta_pic_order_always_zero_flag) {
+        h->delta_poc[0] = get_se_golomb(&s->gb);
+        if (h->pps.pic_order_present == 1 && s->picture_structure == PICT_FRAME)
+            h->delta_poc[1] = get_se_golomb(&s->gb);
+    }
+
+    /* calculate poc */
+    if (h->nal_unit_type == NAL_IDR_SLICE)
+        idr(h);
+    init_poc(h, df);
+
+    // FIXME currently not support memory_management_control_operation
+    df->mmco_reset = 0;
+
+    /* fill fields will be used */
+    df->f.pict_type = ctx->parser->pict_type;
+    df->f.key_frame = ctx->parser->key_frame;
+
+    /* update has_b_frames */
+    for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) {
+        if (df->poc < h->last_pocs[i])
+            break;
+        if (i)
+            h->last_pocs[i - 1] = h->last_pocs[i];
+    }
+    if (i)
+        h->last_pocs[i - 1] = df->poc;
+    out_of_order = MAX_DELAYED_PIC_COUNT - i;
+    if (df->f.pict_type == AV_PICTURE_TYPE_B ||
+            (h->last_pocs[MAX_DELAYED_PIC_COUNT - 2] > INT_MIN &&
+             h->last_pocs[MAX_DELAYED_PIC_COUNT - 1] -
+             h->last_pocs[MAX_DELAYED_PIC_COUNT - 2] > 2))
+        out_of_order = FFMAX(out_of_order, 1);
+    if (avctx->has_b_frames < out_of_order)
+        avctx->has_b_frames = out_of_order;
+
+    /* push the picture into display queue */
+    push_queue(ctx, df);
+}
+
+static inline void prepare_next_frame(AVCodecContext *avctx)
+{
+    VDADecoderContext *ctx = avctx->priv_data;
+    H264Context *h = ctx->h;
+    int out_of_order;
+    int pics = ctx->queued_pics;
+    DisplayFrame *first;
+
+    /* release last outputed frame */
+    if (ctx->next_frame) {
+        release_frame(ctx->next_frame);
+        ctx->next_frame = NULL;
+    }
+
+    first = ctx->queue.next;
+    if (first == &ctx->queue)
+        return;
+    if (first->f.key_frame || first->mmco_reset)
+        h->next_outputed_poc = INT_MIN;
+    out_of_order = first->poc < h->next_outputed_poc;
+    if (out_of_order) {
+        release_frame(pop_queue(ctx));
+        av_log(avctx, AV_LOG_VERBOSE, "Dropped frame due to out_of_order.\n");
+    } else if (pics > avctx->has_b_frames) {
+        first = ctx->next_frame = pop_queue(ctx);
+        if (first->f.key_frame || first->mmco_reset)
+            h->next_outputed_poc = INT_MIN;
+        else
+            h->next_outputed_poc = first->poc;
+    }
+}
+
+static int vdadec_decode(AVCodecContext *avctx,
+        void *data, int *data_size, AVPacket *avpkt)
+{
+    VDADecoderContext *ctx = avctx->priv_data;
+    struct vda_context *vda_ctx = &ctx->vda_ctx;
+    AVFrame *pic = data;
+    OSStatus status;
+
+    if (avpkt->size) {
+        vda_ctx->bitstream = avpkt->data;
+        vda_ctx->bitstream_size = avpkt->size;
+
+        status = ff_vda_sync_decode(vda_ctx);
+        if (status != kVDADecoderNoErr) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to decode frame: %d\n", status);
+            return -1;
+        }
+
+        if (vda_ctx->cv_buffer) {
+            DisplayFrame *df = alloc_frame(vda_ctx->cv_buffer);
+            parse_frame(avctx, avpkt->data, avpkt->size, df);
+        }
+    } else {
+        avctx->has_b_frames = 0;
+    }
+
+    *data_size = 0;
+    prepare_next_frame(avctx);
+    if (ctx->next_frame) {
+        *pic = ctx->next_frame->f;
+        *data_size = sizeof(AVFrame);
+    }
+
+    return avpkt->size;
+}
+
+static av_cold int vdadec_close(AVCodecContext *avctx)
+{
+    VDADecoderContext *ctx = avctx->priv_data;
+    /* flush display queue & free buffer */
+    release_frame(ctx->next_frame);
+    flush_queue(ctx);
+    /* release buffers and decoder */
+    ff_vda_destroy_decoder(&ctx->vda_ctx);
+    /* release parser */
+    av_parser_close(ctx->parser);
+    return 0;
+}
+
+static av_cold int vdadec_init(AVCodecContext *avctx)
+{
+    VDADecoderContext *ctx;
+    struct vda_context *vda_ctx;
+    H264Context *h;
+    OSStatus status;
+
+    ctx = avctx->priv_data;
+    memset(ctx, 0, sizeof(VDADecoderContext));
+
+    /* init display queue */
+    ctx->queue.mmco_reset = 1;
+    ctx->queue.f.key_frame = 1;
+    ctx->queue.poc = INT_MIN;
+    ctx->queue.next = &ctx->queue;
+    ctx->queue.prev = &ctx->queue;
+    ctx->queued_pics = 0;
+
+    /* init H.264 parser */
+    ctx->parser = av_parser_init(avctx->codec->id);
+    if (!ctx->parser) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to open H.264 parser.\n");
+        goto failed;
+    }
+    ctx->parser->flags = PARSER_FLAG_COMPLETE_FRAMES;
+    ctx->h = ctx->parser->priv_data;
+
+    /* init output ordering related fields */
+    h = ctx->h;
+    h->outputed_poc = h->next_outputed_poc = INT_MIN;
+    idr(h);
+    h->prev_frame_num = -1;
+    avctx->has_b_frames = 2;
+
+    /* init vda */
+    vda_ctx = &ctx->vda_ctx;
+    vda_ctx->width = avctx->width;
+    vda_ctx->height = avctx->height;
+    vda_ctx->format = 'avc1';
+    vda_ctx->cv_pix_fmt_type = kCVPixelFormatType_422YpCbCr8_yuvs;
+    vda_ctx->use_sync_decoding = 1;
+    avctx->pix_fmt = PIX_FMT_YUYV422;
+    status = ff_vda_create_decoder(vda_ctx,
+                                   avctx->extradata, avctx->extradata_size);
+    if (status != kVDADecoderNoErr) {
+        av_log(avctx, AV_LOG_ERROR,
+                "Failed to init VDA decoder: %d.\n", status);
+        goto failed;
+    }
+
+    return 0;
+
+failed:
+    vdadec_close(avctx);
+    return -1;
+}
+
+static void vdadec_flush(AVCodecContext *avctx)
+{
+    VDADecoderContext *ctx = avctx->priv_data;
+    H264Context *h = ctx->h;
+    /* flush display queue & free buffer */
+    release_frame(ctx->next_frame);
+    ctx->next_frame = NULL;
+    flush_queue(ctx);
+    /* flush h.264 context */
+    h->outputed_poc = h->next_outputed_poc = INT_MIN;
+    idr(h);
+    h->prev_frame_num = -1;
+}
+
+AVCodec ff_h264_vda_decoder = {
+    .name           = "h264_vda",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_H264,
+    .priv_data_size = sizeof(VDADecoderContext),
+    .init           = vdadec_init,
+    .close          = vdadec_close,
+    .decode         = vdadec_decode,
+    .capabilities   = CODEC_CAP_DELAY,
+    .flush          = vdadec_flush,
+    .long_name      = NULL_IF_CONFIG_SMALL("H.264 (VDA acceleration)"),
+    .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUYV422,
+                                                  PIX_FMT_NONE },
+};
-- 
1.7.11.4