[FFmpeg-devel] [PATCH] vp9: add superframe merging bitstream filter.

Fri Feb 19 17:04:31 CET 2016

Fixes ticket 4313.

There is still an issue after this patch: it works when you use -bsf
vp9_superframe, but if you let the auto-bsf system work on its own,
ffmpeg.c triggers these warnings:

[ivf @ 0x7fc97a02fc00] Non-monotonous DTS in output stream 0:0;
previous: 900, current: 900; changing to 901. This may result in
incorrect timestamps in the output file.

This warning code lives in ffmpeg.c and I'm not entirely sure how to
prevent this from running. Suggestions welcome.
---
 ffmpeg.c                        |   2 +
 libavcodec/Makefile             |   1 +
 libavcodec/allcodecs.c          |   1 +
 libavcodec/vp9_superframe_bsf.c | 189 ++++++++++++++++++++++++++++++++++++++++
 libavformat/ivfenc.c            |  13 +++
 libavformat/matroskaenc.c       |   2 +
 libavformat/mux.c               |   2 +
 libavformat/utils.c             |   5 ++
 8 files changed, 215 insertions(+)
 create mode 100644 libavcodec/vp9_superframe_bsf.c

diff --git a/ffmpeg.c b/ffmpeg.c
index a5ec3c3..bb7f138 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -694,6 +694,8 @@ static void write_frame(AVFormatContext *s, AVPacket *pkt, OutputStream *ost)
         if (exit_on_error)
             exit_program(1);
     }
+    if (pkt->size == 0)
+        return;
 
     if (!(s->oformat->flags & AVFMT_NOTIMESTAMPS)) {
         if (pkt->dts != AV_NOPTS_VALUE &&
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index f6a4fbb..041684d 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -916,6 +916,7 @@ OBJS-$(CONFIG_MP3_HEADER_DECOMPRESS_BSF)  += mp3_header_decompress_bsf.o \
 OBJS-$(CONFIG_NOISE_BSF)                  += noise_bsf.o
 OBJS-$(CONFIG_REMOVE_EXTRADATA_BSF)       += remove_extradata_bsf.o
 OBJS-$(CONFIG_TEXT2MOVSUB_BSF)            += movsub_bsf.o
+OBJS-$(CONFIG_VP9_SUPERFRAME_BSF)         += vp9_superframe_bsf.o
 
 # thread libraries
 OBJS-$(HAVE_LIBC_MSVCRT)               += file_open.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 2097db0..96f5c5c 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -666,4 +666,5 @@ void avcodec_register_all(void)
     REGISTER_BSF(NOISE,                 noise);
     REGISTER_BSF(REMOVE_EXTRADATA,      remove_extradata);
     REGISTER_BSF(TEXT2MOVSUB,           text2movsub);
+    REGISTER_BSF(VP9_SUPERFRAME,        vp9_superframe);
 }
diff --git a/libavcodec/vp9_superframe_bsf.c b/libavcodec/vp9_superframe_bsf.c
new file mode 100644
index 0000000..f991a80
--- /dev/null
+++ b/libavcodec/vp9_superframe_bsf.c
@@ -0,0 +1,189 @@
+/*
+ * Vp9 invisible (alt-ref) frame to superframe merge bitstream filter
+ * Copyright (c) 2016 Ronald S. Bultje <rsbultje at gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "avcodec.h"
+#include "get_bits.h"
+
+#define MAX_CACHE 8
+typedef struct VP9BSFContext {
+    int n_cache;
+    struct CachedBuf {
+        uint8_t *data;
+        int size;
+    } cache[MAX_CACHE];
+} VP9BSFContext;
+
+static void stats(const struct CachedBuf *in, int n_in,
+                  unsigned *_max, unsigned *_sum)
+{
+    int n;
+    unsigned max = 0, sum = 0;
+
+    for (n = 0; n < n_in; n++) {
+        unsigned sz = in[n].size;
+
+        if (sz > max)
+            max = sz;
+        sum += sz;
+    }
+
+    *_max = max;
+    *_sum = sum;
+}
+
+static int merge_superframe(const struct CachedBuf *in, int n_in,
+                            uint8_t **poutbuf, int *poutbuf_size)
+{
+    unsigned max, sum, mag, marker, n, sz;
+    uint8_t *ptr;
+
+    stats(in, n_in, &max, &sum);
+    mag = av_log2(max) >> 3;
+    marker = 0xC0 + (mag << 3) + (n_in - 1);
+    sz = *poutbuf_size = sum + 2 + (mag + 1) * n_in;
+    ptr = *poutbuf = av_malloc(sz);
+    if (!ptr)
+        return AVERROR(ENOMEM);
+
+    for (n = 0; n < n_in; n++) {
+        memcpy(ptr, in[n].data, in[n].size);
+        ptr += in[n].size;
+    }
+
+#define wloop(mag, wr) \
+    for (n = 0; n < n_in; n++) { \
+        wr; \
+        ptr += mag + 1; \
+    }
+
+    // write superframe with marker 110[mag:2][nframes:3]
+    *ptr++ = marker;
+    switch (mag) {
+    case 0:
+        wloop(mag, *ptr = in[n].size);
+        break;
+    case 1:
+        wloop(mag, AV_WB16(ptr, in[n].size));
+        break;
+    case 2:
+        wloop(mag, AV_WB24(ptr, in[n].size));
+        break;
+    case 3:
+        wloop(mag, AV_WB32(ptr, in[n].size));
+        break;
+    }
+    *ptr++ = marker;
+    av_assert0(ptr == &(*poutbuf)[*poutbuf_size]);
+
+    return 0;
+}
+
+static int vp9_superframe_filter(AVBitStreamFilterContext *bsfc,
+                                 AVCodecContext *avctx, const char *args,
+                                 uint8_t  **poutbuf, int *poutbuf_size,
+                                 const uint8_t *buf, int      buf_size,
+                                 int keyframe)
+{
+    GetBitContext gb;
+    VP9BSFContext *ctx = bsfc->priv_data;
+    int res, invisible, profile, marker, uses_superframe_syntax = 0, n;
+
+    marker = buf[buf_size - 1];
+    if ((marker & 0xe0) == 0xc0) {
+        int nbytes = 1 + ((marker >> 3) & 0x3);
+        int n_frames = 1 + (marker & 0x7), idx_sz = 2 + n_frames * nbytes;
+
+        uses_superframe_syntax = buf_size >= idx_sz && buf[buf_size - idx_sz] == marker;
+    }
+
+    if ((res = init_get_bits8(&gb, buf, buf_size)) < 0)
+        return res;
+
+    get_bits(&gb, 2); // frame marker
+    profile  = get_bits1(&gb);
+    profile |= get_bits1(&gb) << 1;
+    if (profile == 3) profile += get_bits1(&gb);
+
+    if (get_bits1(&gb)) {
+        invisible = 0;
+    } else {
+        get_bits1(&gb); // keyframe
+        invisible = !get_bits1(&gb);
+    }
+
+    if (uses_superframe_syntax && ctx->n_cache > 0) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Mixing of superframe syntax and naked VP9 frames not supported");
+        return AVERROR_INVALIDDATA;
+    } else if ((!invisible || uses_superframe_syntax) && !ctx->n_cache) {
+        // passthrough
+        *poutbuf = (uint8_t *) buf;
+        *poutbuf_size = buf_size;
+        return 0;
+    } else if (ctx->n_cache + 1 >= MAX_CACHE) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Too many invisible frames");
+        return AVERROR_INVALIDDATA;
+    }
+
+    ctx->cache[ctx->n_cache].size = buf_size;
+    if (invisible && !uses_superframe_syntax) {
+        ctx->cache[ctx->n_cache].data = av_malloc(buf_size);
+        if (!ctx->cache[ctx->n_cache].data)
+            return AVERROR(ENOMEM);
+        memcpy(ctx->cache[ctx->n_cache++].data, buf, buf_size);
+        *poutbuf = NULL;
+        *poutbuf_size = 0;
+        return 0;
+    }
+    av_assert0(ctx->n_cache > 0);
+
+    ctx->cache[ctx->n_cache].data = (uint8_t *) buf;
+
+    // build superframe
+    if ((res = merge_superframe(ctx->cache, ctx->n_cache + 1,
+                                poutbuf, poutbuf_size)) < 0)
+        return res;
+
+    for (n = 0; n < ctx->n_cache; n++)
+        av_freep(&ctx->cache[n].data);
+    ctx->n_cache = 0;
+
+    return 0;
+}
+
+static void vp9_superframe_close(AVBitStreamFilterContext *bsfc)
+{
+    VP9BSFContext *ctx = bsfc->priv_data;
+    int n;
+
+    // free cached data
+    for (n = 0; n < ctx->n_cache; n++)
+        av_freep(&ctx->cache[n].data);
+}
+
+AVBitStreamFilter ff_vp9_superframe_bsf = {
+    .name           = "vp9_superframe",
+    .priv_data_size = sizeof(VP9BSFContext),
+    .filter         = vp9_superframe_filter,
+    .close          = vp9_superframe_close,
+};
diff --git a/libavformat/ivfenc.c b/libavformat/ivfenc.c
index 484d87d..490b0a9 100644
--- a/libavformat/ivfenc.c
+++ b/libavformat/ivfenc.c
@@ -18,6 +18,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "avformat.h"
+#include "internal.h"
 #include "libavutil/intreadwrite.h"
 
 typedef struct IVFEncContext {
@@ -85,6 +86,17 @@ static int ivf_write_trailer(AVFormatContext *s)
     return 0;
 }
 
+static int ivf_check_bitstream(struct AVFormatContext *s, const AVPacket *pkt)
+{
+    int ret = 1;
+    AVStream *st = s->streams[pkt->stream_index];
+
+    if (st->codec->codec_id == AV_CODEC_ID_VP9)
+        ret = ff_stream_add_bitstream_filter(st, "vp9_superframe", NULL);
+
+    return ret;
+}
+
 AVOutputFormat ff_ivf_muxer = {
     .priv_data_size = sizeof(IVFEncContext),
     .name         = "ivf",
@@ -95,4 +107,5 @@ AVOutputFormat ff_ivf_muxer = {
     .write_header = ivf_write_header,
     .write_packet = ivf_write_packet,
     .write_trailer = ivf_write_trailer,
+    .check_bitstream = ivf_check_bitstream,
 };
diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index f42434b..86d3415 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -2121,6 +2121,8 @@ static int mkv_check_bitstream(struct AVFormatContext *s, const AVPacket *pkt)
     if (st->codec->codec_id == AV_CODEC_ID_AAC)
         if (pkt->size > 2 && (AV_RB16(pkt->data) & 0xfff0) == 0xfff0)
             ret = ff_stream_add_bitstream_filter(st, "aac_adtstoasc", NULL);
+    else if (st->codec->codec_id == AV_CODEC_ID_VP9)
+        ret = ff_stream_add_bitstream_filter(st, "vp9_superframe", NULL);
 
     return ret;
 }
diff --git a/libavformat/mux.c b/libavformat/mux.c
index 789c811..d2029d3 100644
--- a/libavformat/mux.c
+++ b/libavformat/mux.c
@@ -1048,6 +1048,8 @@ int av_interleaved_write_frame(AVFormatContext *s, AVPacket *pkt)
         }
 
         av_apply_bitstream_filters(st->codec, pkt, st->internal->bsfc);
+        if (pkt->size == 0)
+            return 0;
     } else {
         av_log(s, AV_LOG_TRACE, "av_interleaved_write_frame FLUSH\n");
         flush = 1;
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 129a49d..2e45136 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -4694,6 +4694,11 @@ int av_apply_bitstream_filters(AVCodecContext *codec, AVPacket *pkt,
                                            &new_pkt.data, &new_pkt.size,
                                            pkt->data, pkt->size,
                                            pkt->flags & AV_PKT_FLAG_KEY);
+        if (a == 0 && new_pkt.size == 0) {
+            av_packet_unref(pkt);
+            memset(pkt, 0, sizeof(*pkt));
+            return 0;
+        }
         if(a == 0 && new_pkt.data != pkt->data) {
             uint8_t *t = av_malloc(new_pkt.size + AV_INPUT_BUFFER_PADDING_SIZE); //the new should be a subset of the old so cannot overflow
             if (t) {
-- 
2.1.2