[FFmpeg-devel] [PATCH] avcodec: add a bsf to reorder DTS into PTS

James Almer jamrial at gmail.com
Tue Aug 30 04:46:53 EEST 2022


Starting with an h264 implementation. Can be extended to support other codecs.

Addresses ticket #502.

Signed-off-by: James Almer <jamrial at gmail.com>
---
 configure                      |   1 +
 libavcodec/Makefile            |   1 +
 libavcodec/bitstream_filters.c |   1 +
 libavcodec/dts2pts_bsf.c       | 477 +++++++++++++++++++++++++++++++++
 4 files changed, 480 insertions(+)
 create mode 100644 libavcodec/dts2pts_bsf.c

diff --git a/configure b/configure
index 932ea5b553..91ee5eb303 100755
--- a/configure
+++ b/configure
@@ -3275,6 +3275,7 @@ aac_adtstoasc_bsf_select="adts_header mpeg4audio"
 av1_frame_merge_bsf_select="cbs_av1"
 av1_frame_split_bsf_select="cbs_av1"
 av1_metadata_bsf_select="cbs_av1"
+dts2pts_bsf_select="cbs_h264 h264parse"
 eac3_core_bsf_select="ac3_parser"
 filter_units_bsf_select="cbs"
 h264_metadata_bsf_deps="const_nan"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index cb80f73d99..858e110b79 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1176,6 +1176,7 @@ OBJS-$(CONFIG_AV1_FRAME_SPLIT_BSF)        += av1_frame_split_bsf.o
 OBJS-$(CONFIG_CHOMP_BSF)                  += chomp_bsf.o
 OBJS-$(CONFIG_DUMP_EXTRADATA_BSF)         += dump_extradata_bsf.o
 OBJS-$(CONFIG_DCA_CORE_BSF)               += dca_core_bsf.o
+OBJS-$(CONFIG_DTS2PTS_BSF)                += dts2pts_bsf.o
 OBJS-$(CONFIG_DV_ERROR_MARKER_BSF)        += dv_error_marker_bsf.o
 OBJS-$(CONFIG_EAC3_CORE_BSF)              += eac3_core_bsf.o
 OBJS-$(CONFIG_EXTRACT_EXTRADATA_BSF)      += extract_extradata_bsf.o    \
diff --git a/libavcodec/bitstream_filters.c b/libavcodec/bitstream_filters.c
index 444423ae93..a3bebefe5f 100644
--- a/libavcodec/bitstream_filters.c
+++ b/libavcodec/bitstream_filters.c
@@ -31,6 +31,7 @@ extern const FFBitStreamFilter ff_av1_metadata_bsf;
 extern const FFBitStreamFilter ff_chomp_bsf;
 extern const FFBitStreamFilter ff_dump_extradata_bsf;
 extern const FFBitStreamFilter ff_dca_core_bsf;
+extern const FFBitStreamFilter ff_dts2pts_bsf;
 extern const FFBitStreamFilter ff_dv_error_marker_bsf;
 extern const FFBitStreamFilter ff_eac3_core_bsf;
 extern const FFBitStreamFilter ff_extract_extradata_bsf;
diff --git a/libavcodec/dts2pts_bsf.c b/libavcodec/dts2pts_bsf.c
new file mode 100644
index 0000000000..f600150a6b
--- /dev/null
+++ b/libavcodec/dts2pts_bsf.c
@@ -0,0 +1,477 @@
+/*
+ * Copyright (c) 2022 James Almer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Derive PTS by reordering DTS from supported streams
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/eval.h"
+#include "libavutil/fifo.h"
+#include "libavutil/opt.h"
+#include "libavutil/tree.h"
+
+#include "bsf.h"
+#include "bsf_internal.h"
+#include "cbs.h"
+#include "cbs_h264.h"
+#include "h264_parse.h"
+#include "h264_ps.h"
+
+typedef struct DTS2PTSNode {
+    int64_t      dts;
+    int64_t duration;
+    int          poc;
+} DTS2PTSNode;
+
+typedef struct DTS2PTSFrame {
+    AVPacket    *pkt;
+    int          poc;
+    int     poc_diff;
+} DTS2PTSFrame;
+
+typedef struct DTS2PTSH264Context {
+    H264POCContext poc;
+    SPS sps;
+    int last_poc;
+    int highest_poc;
+    int picture_structure;
+} DTS2PTSH264Context;
+
+typedef struct DTS2PTSContext {
+    struct AVTreeNode *root;
+    AVFifo *fifo;
+
+    // Codec specific function pointers
+    int (*init)(AVBSFContext *ctx);
+    int (*filter)(AVBSFContext *ctx);
+    void (*flush)(AVBSFContext *ctx);
+
+    CodedBitstreamContext *cbc;
+    CodedBitstreamFragment au;
+
+    union {
+        DTS2PTSH264Context h264;
+    } u;
+
+    int nb_frame;
+    int eof;
+} DTS2PTSContext;
+
+// AVTreeNode callbacks
+static int cmp_insert(const void *key, const void *node)
+{
+    return ((const DTS2PTSNode *) key)->poc - ((const DTS2PTSNode *) node)->poc;
+}
+
+static int cmp_find(const void *key, const void *node)
+{
+    return *(const int *)key - ((const DTS2PTSNode *) node)->poc;
+}
+
+static int dec_poc(void *opaque, void *elem)
+{
+    DTS2PTSNode *node = elem;
+    int dec = *(int *)opaque;
+    node->poc -= dec;
+    return 0;
+}
+
+static int free_node(void *opaque, void *elem)
+{
+    DTS2PTSNode *node = elem;
+    av_free(node);
+    return 0;
+}
+
+// Shared functions
+static int alloc_and_insert_node(AVBSFContext *ctx, int64_t ts, int64_t duration,
+                                 int poc, int poc_diff)
+{
+    DTS2PTSContext *s = ctx->priv_data;
+    for (int i = 0; i < poc_diff; i++) {
+        struct AVTreeNode *node = av_tree_node_alloc();
+        DTS2PTSNode *poc_node, *ret;
+        if (!node)
+            return AVERROR(ENOMEM);
+        poc_node = av_malloc(sizeof(*poc_node));
+        if (!poc_node) {
+            av_free(node);
+            return AVERROR(ENOMEM);
+        }
+        *poc_node = (DTS2PTSNode) { ts, duration, poc++ };
+        ret = av_tree_insert(&s->root, poc_node, cmp_insert, &node);
+        if (ret && ret != poc_node) {
+            *ret = *poc_node;
+            av_free(poc_node);
+            av_free(node);
+        }
+    }
+    return 0;
+}
+
+// H.264
+static const CodedBitstreamUnitType h264_decompose_unit_types[] = {
+    H264_NAL_SPS,
+    H264_NAL_PPS,
+    H264_NAL_IDR_SLICE,
+    H264_NAL_SLICE,
+};
+
+static int h264_init(AVBSFContext *ctx)
+{
+    DTS2PTSContext *s = ctx->priv_data;
+    DTS2PTSH264Context *h264 = &s->u.h264;
+
+    s->fifo = av_fifo_alloc2(H264_MAX_DPB_FRAMES, sizeof(DTS2PTSFrame), 0);
+    if (!s->fifo)
+        return AVERROR(ENOMEM);
+
+    s->cbc->decompose_unit_types    = h264_decompose_unit_types;
+    s->cbc->nb_decompose_unit_types = FF_ARRAY_ELEMS(h264_decompose_unit_types);
+
+    h264->last_poc = h264->highest_poc = INT_MIN;
+
+    return 0;
+}
+
+static int get_mmco_reset(const H264RawSliceHeader *header)
+{
+    if (header->nal_unit_header.nal_ref_idc == 0 ||
+        !header->adaptive_ref_pic_marking_mode_flag)
+        return 0;
+
+    for (int i = 0; i < H264_MAX_MMCO_COUNT; i++) {
+        if (header->mmco[i].memory_management_control_operation == 0)
+            return 0;
+        else if (header->mmco[i].memory_management_control_operation == 5)
+            return 1;
+    }
+
+    return 0;
+}
+
+static int h264_queue_frame(AVBSFContext *ctx, AVPacket *pkt, int poc, int *queued)
+{
+    DTS2PTSContext *s = ctx->priv_data;
+    DTS2PTSH264Context *h264 = &s->u.h264;
+    DTS2PTSFrame frame;
+    int poc_diff, ret;
+
+    poc_diff = (h264->picture_structure == 3) + 1;
+    if (pkt->dts != AV_NOPTS_VALUE && pkt->dts < 0 && !s->nb_frame) {
+        av_tree_enumerate(s->root, &poc_diff, NULL, dec_poc);
+        s->nb_frame -= poc_diff;
+    }
+    if (poc < 0) {
+        av_tree_enumerate(s->root, &poc_diff, NULL, dec_poc);
+        s->nb_frame -= poc_diff;
+    }
+    // Check if there was a POC reset (Like an IDR slice)
+    if (s->nb_frame > h264->highest_poc) {
+        s->nb_frame = 0;
+        h264->highest_poc = h264->last_poc;
+    }
+
+    ret = alloc_and_insert_node(ctx, pkt->dts, pkt->duration, s->nb_frame, poc_diff);
+    if (ret < 0)
+        return ret;
+    av_log(ctx, AV_LOG_DEBUG, "Queueing frame with POC %d, dts %"PRId64"\n",
+           poc, pkt->dts);
+    s->nb_frame += poc_diff;
+
+    // Add frame to output FIFO only once
+    if (*queued)
+        return 0;
+
+    frame = (DTS2PTSFrame) { pkt, poc, poc_diff };
+    ret = av_fifo_write(s->fifo, &frame, 1);
+    av_assert2(ret >= 0);
+    *queued = 1;
+
+    return 0;
+}
+
+static int h264_filter(AVBSFContext *ctx)
+{
+    DTS2PTSContext *s = ctx->priv_data;
+    DTS2PTSH264Context *h264 = &s->u.h264;
+    CodedBitstreamFragment *au = &s->au;
+    AVPacket *in;
+    int output_picture_number = INT_MIN;
+    int field_poc[2];
+    int queued = 0, ret;
+
+    ret = ff_bsf_get_packet(ctx, &in);
+    if (ret < 0)
+        return ret;
+
+    ret = ff_cbs_read_packet(s->cbc, au, in);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_WARNING, "Failed to parse access unit.\n");
+        goto fail;
+    }
+
+    for (int i = 0; i < au->nb_units; i++) {
+        CodedBitstreamUnit *unit = &au->units[i];
+
+        switch (unit->type) {
+        case H264_NAL_IDR_SLICE:
+            h264->poc.prev_frame_num        = 0;
+            h264->poc.prev_frame_num_offset = 0;
+            h264->poc.prev_poc_msb          =
+            h264->poc.prev_poc_lsb          = 0;
+        // fall-through
+        case H264_NAL_SLICE: {
+            const H264RawSlice *slice = unit->content;
+            const H264RawSliceHeader *header = &slice->header;
+            const CodedBitstreamH264Context *cbs_h264 = s->cbc->priv_data;
+            const H264RawSPS *sps = cbs_h264->active_sps;
+            int got_reset;
+
+            // Initialize the SPS struct with the fields ff_h264_init_poc() cares about
+            h264->sps.log2_max_frame_num             = sps->log2_max_frame_num_minus4 + 4;
+            h264->sps.poc_type                       = sps->pic_order_cnt_type;
+            h264->sps.log2_max_poc_lsb               = sps->log2_max_pic_order_cnt_lsb_minus4 + 4;
+            h264->sps.offset_for_non_ref_pic         = sps->offset_for_non_ref_pic;
+            h264->sps.offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field;
+            h264->sps.poc_cycle_length               = sps->num_ref_frames_in_pic_order_cnt_cycle;
+            for (int i = 0; i < h264->sps.poc_cycle_length; i++)
+                h264->sps.offset_for_ref_frame[i] = sps->offset_for_ref_frame[i];
+
+            h264->picture_structure = sps->frame_mbs_only_flag ? 3 :
+                                      (header->field_pic_flag ?
+                                       header->field_pic_flag + header->bottom_field_flag : 3);
+
+            h264->poc.frame_num = header->frame_num;
+            h264->poc.poc_lsb = header->pic_order_cnt_lsb;
+            h264->poc.delta_poc_bottom = header->delta_pic_order_cnt_bottom;
+            h264->poc.delta_poc[0] = header->delta_pic_order_cnt[0];
+            h264->poc.delta_poc[1] = header->delta_pic_order_cnt[1];
+
+            field_poc[0] = field_poc[1] = INT_MAX;
+            ret = ff_h264_init_poc(field_poc, &output_picture_number, &h264->sps,
+                                   &h264->poc, h264->picture_structure,
+                                   header->nal_unit_header.nal_ref_idc);
+            if (ret < 0) {
+                av_log(ctx, AV_LOG_ERROR, "ff_h264_init_poc() failure\n");
+                goto fail;
+            }
+
+            got_reset = get_mmco_reset(header);
+            h264->poc.prev_frame_num        = got_reset ? 0 : h264->poc.frame_num;
+            h264->poc.prev_frame_num_offset = got_reset ? 0 : h264->poc.frame_num_offset;
+            if (header->nal_unit_header.nal_ref_idc != 0) {
+                h264->poc.prev_poc_msb      = got_reset ? 0 : h264->poc.poc_msb;
+                if (got_reset)
+                    h264->poc.prev_poc_lsb = h264->picture_structure == 2 ? 0 : field_poc[0];
+                else
+                    h264->poc.prev_poc_lsb = h264->poc.poc_lsb;
+            }
+
+            if (output_picture_number != h264->last_poc) {
+                h264->last_poc = output_picture_number;
+                h264->highest_poc = FFMAX(h264->highest_poc, output_picture_number);
+
+                ret = h264_queue_frame(ctx, in, output_picture_number, &queued);
+                if (ret < 0)
+                    goto fail;
+            }
+        }
+        default:
+            break;
+        }
+    }
+
+    if (output_picture_number == INT_MIN) {
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    ret = AVERROR(EAGAIN);
+fail:
+    ff_cbs_fragment_reset(au);
+    if (!queued)
+        av_packet_free(&in);
+
+    return ret;
+}
+
+static void h264_flush(AVBSFContext *ctx)
+{
+    DTS2PTSContext *s = ctx->priv_data;
+    DTS2PTSH264Context *h264 = &s->u.h264;
+
+    memset(&h264->sps, 0, sizeof(h264->sps));
+    memset(&h264->poc, 0, sizeof(h264->poc));
+    h264->last_poc = h264->highest_poc = INT_MIN;
+}
+
+// Core functions
+static const struct {
+    enum AVCodecID id;
+    int (*init)(AVBSFContext *ctx);
+    int (*filter)(AVBSFContext *ctx);
+    void (*flush)(AVBSFContext *ctx);
+} func_tab[] = {
+    { AV_CODEC_ID_H264, h264_init, h264_filter, h264_flush },
+};
+
+static int dts2pts_init(AVBSFContext *ctx)
+{
+    DTS2PTSContext *s = ctx->priv_data;
+    CodedBitstreamFragment *au = &s->au;
+    int i, ret;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(func_tab); i++) {
+        if (func_tab[i].id == ctx->par_in->codec_id) {
+            s->init   = func_tab[i].init;
+            s->filter = func_tab[i].filter;
+            s->flush  = func_tab[i].flush;
+            break;
+        }
+    }
+    if (i == FF_ARRAY_ELEMS(func_tab))
+        return AVERROR_BUG;
+
+    ret = ff_cbs_init(&s->cbc, ctx->par_in->codec_id, ctx);
+    if (ret < 0)
+        return ret;
+
+    ret = s->init(ctx);
+    if (ret < 0)
+        return ret;
+
+    if (!ctx->par_in->extradata_size)
+        return 0;
+
+    ret = ff_cbs_read_extradata(s->cbc, au, ctx->par_in);
+    if (ret < 0)
+        av_log(ctx, AV_LOG_WARNING, "Failed to parse extradata.\n");
+
+    ff_cbs_fragment_reset(au);
+
+    return 0;
+}
+
+static int dts2pts_filter(AVBSFContext *ctx, AVPacket *out)
+{
+    DTS2PTSContext *s = ctx->priv_data;
+    DTS2PTSNode *poc_node = NULL;
+    DTS2PTSFrame frame;
+    int poc, ret;
+
+    // Fill up the FIFO and POC tree
+    if (!s->eof && av_fifo_can_write(s->fifo)) {
+        ret = s->filter(ctx);
+        if (ret != AVERROR_EOF)
+            return ret;
+        s->eof = 1;
+    }
+
+    if (!av_fifo_can_read(s->fifo))
+        return AVERROR_EOF;
+
+    // Fetch a packet from the FIFO
+    ret = av_fifo_read(s->fifo, &frame, 1);
+    av_assert2(ret >= 0);
+    av_packet_move_ref(out, frame.pkt);
+    av_packet_free(&frame.pkt);
+
+    // Search the timestamp for the requested POC and set PTS
+    poc = frame.poc;
+    poc_node = av_tree_find(s->root, &poc, cmp_find, NULL);
+    if (poc_node) {
+        out->pts = poc_node->dts;
+        if (!s->eof) {
+            // Remove the found entry from the tree
+            struct AVTreeNode *node = NULL;
+            av_tree_insert(&s->root, poc_node, cmp_insert, &node);
+            av_freep(&poc_node);
+            av_free(node);
+        }
+    } else {
+        poc--;
+        if (s->eof && (poc_node = av_tree_find(s->root, &poc, cmp_find, NULL))) {
+            out->pts = poc_node->dts + poc_node->duration;
+            ret = alloc_and_insert_node(ctx, out->pts, out->duration,
+                                        frame.poc, frame.poc_diff);
+            if (ret < 0) {
+                av_packet_unref(out);
+                return ret;
+            }
+            if (!ret)
+                av_log(ctx, AV_LOG_DEBUG, "Queueing frame with POC %d, dts %"PRId64"\n",
+                       frame.poc, out->pts);
+        } else
+            av_log(ctx, AV_LOG_WARNING, "No timestamp for POC %d in tree\n", frame.poc);
+    }
+    av_log(ctx, AV_LOG_DEBUG, "Returning frame for POC %d, dts %"PRId64", pts %"PRId64"\n",
+           frame.poc, out->dts, out->pts);
+
+    return 0;
+}
+
+static void dts2pts_flush(AVBSFContext *ctx)
+{
+    DTS2PTSContext *s = ctx->priv_data;
+    DTS2PTSFrame frame;
+
+    s->flush(ctx);
+    s->eof = 0;
+
+    while (av_fifo_can_read(s->fifo)) {
+        av_fifo_read(s->fifo, &frame, 1);
+        av_packet_free(&frame.pkt);
+    }
+
+    av_tree_enumerate(s->root, NULL, NULL, free_node);
+    av_tree_destroy(s->root);
+
+    ff_cbs_fragment_reset(&s->au);
+    ff_cbs_flush(s->cbc);
+}
+
+static void dts2pts_close(AVBSFContext *ctx)
+{
+    DTS2PTSContext *s = ctx->priv_data;
+
+    dts2pts_flush(ctx);
+
+    av_fifo_freep2(&s->fifo);
+    ff_cbs_fragment_free(&s->au);
+    ff_cbs_close(&s->cbc);
+}
+
+static const enum AVCodecID dts2pts_codec_ids[] = {
+    AV_CODEC_ID_H264,
+    AV_CODEC_ID_NONE,
+};
+
+const FFBitStreamFilter ff_dts2pts_bsf = {
+    .p.name         = "dts2pts",
+    .p.codec_ids    = dts2pts_codec_ids,
+    .priv_data_size = sizeof(DTS2PTSContext),
+    .init           = dts2pts_init,
+    .flush          = dts2pts_flush,
+    .close          = dts2pts_close,
+    .filter         = dts2pts_filter,
+};
-- 
2.37.2



More information about the ffmpeg-devel mailing list