[FFmpeg-devel] [PATCH v1 3/4] avformat/imf: fix packet pts, dts and muxing
Andreas Rheinhardt
andreas.rheinhardt at outlook.com
Tue Feb 1 06:36:38 EET 2022
Pierre-Anthony Lemieux:
> On Sun, Jan 30, 2022 at 2:16 PM Andreas Rheinhardt
> <andreas.rheinhardt at outlook.com> wrote:
>>
>> pal at sandflow.com:
>>> From: Pierre-Anthony Lemieux <pal at palemieux.com>
>>>
>>> The IMF demuxer does not set the DTS and PTS of packets accurately in all
>>> scenarios. Moreover, audio packets are not trimmed when they exceed the
>>> duration of the underlying resource.
>>>
>>> Closes https://trac.ffmpeg.org/ticket/9611
>>>
>>> ---
>>> libavformat/imfdec.c | 225 +++++++++++++++++++++++++------------------
>>> 1 file changed, 132 insertions(+), 93 deletions(-)
>>>
>>> diff --git a/libavformat/imfdec.c b/libavformat/imfdec.c
>>> index 6b50b582f6..05dcb6ff31 100644
>>> --- a/libavformat/imfdec.c
>>> +++ b/libavformat/imfdec.c
>>> @@ -65,6 +65,7 @@
>>> #include "avio_internal.h"
>>> #include "imf.h"
>>> #include "internal.h"
>>> +#include "libavcodec/packet.h"
>>> #include "libavutil/avstring.h"
>>> #include "libavutil/bprint.h"
>>> #include "libavutil/opt.h"
>>> @@ -97,6 +98,9 @@ typedef struct IMFVirtualTrackResourcePlaybackCtx {
>>> IMFAssetLocator *locator;
>>> FFIMFTrackFileResource *resource;
>>> AVFormatContext *ctx;
>>> + AVRational start_time;
>>> + AVRational end_time;
>>> + AVRational ts_offset;
>>> } IMFVirtualTrackResourcePlaybackCtx;
>>>
>>> typedef struct IMFVirtualTrackPlaybackCtx {
>>> @@ -108,7 +112,6 @@ typedef struct IMFVirtualTrackPlaybackCtx {
>>> IMFVirtualTrackResourcePlaybackCtx *resources; /**< Buffer holding the resources */
>>> int32_t current_resource_index; /**< Index of the current resource in resources,
>>> or < 0 if a current resource has yet to be selected */
>>> - int64_t last_pts; /**< Last timestamp */
>>> } IMFVirtualTrackPlaybackCtx;
>>>
>>> typedef struct IMFContext {
>>> @@ -342,6 +345,7 @@ static int open_track_resource_context(AVFormatContext *s,
>>> int ret = 0;
>>> int64_t entry_point;
>>> AVDictionary *opts = NULL;
>>> + AVStream *st;
>>>
>>> if (track_resource->ctx) {
>>> av_log(s,
>>> @@ -383,23 +387,28 @@ static int open_track_resource_context(AVFormatContext *s,
>>> }
>>> av_dict_free(&opts);
>>>
>>> - /* Compare the source timebase to the resource edit rate,
>>> - * considering the first stream of the source file
>>> - */
>>> - if (av_cmp_q(track_resource->ctx->streams[0]->time_base,
>>> - av_inv_q(track_resource->resource->base.edit_rate)))
>>> + /* make sure there is only one stream in the file */
>>> +
>>> + if (track_resource->ctx->nb_streams != 1) {
>>> + ret = AVERROR_INVALIDDATA;
>>> + goto cleanup;
>>> + }
>>> +
>>> + st = track_resource->ctx->streams[0];
>>> +
>>> + /* Warn if the resource time base does not match the file time base */
>>> + if (av_cmp_q(st->time_base, av_inv_q(track_resource->resource->base.edit_rate)))
>>> av_log(s,
>>> AV_LOG_WARNING,
>>> - "Incoherent source stream timebase %d/%d regarding resource edit rate: %d/%d",
>>> - track_resource->ctx->streams[0]->time_base.num,
>>> - track_resource->ctx->streams[0]->time_base.den,
>>> + "Incoherent source stream timebase " AVRATIONAL_FORMAT
>>> + "regarding resource edit rate: " AVRATIONAL_FORMAT,
>>> + st->time_base.num,
>>> + st->time_base.den,
>>> track_resource->resource->base.edit_rate.den,
>>> track_resource->resource->base.edit_rate.num);
>>>
>>> - entry_point = (int64_t)track_resource->resource->base.entry_point
>>> - * track_resource->resource->base.edit_rate.den
>>> - * AV_TIME_BASE
>>> - / track_resource->resource->base.edit_rate.num;
>>> + entry_point = av_rescale_q(track_resource->resource->base.entry_point, st->time_base,
>>> + av_inv_q(track_resource->resource->base.edit_rate));
>>>
>>> if (entry_point) {
>>> av_log(s,
>>> @@ -407,7 +416,7 @@ static int open_track_resource_context(AVFormatContext *s,
>>> "Seek at resource %s entry point: %" PRIu32 "\n",
>>> track_resource->locator->absolute_uri,
>>> track_resource->resource->base.entry_point);
>>> - ret = avformat_seek_file(track_resource->ctx, -1, entry_point, entry_point, entry_point, 0);
>>> + ret = avformat_seek_file(track_resource->ctx, 0, entry_point, entry_point, entry_point, 0);
>>> if (ret < 0) {
>>> av_log(s,
>>> AV_LOG_ERROR,
>>> @@ -470,11 +479,16 @@ static int open_track_file_resource(AVFormatContext *s,
>>> vt_ctx.locator = asset_locator;
>>> vt_ctx.resource = track_file_resource;
>>> vt_ctx.ctx = NULL;
>>> - track->resources[track->resource_count++] = vt_ctx;
>>> - track->duration = av_add_q(track->duration,
>>> + vt_ctx.start_time = track->duration;
>>> + vt_ctx.ts_offset = av_sub_q(vt_ctx.start_time,
>>> + av_div_q(av_make_q((int)track_file_resource->base.entry_point, 1),
>>> + track_file_resource->base.edit_rate));
>>> + vt_ctx.end_time = av_add_q(track->duration,
>>> av_make_q((int)track_file_resource->base.duration
>>> * track_file_resource->base.edit_rate.den,
>>> track_file_resource->base.edit_rate.num));
>>> + track->resources[track->resource_count++] = vt_ctx;
>>> + track->duration = vt_ctx.end_time;
>>> }
>>>
>>> return 0;
>>> @@ -701,11 +715,14 @@ static IMFVirtualTrackPlaybackCtx *get_next_track_with_minimum_timestamp(AVForma
>>> return track;
>>> }
>>>
>>> -static IMFVirtualTrackResourcePlaybackCtx *get_resource_context_for_timestamp(AVFormatContext *s,
>>> - IMFVirtualTrackPlaybackCtx *track)
>>> +static int get_resource_context_for_timestamp(AVFormatContext *s, IMFVirtualTrackPlaybackCtx *track, IMFVirtualTrackResourcePlaybackCtx **resource)
>>> {
>>> - AVRational edit_unit_duration = av_inv_q(track->resources[0].resource->base.edit_rate);
>>> - AVRational cumulated_duration = av_make_q(0, edit_unit_duration.den);
>>> + *resource = NULL;
>>> +
>>> + if (av_cmp_q(track->current_timestamp, track->duration) >= 0) {
>>> + av_log(s, AV_LOG_DEBUG, "Reached the end of the virtual track\n");
>>> + return AVERROR_EOF;
>>> + }
>>>
>>> av_log(s,
>>> AV_LOG_DEBUG,
>>> @@ -714,119 +731,141 @@ static IMFVirtualTrackResourcePlaybackCtx *get_resource_context_for_timestamp(AV
>>> av_q2d(track->current_timestamp),
>>> av_q2d(track->duration));
>>> for (uint32_t i = 0; i < track->resource_count; ++i) {
>>> - cumulated_duration = av_add_q(cumulated_duration,
>>> - av_make_q((int)track->resources[i].resource->base.duration
>>> - * edit_unit_duration.num,
>>> - edit_unit_duration.den));
>>>
>>> - if (av_cmp_q(av_add_q(track->current_timestamp, edit_unit_duration), cumulated_duration) <= 0) {
>>> + if (av_cmp_q(track->resources[i].end_time, track->current_timestamp) > 0) {
>>> av_log(s,
>>> AV_LOG_DEBUG,
>>> - "Found resource %d in track %d to read for timestamp %lf "
>>> - "(on cumulated=%lf): entry=%" PRIu32
>>> + "Found resource %d in track %d to read at timestamp %lf: "
>>> + "entry=%" PRIu32
>>> ", duration=%" PRIu32
>>> - ", editrate=" AVRATIONAL_FORMAT
>>> - " | edit_unit_duration=%lf\n",
>>> + ", editrate=" AVRATIONAL_FORMAT,
>>> i,
>>> track->index,
>>> av_q2d(track->current_timestamp),
>>> - av_q2d(cumulated_duration),
>>> track->resources[i].resource->base.entry_point,
>>> track->resources[i].resource->base.duration,
>>> - AVRATIONAL_ARG(track->resources[i].resource->base.edit_rate),
>>> - av_q2d(edit_unit_duration));
>>> + AVRATIONAL_ARG(track->resources[i].resource->base.edit_rate));
>>>
>>> if (track->current_resource_index != i) {
>>> + int ret;
>>> +
>>> av_log(s,
>>> AV_LOG_DEBUG,
>>> "Switch resource on track %d: re-open context\n",
>>> track->index);
>>> - if (open_track_resource_context(s, &(track->resources[i])) != 0)
>>> - return NULL;
>>> +
>>> + ret = open_track_resource_context(s, &(track->resources[i]));
>>> + if (ret != 0)
>>> + return ret;
>>> if (track->current_resource_index > 0)
>>> avformat_close_input(&track->resources[track->current_resource_index].ctx);
>>> track->current_resource_index = i;
>>> }
>>>
>>> - return &(track->resources[track->current_resource_index]);
>>> + *resource = &(track->resources[track->current_resource_index]);
>>> + return 0;
>>> }
>>> }
>>> - return NULL;
>>> +
>>> + av_log(s, AV_LOG_ERROR, "Could not find IMF track resource to read\n");
>>> + return AVERROR_STREAM_NOT_FOUND;
>>> +}
>>> +
>>> +static int imf_time_to_ts(int64_t *ts, AVRational t, AVRational time_base)
>>> +{
>>> + int dst_num;
>>> + int dst_den;
>>> + AVRational r;
>>> +
>>> + r = av_div_q(t, time_base);
>>> +
>>> + if ((av_reduce(&dst_num, &dst_den, r.num, r.den, INT64_MAX) != 1))
>>> + return 0;
>>> +
>>> + if (dst_den != 1)
>>> + return 0;
>>> +
>>> + *ts = dst_num;
>>> +
>>> + return 1;
>>> }
>>>
>>> static int imf_read_packet(AVFormatContext *s, AVPacket *pkt)
>>> {
>>> - IMFContext *c = s->priv_data;
>>> - IMFVirtualTrackResourcePlaybackCtx *resource_to_read = NULL;
>>> - AVRational edit_unit_duration;
>>> + IMFVirtualTrackResourcePlaybackCtx *resource = NULL;
>>> int ret = 0;
>>> IMFVirtualTrackPlaybackCtx *track;
>>> - FFStream *track_stream;
>>> + int64_t delta_ts;
>>> + AVStream *st;
>>> + AVRational next_timestamp;
>>>
>>> track = get_next_track_with_minimum_timestamp(s);
>>>
>>> - if (av_cmp_q(track->current_timestamp, track->duration) == 0)
>>> - return AVERROR_EOF;
>>> + ret = get_resource_context_for_timestamp(s, track, &resource);
>>> + if (ret)
>>> + return ret;
>>>
>>> - resource_to_read = get_resource_context_for_timestamp(s, track);
>>> + ret = av_read_frame(resource->ctx, pkt);
>>> + if (ret) {
>>> + av_log(s, AV_LOG_ERROR, "Failed to read frame\n");
>>> + return ret;
>>> + }
>>>
>>> - if (!resource_to_read) {
>>> - edit_unit_duration
>>> - = av_inv_q(track->resources[track->current_resource_index].resource->base.edit_rate);
>>> + av_log(s, AV_LOG_DEBUG, "Got packet: pts=%" PRId64 ", dts=%" PRId64
>>> + ", duration=%" PRId64 ", stream_index=%d, pos=%" PRId64
>>> + ", time_base=" AVRATIONAL_FORMAT "\n", pkt->pts, pkt->dts, pkt->duration,
>>> + pkt->stream_index, pkt->pos, pkt->time_base.num, pkt->time_base.den);
>>>
>>> - if (av_cmp_q(av_add_q(track->current_timestamp, edit_unit_duration), track->duration) > 0)
>>> - return AVERROR_EOF;
>>> + /* IMF resources contain only one stream */
>>>
>>> - av_log(s, AV_LOG_ERROR, "Could not find IMF track resource to read\n");
>>> - return AVERROR_STREAM_NOT_FOUND;
>>> - }
>>> + if (pkt->stream_index != 0)
>>> + return AVERROR_INVALIDDATA;
>>> + st = resource->ctx->streams[0];
>>>
>>> - while (!ff_check_interrupt(c->interrupt_callback) && !ret) {
>>> - ret = av_read_frame(resource_to_read->ctx, pkt);
>>> - av_log(s,
>>> - AV_LOG_DEBUG,
>>> - "Got packet: pts=%" PRId64
>>> - ", dts=%" PRId64
>>> - ", duration=%" PRId64
>>> - ", stream_index=%d, pos=%" PRId64
>>> - "\n",
>>> - pkt->pts,
>>> - pkt->dts,
>>> - pkt->duration,
>>> - pkt->stream_index,
>>> - pkt->pos);
>>> -
>>> - track_stream = ffstream(s->streams[track->index]);
>>> - if (ret >= 0) {
>>> - /* Update packet info from track */
>>> - if (pkt->dts < track_stream->cur_dts && track->last_pts > 0)
>>> - pkt->dts = track_stream->cur_dts;
>>> -
>>> - pkt->pts = track->last_pts;
>>> - pkt->dts = pkt->dts
>>> - - (int64_t)track->resources[track->current_resource_index].resource->base.entry_point;
>>> - pkt->stream_index = track->index;
>>> -
>>> - /* Update track cursors */
>>> - track->current_timestamp
>>> - = av_add_q(track->current_timestamp,
>>> - av_make_q((int)pkt->duration
>>> - * resource_to_read->ctx->streams[0]->time_base.num,
>>> - resource_to_read->ctx->streams[0]->time_base.den));
>>> - track->last_pts += pkt->duration;
>>> + pkt->stream_index = track->index;
>>>
>>> - return 0;
>>> - } else if (ret != AVERROR_EOF) {
>>> - av_log(s,
>>> - AV_LOG_ERROR,
>>> - "Could not get packet from track %d: %s\n",
>>> - track->index,
>>> - av_err2str(ret));
>>> - return ret;
>>> + /* adjust the packet PTS and DTS based on the temporal position of the resource within the timeline */
>>> +
>>> + if ((imf_time_to_ts(&delta_ts, resource->ts_offset, st->time_base) == 0))
>>> + av_log(s, AV_LOG_WARNING, "Incoherent time stamp " AVRATIONAL_FORMAT " for time base " AVRATIONAL_FORMAT,
>>> + resource->ts_offset.num, resource->ts_offset.den, pkt->time_base.num,
>>> + pkt->time_base.den);
>>> + if (pkt->pts != AV_NOPTS_VALUE)
>>> + pkt->pts += delta_ts;
>>> + if (pkt->dts != AV_NOPTS_VALUE)
>>> + pkt->dts += delta_ts;
>>> +
>>> + /* advance the track timestamp by the packet duration */
>>> +
>>> + next_timestamp = av_add_q(track->current_timestamp,
>>> + av_mul_q(av_make_q((int)pkt->duration, 1), st->time_base));
>>> +
>>> + /* if necessary, clamp the next timestamp to the end of the current resource */
>>> +
>>> + if (av_cmp_q(next_timestamp, resource->end_time) > 0) {
>>> +
>>> + next_timestamp = resource->end_time;
>>> +
>>> + /* shrink the packet duration */
>>> +
>>> + if ((imf_time_to_ts(&pkt->duration, av_sub_q(resource->end_time, track->current_timestamp), st->time_base) == 0))
>>> + av_log(s, AV_LOG_WARNING, "Incoherent time base during packet duration calculation");
>>> +
>>> + /* shrink the packet size itself for audio samples */
>>> + /* only AV_CODEC_ID_PCM_S24LE is supported in IMF */
>>> +
>>> + if (st->codecpar->codec_id == AV_CODEC_ID_PCM_S24LE) {
>>> + int bytes_per_sample = av_get_exact_bits_per_sample(st->codecpar->codec_id) >> 3;
>>> + int64_t nbsamples = av_rescale_q(pkt->duration, st->time_base, av_make_q(1, st->codecpar->sample_rate));
>>> + av_shrink_packet(pkt, nbsamples * st->codecpar->channels * bytes_per_sample);
>>> + } else {
>>> + av_log(s, AV_LOG_WARNING, "Cannot shrink packets for non-PCM essence");
>>
>> AV_PKT_DATA_SKIP_SAMPLES
>
> Ok. Would the "reason for end skip" be 1 (convergence)?
>
I guess so given that this is not the end of the logical track.
- Andreas
More information about the ffmpeg-devel
mailing list