[FFmpeg-devel] [PATCH v1 1/2] avformat/demux: use per-packet repeat_pict to calculate frame duration

Mon Jan 27 05:54:05 EET 2025

For h264/hevc, when using the raw demuxer, parse_packet() may parse
multiple frames from a single input packet, depending on the setting of
-raw_packet_size. The first parsed packet is returned from
read_frame_internal(), while the subsequent parsed packets are stored in
the parse queue, and returned in later calls to read_frame_internal().

However, during the increment of sti->info->codec_info_duration_fields,
the repeat_pict value of the first parsed packet is reused for the rest
of the packets in the parse queue. This causes the field count to be
inaccurate in interlaced video with variable pic_struct values,
particularly when -raw_packet_size is a large value. This can be
demonstrated with the FATE sample h264-conformance/CVSE2_Sony_B.jsv:
with the default -raw_packet_size of 1024, avg_frame_rate is incorrectly
detected as 29.67 fps, while with a smaller -raw_packet_size of 128,
avg_frame_rate is correctly detected as 29.97 fps.

To fix this, store the repeat_pict value in AVPacket instead of
AVCodecParserContext (where it would get overridden when processing
subsequent packets), and use it to calculate the frame duration.

Signed-off-by: Gavin Li <git at thegavinli.com>
---
 libavcodec/packet.h |  5 +++++
 libavformat/demux.c | 11 +++--------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/libavcodec/packet.h b/libavcodec/packet.h
index c1f1ad7b43..c28174d7fb 100644
--- a/libavcodec/packet.h
+++ b/libavcodec/packet.h
@@ -581,6 +581,11 @@ typedef struct AVPacket {
      * or muxers.
      */
     AVRational time_base;
+
+    /**
+     * See AVFrame.repeat_pict for details.
+     */
+    int repeat_pict;
 } AVPacket;
 
 #if FF_API_INIT_PACKET
diff --git a/libavformat/demux.c b/libavformat/demux.c
index d8ab29431e..5ac79dae49 100644
--- a/libavformat/demux.c
+++ b/libavformat/demux.c
@@ -710,16 +710,10 @@ static void compute_frame_duration(AVFormatContext *s, int *pnum, int *pden,
             int ticks_per_frame = (sti->codec_desc &&
                                    (sti->codec_desc->props & AV_CODEC_PROP_FIELDS)) ? 2 : 1;
             av_reduce(pnum, pden,
-                      codec_framerate.den,
+                      codec_framerate.den * (1LL + (pc ? pkt->repeat_pict : 0)),
                       codec_framerate.num * (int64_t)ticks_per_frame,
                       INT_MAX);
 
-            if (pc && pc->repeat_pict) {
-                av_reduce(pnum, pden,
-                          (*pnum) * (1LL + pc->repeat_pict),
-                          (*pden),
-                          INT_MAX);
-            }
             /* If this codec can be interlaced or progressive then we need
              * a parser to compute duration of a packet. Thus if we have
              * no parser in such case leave duration undefined. */
@@ -1241,6 +1235,7 @@ static int parse_packet(AVFormatContext *s, AVPacket *pkt,
         out_pkt->pts          = sti->parser->pts;
         out_pkt->dts          = sti->parser->dts;
         out_pkt->pos          = sti->parser->pos;
+        out_pkt->repeat_pict  = sti->parser->repeat_pict;
         out_pkt->flags       |= pkt->flags & (AV_PKT_FLAG_DISCARD | AV_PKT_FLAG_CORRUPT);
 
         if (sti->need_parsing == AVSTREAM_PARSE_FULL_RAW)
@@ -2819,7 +2814,7 @@ int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options)
                 } else
                     sti->info->codec_info_duration += pkt->duration;
                 sti->info->codec_info_duration_fields += sti->parser && sti->need_parsing && fields
-                                                         ? sti->parser->repeat_pict + 1 : 2;
+                                                         ? pkt->repeat_pict + 1 : 2;
             }
         }
         if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
-- 
2.47.1