[FFmpeg-devel] [PATCH 2/4] fftools/ffmpeg: stop using av_stream_get_parser()

Thu Aug 18 20:08:12 EEST 2022

James Almer:
> On 8/18/2022 10:58 AM, Andreas Rheinhardt wrote:
>> Anton Khirnov:
>>> The parser is internal to the demuxer, so its state at any particular
>>> point is not well-defined for the caller. Additionally, it is being
>>> accessed from the main thread, while demuxing runs in a separate thread.
>>>
>>> Use a separate parser owned by ffmpeg.c to retrieve the same
>>> information.
>>>
>>> Fixes races, e.g. in:
>>> - fate-h264-brokensps-2580
>>> - fate-h264-extradata-reload
>>> - fate-iv8-demux
>>> - fate-m4v-cfr
>>> - fate-m4v
>>> ---
>>>   fftools/ffmpeg.c     | 33 +++++++++++++++++++++++++++++++--
>>>   fftools/ffmpeg.h     |  9 +++++++++
>>>   fftools/ffmpeg_opt.c |  9 +++++++++
>>>   3 files changed, 49 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
>>> index ef7177fc33..580df0443a 100644
>>> --- a/fftools/ffmpeg.c
>>> +++ b/fftools/ffmpeg.c
>>> @@ -610,6 +610,9 @@ static void ffmpeg_cleanup(int ret)
>>>           avcodec_free_context(&ist->dec_ctx);
>>>           avcodec_parameters_free(&ist->par);
>>>   +        avcodec_free_context(&ist->parser_dec);
>>> +        av_parser_close(ist->parser);
>>> +
>>>           av_freep(&input_streams[i]);
>>>       }
>>>   @@ -2421,6 +2424,15 @@ static int process_input_packet(InputStream
>>> *ist, const AVPacket *pkt, int no_eo
>>>           ret = av_packet_ref(avpkt, pkt);
>>>           if (ret < 0)
>>>               return ret;
>>> +
>>> +        if (ist->parser) {
>>> +            // we do not use the parsed output, only the
>>> +            // filled codec context fields
>>> +            uint8_t *dummy;
>>> +            int      dummy_size;
>>> +            av_parser_parse2(ist->parser, ist->parser_dec, &dummy,
>>> &dummy_size,
>>> +                             pkt->data, pkt->size, pkt->pts,
>>> pkt->dts, pkt->pos);
>>> +        }
>>>       }
>>>         if (pkt && pkt->dts != AV_NOPTS_VALUE) {
>>> @@ -2452,7 +2464,8 @@ static int process_input_packet(InputStream
>>> *ist, const AVPacket *pkt, int no_eo
>>>                   if (pkt && pkt->duration) {
>>>                       duration_dts = av_rescale_q(pkt->duration,
>>> ist->st->time_base, AV_TIME_BASE_Q);
>>>                   } else if(ist->dec_ctx->framerate.num != 0 &&
>>> ist->dec_ctx->framerate.den != 0) {
>>> -                    int ticks= av_stream_get_parser(ist->st) ?
>>> av_stream_get_parser(ist->st)->repeat_pict+1 :
>>> ist->dec_ctx->ticks_per_frame;
>>> +                    int ticks = ist->parser ?
>>> ist->parser->repeat_pict + 1 :
>>> +                                             
>>> ist->dec_ctx->ticks_per_frame;
>>>                       duration_dts = ((int64_t)AV_TIME_BASE *
>>>                                       ist->dec_ctx->framerate.den *
>>> ticks) /
>>>                                       ist->dec_ctx->framerate.num /
>>> ist->dec_ctx->ticks_per_frame;
>>> @@ -2555,7 +2568,8 @@ static int process_input_packet(InputStream
>>> *ist, const AVPacket *pkt, int no_eo
>>>               } else if (pkt->duration) {
>>>                   ist->next_dts += av_rescale_q(pkt->duration,
>>> ist->st->time_base, AV_TIME_BASE_Q);
>>>               } else if(ist->dec_ctx->framerate.num != 0) {
>>> -                int ticks= av_stream_get_parser(ist->st) ?
>>> av_stream_get_parser(ist->st)->repeat_pict + 1 :
>>> ist->dec_ctx->ticks_per_frame;
>>> +                int ticks = ist->parser ? ist->parser->repeat_pict +
>>> 1 :
>>> +                                         
>>> ist->dec_ctx->ticks_per_frame;
>>>                   ist->next_dts += ((int64_t)AV_TIME_BASE *
>>>                                     ist->dec_ctx->framerate.den *
>>> ticks) /
>>>                                     ist->dec_ctx->framerate.num /
>>> ist->dec_ctx->ticks_per_frame;
>>> @@ -2688,6 +2702,21 @@ static int init_input_stream(int ist_index,
>>> char *error, int error_len)
>>>           assert_avoptions(ist->decoder_opts);
>>>       }
>>>   +    if (ist->parser) {
>>> +        AVCodecParameters *par_tmp;
>>> +
>>> +        par_tmp = avcodec_parameters_alloc();
>>> +        if (!par_tmp)
>>> +            return AVERROR(ENOMEM);
>>> +
>>> +        ret = avcodec_parameters_from_context(par_tmp, ist->dec_ctx);
>>> +        if (ret >= 0)
>>> +            ret = avcodec_parameters_to_context(ist->parser_dec,
>>> par_tmp);
>>> +        avcodec_parameters_free(&par_tmp);
>>> +        if (ret < 0)
>>> +            return ret;
>>> +    }
>>> +
>>>       ist->next_pts = AV_NOPTS_VALUE;
>>>       ist->next_dts = AV_NOPTS_VALUE;
>>>   diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
>>> index 44cc23fa84..f67a2f1d1d 100644
>>> --- a/fftools/ffmpeg.h
>>> +++ b/fftools/ffmpeg.h
>>> @@ -334,6 +334,15 @@ typedef struct InputStream {
>>>       AVFrame *decoded_frame;
>>>       AVPacket *pkt;
>>>   +    /**
>>> +     * Parser for timestamp processing.
>>> +     */
>>> +    AVCodecParserContext *parser;
>>> +    /**
>>> +     * Codec context needed by parsing.
>>> +     */
>>> +    AVCodecContext       *parser_dec;
>>> +
>>>       int64_t       prev_pkt_pts;
>>>       int64_t       start;     /* time when read started */
>>>       /* predicted dts of the next packet read for this stream or
>>> (when there are
>>> diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
>>> index 30ca5cd609..a505c7b26f 100644
>>> --- a/fftools/ffmpeg_opt.c
>>> +++ b/fftools/ffmpeg_opt.c
>>> @@ -1065,6 +1065,15 @@ static void add_input_streams(OptionsContext
>>> *o, AVFormatContext *ic)
>>>               ist->top_field_first = -1;
>>>               MATCH_PER_STREAM_OPT(top_field_first, i,
>>> ist->top_field_first, ic, st);
>>>   +            ist->parser = av_parser_init(ist->dec->id);
>>> +            if (ist->parser) {
>>> +                ist->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;
>>> +
>>> +                ist->parser_dec = avcodec_alloc_context3(NULL);
>>> +                if (!ist->parser_dec)
>>> +                    exit_program(1);
>>> +            }
>>> +
>>>               break;
>>>           case AVMEDIA_TYPE_AUDIO:
>>>               ist->guess_layout_max = INT_MAX;
>>
>> Are you aware that some parsers (e.g. the HEVC one) are still slow even
>> with the PARSER_FLAG_COMPLETE_FRAMES flag and consume non-negligible
>> amounts of memory?
> 
> Is it because it uses ff_h2645_packet_split()? It used to do like
> h264_parser and feature a custom NAL splitting implementation in order
> to not bother unscaping entire slice NALs (For raw annexb) since it only
> cares about their headers.
> 
> Maybe adding a max_read_size parameter to it so it stops parsing NALs
> after that point would help. It would also allow us to use it in
> h264_parser, simplifying it considerably.
> 

1. It is because ff_h2645_packet_split().
2. And even if unescaping is stopped after the first few bytes (which is
e.g. not possible for SEIs as it might be the last SEI message that
contains important information), one would still need a buffer to hold
the complete input packet (because it might be that it contains a
billion little NALUs).
3. The simplifications due to ceb085906623dcc64b82151d433d1be2b0c71f73
were in part due to the HEVC parser retaining a list of NALUs instead of
only retaining the current NALU (it only looks at the current NALU
anyway). The H.264 parser does not make this mistake, so any
simplifications (if any) will be smaller.

- Andreas