[FFmpeg-devel] [PATCH 2/2] libfdk-aacdec: Flush delayed samples at the end

Andreas Rheinhardt andreas.rheinhardt at outlook.com
Fri Jan 21 16:42:36 EET 2022


Martin Storsjö:
> On Fri, 21 Jan 2022, Andreas Rheinhardt wrote:
> 
>> Martin Storsjö:
>>> Also trim off delay samples at the start instead of adjusting pts
>>> to compensate for them; this avoids unwanted offsets if working
>>> with raw samples without considering their pts.
>>> ---
>>>  libavcodec/libfdk-aacdec.c | 80 +++++++++++++++++++++++++++++++-------
>>>  1 file changed, 65 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/libavcodec/libfdk-aacdec.c b/libavcodec/libfdk-aacdec.c
>>> index 93b52023b0..d560e313ca 100644
>>> --- a/libavcodec/libfdk-aacdec.c
>>> +++ b/libavcodec/libfdk-aacdec.c
>>> @@ -58,7 +58,11 @@ typedef struct FDKAACDecContext {
>>>      int drc_cut;
>>>      int album_mode;
>>>      int level_limit;
>>> -    int output_delay;
>>> +#if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
>>> +    int output_delay_set;
>>> +    int flush_samples;
>>> +    int delay_samples;
>>> +#endif
>>>  } FDKAACDecContext;
>>>
>>>
>>> @@ -123,7 +127,12 @@ static int get_stream_info(AVCodecContext *avctx)
>>>      avctx->sample_rate = info->sampleRate;
>>>      avctx->frame_size  = info->frameSize;
>>>  #if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
>>> -    s->output_delay    = info->outputDelay;
>>> +    if (!s->output_delay_set && info->outputDelay) {
>>> +        // Set this only once.
>>> +        s->flush_samples    = info->outputDelay;
>>> +        s->delay_samples    = info->outputDelay;
>>> +        s->output_delay_set = 1;
>>> +    }
>>>  #endif
>>>
>>>      for (i = 0; i < info->numChannels; i++) {
>>> @@ -367,14 +376,31 @@ static int fdk_aac_decode_frame(AVCodecContext
>>> *avctx, void *data,
>>>      int ret;
>>>      AAC_DECODER_ERROR err;
>>>      UINT valid = avpkt->size;
>>> +    UINT flags = 0;
>>> +    int input_offset = 0;
>>>
>>> -    err = aacDecoder_Fill(s->handle, &avpkt->data, &avpkt->size,
>>> &valid);
>>> -    if (err != AAC_DEC_OK) {
>>> -        av_log(avctx, AV_LOG_ERROR, "aacDecoder_Fill() failed:
>>> %x\n", err);
>>> -        return AVERROR_INVALIDDATA;
>>> +    if (avpkt->size) {
>>> +        err = aacDecoder_Fill(s->handle, &avpkt->data, &avpkt->size,
>>> &valid);
>>> +        if (err != AAC_DEC_OK) {
>>> +            av_log(avctx, AV_LOG_ERROR, "aacDecoder_Fill() failed:
>>> %x\n", err);
>>> +            return AVERROR_INVALIDDATA;
>>> +        }
>>> +    } else {
>>> +#if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
>>> +        /* Handle decoder draining */
>>> +        if (s->flush_samples > 0) {
>>> +            flags |= AACDEC_FLUSH;
>>> +        } else {
>>> +            return AVERROR_EOF;
>>> +        }
>>> +#else
>>> +        return AVERROR_EOF;
>>> +#endif
>>>      }
>>>
>>> -    err = aacDecoder_DecodeFrame(s->handle, (INT_PCM *)
>>> s->decoder_buffer, s->decoder_buffer_size / sizeof(INT_PCM), 0);
>>> +    err = aacDecoder_DecodeFrame(s->handle, (INT_PCM *)
>>> s->decoder_buffer,
>>> +                                 s->decoder_buffer_size /
>>> sizeof(INT_PCM),
>>> +                                 flags);
>>>      if (err == AAC_DEC_NOT_ENOUGH_BITS) {
>>>          ret = avpkt->size - valid;
>>>          goto end;
>>> @@ -390,16 +416,36 @@ static int fdk_aac_decode_frame(AVCodecContext
>>> *avctx, void *data,
>>>          goto end;
>>>      frame->nb_samples = avctx->frame_size;
>>>
>>> +#if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
>>> +    if (flags & AACDEC_FLUSH) {
>>> +        // Only return the right amount of samples at the end; if
>>> calling the
>>> +        // decoder with AACDEC_FLUSH, it will keep returning frames
>>> indefinitely.
>>> +        frame->nb_samples = FFMIN(s->flush_samples, frame->nb_samples);
>>> +        av_log(s, AV_LOG_DEBUG, "Returning %d/%d delayed samples.\n",
>>> +                                frame->nb_samples, s->flush_samples);
>>> +        s->flush_samples -= frame->nb_samples;
>>> +    } else {
>>> +        // Trim off samples from the start to compensate for extra
>>> decoder
>>> +        // delay. We could also just adjust the pts, but this avoids
>>> +        // including the extra samples in the output altogether.
>>> +        if (s->delay_samples) {
>>> +            int drop_samples = FFMIN(s->delay_samples,
>>> frame->nb_samples);
>>> +            av_log(s, AV_LOG_DEBUG, "Dropping %d/%d delayed
>>> samples.\n",
>>> +                                    drop_samples, s->delay_samples);
>>> +            s->delay_samples  -= drop_samples;
>>> +            frame->nb_samples -= drop_samples;
>>> +            input_offset = drop_samples * avctx->channels;
>>> +            if (frame->nb_samples <= 0)
>>> +                return 0;
>>> +        }
>>> +    }
>>> +#endif
>>> +
>>>      if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
>>>          goto end;
>>>
>>> -    if (frame->pts != AV_NOPTS_VALUE)
>>> -        frame->pts -= av_rescale_q(s->output_delay,
>>> -                                   (AVRational){1, avctx->sample_rate},
>>> -                                   avctx->time_base);
>>> -
>>> -    memcpy(frame->extended_data[0], s->decoder_buffer,
>>> -           avctx->channels * avctx->frame_size *
>>> +    memcpy(frame->extended_data[0], s->decoder_buffer + input_offset,
>>> +           avctx->channels * frame->nb_samples *
>>>             av_get_bytes_per_sample(avctx->sample_fmt));
>>>
>>>      *got_frame_ptr = 1;
>>> @@ -432,7 +478,11 @@ const AVCodec ff_libfdk_aac_decoder = {
>>>      .decode         = fdk_aac_decode_frame,
>>>      .close          = fdk_aac_decode_close,
>>>      .flush          = fdk_aac_decode_flush,
>>> -    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF,
>>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF
>>> +#if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
>>> +                      | AV_CODEC_CAP_DELAY
>>> +#endif
>>> +    ,
>>>      .priv_class     = &fdk_aac_dec_class,
>>>      .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
>>>                        FF_CODEC_CAP_INIT_CLEANUP,
>>>
>>
>> When I use the libfdk-aac decoder I get the exact number of samples like
>> with the native aac decoder (namely number of frames * 1024, as
>> expected). What makes you believe this is necessary?
> 
> The fdk-aac decoder can have, depending on combination of options, some
> amount of extra internal delay, that the libavcodec internal aac decoder
> doesn't have. (It's also possible to set the options in a state where
> the fdk-aac decoder doesn't induce any extra delay.)
> 
> Currently, we compensate for that extra delay by just offsetting pts
> backwards, so for a stream with N packets, we return samples with
> timestamps [-delay,N*framesize-delay].
> 
> In order not to lose data at the end, we must make the decoder flushable
> and flush up to (delay) samples at the end. And since one doesn't
> normally expect extra delay samples at the start of an AAC decoder
> output, we also trim out the same amount of samples at the start (to
> simplify for users that don't observe the pts, who otherwise are
> surprised by the stream starting from pts -delay instead of at pts 0).
> 

Interesting: There is indeed a delay at the start (720 samples in a
quick test) compared to the native AAC decoder.
Furthermore, the current code is buggy, as it believes that
avcodec->time_base to be the time_base of the returned AVFrames (it is
in reality avcodec->pkt_timebase; just test with AAC-in-Matroska for this).
I haven't tested your patches, but I have now realized that there is
indeed an issue. And your patch should also fix the wrong timebase issue.

- Andreas


More information about the ffmpeg-devel mailing list