[FFmpeg-devel] [PATCH 2/2] libfdk-aacdec: Flush delayed samples at the end

Martin Storsjö martin at martin.st
Fri Jan 21 15:58:06 EET 2022


On Fri, 21 Jan 2022, Andreas Rheinhardt wrote:

> Martin Storsjö:
>> Also trim off delay samples at the start instead of adjusting pts
>> to compensate for them; this avoids unwanted offsets if working
>> with raw samples without considering their pts.
>> ---
>>  libavcodec/libfdk-aacdec.c | 80 +++++++++++++++++++++++++++++++-------
>>  1 file changed, 65 insertions(+), 15 deletions(-)
>> 
>> diff --git a/libavcodec/libfdk-aacdec.c b/libavcodec/libfdk-aacdec.c
>> index 93b52023b0..d560e313ca 100644
>> --- a/libavcodec/libfdk-aacdec.c
>> +++ b/libavcodec/libfdk-aacdec.c
>> @@ -58,7 +58,11 @@ typedef struct FDKAACDecContext {
>>      int drc_cut;
>>      int album_mode;
>>      int level_limit;
>> -    int output_delay;
>> +#if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
>> +    int output_delay_set;
>> +    int flush_samples;
>> +    int delay_samples;
>> +#endif
>>  } FDKAACDecContext;
>>
>> 
>> @@ -123,7 +127,12 @@ static int get_stream_info(AVCodecContext *avctx)
>>      avctx->sample_rate = info->sampleRate;
>>      avctx->frame_size  = info->frameSize;
>>  #if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
>> -    s->output_delay    = info->outputDelay;
>> +    if (!s->output_delay_set && info->outputDelay) {
>> +        // Set this only once.
>> +        s->flush_samples    = info->outputDelay;
>> +        s->delay_samples    = info->outputDelay;
>> +        s->output_delay_set = 1;
>> +    }
>>  #endif
>>
>>      for (i = 0; i < info->numChannels; i++) {
>> @@ -367,14 +376,31 @@ static int fdk_aac_decode_frame(AVCodecContext *avctx, void *data,
>>      int ret;
>>      AAC_DECODER_ERROR err;
>>      UINT valid = avpkt->size;
>> +    UINT flags = 0;
>> +    int input_offset = 0;
>> 
>> -    err = aacDecoder_Fill(s->handle, &avpkt->data, &avpkt->size, &valid);
>> -    if (err != AAC_DEC_OK) {
>> -        av_log(avctx, AV_LOG_ERROR, "aacDecoder_Fill() failed: %x\n", err);
>> -        return AVERROR_INVALIDDATA;
>> +    if (avpkt->size) {
>> +        err = aacDecoder_Fill(s->handle, &avpkt->data, &avpkt->size, &valid);
>> +        if (err != AAC_DEC_OK) {
>> +            av_log(avctx, AV_LOG_ERROR, "aacDecoder_Fill() failed: %x\n", err);
>> +            return AVERROR_INVALIDDATA;
>> +        }
>> +    } else {
>> +#if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
>> +        /* Handle decoder draining */
>> +        if (s->flush_samples > 0) {
>> +            flags |= AACDEC_FLUSH;
>> +        } else {
>> +            return AVERROR_EOF;
>> +        }
>> +#else
>> +        return AVERROR_EOF;
>> +#endif
>>      }
>> 
>> -    err = aacDecoder_DecodeFrame(s->handle, (INT_PCM *) s->decoder_buffer, s->decoder_buffer_size / sizeof(INT_PCM), 0);
>> +    err = aacDecoder_DecodeFrame(s->handle, (INT_PCM *) s->decoder_buffer,
>> +                                 s->decoder_buffer_size / sizeof(INT_PCM),
>> +                                 flags);
>>      if (err == AAC_DEC_NOT_ENOUGH_BITS) {
>>          ret = avpkt->size - valid;
>>          goto end;
>> @@ -390,16 +416,36 @@ static int fdk_aac_decode_frame(AVCodecContext *avctx, void *data,
>>          goto end;
>>      frame->nb_samples = avctx->frame_size;
>> 
>> +#if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
>> +    if (flags & AACDEC_FLUSH) {
>> +        // Only return the right amount of samples at the end; if calling the
>> +        // decoder with AACDEC_FLUSH, it will keep returning frames indefinitely.
>> +        frame->nb_samples = FFMIN(s->flush_samples, frame->nb_samples);
>> +        av_log(s, AV_LOG_DEBUG, "Returning %d/%d delayed samples.\n",
>> +                                frame->nb_samples, s->flush_samples);
>> +        s->flush_samples -= frame->nb_samples;
>> +    } else {
>> +        // Trim off samples from the start to compensate for extra decoder
>> +        // delay. We could also just adjust the pts, but this avoids
>> +        // including the extra samples in the output altogether.
>> +        if (s->delay_samples) {
>> +            int drop_samples = FFMIN(s->delay_samples, frame->nb_samples);
>> +            av_log(s, AV_LOG_DEBUG, "Dropping %d/%d delayed samples.\n",
>> +                                    drop_samples, s->delay_samples);
>> +            s->delay_samples  -= drop_samples;
>> +            frame->nb_samples -= drop_samples;
>> +            input_offset = drop_samples * avctx->channels;
>> +            if (frame->nb_samples <= 0)
>> +                return 0;
>> +        }
>> +    }
>> +#endif
>> +
>>      if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
>>          goto end;
>> 
>> -    if (frame->pts != AV_NOPTS_VALUE)
>> -        frame->pts -= av_rescale_q(s->output_delay,
>> -                                   (AVRational){1, avctx->sample_rate},
>> -                                   avctx->time_base);
>> -
>> -    memcpy(frame->extended_data[0], s->decoder_buffer,
>> -           avctx->channels * avctx->frame_size *
>> +    memcpy(frame->extended_data[0], s->decoder_buffer + input_offset,
>> +           avctx->channels * frame->nb_samples *
>>             av_get_bytes_per_sample(avctx->sample_fmt));
>>
>>      *got_frame_ptr = 1;
>> @@ -432,7 +478,11 @@ const AVCodec ff_libfdk_aac_decoder = {
>>      .decode         = fdk_aac_decode_frame,
>>      .close          = fdk_aac_decode_close,
>>      .flush          = fdk_aac_decode_flush,
>> -    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF,
>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF
>> +#if FDKDEC_VER_AT_LEAST(2, 5) // 2.5.10
>> +                      | AV_CODEC_CAP_DELAY
>> +#endif
>> +    ,
>>      .priv_class     = &fdk_aac_dec_class,
>>      .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
>>                        FF_CODEC_CAP_INIT_CLEANUP,
>> 
>
> When I use the libfdk-aac decoder I get the exact number of samples like
> with the native aac decoder (namely number of frames * 1024, as
> expected). What makes you believe this is necessary?

The fdk-aac decoder can have, depending on combination of options, some 
amount of extra internal delay, that the libavcodec internal aac decoder 
doesn't have. (It's also possible to set the options in a state where the 
fdk-aac decoder doesn't induce any extra delay.)

Currently, we compensate for that extra delay by just offsetting pts 
backwards, so for a stream with N packets, we return samples with 
timestamps [-delay,N*framesize-delay].

In order not to lose data at the end, we must make the decoder flushable 
and flush up to (delay) samples at the end. And since one doesn't normally 
expect extra delay samples at the start of an AAC decoder output, we also 
trim out the same amount of samples at the start (to simplify for users 
that don't observe the pts, who otherwise are surprised by the stream 
starting from pts -delay instead of at pts 0).

// Martin


More information about the ffmpeg-devel mailing list