[Libav-user] Resampling pcm audio.
Hristo Ivanov
hivanov.ffmailing at gmail.com
Wed Jun 13 20:03:34 EEST 2018
Hi.
I am capturing video+audio of an IP camera(RTSP), transcoding it and saving
it to file.
The input looks like this:
Input #0, rtsp, from 'rtsp://10.1.1.22/?line=1&enableaudio=1&audio_line=1':
Metadata:
title : LIVE VIEW
Duration: N/A, start: -0.001000, bitrate: N/A
Stream #0:0: Video: h264 (Main), yuv420p(tv, bt470bg/bt470bg/bt709,
progressive), 1280x720 [SAR 1:1 DAR 16:9], 30 tbr, 90k tbn, 180k tbc
Stream #0:1: Audio: pcm_mulaw, 8000 Hz, mono, s16, 64 kb/s
Analyzing the audio stream, once decoded, I get this:
if (_frame->frame->height == 0) { // Easy way to check for audio frame.
std::cout << _frame->frame->pts << ", " << _frame->frame->pkt_duration <<
", " << _frame->frame->nb_samples << std::endl;
}
4472, 640, 640 // Relative pts: unknown
5112, 640, 640 // Relative pts: 640
5784, 640, 640 // Relative pts: 672
6424, 640, 640 // Relative pts: 640
7064, 640, 640 // Relative pts: 640
7704, 640, 640 // Relative pts: 640
8344, 640, 640 // Relative pts: 640
8952, 640, 640 // Relative pts: 608
9592, 640, 640 // Relative pts: 640
10232, 640, 640 // Relative pts: 640
10904, 640, 640 // Relative pts: 672
It can be seen that all the frames have the same pkt_duration and
nb_samples, but not the same relative(compared to the previous frame) pts.
Most of the frames have the expected relative pts of 640 but some have
672(+32) and others have 608(-32).
Is this behavior normal or an issue with the Ip Camera?
When resampling the audio(swr_convert()) I check for lost audio frames and
fill silence samples if I detect a lost frame. The code looks like this:
void AudioResampler::putFrame(Frame* frame) {
if (frame->frame->pts > expectedInputPts) {
this->fillSilenceSamples(frame);
}
// Rest of the function.....
this->expectedInputPts = frame->frame->pts + frame->frame->pkt_duration;
return;
}
This code works fine when the relative pts between frames is constant and
equal to the pkt_duration, but that is not the case with the Ip camera I am
using.
Is there a better approach for audio resampling?
Thanks.
The full code:
AudioResampler::AudioResampler(
Decoder* decoder,
EncoderAudio* encoder
) {
this->decoder_ctx = decoder->codec_ctx;
this->encoder_ctx = encoder->codec_ctx;
this->resample_context = swr_alloc_set_opts(NULL,
this->encoder_ctx->channel_layout, // out_ch_layout
this->encoder_ctx->sample_fmt, // out_sample_fmt
this->encoder_ctx->sample_rate, // out_sample_rate
this->decoder_ctx->channel_layout, // in_ch_layout
this->decoder_ctx->sample_fmt, // in_sample_fmt
this->decoder_ctx->sample_rate, // in_sample_rate
0, // log_offset
NULL); // log_ctx
if (this->resample_context == NULL)
throw new std::exception("Error swr_alloc_set_opts().");
int ret;
ret = swr_init(this->resample_context);
if (ret < 0)
throw new std::exception("Error swr_init().");
this->fifo = av_audio_fifo_alloc(
this->encoder_ctx->sample_fmt,
this->encoder_ctx->channels,
1);
if (this->fifo == NULL)
throw new std::exception("Error av_audio_fifo_alloc().");
/* Others.. */
this->pts = 0;
this->expectedInputPts = 0;
this->flushed = false;
}
void AudioResampler::putFrame(Frame* frame) {
if (frame->frame->pts > expectedInputPts) {
this->fillSilenceSamples(frame);
}
uint8_t** converted_input_samples = (uint8_t**)calloc(
this->encoder_ctx->channels,
sizeof(*converted_input_samples)
);
if (converted_input_samples == NULL)
throw new std::exception("Error calloc().");
int out_samples = av_rescale_rnd(
swr_get_delay(this->resample_context, this->decoder_ctx->sample_rate) +
frame->frame->nb_samples,
this->encoder_ctx->sample_rate,
this->decoder_ctx->sample_rate,
AV_ROUND_UP);
int ret;
ret = av_samples_alloc(
converted_input_samples,
NULL,
this->encoder_ctx->channels,
out_samples,
this->encoder_ctx->sample_fmt,
0
);
if (ret < 0)
throw new std::exception("Error av_samples_alloc().");
ret = swr_convert(
this->resample_context,
converted_input_samples,
out_samples,
(const uint8_t**)frame->frame->extended_data,
frame->frame->nb_samples
);
if (ret < 0)
throw new std::exception("Error swr_convert().");
out_samples = ret;
ret = av_audio_fifo_realloc(
this->fifo,
av_audio_fifo_size(this->fifo) + out_samples
);
if (ret < 0)
throw new std::exception("Error av_audio_fifo_realloc().");
ret = av_audio_fifo_write(
this->fifo,
(void**)converted_input_samples,
out_samples
);
if (ret < 0)
throw new std::exception("Error av_audio_fifo_write().");
av_freep(&converted_input_samples[0]);
free(converted_input_samples);
this->expectedInputPts = frame->frame->pts + frame->frame->pkt_duration;
}
void AudioResampler::fillSilenceSamples(Frame* frame) {
uint64_t missingTime = frame->frame->pts - expectedInputPts;
uint64_t missingSamples = av_rescale(
missingTime,
frame->frame->sample_rate,
this->decoder_ctx->time_base.den
);
int out_missingSamples = av_rescale_rnd(
swr_get_delay(this->resample_context, this->decoder_ctx->sample_rate) +
missingSamples,
this->encoder_ctx->sample_rate,
this->decoder_ctx->sample_rate,
AV_ROUND_NEAR_INF);
uint8_t** silence_samples = (uint8_t**)calloc(
this->encoder_ctx->channels,
sizeof(*silence_samples)
);
if (silence_samples == NULL)
throw new std::exception("Error calloc().");
int ret;
ret = av_samples_alloc(
silence_samples,
NULL,
this->encoder_ctx->channels,
out_missingSamples,
this->encoder_ctx->sample_fmt,
0
);
if (ret < 0)
throw new std::exception("Error av_samples_alloc().");
ret = av_audio_fifo_realloc(
this->fifo,
av_audio_fifo_size(this->fifo) + out_missingSamples
);
if (ret < 0)
throw new std::exception("Error av_audio_fifo_realloc().");
ret = av_audio_fifo_write(
this->fifo,
(void**)silence_samples,
out_missingSamples
);
if (ret < 0)
throw new std::exception("Error av_audio_fifo_write().");
av_freep(&silence_samples[0]);
free(silence_samples);
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://ffmpeg.org/pipermail/libav-user/attachments/20180613/5ac6e83b/attachment.html>
More information about the Libav-user
mailing list