[FFmpeg-devel] [PATCH v8 1/3] libavdevice/avfoundation.m: use AudioConvert, extend supported formats
Marvin Scholz
epirat07 at gmail.com
Wed Jan 5 16:51:54 EET 2022
On 31 Dec 2021, at 18:42, Romain Beauxis wrote:
> * Implement support for AudioConverter
> * Switch to AudioConverter's API to convert unsupported PCM
> formats (non-interleaved, non-packed) to supported formats
> * Minimize data copy.
>
> This fixes: https://trac.ffmpeg.org/ticket/9502
>
> API ref:
> https://developer.apple.com/documentation/audiotoolbox/audio_converter_services
>
> Signed-off-by: Romain Beauxis <toots at rastageeks.org>
> —
> [Sorry for the noise but an issue came up with the previous set]
>
> This is the first patch of a series of 3 that fix, cleanup and enhance
> the
> avfoundation implementation for libavdevice.
>
> These patches come from an actual user-facing application relying on
> libavdevice’s implementation of avfoundation audio input. Without
> them,
> Avfoundation is practically unusable as it will:
> * Refuse to process certain specific audio input format that are
> actually
> returned by the OS for some users (packed PCM audio)
> * Drop audio frames, resulting in corrupted audio input. This might
> have been
> unnoticed with video frames but this makes avfoundation essentially
> unusable
> for audio.
>
> The patches are now being included in our production build so they are
> tested
> and usable in production.
>
> Changelog for this patch:
> * v2: None
> * v3: None
> * v4: None
> * v5: Fix indentation/wrapping
> * v6: None
> * v7: Removed use of kAudioConverterPropertyCalculateOutputBufferSize
> to calculate output buffer size. The calculation is trivial and this
> call was
> randomly failing for no reason
> * v8: None
>
The patchset fails to apply for me:
Applying: libavdevice/avfoundation.m: use AudioConvert, extend supported
formats
error: corrupt patch at line 191
Patch failed at 0001 libavdevice/avfoundation.m: use AudioConvert,
extend supported formats
>
> libavdevice/avfoundation.m | 255 +++++++++++++++++++++----------------
> 1 file changed, 145 insertions(+), 110 deletions(-)
>
> diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m
> index 0cd6e646d5..738cd93375 100644
> --- a/libavdevice/avfoundation.m
> +++ b/libavdevice/avfoundation.m
> @@ -111,16 +111,11 @@
>
> int num_video_devices;
>
> - int audio_channels;
> - int audio_bits_per_sample;
> - int audio_float;
> - int audio_be;
> - int audio_signed_integer;
> - int audio_packed;
> - int audio_non_interleaved;
> -
> - int32_t *audio_buffer;
> - int audio_buffer_size;
> + UInt32 audio_buffers;
> + UInt32 audio_channels;
> + UInt32 input_bytes_per_sample;
> + UInt32 output_bytes_per_sample;
> + AudioConverterRef audio_converter;
>
> enum AVPixelFormat pixel_format;
>
> @@ -299,7 +294,10 @@ static void destroy_context(AVFContext* ctx)
> ctx->avf_delegate = NULL;
> ctx->avf_audio_delegate = NULL;
>
> - av_freep(&ctx->audio_buffer);
> + if (ctx->audio_converter) {
> + AudioConverterDispose(ctx->audio_converter);
> + ctx->audio_converter = NULL;
> + }
>
> pthread_mutex_destroy(&ctx->frame_lock);
>
> @@ -673,6 +671,10 @@ static int get_audio_config(AVFormatContext *s)
> AVFContext *ctx = (AVFContext*)s->priv_data;
> CMFormatDescriptionRef format_desc;
> AVStream* stream = avformat_new_stream(s, NULL);
> + AudioStreamBasicDescription output_format = {0};
> + int audio_bits_per_sample, audio_float, audio_be;
> + int audio_signed_integer, audio_packed, audio_non_interleaved;
> + int must_convert = 0;
>
> if (!stream) {
> return 1;
> @@ -690,60 +692,97 @@ static int get_audio_config(AVFormatContext *s)
> avpriv_set_pts_info(stream, 64, 1, avf_time_base);
>
> format_desc =
> CMSampleBufferGetFormatDescription(ctx->current_audio_frame);
> - const AudioStreamBasicDescription *basic_desc =
> CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
> + const AudioStreamBasicDescription *input_format =
> CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
>
> - if (!basic_desc) {
> + if (!input_format) {
> unlock_frames(ctx);
> av_log(s, AV_LOG_ERROR, "audio format not available\n");
> return 1;
> }
>
> + if (input_format->mFormatID != kAudioFormatLinearPCM) {
> + unlock_frames(ctx);
> + av_log(s, AV_LOG_ERROR, "only PCM audio format are supported
> at the moment\n");
> + return 1;
> + }
> +
> stream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
> - stream->codecpar->sample_rate = basic_desc->mSampleRate;
> - stream->codecpar->channels = basic_desc->mChannelsPerFrame;
> + stream->codecpar->sample_rate = input_format->mSampleRate;
> + stream->codecpar->channels =
> input_format->mChannelsPerFrame;
> stream->codecpar->channel_layout =
> av_get_default_channel_layout(stream->codecpar->channels);
>
> - ctx->audio_channels = basic_desc->mChannelsPerFrame;
> - ctx->audio_bits_per_sample = basic_desc->mBitsPerChannel;
> - ctx->audio_float = basic_desc->mFormatFlags &
> kAudioFormatFlagIsFloat;
> - ctx->audio_be = basic_desc->mFormatFlags &
> kAudioFormatFlagIsBigEndian;
> - ctx->audio_signed_integer = basic_desc->mFormatFlags &
> kAudioFormatFlagIsSignedInteger;
> - ctx->audio_packed = basic_desc->mFormatFlags &
> kAudioFormatFlagIsPacked;
> - ctx->audio_non_interleaved = basic_desc->mFormatFlags &
> kAudioFormatFlagIsNonInterleaved;
> -
> - if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
> - ctx->audio_float &&
> - ctx->audio_bits_per_sample == 32 &&
> - ctx->audio_packed) {
> - stream->codecpar->codec_id = ctx->audio_be ?
> AV_CODEC_ID_PCM_F32BE : AV_CODEC_ID_PCM_F32LE;
> - } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
> - ctx->audio_signed_integer &&
> - ctx->audio_bits_per_sample == 16 &&
> - ctx->audio_packed) {
> - stream->codecpar->codec_id = ctx->audio_be ?
> AV_CODEC_ID_PCM_S16BE : AV_CODEC_ID_PCM_S16LE;
> - } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
> - ctx->audio_signed_integer &&
> - ctx->audio_bits_per_sample == 24 &&
> - ctx->audio_packed) {
> - stream->codecpar->codec_id = ctx->audio_be ?
> AV_CODEC_ID_PCM_S24BE : AV_CODEC_ID_PCM_S24LE;
> - } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
> - ctx->audio_signed_integer &&
> - ctx->audio_bits_per_sample == 32 &&
> - ctx->audio_packed) {
> - stream->codecpar->codec_id = ctx->audio_be ?
> AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE;
> + audio_bits_per_sample = input_format->mBitsPerChannel;
> + audio_float = input_format->mFormatFlags &
> kAudioFormatFlagIsFloat;
> + audio_be = input_format->mFormatFlags &
> kAudioFormatFlagIsBigEndian;
> + audio_signed_integer = input_format->mFormatFlags &
> kAudioFormatFlagIsSignedInteger;
> + audio_packed = input_format->mFormatFlags &
> kAudioFormatFlagIsPacked;
> + audio_non_interleaved = input_format->mFormatFlags &
> kAudioFormatFlagIsNonInterleaved;
> +
> + ctx->input_bytes_per_sample = input_format->mBitsPerChannel >>
> 3;
> + ctx->output_bytes_per_sample = ctx->input_bytes_per_sample;
> + ctx->audio_channels = input_format->mChannelsPerFrame;
> +
> + if (audio_non_interleaved) {
> + ctx->audio_buffers = input_format->mChannelsPerFrame;
> } else {
> - unlock_frames(ctx);
> - av_log(s, AV_LOG_ERROR, "audio format is not supported\n");
> - return 1;
> + ctx->audio_buffers = 1;
> + }
> +
> + if (audio_non_interleaved || !audio_packed) {
> + must_convert = 1;
> + }
> +
> + output_format.mBitsPerChannel = input_format->mBitsPerChannel;
> + output_format.mChannelsPerFrame = ctx->audio_channels;
> + output_format.mFramesPerPacket = 1;
> + output_format.mBytesPerFrame = output_format.mChannelsPerFrame
> * ctx->input_bytes_per_sample;
> + output_format.mBytesPerPacket = output_format.mFramesPerPacket
> * output_format.mBytesPerFrame;
> + output_format.mFormatFlags = kAudioFormatFlagIsPacked |
> audio_be;
> + output_format.mFormatID = kAudioFormatLinearPCM;
> + output_format.mReserved = 0;
> + output_format.mSampleRate = input_format->mSampleRate;
> +
> + if (audio_float &&
> + audio_bits_per_sample == 32) {
> + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_F32BE
> : AV_CODEC_ID_PCM_F32LE;
> + output_format.mFormatFlags |= kAudioFormatFlagIsFloat;
> + } else if (audio_float &&
> + audio_bits_per_sample == 64) {
> + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_F64BE
> : AV_CODEC_ID_PCM_F64LE;
> + output_format.mFormatFlags |= kAudioFormatFlagIsFloat;
> + } else if (audio_signed_integer &&
> + audio_bits_per_sample == 8) {
> + stream->codecpar->codec_id = AV_CODEC_ID_PCM_S8;
> + output_format.mFormatFlags |=
> kAudioFormatFlagIsSignedInteger;
> + } else if (audio_signed_integer &&
> + audio_bits_per_sample == 16) {
> + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S16BE
> : AV_CODEC_ID_PCM_S16LE;
> + output_format.mFormatFlags |=
> kAudioFormatFlagIsSignedInteger;
> + } else if (audio_signed_integer &&
> + audio_bits_per_sample == 24) {
> + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S24BE
> : AV_CODEC_ID_PCM_S24LE;
> + output_format.mFormatFlags |=
> kAudioFormatFlagIsSignedInteger;
> + } else if (audio_signed_integer &&
> + audio_bits_per_sample == 32) {
> + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S32BE
> : AV_CODEC_ID_PCM_S32LE;
> + output_format.mFormatFlags |=
> kAudioFormatFlagIsSignedInteger;
> + } else if (audio_signed_integer &&
> + audio_bits_per_sample == 64) {
> + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S64BE
> : AV_CODEC_ID_PCM_S64LE;
> + output_format.mFormatFlags |=
> kAudioFormatFlagIsSignedInteger;
> + } else {
> + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S32BE
> : AV_CODEC_ID_PCM_S32LE;
> + ctx->output_bytes_per_sample = 4;
> + output_format.mBitsPerChannel = 32;
> + output_format.mFormatFlags |=
> kAudioFormatFlagIsSignedInteger;
> + must_convert = 1;
> }
>
> - if (ctx->audio_non_interleaved) {
> - CMBlockBufferRef block_buffer =
> CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
> - ctx->audio_buffer_size =
> CMBlockBufferGetDataLength(block_buffer);
> - ctx->audio_buffer =
> av_malloc(ctx->audio_buffer_size);
> - if (!ctx->audio_buffer) {
> + if (must_convert) {
> + OSStatus ret = AudioConverterNew(input_format,
> &output_format, &ctx->audio_converter);
> + if (ret != noErr) {
> unlock_frames(ctx);
> - av_log(s, AV_LOG_ERROR, "error allocating audio
> buffer\n");
> + av_log(s, AV_LOG_ERROR, "Error while allocating audio
> converter\n");
> return 1;
> }
> }
> @@ -1048,6 +1087,7 @@ static int copy_cvpixelbuffer(AVFormatContext
> *s,
>
> static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
> {
> + OSStatus ret;
> AVFContext* ctx = (AVFContext*)s->priv_data;
>
> do {
> @@ -1091,7 +1131,7 @@ static int avf_read_packet(AVFormatContext *s,
> AVPacket *pkt)
> status = copy_cvpixelbuffer(s, image_buffer, pkt);
> } else {
> status = 0;
> - OSStatus ret =
> CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
> + ret = CMBlockBufferCopyDataBytes(block_buffer, 0,
> pkt->size, pkt->data);
> if (ret != kCMBlockBufferNoErr) {
> status = AVERROR(EIO);
> }
> @@ -1105,21 +1145,60 @@ static int avf_read_packet(AVFormatContext *s,
> AVPacket *pkt)
> }
> } else if (ctx->current_audio_frame != nil) {
> CMBlockBufferRef block_buffer =
> CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
> - int block_buffer_size =
> CMBlockBufferGetDataLength(block_buffer);
>
> - if (!block_buffer || !block_buffer_size) {
> - unlock_frames(ctx);
> - return AVERROR(EIO);
> - }
> + size_t input_size =
> CMBlockBufferGetDataLength(block_buffer);
> + int buffer_size = input_size / ctx->audio_buffers;
> + int nb_samples = input_size / (ctx->audio_channels *
> ctx->input_bytes_per_sample);
> + int output_size = nb_samples *
> ctx->output_bytes_per_sample * ctx->audio_channels;
>
> - if (ctx->audio_non_interleaved && block_buffer_size >
> ctx->audio_buffer_size) {
> - unlock_frames(ctx);
> - return AVERROR_BUFFER_TOO_SMALL;
> + status = av_new_packet(pkt, output_size);
> + if (status < 0) {
> + CFRelease(audio_frame);
> + return status;
> }
>
> - if (av_new_packet(pkt, block_buffer_size) < 0) {
> - unlock_frames(ctx);
> - return AVERROR(EIO);
> + if (ctx->audio_converter) {
> + size_t input_buffer_size = offsetof(AudioBufferList,
> mBuffers[0]) + (sizeof(AudioBuffer) * ctx->audio_buffers);
> + AudioBufferList *input_buffer =
> av_malloc(input_buffer_size);
> +
> + input_buffer->mNumberBuffers = ctx->audio_buffers;
> +
> + for (int c = 0; c < ctx->audio_buffers; c++) {
> + input_buffer->mBuffers[c].mNumberChannels = 1;
> +
> + ret = CMBlockBufferGetDataPointer(block_buffer, c
> * buffer_size, (size_t *)&input_buffer->mBuffers[c].mDataByteSize,
> NULL, (void *)&input_buffer->mBuffers[c].mData);
> +
> + if (ret != kCMBlockBufferNoErr) {
> + av_free(input_buffer);
> + unlock_frames(ctx);
> + return AVERROR(EIO);
> + }
> + }
> +
> + AudioBufferList output_buffer = {
> + .mNumberBuffers = 1,
> + .mBuffers[0] = {
> + .mNumberChannels = ctx->audio_channels,
> + .mDataByteSize = pkt->size,
> + .mData = pkt->data
> + }
> + };
> +
> + ret =
> AudioConverterConvertComplexBuffer(ctx->audio_converter, nb_samples,
> input_buffer, &output_buffer);
> + av_free(input_buffer);
> +
> + if (ret != noErr) {
> + unlock_frames(ctx);
> + return AVERROR(EIO);
> + }
> +
> + pkt->size = output_buffer.mBuffers[0].mDataByteSize;
> + } else {
> + ret = CMBlockBufferCopyDataBytes(block_buffer, 0,
> pkt->size, pkt->data);
> + if (ret != kCMBlockBufferNoErr) {
> + unlock_frames(ctx);
> + return AVERROR(EIO);
> + }
> }
>
> CMItemCount count;
> @@ -1133,54 +1212,10 @@ static int avf_read_packet(AVFormatContext *s,
> AVPacket *pkt)
> pkt->stream_index = ctx->audio_stream_index;
> pkt->flags |= AV_PKT_FLAG_KEY;
>
> - if (ctx->audio_non_interleaved) {
> - int sample, c, shift, num_samples;
> -
> - OSStatus ret =
> CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size,
> ctx->audio_buffer);
> - if (ret != kCMBlockBufferNoErr) {
> - unlock_frames(ctx);
> - return AVERROR(EIO);
> - }
> -
> - num_samples = pkt->size / (ctx->audio_channels *
> (ctx->audio_bits_per_sample >> 3));
> -
> - // transform decoded frame into output format
> - #define INTERLEAVE_OUTPUT(bps)
> \
> - {
> \
> - int##bps##_t **src;
> \
> - int##bps##_t *dest;
> \
> - src = av_malloc(ctx->audio_channels *
> sizeof(int##bps##_t*)); \
> - if (!src) {
> \
> - unlock_frames(ctx);
> \
> - return AVERROR(EIO);
> \
> - }
> \
> -
> \
> - for (c = 0; c < ctx->audio_channels; c++) {
> \
> - src[c] = ((int##bps##_t*)ctx->audio_buffer) +
> c * num_samples; \
> - }
> \
> - dest = (int##bps##_t*)pkt->data;
> \
> - shift = bps - ctx->audio_bits_per_sample;
> \
> - for (sample = 0; sample < num_samples; sample++)
> \
> - for (c = 0; c < ctx->audio_channels; c++)
> \
> - *dest++ = src[c][sample] << shift;
> \
> - av_freep(&src);
> \
> - }
> -
> - if (ctx->audio_bits_per_sample <= 16) {
> - INTERLEAVE_OUTPUT(16)
> - } else {
> - INTERLEAVE_OUTPUT(32)
> - }
> - } else {
> - OSStatus ret =
> CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
> - if (ret != kCMBlockBufferNoErr) {
> - unlock_frames(ctx);
> - return AVERROR(EIO);
> - }
> - }
> -
> CFRelease(ctx->current_audio_frame);
> ctx->current_audio_frame = nil;
> +
> + unlock_frames(ctx);
> } else {
> pkt->data = NULL;
> unlock_frames(ctx);
> --
> 2.32.0 (Apple Git-132)
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
More information about the ffmpeg-devel
mailing list