[FFmpeg-devel] [PATCH] libavdevice/avfoundation.m: use AudioConvert, extend supported formats
Thilo Borgmann
thilo.borgmann at mail.de
Tue Dec 7 15:21:01 EET 2021
Hi,
will look at this soon (tm), ping me if I don‘t.
Thanks,
Thilo
> Am 02.12.2021 um 00:08 schrieb Romain Beauxis <toots at rastageeks.org>:
>
> Hi there!
>
> Anyone interested in the patch below? It fixes an issue with some
> macos avfoundation input devices that return formats currently not
> supported by the implementation. I also have another important bugfix
> in the concurrency model of the implementation waiting for this to be
> merged first.
>
> Otherwise, is there any appropriate step to help getting this merged?
>
> Thanks for any insight!
> -- Romain
>
>> On Nov 30, 2021, at 12:02 AM, Romain Beauxis <romain.beauxis at gmail.com> wrote:
>>
>> * Implement support for AudioConverter
>> * Switch to AudioConverter's API to convert unsupported PCM
>> formats (non-interleaved, non-packed) to supported formats
>> * Minimize data copy.
>>
>> This fixes: https://trac.ffmpeg.org/ticket/9502
>>
>> API ref: https://developer.apple.com/documentation/audiotoolbox/audio_converter_services
>>
>> Signed-off-by: Romain Beauxis <toots at rastageeks.org>
>> ---
>> libavdevice/avfoundation.m | 250 +++++++++++++++++++++----------------
>> 1 file changed, 144 insertions(+), 106 deletions(-)
>>
>> diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m
>> index 0cd6e646d5..79c9207cfa 100644
>> --- a/libavdevice/avfoundation.m
>> +++ b/libavdevice/avfoundation.m
>> @@ -111,16 +111,10 @@
>>
>> int num_video_devices;
>>
>> - int audio_channels;
>> - int audio_bits_per_sample;
>> - int audio_float;
>> - int audio_be;
>> - int audio_signed_integer;
>> - int audio_packed;
>> - int audio_non_interleaved;
>> -
>> - int32_t *audio_buffer;
>> - int audio_buffer_size;
>> + UInt32 audio_buffers;
>> + UInt32 audio_channels;
>> + UInt32 bytes_per_sample;
>> + AudioConverterRef audio_converter;
>>
>> enum AVPixelFormat pixel_format;
>>
>> @@ -299,7 +293,10 @@ static void destroy_context(AVFContext* ctx)
>> ctx->avf_delegate = NULL;
>> ctx->avf_audio_delegate = NULL;
>>
>> - av_freep(&ctx->audio_buffer);
>> + if (ctx->audio_converter) {
>> + AudioConverterDispose(ctx->audio_converter);
>> + ctx->audio_converter = NULL;
>> + }
>>
>> pthread_mutex_destroy(&ctx->frame_lock);
>>
>> @@ -673,6 +670,10 @@ static int get_audio_config(AVFormatContext *s)
>> AVFContext *ctx = (AVFContext*)s->priv_data;
>> CMFormatDescriptionRef format_desc;
>> AVStream* stream = avformat_new_stream(s, NULL);
>> + AudioStreamBasicDescription output_format = {0};
>> + int audio_bits_per_sample, audio_float, audio_be;
>> + int audio_signed_integer, audio_packed, audio_non_interleaved;
>> + int must_convert = 0;
>>
>> if (!stream) {
>> return 1;
>> @@ -690,60 +691,95 @@ static int get_audio_config(AVFormatContext *s)
>> avpriv_set_pts_info(stream, 64, 1, avf_time_base);
>>
>> format_desc = CMSampleBufferGetFormatDescription(ctx->current_audio_frame);
>> - const AudioStreamBasicDescription *basic_desc = CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
>> + const AudioStreamBasicDescription *input_format = CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
>>
>> - if (!basic_desc) {
>> + if (!input_format) {
>> unlock_frames(ctx);
>> av_log(s, AV_LOG_ERROR, "audio format not available\n");
>> return 1;
>> }
>>
>> + if (input_format->mFormatID != kAudioFormatLinearPCM) {
>> + unlock_frames(ctx);
>> + av_log(s, AV_LOG_ERROR, "only PCM audio format are supported at the moment\n");
>> + return 1;
>> + }
>> +
>> stream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
>> - stream->codecpar->sample_rate = basic_desc->mSampleRate;
>> - stream->codecpar->channels = basic_desc->mChannelsPerFrame;
>> + stream->codecpar->sample_rate = input_format->mSampleRate;
>> + stream->codecpar->channels = input_format->mChannelsPerFrame;
>> stream->codecpar->channel_layout = av_get_default_channel_layout(stream->codecpar->channels);
>>
>> - ctx->audio_channels = basic_desc->mChannelsPerFrame;
>> - ctx->audio_bits_per_sample = basic_desc->mBitsPerChannel;
>> - ctx->audio_float = basic_desc->mFormatFlags & kAudioFormatFlagIsFloat;
>> - ctx->audio_be = basic_desc->mFormatFlags & kAudioFormatFlagIsBigEndian;
>> - ctx->audio_signed_integer = basic_desc->mFormatFlags & kAudioFormatFlagIsSignedInteger;
>> - ctx->audio_packed = basic_desc->mFormatFlags & kAudioFormatFlagIsPacked;
>> - ctx->audio_non_interleaved = basic_desc->mFormatFlags & kAudioFormatFlagIsNonInterleaved;
>> -
>> - if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
>> - ctx->audio_float &&
>> - ctx->audio_bits_per_sample == 32 &&
>> - ctx->audio_packed) {
>> - stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_F32BE : AV_CODEC_ID_PCM_F32LE;
>> - } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
>> - ctx->audio_signed_integer &&
>> - ctx->audio_bits_per_sample == 16 &&
>> - ctx->audio_packed) {
>> - stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S16BE : AV_CODEC_ID_PCM_S16LE;
>> - } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
>> - ctx->audio_signed_integer &&
>> - ctx->audio_bits_per_sample == 24 &&
>> - ctx->audio_packed) {
>> - stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S24BE : AV_CODEC_ID_PCM_S24LE;
>> - } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
>> - ctx->audio_signed_integer &&
>> - ctx->audio_bits_per_sample == 32 &&
>> - ctx->audio_packed) {
>> - stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE;
>> + audio_bits_per_sample = input_format->mBitsPerChannel;
>> + audio_float = input_format->mFormatFlags & kAudioFormatFlagIsFloat;
>> + audio_be = input_format->mFormatFlags & kAudioFormatFlagIsBigEndian;
>> + audio_signed_integer = input_format->mFormatFlags & kAudioFormatFlagIsSignedInteger;
>> + audio_packed = input_format->mFormatFlags & kAudioFormatFlagIsPacked;
>> + audio_non_interleaved = input_format->mFormatFlags & kAudioFormatFlagIsNonInterleaved;
>> +
>> + ctx->bytes_per_sample = input_format->mBitsPerChannel >> 3;
>> + ctx->audio_channels = input_format->mChannelsPerFrame;
>> +
>> + if (audio_non_interleaved) {
>> + ctx->audio_buffers = input_format->mChannelsPerFrame;
>> } else {
>> - unlock_frames(ctx);
>> - av_log(s, AV_LOG_ERROR, "audio format is not supported\n");
>> - return 1;
>> + ctx->audio_buffers = 1;
>> + }
>> +
>> + if (audio_non_interleaved || !audio_packed) {
>> + must_convert = 1;
>> + }
>> +
>> + output_format.mBitsPerChannel = input_format->mBitsPerChannel;
>> + output_format.mChannelsPerFrame = ctx->audio_channels;
>> + output_format.mFramesPerPacket = 1;
>> + output_format.mBytesPerFrame = output_format.mChannelsPerFrame * ctx->bytes_per_sample;
>> + output_format.mBytesPerPacket = output_format.mFramesPerPacket * output_format.mBytesPerFrame;
>> + output_format.mFormatFlags = kAudioFormatFlagIsPacked | audio_be;
>> + output_format.mFormatID = kAudioFormatLinearPCM;
>> + output_format.mReserved = 0;
>> + output_format.mSampleRate = input_format->mSampleRate;
>> +
>> + if (audio_float &&
>> + audio_bits_per_sample == 32) {
>> + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_F32BE : AV_CODEC_ID_PCM_F32LE;
>> + output_format.mFormatFlags |= kAudioFormatFlagIsFloat;
>> + } else if (audio_float &&
>> + audio_bits_per_sample == 64) {
>> + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_F64BE : AV_CODEC_ID_PCM_F64LE;
>> + output_format.mFormatFlags |= kAudioFormatFlagIsFloat;
>> + } else if (audio_signed_integer &&
>> + audio_bits_per_sample == 8) {
>> + stream->codecpar->codec_id = AV_CODEC_ID_PCM_S8;
>> + output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
>> + } else if (audio_signed_integer &&
>> + audio_bits_per_sample == 16) {
>> + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S16BE : AV_CODEC_ID_PCM_S16LE;
>> + output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
>> + } else if (audio_signed_integer &&
>> + audio_bits_per_sample == 24) {
>> + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S24BE : AV_CODEC_ID_PCM_S24LE;
>> + output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
>> + } else if (audio_signed_integer &&
>> + audio_bits_per_sample == 32) {
>> + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE;
>> + output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
>> + } else if (audio_signed_integer &&
>> + audio_bits_per_sample == 64) {
>> + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S64BE : AV_CODEC_ID_PCM_S64LE;
>> + output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
>> + } else {
>> + stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE;
>> + output_format.mBitsPerChannel = 32;
>> + output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
>> + must_convert = 1;
>> }
>>
>> - if (ctx->audio_non_interleaved) {
>> - CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
>> - ctx->audio_buffer_size = CMBlockBufferGetDataLength(block_buffer);
>> - ctx->audio_buffer = av_malloc(ctx->audio_buffer_size);
>> - if (!ctx->audio_buffer) {
>> + if (must_convert) {
>> + OSStatus ret = AudioConverterNew(input_format, &output_format, &ctx->audio_converter);
>> + if (ret != noErr) {
>> unlock_frames(ctx);
>> - av_log(s, AV_LOG_ERROR, "error allocating audio buffer\n");
>> + av_log(s, AV_LOG_ERROR, "Error while allocating audio converter\n");
>> return 1;
>> }
>> }
>> @@ -1048,6 +1084,7 @@ static int copy_cvpixelbuffer(AVFormatContext *s,
>>
>> static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
>> {
>> + OSStatus ret;
>> AVFContext* ctx = (AVFContext*)s->priv_data;
>>
>> do {
>> @@ -1091,7 +1128,7 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
>> status = copy_cvpixelbuffer(s, image_buffer, pkt);
>> } else {
>> status = 0;
>> - OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
>> + ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
>> if (ret != kCMBlockBufferNoErr) {
>> status = AVERROR(EIO);
>> }
>> @@ -1105,82 +1142,83 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
>> }
>> } else if (ctx->current_audio_frame != nil) {
>> CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
>> - int block_buffer_size = CMBlockBufferGetDataLength(block_buffer);
>>
>> - if (!block_buffer || !block_buffer_size) {
>> - unlock_frames(ctx);
>> - return AVERROR(EIO);
>> - }
>> + size_t input_size = CMBlockBufferGetDataLength(block_buffer);
>> + int buffer_size = input_size / ctx->audio_buffers;
>> + int nb_samples = input_size / (ctx->audio_channels * ctx->bytes_per_sample);
>> + int output_size = buffer_size;
>>
>> - if (ctx->audio_non_interleaved && block_buffer_size > ctx->audio_buffer_size) {
>> + UInt32 size = sizeof(output_size);
>> + ret = AudioConverterGetProperty(ctx->audio_converter, kAudioConverterPropertyCalculateOutputBufferSize, &size, &output_size);
>> + if (ret != noErr) {
>> unlock_frames(ctx);
>> - return AVERROR_BUFFER_TOO_SMALL;
>> + return AVERROR(EIO);
>> }
>>
>> - if (av_new_packet(pkt, block_buffer_size) < 0) {
>> + if (av_new_packet(pkt, output_size) < 0) {
>> unlock_frames(ctx);
>> return AVERROR(EIO);
>> }
>>
>> - CMItemCount count;
>> - CMSampleTimingInfo timing_info;
>> + if (ctx->audio_converter) {
>> + size_t input_buffer_size = offsetof(AudioBufferList, mBuffers[0]) + (sizeof(AudioBuffer) * ctx->audio_buffers);
>> + AudioBufferList *input_buffer = av_malloc(input_buffer_size);
>>
>> - if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1, &timing_info, &count) == noErr) {
>> - AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale);
>> - pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q);
>> - }
>> + input_buffer->mNumberBuffers = ctx->audio_buffers;
>>
>> - pkt->stream_index = ctx->audio_stream_index;
>> - pkt->flags |= AV_PKT_FLAG_KEY;
>> + for (int c = 0; c < ctx->audio_buffers; c++) {
>> + input_buffer->mBuffers[c].mNumberChannels = 1;
>>
>> - if (ctx->audio_non_interleaved) {
>> - int sample, c, shift, num_samples;
>> + ret = CMBlockBufferGetDataPointer(block_buffer, c * buffer_size, (size_t *)&input_buffer->mBuffers[c].mDataByteSize, NULL, (void *)&input_buffer->mBuffers[c].mData);
>>
>> - OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, ctx->audio_buffer);
>> - if (ret != kCMBlockBufferNoErr) {
>> - unlock_frames(ctx);
>> - return AVERROR(EIO);
>> + if (ret != kCMBlockBufferNoErr) {
>> + av_free(input_buffer);
>> + unlock_frames(ctx);
>> + return AVERROR(EIO);
>> + }
>> }
>>
>> - num_samples = pkt->size / (ctx->audio_channels * (ctx->audio_bits_per_sample >> 3));
>> -
>> - // transform decoded frame into output format
>> - #define INTERLEAVE_OUTPUT(bps) \
>> - { \
>> - int##bps##_t **src; \
>> - int##bps##_t *dest; \
>> - src = av_malloc(ctx->audio_channels * sizeof(int##bps##_t*)); \
>> - if (!src) { \
>> - unlock_frames(ctx); \
>> - return AVERROR(EIO); \
>> - } \
>> - \
>> - for (c = 0; c < ctx->audio_channels; c++) { \
>> - src[c] = ((int##bps##_t*)ctx->audio_buffer) + c * num_samples; \
>> - } \
>> - dest = (int##bps##_t*)pkt->data; \
>> - shift = bps - ctx->audio_bits_per_sample; \
>> - for (sample = 0; sample < num_samples; sample++) \
>> - for (c = 0; c < ctx->audio_channels; c++) \
>> - *dest++ = src[c][sample] << shift; \
>> - av_freep(&src); \
>> - }
>> + AudioBufferList output_buffer = {
>> + .mNumberBuffers = 1,
>> + .mBuffers[0] = {
>> + .mNumberChannels = ctx->audio_channels,
>> + .mDataByteSize = pkt->size,
>> + .mData = pkt->data
>> + }
>> + };
>>
>> - if (ctx->audio_bits_per_sample <= 16) {
>> - INTERLEAVE_OUTPUT(16)
>> - } else {
>> - INTERLEAVE_OUTPUT(32)
>> - }
>> - } else {
>> - OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
>> - if (ret != kCMBlockBufferNoErr) {
>> + ret = AudioConverterConvertComplexBuffer(ctx->audio_converter, nb_samples, input_buffer, &output_buffer);
>> + av_free(input_buffer);
>> +
>> + if (ret != noErr) {
>> unlock_frames(ctx);
>> return AVERROR(EIO);
>> }
>> +
>> + pkt->size = output_buffer.mBuffers[0].mDataByteSize;
>> + } else {
>> + ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
>> + if (ret != kCMBlockBufferNoErr) {
>> + unlock_frames(ctx);
>> + return AVERROR(EIO);
>> + }
>> }
>>
>> + CMItemCount count;
>> + CMSampleTimingInfo timing_info;
>> +
>> + if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1, &timing_info, &count) == noErr) {
>> + AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale);
>> + pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q);
>> + }
>> +
>> + pkt->stream_index = ctx->audio_stream_index;
>> + pkt->flags |= AV_PKT_FLAG_KEY;
>> +
>> CFRelease(ctx->current_audio_frame);
>> ctx->current_audio_frame = nil;
>> +
>> + unlock_frames(ctx);
>> } else {
>> pkt->data = NULL;
>> unlock_frames(ctx);
>> --
>> 2.30.1 (Apple Git-130)
>>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
More information about the ffmpeg-devel
mailing list