[FFmpeg-devel] [PATCH] libavdevice/avfoundation.m: use AudioConvert, extend supported formats

Thilo Borgmann thilo.borgmann at mail.de
Tue Dec 7 15:21:01 EET 2021


Hi,

will look at this soon (tm), ping me if I don‘t. 

Thanks,
Thilo

> Am 02.12.2021 um 00:08 schrieb Romain Beauxis <toots at rastageeks.org>:
> 
> Hi there!
> 
> Anyone interested in the patch below? It fixes an issue with some
> macos avfoundation input devices that return formats currently not
> supported by the implementation. I also have another important bugfix
> in the concurrency model of the implementation waiting for this to be
> merged first.
> 
> Otherwise, is there any appropriate step to help getting this merged?
> 
> Thanks for any insight!
> -- Romain
> 
>> On Nov 30, 2021, at 12:02 AM, Romain Beauxis <romain.beauxis at gmail.com> wrote:
>> 
>> * Implement support for AudioConverter
>> * Switch to AudioConverter's API to convert unsupported PCM
>> formats (non-interleaved, non-packed) to supported formats
>> * Minimize data copy.
>> 
>> This fixes: https://trac.ffmpeg.org/ticket/9502
>> 
>> API ref: https://developer.apple.com/documentation/audiotoolbox/audio_converter_services
>> 
>> Signed-off-by: Romain Beauxis <toots at rastageeks.org>
>> ---
>> libavdevice/avfoundation.m | 250 +++++++++++++++++++++----------------
>> 1 file changed, 144 insertions(+), 106 deletions(-)
>> 
>> diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m
>> index 0cd6e646d5..79c9207cfa 100644
>> --- a/libavdevice/avfoundation.m
>> +++ b/libavdevice/avfoundation.m
>> @@ -111,16 +111,10 @@
>> 
>>   int             num_video_devices;
>> 
>> -    int             audio_channels;
>> -    int             audio_bits_per_sample;
>> -    int             audio_float;
>> -    int             audio_be;
>> -    int             audio_signed_integer;
>> -    int             audio_packed;
>> -    int             audio_non_interleaved;
>> -
>> -    int32_t         *audio_buffer;
>> -    int             audio_buffer_size;
>> +    UInt32            audio_buffers;
>> +    UInt32            audio_channels;
>> +    UInt32            bytes_per_sample;
>> +    AudioConverterRef audio_converter;
>> 
>>   enum AVPixelFormat pixel_format;
>> 
>> @@ -299,7 +293,10 @@ static void destroy_context(AVFContext* ctx)
>>   ctx->avf_delegate    = NULL;
>>   ctx->avf_audio_delegate = NULL;
>> 
>> -    av_freep(&ctx->audio_buffer);
>> +    if (ctx->audio_converter) {
>> +      AudioConverterDispose(ctx->audio_converter);
>> +      ctx->audio_converter = NULL;
>> +    }
>> 
>>   pthread_mutex_destroy(&ctx->frame_lock);
>> 
>> @@ -673,6 +670,10 @@ static int get_audio_config(AVFormatContext *s)
>>   AVFContext *ctx = (AVFContext*)s->priv_data;
>>   CMFormatDescriptionRef format_desc;
>>   AVStream* stream = avformat_new_stream(s, NULL);
>> +    AudioStreamBasicDescription output_format = {0};
>> +    int audio_bits_per_sample, audio_float, audio_be;
>> +    int audio_signed_integer, audio_packed, audio_non_interleaved;
>> +    int must_convert = 0;
>> 
>>   if (!stream) {
>>       return 1;
>> @@ -690,60 +691,95 @@ static int get_audio_config(AVFormatContext *s)
>>   avpriv_set_pts_info(stream, 64, 1, avf_time_base);
>> 
>>   format_desc = CMSampleBufferGetFormatDescription(ctx->current_audio_frame);
>> -    const AudioStreamBasicDescription *basic_desc = CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
>> +    const AudioStreamBasicDescription *input_format = CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
>> 
>> -    if (!basic_desc) {
>> +    if (!input_format) {
>>       unlock_frames(ctx);
>>       av_log(s, AV_LOG_ERROR, "audio format not available\n");
>>       return 1;
>>   }
>> 
>> +    if (input_format->mFormatID != kAudioFormatLinearPCM) {
>> +        unlock_frames(ctx);
>> +        av_log(s, AV_LOG_ERROR, "only PCM audio format are supported at the moment\n");
>> +        return 1;
>> +    }
>> +
>>   stream->codecpar->codec_type     = AVMEDIA_TYPE_AUDIO;
>> -    stream->codecpar->sample_rate    = basic_desc->mSampleRate;
>> -    stream->codecpar->channels       = basic_desc->mChannelsPerFrame;
>> +    stream->codecpar->sample_rate    = input_format->mSampleRate;
>> +    stream->codecpar->channels       = input_format->mChannelsPerFrame;
>>   stream->codecpar->channel_layout = av_get_default_channel_layout(stream->codecpar->channels);
>> 
>> -    ctx->audio_channels        = basic_desc->mChannelsPerFrame;
>> -    ctx->audio_bits_per_sample = basic_desc->mBitsPerChannel;
>> -    ctx->audio_float           = basic_desc->mFormatFlags & kAudioFormatFlagIsFloat;
>> -    ctx->audio_be              = basic_desc->mFormatFlags & kAudioFormatFlagIsBigEndian;
>> -    ctx->audio_signed_integer  = basic_desc->mFormatFlags & kAudioFormatFlagIsSignedInteger;
>> -    ctx->audio_packed          = basic_desc->mFormatFlags & kAudioFormatFlagIsPacked;
>> -    ctx->audio_non_interleaved = basic_desc->mFormatFlags & kAudioFormatFlagIsNonInterleaved;
>> -
>> -    if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
>> -        ctx->audio_float &&
>> -        ctx->audio_bits_per_sample == 32 &&
>> -        ctx->audio_packed) {
>> -        stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_F32BE : AV_CODEC_ID_PCM_F32LE;
>> -    } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
>> -        ctx->audio_signed_integer &&
>> -        ctx->audio_bits_per_sample == 16 &&
>> -        ctx->audio_packed) {
>> -        stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S16BE : AV_CODEC_ID_PCM_S16LE;
>> -    } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
>> -        ctx->audio_signed_integer &&
>> -        ctx->audio_bits_per_sample == 24 &&
>> -        ctx->audio_packed) {
>> -        stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S24BE : AV_CODEC_ID_PCM_S24LE;
>> -    } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
>> -        ctx->audio_signed_integer &&
>> -        ctx->audio_bits_per_sample == 32 &&
>> -        ctx->audio_packed) {
>> -        stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE;
>> +    audio_bits_per_sample = input_format->mBitsPerChannel;
>> +    audio_float           = input_format->mFormatFlags & kAudioFormatFlagIsFloat;
>> +    audio_be              = input_format->mFormatFlags & kAudioFormatFlagIsBigEndian;
>> +    audio_signed_integer  = input_format->mFormatFlags & kAudioFormatFlagIsSignedInteger;
>> +    audio_packed          = input_format->mFormatFlags & kAudioFormatFlagIsPacked;
>> +    audio_non_interleaved = input_format->mFormatFlags & kAudioFormatFlagIsNonInterleaved;
>> +
>> +    ctx->bytes_per_sample = input_format->mBitsPerChannel >> 3;
>> +    ctx->audio_channels   = input_format->mChannelsPerFrame;
>> +
>> +    if (audio_non_interleaved) {
>> +        ctx->audio_buffers = input_format->mChannelsPerFrame;
>>   } else {
>> -        unlock_frames(ctx);
>> -        av_log(s, AV_LOG_ERROR, "audio format is not supported\n");
>> -        return 1;
>> +        ctx->audio_buffers = 1;
>> +    }
>> +
>> +    if (audio_non_interleaved || !audio_packed) {
>> +      must_convert = 1;
>> +    }
>> +
>> +    output_format.mBitsPerChannel   = input_format->mBitsPerChannel;
>> +    output_format.mChannelsPerFrame = ctx->audio_channels;
>> +    output_format.mFramesPerPacket  = 1;
>> +    output_format.mBytesPerFrame    = output_format.mChannelsPerFrame * ctx->bytes_per_sample;
>> +    output_format.mBytesPerPacket   = output_format.mFramesPerPacket * output_format.mBytesPerFrame;
>> +    output_format.mFormatFlags      = kAudioFormatFlagIsPacked | audio_be;
>> +    output_format.mFormatID         = kAudioFormatLinearPCM;
>> +    output_format.mReserved         = 0;
>> +    output_format.mSampleRate       = input_format->mSampleRate;
>> +
>> +    if (audio_float &&
>> +        audio_bits_per_sample == 32) {
>> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_F32BE : AV_CODEC_ID_PCM_F32LE;
>> +        output_format.mFormatFlags |= kAudioFormatFlagIsFloat;
>> +    } else if (audio_float &&
>> +        audio_bits_per_sample == 64) {
>> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_F64BE : AV_CODEC_ID_PCM_F64LE;
>> +        output_format.mFormatFlags |= kAudioFormatFlagIsFloat;
>> +    } else if (audio_signed_integer &&
>> +        audio_bits_per_sample == 8) {
>> +        stream->codecpar->codec_id = AV_CODEC_ID_PCM_S8;
>> +        output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
>> +    } else if (audio_signed_integer &&
>> +        audio_bits_per_sample == 16) {
>> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S16BE : AV_CODEC_ID_PCM_S16LE;
>> +        output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
>> +    } else if (audio_signed_integer &&
>> +        audio_bits_per_sample == 24) {
>> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S24BE : AV_CODEC_ID_PCM_S24LE;
>> +        output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
>> +    } else if (audio_signed_integer &&
>> +        audio_bits_per_sample == 32) {
>> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE;
>> +        output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
>> +    } else if (audio_signed_integer &&
>> +        audio_bits_per_sample == 64) {
>> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S64BE : AV_CODEC_ID_PCM_S64LE;
>> +        output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
>> +    } else {
>> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE;
>> +        output_format.mBitsPerChannel = 32;
>> +        output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
>> +        must_convert = 1;
>>   }
>> 
>> -    if (ctx->audio_non_interleaved) {
>> -        CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
>> -        ctx->audio_buffer_size        = CMBlockBufferGetDataLength(block_buffer);
>> -        ctx->audio_buffer             = av_malloc(ctx->audio_buffer_size);
>> -        if (!ctx->audio_buffer) {
>> +    if (must_convert) {
>> +        OSStatus ret = AudioConverterNew(input_format, &output_format, &ctx->audio_converter);
>> +        if (ret != noErr) {
>>           unlock_frames(ctx);
>> -            av_log(s, AV_LOG_ERROR, "error allocating audio buffer\n");
>> +            av_log(s, AV_LOG_ERROR, "Error while allocating audio converter\n");
>>           return 1;
>>       }
>>   }
>> @@ -1048,6 +1084,7 @@ static int copy_cvpixelbuffer(AVFormatContext *s,
>> 
>> static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
>> {
>> +    OSStatus ret;
>>   AVFContext* ctx = (AVFContext*)s->priv_data;
>> 
>>   do {
>> @@ -1091,7 +1128,7 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
>>               status = copy_cvpixelbuffer(s, image_buffer, pkt);
>>           } else {
>>               status = 0;
>> -                OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
>> +                ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
>>               if (ret != kCMBlockBufferNoErr) {
>>                   status = AVERROR(EIO);
>>               }
>> @@ -1105,82 +1142,83 @@ static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
>>           }
>>       } else if (ctx->current_audio_frame != nil) {
>>           CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
>> -            int block_buffer_size         = CMBlockBufferGetDataLength(block_buffer);
>> 
>> -            if (!block_buffer || !block_buffer_size) {
>> -                unlock_frames(ctx);
>> -                return AVERROR(EIO);
>> -            }
>> +            size_t input_size = CMBlockBufferGetDataLength(block_buffer);
>> +            int buffer_size = input_size / ctx->audio_buffers;
>> +            int nb_samples = input_size / (ctx->audio_channels * ctx->bytes_per_sample);
>> +            int output_size = buffer_size;
>> 
>> -            if (ctx->audio_non_interleaved && block_buffer_size > ctx->audio_buffer_size) {
>> +            UInt32 size = sizeof(output_size);
>> +            ret = AudioConverterGetProperty(ctx->audio_converter, kAudioConverterPropertyCalculateOutputBufferSize, &size, &output_size);
>> +            if (ret != noErr) {
>>               unlock_frames(ctx);
>> -                return AVERROR_BUFFER_TOO_SMALL;
>> +                return AVERROR(EIO);
>>           }
>> 
>> -            if (av_new_packet(pkt, block_buffer_size) < 0) {
>> +            if (av_new_packet(pkt, output_size) < 0) {
>>               unlock_frames(ctx);
>>               return AVERROR(EIO);
>>           }
>> 
>> -            CMItemCount count;
>> -            CMSampleTimingInfo timing_info;
>> +            if (ctx->audio_converter) {
>> +                size_t input_buffer_size = offsetof(AudioBufferList, mBuffers[0]) + (sizeof(AudioBuffer) * ctx->audio_buffers);
>> +                AudioBufferList *input_buffer = av_malloc(input_buffer_size);
>> 
>> -            if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1, &timing_info, &count) == noErr) {
>> -                AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale);
>> -                pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q);
>> -            }
>> +                input_buffer->mNumberBuffers = ctx->audio_buffers;
>> 
>> -            pkt->stream_index  = ctx->audio_stream_index;
>> -            pkt->flags        |= AV_PKT_FLAG_KEY;
>> +                for (int c = 0; c < ctx->audio_buffers; c++) {
>> +                    input_buffer->mBuffers[c].mNumberChannels = 1;
>> 
>> -            if (ctx->audio_non_interleaved) {
>> -                int sample, c, shift, num_samples;
>> +                    ret = CMBlockBufferGetDataPointer(block_buffer, c * buffer_size, (size_t *)&input_buffer->mBuffers[c].mDataByteSize, NULL, (void *)&input_buffer->mBuffers[c].mData);
>> 
>> -                OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, ctx->audio_buffer);
>> -                if (ret != kCMBlockBufferNoErr) {
>> -                    unlock_frames(ctx);
>> -                    return AVERROR(EIO);
>> +                    if (ret != kCMBlockBufferNoErr) {
>> +                        av_free(input_buffer);
>> +                        unlock_frames(ctx);
>> +                        return AVERROR(EIO);
>> +                    }
>>               }
>> 
>> -                num_samples = pkt->size / (ctx->audio_channels * (ctx->audio_bits_per_sample >> 3));
>> -
>> -                // transform decoded frame into output format
>> -                #define INTERLEAVE_OUTPUT(bps)                                         \
>> -                {                                                                      \
>> -                    int##bps##_t **src;                                                \
>> -                    int##bps##_t *dest;                                                \
>> -                    src = av_malloc(ctx->audio_channels * sizeof(int##bps##_t*));      \
>> -                    if (!src) {                                                        \
>> -                        unlock_frames(ctx);                                            \
>> -                        return AVERROR(EIO);                                           \
>> -                    }                                                                  \
>> -                                                                                       \
>> -                    for (c = 0; c < ctx->audio_channels; c++) {                        \
>> -                        src[c] = ((int##bps##_t*)ctx->audio_buffer) + c * num_samples; \
>> -                    }                                                                  \
>> -                    dest  = (int##bps##_t*)pkt->data;                                  \
>> -                    shift = bps - ctx->audio_bits_per_sample;                          \
>> -                    for (sample = 0; sample < num_samples; sample++)                   \
>> -                        for (c = 0; c < ctx->audio_channels; c++)                      \
>> -                            *dest++ = src[c][sample] << shift;                         \
>> -                    av_freep(&src);                                                    \
>> -                }
>> +                AudioBufferList output_buffer = {
>> +                   .mNumberBuffers = 1,
>> +                   .mBuffers[0]    = {
>> +                       .mNumberChannels = ctx->audio_channels,
>> +                       .mDataByteSize   = pkt->size,
>> +                       .mData           = pkt->data
>> +                   }
>> +                };
>> 
>> -                if (ctx->audio_bits_per_sample <= 16) {
>> -                    INTERLEAVE_OUTPUT(16)
>> -                } else {
>> -                    INTERLEAVE_OUTPUT(32)
>> -                }
>> -            } else {
>> -                OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
>> -                if (ret != kCMBlockBufferNoErr) {
>> +                ret = AudioConverterConvertComplexBuffer(ctx->audio_converter, nb_samples, input_buffer, &output_buffer);
>> +                av_free(input_buffer);
>> +
>> +                if (ret != noErr) {
>>                   unlock_frames(ctx);
>>                   return AVERROR(EIO);
>>               }
>> +
>> +                pkt->size = output_buffer.mBuffers[0].mDataByteSize;
>> +            } else {
>> +                 ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
>> +                 if (ret != kCMBlockBufferNoErr) {
>> +                     unlock_frames(ctx);
>> +                     return AVERROR(EIO);
>> +                 }
>>           }
>> 
>> +            CMItemCount count;
>> +            CMSampleTimingInfo timing_info;
>> +
>> +            if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1, &timing_info, &count) == noErr) {
>> +                AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale);
>> +                pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q);
>> +            }
>> +
>> +            pkt->stream_index  = ctx->audio_stream_index;
>> +            pkt->flags        |= AV_PKT_FLAG_KEY;
>> +
>>           CFRelease(ctx->current_audio_frame);
>>           ctx->current_audio_frame = nil;
>> +
>> +            unlock_frames(ctx);
>>       } else {
>>           pkt->data = NULL;
>>           unlock_frames(ctx);
>> -- 
>> 2.30.1 (Apple Git-130)
>> 
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".



More information about the ffmpeg-devel mailing list