Re: [FFmpeg-devel] [PATCH] libavdevice/avfoundation.m: use AudioConvert, extend supported formats

Romain Beauxis Wed, 01 Dec 2021 15:08:04 -0800

Hi there!

Anyone interested in the patch below? It fixes an issue with some
macos avfoundation input devices that return formats currently not
supported by the implementation. I also have another important bugfix
in the concurrency model of the implementation waiting for this to be
merged first.


Otherwise, is there any appropriate step to help getting this merged?

Thanks for any insight!
-- Romain

> On Nov 30, 2021, at 12:02 AM, Romain Beauxis <romain.beau...@gmail.com> wrote:
> 
> * Implement support for AudioConverter
> * Switch to AudioConverter's API to convert unsupported PCM
> formats (non-interleaved, non-packed) to supported formats
> * Minimize data copy.
> 
> This fixes: https://trac.ffmpeg.org/ticket/9502
> 
> API ref: 
> https://developer.apple.com/documentation/audiotoolbox/audio_converter_services
> 
> Signed-off-by: Romain Beauxis <to...@rastageeks.org>
> ---
> libavdevice/avfoundation.m | 250 +++++++++++++++++++++----------------
> 1 file changed, 144 insertions(+), 106 deletions(-)
> 
> diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m
> index 0cd6e646d5..79c9207cfa 100644
> --- a/libavdevice/avfoundation.m
> +++ b/libavdevice/avfoundation.m
> @@ -111,16 +111,10 @@
> 
>    int             num_video_devices;
> 
> -    int             audio_channels;
> -    int             audio_bits_per_sample;
> -    int             audio_float;
> -    int             audio_be;
> -    int             audio_signed_integer;
> -    int             audio_packed;
> -    int             audio_non_interleaved;
> -
> -    int32_t         *audio_buffer;
> -    int             audio_buffer_size;
> +    UInt32            audio_buffers;
> +    UInt32            audio_channels;
> +    UInt32            bytes_per_sample;
> +    AudioConverterRef audio_converter;
> 
>    enum AVPixelFormat pixel_format;
> 
> @@ -299,7 +293,10 @@ static void destroy_context(AVFContext* ctx)
>    ctx->avf_delegate    = NULL;
>    ctx->avf_audio_delegate = NULL;
> 
> -    av_freep(&ctx->audio_buffer);
> +    if (ctx->audio_converter) {
> +      AudioConverterDispose(ctx->audio_converter);
> +      ctx->audio_converter = NULL;
> +    }
> 
>    pthread_mutex_destroy(&ctx->frame_lock);
> 
> @@ -673,6 +670,10 @@ static int get_audio_config(AVFormatContext *s)
>    AVFContext *ctx = (AVFContext*)s->priv_data;
>    CMFormatDescriptionRef format_desc;
>    AVStream* stream = avformat_new_stream(s, NULL);
> +    AudioStreamBasicDescription output_format = {0};
> +    int audio_bits_per_sample, audio_float, audio_be;
> +    int audio_signed_integer, audio_packed, audio_non_interleaved;
> +    int must_convert = 0;
> 
>    if (!stream) {
>        return 1;
> @@ -690,60 +691,95 @@ static int get_audio_config(AVFormatContext *s)
>    avpriv_set_pts_info(stream, 64, 1, avf_time_base);
> 
>    format_desc = CMSampleBufferGetFormatDescription(ctx->current_audio_frame);
> -    const AudioStreamBasicDescription *basic_desc = 
> CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
> +    const AudioStreamBasicDescription *input_format = 
> CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
> 
> -    if (!basic_desc) {
> +    if (!input_format) {
>        unlock_frames(ctx);
>        av_log(s, AV_LOG_ERROR, "audio format not available\n");
>        return 1;
>    }
> 
> +    if (input_format->mFormatID != kAudioFormatLinearPCM) {
> +        unlock_frames(ctx);
> +        av_log(s, AV_LOG_ERROR, "only PCM audio format are supported at the 
> moment\n");
> +        return 1;
> +    }
> +
>    stream->codecpar->codec_type     = AVMEDIA_TYPE_AUDIO;
> -    stream->codecpar->sample_rate    = basic_desc->mSampleRate;
> -    stream->codecpar->channels       = basic_desc->mChannelsPerFrame;
> +    stream->codecpar->sample_rate    = input_format->mSampleRate;
> +    stream->codecpar->channels       = input_format->mChannelsPerFrame;
>    stream->codecpar->channel_layout = 
> av_get_default_channel_layout(stream->codecpar->channels);
> 
> -    ctx->audio_channels        = basic_desc->mChannelsPerFrame;
> -    ctx->audio_bits_per_sample = basic_desc->mBitsPerChannel;
> -    ctx->audio_float           = basic_desc->mFormatFlags & 
> kAudioFormatFlagIsFloat;
> -    ctx->audio_be              = basic_desc->mFormatFlags & 
> kAudioFormatFlagIsBigEndian;
> -    ctx->audio_signed_integer  = basic_desc->mFormatFlags & 
> kAudioFormatFlagIsSignedInteger;
> -    ctx->audio_packed          = basic_desc->mFormatFlags & 
> kAudioFormatFlagIsPacked;
> -    ctx->audio_non_interleaved = basic_desc->mFormatFlags & 
> kAudioFormatFlagIsNonInterleaved;
> -
> -    if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
> -        ctx->audio_float &&
> -        ctx->audio_bits_per_sample == 32 &&
> -        ctx->audio_packed) {
> -        stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_F32BE : 
> AV_CODEC_ID_PCM_F32LE;
> -    } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
> -        ctx->audio_signed_integer &&
> -        ctx->audio_bits_per_sample == 16 &&
> -        ctx->audio_packed) {
> -        stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S16BE : 
> AV_CODEC_ID_PCM_S16LE;
> -    } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
> -        ctx->audio_signed_integer &&
> -        ctx->audio_bits_per_sample == 24 &&
> -        ctx->audio_packed) {
> -        stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S24BE : 
> AV_CODEC_ID_PCM_S24LE;
> -    } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
> -        ctx->audio_signed_integer &&
> -        ctx->audio_bits_per_sample == 32 &&
> -        ctx->audio_packed) {
> -        stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S32BE : 
> AV_CODEC_ID_PCM_S32LE;
> +    audio_bits_per_sample = input_format->mBitsPerChannel;
> +    audio_float           = input_format->mFormatFlags & 
> kAudioFormatFlagIsFloat;
> +    audio_be              = input_format->mFormatFlags & 
> kAudioFormatFlagIsBigEndian;
> +    audio_signed_integer  = input_format->mFormatFlags & 
> kAudioFormatFlagIsSignedInteger;
> +    audio_packed          = input_format->mFormatFlags & 
> kAudioFormatFlagIsPacked;
> +    audio_non_interleaved = input_format->mFormatFlags & 
> kAudioFormatFlagIsNonInterleaved;
> +
> +    ctx->bytes_per_sample = input_format->mBitsPerChannel >> 3;
> +    ctx->audio_channels   = input_format->mChannelsPerFrame;
> +
> +    if (audio_non_interleaved) {
> +        ctx->audio_buffers = input_format->mChannelsPerFrame;
>    } else {
> -        unlock_frames(ctx);
> -        av_log(s, AV_LOG_ERROR, "audio format is not supported\n");
> -        return 1;
> +        ctx->audio_buffers = 1;
> +    }
> +
> +    if (audio_non_interleaved || !audio_packed) {
> +      must_convert = 1;
> +    }
> +
> +    output_format.mBitsPerChannel   = input_format->mBitsPerChannel;
> +    output_format.mChannelsPerFrame = ctx->audio_channels;
> +    output_format.mFramesPerPacket  = 1;
> +    output_format.mBytesPerFrame    = output_format.mChannelsPerFrame * 
> ctx->bytes_per_sample;
> +    output_format.mBytesPerPacket   = output_format.mFramesPerPacket * 
> output_format.mBytesPerFrame;
> +    output_format.mFormatFlags      = kAudioFormatFlagIsPacked | audio_be;
> +    output_format.mFormatID         = kAudioFormatLinearPCM;
> +    output_format.mReserved         = 0;
> +    output_format.mSampleRate       = input_format->mSampleRate;
> +
> +    if (audio_float &&
> +        audio_bits_per_sample == 32) {
> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_F32BE : 
> AV_CODEC_ID_PCM_F32LE;
> +        output_format.mFormatFlags |= kAudioFormatFlagIsFloat;
> +    } else if (audio_float &&
> +        audio_bits_per_sample == 64) {
> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_F64BE : 
> AV_CODEC_ID_PCM_F64LE;
> +        output_format.mFormatFlags |= kAudioFormatFlagIsFloat;
> +    } else if (audio_signed_integer &&
> +        audio_bits_per_sample == 8) {
> +        stream->codecpar->codec_id = AV_CODEC_ID_PCM_S8;
> +        output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
> +    } else if (audio_signed_integer &&
> +        audio_bits_per_sample == 16) {
> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S16BE : 
> AV_CODEC_ID_PCM_S16LE;
> +        output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
> +    } else if (audio_signed_integer &&
> +        audio_bits_per_sample == 24) {
> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S24BE : 
> AV_CODEC_ID_PCM_S24LE;
> +        output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
> +    } else if (audio_signed_integer &&
> +        audio_bits_per_sample == 32) {
> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S32BE : 
> AV_CODEC_ID_PCM_S32LE;
> +        output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
> +    } else if (audio_signed_integer &&
> +        audio_bits_per_sample == 64) {
> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S64BE : 
> AV_CODEC_ID_PCM_S64LE;
> +        output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
> +    } else {
> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S32BE : 
> AV_CODEC_ID_PCM_S32LE;
> +        output_format.mBitsPerChannel = 32;
> +        output_format.mFormatFlags |= kAudioFormatFlagIsSignedInteger;
> +        must_convert = 1;
>    }
> 
> -    if (ctx->audio_non_interleaved) {
> -        CMBlockBufferRef block_buffer = 
> CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
> -        ctx->audio_buffer_size        = 
> CMBlockBufferGetDataLength(block_buffer);
> -        ctx->audio_buffer             = av_malloc(ctx->audio_buffer_size);
> -        if (!ctx->audio_buffer) {
> +    if (must_convert) {
> +        OSStatus ret = AudioConverterNew(input_format, &output_format, 
> &ctx->audio_converter);
> +        if (ret != noErr) {
>            unlock_frames(ctx);
> -            av_log(s, AV_LOG_ERROR, "error allocating audio buffer\n");
> +            av_log(s, AV_LOG_ERROR, "Error while allocating audio 
> converter\n");
>            return 1;
>        }
>    }
> @@ -1048,6 +1084,7 @@ static int copy_cvpixelbuffer(AVFormatContext *s,
> 
> static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
> {
> +    OSStatus ret;
>    AVFContext* ctx = (AVFContext*)s->priv_data;
> 
>    do {
> @@ -1091,7 +1128,7 @@ static int avf_read_packet(AVFormatContext *s, AVPacket 
> *pkt)
>                status = copy_cvpixelbuffer(s, image_buffer, pkt);
>            } else {
>                status = 0;
> -                OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, 
> pkt->size, pkt->data);
> +                ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, 
> pkt->data);
>                if (ret != kCMBlockBufferNoErr) {
>                    status = AVERROR(EIO);
>                }
> @@ -1105,82 +1142,83 @@ static int avf_read_packet(AVFormatContext *s, 
> AVPacket *pkt)
>            }
>        } else if (ctx->current_audio_frame != nil) {
>            CMBlockBufferRef block_buffer = 
> CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
> -            int block_buffer_size         = 
> CMBlockBufferGetDataLength(block_buffer);
> 
> -            if (!block_buffer || !block_buffer_size) {
> -                unlock_frames(ctx);
> -                return AVERROR(EIO);
> -            }
> +            size_t input_size = CMBlockBufferGetDataLength(block_buffer);
> +            int buffer_size = input_size / ctx->audio_buffers;
> +            int nb_samples = input_size / (ctx->audio_channels * 
> ctx->bytes_per_sample);
> +            int output_size = buffer_size;
> 
> -            if (ctx->audio_non_interleaved && block_buffer_size > 
> ctx->audio_buffer_size) {
> +            UInt32 size = sizeof(output_size);
> +            ret = AudioConverterGetProperty(ctx->audio_converter, 
> kAudioConverterPropertyCalculateOutputBufferSize, &size, &output_size);
> +            if (ret != noErr) {
>                unlock_frames(ctx);
> -                return AVERROR_BUFFER_TOO_SMALL;
> +                return AVERROR(EIO);
>            }
> 
> -            if (av_new_packet(pkt, block_buffer_size) < 0) {
> +            if (av_new_packet(pkt, output_size) < 0) {
>                unlock_frames(ctx);
>                return AVERROR(EIO);
>            }
> 
> -            CMItemCount count;
> -            CMSampleTimingInfo timing_info;
> +            if (ctx->audio_converter) {
> +                size_t input_buffer_size = offsetof(AudioBufferList, 
> mBuffers[0]) + (sizeof(AudioBuffer) * ctx->audio_buffers);
> +                AudioBufferList *input_buffer = av_malloc(input_buffer_size);
> 
> -            if 
> (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1, 
> &timing_info, &count) == noErr) {
> -                AVRational timebase_q = av_make_q(1, 
> timing_info.presentationTimeStamp.timescale);
> -                pkt->pts = pkt->dts = 
> av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, 
> avf_time_base_q);
> -            }
> +                input_buffer->mNumberBuffers = ctx->audio_buffers;
> 
> -            pkt->stream_index  = ctx->audio_stream_index;
> -            pkt->flags        |= AV_PKT_FLAG_KEY;
> +                for (int c = 0; c < ctx->audio_buffers; c++) {
> +                    input_buffer->mBuffers[c].mNumberChannels = 1;
> 
> -            if (ctx->audio_non_interleaved) {
> -                int sample, c, shift, num_samples;
> +                    ret = CMBlockBufferGetDataPointer(block_buffer, c * 
> buffer_size, (size_t *)&input_buffer->mBuffers[c].mDataByteSize, NULL, (void 
> *)&input_buffer->mBuffers[c].mData);
> 
> -                OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, 
> pkt->size, ctx->audio_buffer);
> -                if (ret != kCMBlockBufferNoErr) {
> -                    unlock_frames(ctx);
> -                    return AVERROR(EIO);
> +                    if (ret != kCMBlockBufferNoErr) {
> +                        av_free(input_buffer);
> +                        unlock_frames(ctx);
> +                        return AVERROR(EIO);
> +                    }
>                }
> 
> -                num_samples = pkt->size / (ctx->audio_channels * 
> (ctx->audio_bits_per_sample >> 3));
> -
> -                // transform decoded frame into output format
> -                #define INTERLEAVE_OUTPUT(bps)                               
>           \
> -                {                                                            
>           \
> -                    int##bps##_t **src;                                      
>           \
> -                    int##bps##_t *dest;                                      
>           \
> -                    src = av_malloc(ctx->audio_channels * 
> sizeof(int##bps##_t*));      \
> -                    if (!src) {                                              
>           \
> -                        unlock_frames(ctx);                                  
>           \
> -                        return AVERROR(EIO);                                 
>           \
> -                    }                                                        
>           \
> -                                                                             
>           \
> -                    for (c = 0; c < ctx->audio_channels; c++) {              
>           \
> -                        src[c] = ((int##bps##_t*)ctx->audio_buffer) + c * 
> num_samples; \
> -                    }                                                        
>           \
> -                    dest  = (int##bps##_t*)pkt->data;                        
>           \
> -                    shift = bps - ctx->audio_bits_per_sample;                
>           \
> -                    for (sample = 0; sample < num_samples; sample++)         
>           \
> -                        for (c = 0; c < ctx->audio_channels; c++)            
>           \
> -                            *dest++ = src[c][sample] << shift;               
>           \
> -                    av_freep(&src);                                          
>           \
> -                }
> +                AudioBufferList output_buffer = {
> +                   .mNumberBuffers = 1,
> +                   .mBuffers[0]    = {
> +                       .mNumberChannels = ctx->audio_channels,
> +                       .mDataByteSize   = pkt->size,
> +                       .mData           = pkt->data
> +                   }
> +                };
> 
> -                if (ctx->audio_bits_per_sample <= 16) {
> -                    INTERLEAVE_OUTPUT(16)
> -                } else {
> -                    INTERLEAVE_OUTPUT(32)
> -                }
> -            } else {
> -                OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, 
> pkt->size, pkt->data);
> -                if (ret != kCMBlockBufferNoErr) {
> +                ret = 
> AudioConverterConvertComplexBuffer(ctx->audio_converter, nb_samples, 
> input_buffer, &output_buffer);
> +                av_free(input_buffer);
> +
> +                if (ret != noErr) {
>                    unlock_frames(ctx);
>                    return AVERROR(EIO);
>                }
> +
> +                pkt->size = output_buffer.mBuffers[0].mDataByteSize;
> +            } else {
> +                 ret = CMBlockBufferCopyDataBytes(block_buffer, 0, 
> pkt->size, pkt->data);
> +                 if (ret != kCMBlockBufferNoErr) {
> +                     unlock_frames(ctx);
> +                     return AVERROR(EIO);
> +                 }
>            }
> 
> +            CMItemCount count;
> +            CMSampleTimingInfo timing_info;
> +
> +            if 
> (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1, 
> &timing_info, &count) == noErr) {
> +                AVRational timebase_q = av_make_q(1, 
> timing_info.presentationTimeStamp.timescale);
> +                pkt->pts = pkt->dts = 
> av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, 
> avf_time_base_q);
> +            }
> +
> +            pkt->stream_index  = ctx->audio_stream_index;
> +            pkt->flags        |= AV_PKT_FLAG_KEY;
> +
>            CFRelease(ctx->current_audio_frame);
>            ctx->current_audio_frame = nil;
> +
> +            unlock_frames(ctx);
>        } else {
>            pkt->data = NULL;
>            unlock_frames(ctx);
> -- 
> 2.30.1 (Apple Git-130)
> 

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH] libavdevice/avfoundation.m: use AudioConvert, extend supported formats

Reply via email to