On Wed, Dec 28, 2016 at 9:51 AM, Kyle Swanson <k...@ylo.ph> wrote: > On Mon, Nov 28, 2016 at 10:59 PM, Marton Balint <c...@passwd.hu> wrote: > >> >> On Mon, 28 Nov 2016, Kyle Swanson wrote: >> >> On Thu, Nov 17, 2016 at 11:04 AM, Kyle Swanson <k...@ylo.ph> wrote: >>> >>>> Hi, >>>> >>>> Here's a couple of patches which update the ebur128 filter to use the >>>> recently added ebur128 API. This updated filter allows fine-tuned >>>> control over which EBU R128 parameters are measured, and provides >>>> modest speed increases over the previous ebur128 filter. Also >>>> noteworthy: this removes the video output option of the ebur128 >>>> filter. This is extraneous for an ebur128 measurement filter IMHO, but >>>> if we wanted to keep similar functionality in FFmpeg, we'd be better >>>> served by a new video source filter where custom meters could be >>>> created via exported frame metadata. >>>> >>>> The first patch adds true peak functionality to the ebur128 API using >>>> swresample (this was already discussed a little bit: >>>> http://ffmpeg.org/pipermail/ffmpeg-devel/2016-November/202583.html) >>>> The second patch is an update to the ebur128 filter. >>>> >>>> Kyle >>>> >>> >>> Does anyone have any problems with the first patch? >>> >> >> From 6912ed3a03cd19f46e96f1f4b9eb3aa69b7ce4df Mon Sep 17 00:00:00 2001 >>> From: Kyle Swanson <k...@ylo.ph> >>> Date: Thu, 17 Nov 2016 10:32:45 -0600 >>> Subject: [PATCH 1/2] lavfi/ebur128: add ebur128_check_true_peak() >>> >>> Signed-off-by: Kyle Swanson <k...@ylo.ph> >>> --- >>> libavfilter/ebur128.c | 162 ++++++++++++++++++++++++++++++ >>> +++++++++++++++++++- >>> libavfilter/ebur128.h | 17 ++++++ >>> 2 files changed, 177 insertions(+), 2 deletions(-) >>> >>> diff --git a/libavfilter/ebur128.c b/libavfilter/ebur128.c >>> index a46692e..dc16647 100644 >>> --- a/libavfilter/ebur128.c >>> +++ b/libavfilter/ebur128.c >>> @@ -50,6 +50,9 @@ >>> #include "libavutil/common.h" >>> #include "libavutil/mem.h" >>> #include "libavutil/thread.h" >>> +#include "libavutil/channel_layout.h" >>> +#include "libswresample/swresample.h" >>> >> >> Isn't this include needs an ifdef as well? >> >> +#include "libavutil/opt.h" >>> >>> #define CHECK_ERROR(condition, errorcode, goto_point) >>> \ >>> if ((condition)) { >>> \ >>> @@ -91,6 +94,16 @@ struct FFEBUR128StateInternal { >>> size_t short_term_frame_counter; >>> /** Maximum sample peak, one per channel */ >>> double *sample_peak; >>> + /** Maximum true peak, one per channel */ >>> + double* true_peak; >>> +#if CONFIG_SWRESAMPLE >>> + SwrContext *resampler; >>> + size_t oversample_factor; >>> + float* resampler_buffer_input; >>> + size_t resampler_buffer_input_frames; >>> + float* resampler_buffer_output; >>> + size_t resampler_buffer_output_frames; >>> +#endif >>> /** The maximum window duration in ms. */ >>> unsigned long window; >>> /** Data pointer array for interleaved data */ >>> @@ -214,12 +227,78 @@ static inline void init_histogram(void) >>> } >>> } >>> >>> +#if CONFIG_SWRESAMPLE >>> +static int ebur128_init_resampler(FFEBUR128State* st) { >>> + int64_t channel_layout; >>> + int errcode; >>> + >>> + if (st->samplerate < 96000) { >>> + st->d->oversample_factor = 4; >>> + } else if (st->samplerate < 192000) { >>> + st->d->oversample_factor = 2; >>> + } else { >>> + st->d->oversample_factor = 1; >>> + st->d->resampler_buffer_input = NULL; >>> + st->d->resampler_buffer_output = NULL; >>> + st->d->resampler = NULL; >>> + } >>> + >>> + st->d->resampler_buffer_input_frames = st->d->samples_in_100ms * 4; >>> + st->d->resampler_buffer_input = >>> av_malloc(st->d->resampler_buffer_input_frames >>> * >>> + st->channels * >>> + sizeof(float)); >>> >> >> av_malloc_array >> >> + CHECK_ERROR(!st->d->resampler_buffer_input, 0, exit) >>> + >>> + st->d->resampler_buffer_output_frames = >>> + st->d->resampler_buffer_input_frames * >>> + st->d->oversample_factor; >>> + st->d->resampler_buffer_output = >>> av_malloc(st->d->resampler_buffer_output_frames >>> * >>> + st->channels * >>> + sizeof(float)); >>> >> >> av_malloc_array >> >> + CHECK_ERROR(!st->d->resampler_buffer_output, 0, free_input) >>> + >>> + st->d->resampler = swr_alloc(); >>> + CHECK_ERROR(!st->d->resampler, 0, free_output) >>> + >>> + channel_layout = av_get_default_channel_layout(st->channels); >>> + >>> + av_opt_set_int(st->d->resampler, "in_channel_layout", >>> channel_layout, 0); >>> + av_opt_set_int(st->d->resampler, "in_sample_rate", st->samplerate, >>> 0); >>> + av_opt_set_sample_fmt(st->d->resampler, "in_sample_fmt", >>> AV_SAMPLE_FMT_FLT, 0); >>> + av_opt_set_int(st->d->resampler, "out_channel_layout", >>> channel_layout, 0); >>> + av_opt_set_int(st->d->resampler, "out_sample_rate", st->samplerate >>> * st->d->oversample_factor, 0); >>> + av_opt_set_sample_fmt(st->d->resampler, "out_sample_fmt", >>> AV_SAMPLE_FMT_FLT, 0); >>> + >>> + swr_init(st->d->resampler); >>> + return 0; >>> + >>> +free_output: >>> + av_free(st->d->resampler_buffer_output); >>> + st->d->resampler_buffer_output = NULL; >>> >> >> av_freep >> >> +free_input: >>> + av_free(st->d->resampler_buffer_input); >>> + st->d->resampler_buffer_input = NULL; >>> >> >> av_freep >> >> +exit: >>> + return AVERROR(ENOMEM); >>> +} >>> + >>> +static void ebur128_destroy_resampler(FFEBUR128State* st) { >>> + av_free(st->d->resampler_buffer_input); >>> + st->d->resampler_buffer_input = NULL; >>> >> >> av_freep >> >> + av_free(st->d->resampler_buffer_output); >>> + st->d->resampler_buffer_output = NULL; >>> >> >> av_freep >> >> + swr_free(&st->d->resampler); >>> + st->d->resampler = NULL; >>> >> >> swr_free already sets resampler to NULL. >> >> +} >>> +#endif >>> + >>> FFEBUR128State *ff_ebur128_init(unsigned int channels, >>> unsigned long samplerate, >>> unsigned long window, int mode) >>> { >>> int errcode; >>> FFEBUR128State *st; >>> + unsigned int i; >>> >>> st = (FFEBUR128State *) av_malloc(sizeof(FFEBUR128State)); >>> CHECK_ERROR(!st, 0, exit) >>> @@ -233,6 +312,14 @@ FFEBUR128State *ff_ebur128_init(unsigned int >>> channels, >>> st->d->sample_peak = >>> (double *) av_mallocz_array(channels, sizeof(double)); >>> CHECK_ERROR(!st->d->sample_peak, 0, free_channel_map) >>> + st->d->true_peak = >>> + (double*) malloc(channels * sizeof(double)); >>> >> >> av_mallocz_array >> >> + CHECK_ERROR(!st->d->true_peak, 0, free_sample_peak) >>> + >>> + for (i = 0; i < channels; ++i) { >>> + st->d->sample_peak[i] = 0.0; >>> + st->d->true_peak[i] = 0.0; >>> + } >>> >> >> Technically not portable (AFAIK), but we assume in a lot of places that a >> mallocz-ed double is 0.0, so this initialization is unneeded. >> >> >>> st->samplerate = samplerate; >>> st->d->samples_in_100ms = (st->samplerate + 5) / 10; >>> @@ -242,7 +329,7 @@ FFEBUR128State *ff_ebur128_init(unsigned int >>> channels, >>> } else if ((mode & FF_EBUR128_MODE_M) == FF_EBUR128_MODE_M) { >>> st->d->window = FFMAX(window, 400); >>> } else { >>> - goto free_sample_peak; >>> + goto free_true_peak; >>> } >>> st->d->audio_data_frames = st->samplerate * st->d->window / 1000; >>> if (st->d->audio_data_frames % st->d->samples_in_100ms) { >>> @@ -254,7 +341,7 @@ FFEBUR128State *ff_ebur128_init(unsigned int >>> channels, >>> st->d->audio_data = >>> (double *) av_mallocz_array(st->d->audio_data_frames, >>> st->channels * sizeof(double)); >>> - CHECK_ERROR(!st->d->audio_data, 0, free_sample_peak) >>> + CHECK_ERROR(!st->d->audio_data, 0, free_true_peak) >>> >>> ebur128_init_filter(st); >>> >>> @@ -267,6 +354,11 @@ FFEBUR128State *ff_ebur128_init(unsigned int >>> channels, >>> free_block_energy_histogram) >>> st->d->short_term_frame_counter = 0; >>> >>> +#if CONFIG_SWRESAMPLE >>> + unsigned int result = ebur128_init_resampler(st); >>> >> >> Why unsigned? >> >> + CHECK_ERROR(result, 0, free_short_term_block_energy_histogram) >>> +#endif >>> + >>> /* the first block needs 400ms of audio data */ >>> st->d->needed_frames = st->d->samples_in_100ms * 4; >>> /* start at the beginning of the buffer */ >>> @@ -287,6 +379,8 @@ free_block_energy_histogram: >>> av_free(st->d->block_energy_histogram); >>> free_audio_data: >>> av_free(st->d->audio_data); >>> +free_true_peak: >>> + av_free(st->d->true_peak); >>> free_sample_peak: >>> av_free(st->d->sample_peak); >>> free_channel_map: >>> @@ -306,12 +400,53 @@ void ff_ebur128_destroy(FFEBUR128State ** st) >>> av_free((*st)->d->audio_data); >>> av_free((*st)->d->channel_map); >>> av_free((*st)->d->sample_peak); >>> + av_free((*st)->d->true_peak); >>> av_free((*st)->d->data_ptrs); >>> +#if CONFIG_SWRESAMPLE >>> + ebur128_destroy_resampler(*st); >>> +#endif >>> av_free((*st)->d); >>> av_free(*st); >>> *st = NULL; >>> } >>> >>> +static int ebur128_use_swresample(FFEBUR128State* st) { >>> +#if CONFIG_SWRESAMPLE >>> + return ((st->mode & FF_EBUR128_MODE_TRUE_PEAK) == >>> FF_EBUR128_MODE_TRUE_PEAK); >>> +#else >>> + (void) st; >>> + return 0; >>> +#endif >>> +} >>> + >>> +static void ebur128_check_true_peak(FFEBUR128State* st, size_t frames) >>> { >>> +#if CONFIG_SWRESAMPLE >>> + size_t c, i; >>> + >>> + const int in_len = frames; >>> + const int out_len = st->d->resampler_buffer_output_frames; >>> + swr_convert(st->d->resampler, (uint8_t >>> **)&st->d->resampler_buffer_output, out_len, >>> + (const uint8_t **)&st->d->resampler_buffer_input, >>> in_len); >>> + >>> + for (c = 0; c < st->channels; ++c) { >>> + for (i = 0; i < out_len; ++i) { >>> + if (st->d->resampler_buffer_output[i * st->channels + c] > >>> + >>> st->d->true_peak[c]) { >>> + st->d->true_peak[c] = >>> + st->d->resampler_buffer_output[i * st->channels + c]; >>> + } else if (-st->d->resampler_buffer_output[i * >>> st->channels + c] > >>> + >>> st->d->true_peak[c]) { >>> + st->d->true_peak[c] = >>> + -st->d->resampler_buffer_output[i * st->channels + c]; >>> + } >>> + } >>> + } >>> +#else >>> + (void) st; (void) frames; >>> +#endif >>> +} >>> + >>> + >>> #define EBUR128_FILTER(type, scaling_factor) >>> \ >>> static void ebur128_filter_##type(FFEBUR128State* st, const type** >>> srcs, \ >>> size_t src_index, size_t frames, >>> \ >>> @@ -334,6 +469,15 @@ static void ebur128_filter_##type(FFEBUR128State* >>> st, const type** srcs, >>> if (max > st->d->sample_peak[c]) st->d->sample_peak[c] = >>> max; \ >>> } >>> \ >>> } >>> \ >>> + if (ebur128_use_swresample(st)) { >>> \ >>> + for (c = 0; c < st->channels; ++c) { >>> \ >>> + for (i = 0; i < frames; ++i) { >>> \ >>> + st->d->resampler_buffer_input[i * st->channels + c] = >>> \ >>> + (float) (srcs[c][src_index + i * stride] / >>> scaling_factor); \ >>> + } >>> \ >>> + } >>> \ >>> + ebur128_check_true_peak(st, frames); >>> \ >>> + } >>> \ >>> for (c = 0; c < st->channels; ++c) { >>> \ >>> int ci = st->d->channel_map[c] - 1; >>> \ >>> if (ci < 0) continue; >>> \ >>> @@ -781,3 +925,17 @@ int ff_ebur128_sample_peak(FFEBUR128State * st, >>> *out = st->d->sample_peak[channel_number]; >>> return 0; >>> >> >> Hmm, okay, I got a bit of a problem with this performance-wise. The way I >> see it first we convert everything to float, then we resample, then we find >> the maximum in an interleaved output. I'd say performance-wise it would be >> alot better if we could resample directly the input data to a planar float, >> and then measure the maximum there, so no intermediate conversions. >> >> This can be a second step, if you are not interested in this now. >> >> } >> >>> + >>> +int ff_ebur128_true_peak(FFEBUR128State * st, >>> + unsigned int channel_number, >>> + double* out) { >>> + if ((st->mode & FF_EBUR128_MODE_TRUE_PEAK) != >>> FF_EBUR128_MODE_TRUE_PEAK) { >>> + return AVERROR(EINVAL); >>> + } else if (channel_number >= st->channels) { >>> + return AVERROR(EINVAL); >>> + } >>> + *out = st->d->true_peak[channel_number] > >>> st->d->sample_peak[channel_number] >>> + ? st->d->true_peak[channel_number] >>> + : st->d->sample_peak[channel_number]; >>> + return 0; >>> +} >>> diff --git a/libavfilter/ebur128.h b/libavfilter/ebur128.h >>> index b94cd24..ca6dd62 100644 >>> --- a/libavfilter/ebur128.h >>> +++ b/libavfilter/ebur128.h >>> @@ -91,6 +91,9 @@ enum mode { >>> FF_EBUR128_MODE_LRA = (1 << 3) | FF_EBUR128_MODE_S, >>> /** can call ff_ebur128_sample_peak */ >>> FF_EBUR128_MODE_SAMPLE_PEAK = (1 << 4) | FF_EBUR128_MODE_M, >>> + /** can call ff_ebur128_true_peak */ >>> + FF_EBUR128_MODE_TRUE_PEAK = (1 << 5) | FF_EBUR128_MODE_M >>> + | FF_EBUR128_MODE_SAMPLE_PEAK >>> >> >> I'd rather not set implicitly MODE_M, because I don't want to give >> loudness measurement to the user, who only wants true peak. >> >> }; >>> >>> /** forward declaration of FFEBUR128StateInternal */ >>> @@ -283,6 +286,20 @@ int ff_ebur128_loudness_range_multiple(FFEBUR128State >>> ** sts, >>> int ff_ebur128_sample_peak(FFEBUR128State * st, >>> unsigned int channel_number, double *out); >>> >>> +/** \brief Get maximum true peak from all frames that have been >>> processed. >>> + * >>> + * @param st library state >>> + * @param channel_number channel to analyse >>> + * @param out maximum true peak in float format (1.0 is 0 dBTP) >>> + * @return >>> + * - 0 on success. >>> + * - AVERROR(EINVAL) if mode "FF_EBUR128_MODE_TRUE_PEAK" has not >>> + * been set. >>> + * - AVERROR(EINVAL) if invalid channel index. >>> + */ >>> +int ff_ebur128_true_peak(FFEBUR128State* st, >>> + unsigned int channel_number, double* out); >>> + >>> /** \brief Get relative threshold in LUFS. >>> * >>> * @param st library state >>> -- >>> 2.10.1 >>> >>> >> Regards, >> Marton > > > Finally had a minute to look at this again. Attached patch addresses > Michael's and Marton's comments. >
If no one has anything else, I'll push this in the next couple of days. > > >> >> _______________________________________________ >> ffmpeg-devel mailing list >> ffmpeg-devel@ffmpeg.org >> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel >> > > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel