Fixes #9613 --- libavfilter/af_volumedetect.c | 234 +++++++++++++++++++++++++--------- 1 file changed, 172 insertions(+), 62 deletions(-)
diff --git a/libavfilter/af_volumedetect.c b/libavfilter/af_volumedetect.c index 8b001d1cf2..d13d043f88 100644 --- a/libavfilter/af_volumedetect.c +++ b/libavfilter/af_volumedetect.c @@ -24,94 +24,193 @@ #include "avfilter.h" #include "internal.h" +#define NOISE_FLOOR_DB_FLT -758 +#define MAX_DB_FLT 770 +#define MAX_DB 91 +#define HISTOGRAM_SIZE 0x10000 + typedef struct VolDetectContext { - /** - * Number of samples at each PCM value. - * histogram[0x8000 + i] is the number of samples at value i. - * The extra element is there for symmetry. - */ - uint64_t histogram[0x10001]; + uint64_t* histogram; ///< for integer number of samples at each PCM value, for float number of samples at each dB + uint64_t nb_samples; ///< number of samples + double sum2; ///< sum of the squares of the samples + double max; ///< maximum sample value + int is_float; ///< true if the input is in floating point } VolDetectContext; +static inline double logdb(double v, enum AVSampleFormat sample_fmt) +{ + /* + * Since it is a not a power value, able to use 20.0 * log10(v) + */ + if (sample_fmt == AV_SAMPLE_FMT_FLT) { + if (!v) + return MAX_DB_FLT; + return 20.0 * log10(v); + } else { + double d = v / (double)(0x8000 * 0x8000); + if (!v) + return MAX_DB; + return -log10(d) * 10; + } +} + +static void update_float_stats(VolDetectContext *vd, float *audio_data) +{ + double max_sample; + max_sample = fabsf(*audio_data); + if (max_sample > vd->max) + vd->max = max_sample; + vd->sum2 += *audio_data * *audio_data; + vd->histogram[(int)logdb(max_sample, AV_SAMPLE_FMT_FLT) + MAX_DB_FLT]++; + vd->nb_samples++; +} + static int filter_frame(AVFilterLink *inlink, AVFrame *samples) { AVFilterContext *ctx = inlink->dst; VolDetectContext *vd = ctx->priv; - int nb_samples = samples->nb_samples; int nb_channels = samples->ch_layout.nb_channels; int nb_planes = nb_channels; + int planar = 0; int plane, i; - int16_t *pcm; - if (!av_sample_fmt_is_planar(samples->format)) { - nb_samples *= nb_channels; + planar = av_sample_fmt_is_planar(samples->format); + if (!planar) nb_planes = 1; + if (vd->is_float) { + float *audio_data; + for (plane = 0; plane < nb_planes; plane++) { + audio_data = (float *)samples->extended_data[plane]; + for (i = 0; i < samples->nb_samples; i++) { + /* + * If the input is planar, the samples are in the seperated planes. + * if the input is not planar, the samples are interleaved. + * if the input is not planar, split the samples into the planes. + */ + if (planar) { + update_float_stats(vd, &audio_data[i]); + } else { + for (int j = 0; j < nb_channels; j++) + update_float_stats(vd, &audio_data[i * nb_channels + j]); + } + } + } + } else { + int16_t *pcm; + for (plane = 0; plane < nb_planes; plane++) { + pcm = (int16_t *)samples->extended_data[plane]; + for (i = 0; i < samples->nb_samples; i++) { + if (planar) { + vd->histogram[pcm[i] + 0x8000]++; + vd->nb_samples++; + } else { + for (int j = 0; j < nb_channels; j++) { + vd->histogram[pcm[i * nb_channels + j] + 0x8000]++; + vd->nb_samples++; + } + } + } + } } - for (plane = 0; plane < nb_planes; plane++) { - pcm = (int16_t *)samples->extended_data[plane]; - for (i = 0; i < nb_samples; i++) - vd->histogram[pcm[i] + 0x8000]++; - } - return ff_filter_frame(inlink->dst->outputs[0], samples); } -#define MAX_DB 91 - -static inline double logdb(uint64_t v) +static void print_stats(AVFilterContext *ctx) { - double d = v / (double)(0x8000 * 0x8000); - if (!v) - return MAX_DB; - return -log10(d) * 10; + VolDetectContext *vd = ctx->priv; + + if (!vd->nb_samples) + return; + if (vd->is_float) { + double rms; + int i, sum = 0; + av_log(ctx, AV_LOG_INFO, "n_samples: %" PRId64 "\n", vd->nb_samples); + rms = sqrt(vd->sum2 / vd->nb_samples); + av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", logdb(rms, AV_SAMPLE_FMT_FLT)); + av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", logdb(vd->max, AV_SAMPLE_FMT_FLT)); + for (i = MAX_DB_FLT - NOISE_FLOOR_DB_FLT; i >= 0 && !vd->histogram[i]; i--); + for (; i >= 0 && sum < vd->nb_samples / 1000; i--) { + if (!vd->histogram[i]) + continue; + av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %" PRId64 "\n", MAX_DB_FLT - i, vd->histogram[i]); + sum += vd->histogram[i]; + } + } else { + int i, max_volume, shift; + uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0; + uint64_t histdb[MAX_DB + 1] = {0}; + for (i = 0; i < 0x10000; i++) + nb_samples += vd->histogram[i]; + av_log(ctx, AV_LOG_INFO, "n_samples: %" PRId64 "\n", nb_samples); + /* + * If nb_samples > 1<<34, there is a risk of overflow in the + * multiplication or the sum: shift all histogram values to avoid that. + * The total number of samples must be recomputed to avoid rounding + * errors. + */ + shift = av_log2(nb_samples >> 33); + for (i = 0; i < 0x10000; i++) { + nb_samples_shift += vd->histogram[i] >> shift; + power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift); + } + if (!nb_samples_shift) + return; + power = (power + nb_samples_shift / 2) / nb_samples_shift; + av_assert0(power <= 0x8000 * 0x8000); + av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb((double)power, AV_SAMPLE_FMT_S16)); + max_volume = 0x8000; + while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] && + !vd->histogram[0x8000 - max_volume]) + max_volume--; + av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb((double)(max_volume * max_volume), AV_SAMPLE_FMT_S16)); + for (i = 0; i < 0x10000; i++) + histdb[(int)logdb((double)(i - 0x8000) * (i - 0x8000), AV_SAMPLE_FMT_S16)] += vd->histogram[i]; + for (i = 0; i <= MAX_DB && !histdb[i]; i++); + for (; i <= MAX_DB && sum < nb_samples / 1000; i++) { + av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %" PRId64 "\n", i, histdb[i]); + sum += histdb[i]; + } + } } -static void print_stats(AVFilterContext *ctx) +static int config_output(AVFilterLink *outlink) { + AVFilterContext *ctx = outlink->src; VolDetectContext *vd = ctx->priv; - int i, max_volume, shift; - uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0; - uint64_t histdb[MAX_DB + 1] = { 0 }; - - for (i = 0; i < 0x10000; i++) - nb_samples += vd->histogram[i]; - av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples); - if (!nb_samples) - return; - /* If nb_samples > 1<<34, there is a risk of overflow in the - multiplication or the sum: shift all histogram values to avoid that. - The total number of samples must be recomputed to avoid rounding - errors. */ - shift = av_log2(nb_samples >> 33); - for (i = 0; i < 0x10000; i++) { - nb_samples_shift += vd->histogram[i] >> shift; - power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift); - } - if (!nb_samples_shift) - return; - power = (power + nb_samples_shift / 2) / nb_samples_shift; - av_assert0(power <= 0x8000 * 0x8000); - av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power)); - - max_volume = 0x8000; - while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] && - !vd->histogram[0x8000 - max_volume]) - max_volume--; - av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume)); - - for (i = 0; i < 0x10000; i++) - histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i]; - for (i = 0; i <= MAX_DB && !histdb[i]; i++); - for (; i <= MAX_DB && sum < nb_samples / 1000; i++) { - av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]); - sum += histdb[i]; + vd->is_float = outlink->format == AV_SAMPLE_FMT_FLT || + outlink->format == AV_SAMPLE_FMT_FLTP; + + if (!vd->is_float) { + /* + * Number of samples at each PCM value. + * Only used for integer formats. + * For 16 bit signed PCM there are 65536. + * histogram[0x8000 + i] is the number of samples at value i. + * The extra element is there for symmetry. + */ + vd->histogram = av_calloc(HISTOGRAM_SIZE + 1, sizeof(uint64_t)); + if (!vd->histogram) + return AVERROR(ENOMEM); + } else { + /* + * The histogram is used to store the number of samples at each dB + * instead of the number of samples at each PCM value. + * The range of dB is from -758 to 770. + */ + vd->histogram = av_calloc(MAX_DB_FLT - NOISE_FLOOR_DB_FLT + 1, sizeof(uint64_t)); + if (!vd->histogram) + return AVERROR(ENOMEM); } + return 0; } static av_cold void uninit(AVFilterContext *ctx) { + VolDetectContext *vd = ctx->priv; print_stats(ctx); + if (vd->histogram) + av_freep(&vd->histogram); } static const AVFilterPad volumedetect_inputs[] = { @@ -122,6 +221,14 @@ static const AVFilterPad volumedetect_inputs[] = { }, }; +static const AVFilterPad volumedetect_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_AUDIO, + .config_props = config_output, + }, +}; + const AVFilter ff_af_volumedetect = { .name = "volumedetect", .description = NULL_IF_CONFIG_SMALL("Detect audio volume."), @@ -129,6 +236,9 @@ const AVFilter ff_af_volumedetect = { .uninit = uninit, .flags = AVFILTER_FLAG_METADATA_ONLY, FILTER_INPUTS(volumedetect_inputs), - FILTER_OUTPUTS(ff_audio_default_filterpad), - FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P), + FILTER_OUTPUTS(volumedetect_outputs), + FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16, + AV_SAMPLE_FMT_S16P, + AV_SAMPLE_FMT_FLT, + AV_SAMPLE_FMT_FLTP), }; -- 2.44.0 > On Mar 23, 2024, at 6:21 PM, Paul B Mahol <one...@gmail.com> wrote: > > On Sat, Mar 23, 2024 at 3:28 PM Yiğithan Yiğit <yigithanyigi...@gmail.com > <mailto:yigithanyigi...@gmail.com>> > wrote: > >> Hi, >> >> According to your advices, I made some changes of mine last patch. I feel >> like this one way more better. I removed trivial calculations but I want to >> say I am not proud of how I handled histogram in float despite 16 bit >> integer histogram. I am storing dB values instead of storing samples. I >> feel this one is more convenient. Still I am open to advices. >> >> > I see no patch at all. > > volumedetect displays histogram typically with 1dB steps, so build > histogram with 1dB range between each bin. > for float, only use normal values, no +inf/subnormals/nans etc. > I bet there is less than current 2^16 entries in histogram table of filter > context to fill. > There is no need to convert each input sample to dB scale. Just to > calculate ranges for each 1dB entry calculate range in linear space and > every such sample that is in such range get added to such histogram bin > entry. > Or if you calculate in dB scale anyway than just round(ceilf/floorf/lrintf) > dB value (removing fractional parts) and add it into histogram table, do > not forget to count >+/-1.0 values too. (ones with >0dB values), you can > use normal mean/max/peak calculations (do not use histogram to calculate > them for float/double). > > > >> Thank you. >> Yigithan >> >> >>  >> >>> On Mar 21, 2024, at 11:30 PM, Paul B Mahol <one...@gmail.com >>> <mailto:one...@gmail.com>> wrote: >>> >>> On Wed, Mar 20, 2024 at 11:55 PM Yiğithan Yiğit < >> yigithanyigi...@gmail.com <mailto:yigithanyigi...@gmail.com> >> <mailto:yigithanyigi...@gmail.com>> >>> wrote: >>> >>>> >>>>> On Mar 21, 2024, at 12:10 AM, Paul B Mahol <one...@gmail.com >>>>> <mailto:one...@gmail.com>> wrote: >>>>> >>>>> Why? This is pointless. >>>>> >>>>> volumedetect have histogram output, float patch does not have it at >> all. >>>>> Use astats filter. >>>>> >>>>> On Wed, Mar 20, 2024 at 9:47 PM Yiğithan Yiğit < >>>> yigithanyigi...@gmail.com> >>>>> wrote: >>>>> >>>>>> _______________________________________________ >>>>>> ffmpeg-devel mailing list >>>>>> ffmpeg-devel@ffmpeg.org >>>>>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel >>>>>> >>>>>> To unsubscribe, visit link above, or email >>>>>> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". >>>>>> >>>>> _______________________________________________ >>>>> ffmpeg-devel mailing list >>>>> ffmpeg-devel@ffmpeg.org >>>>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel >>>>> >>>>> To unsubscribe, visit link above, or email >>>>> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe”. >>>> >>>> I am a beginner/student also new at open source but I love FFmpeg and >>>> using in my daily life. From my perspective volumedetect way more user >>>> friendly. I believe adding this patch would be useful to people such as >>>> #9613. The reason lack of histogram output for float mostly for my >>>> indecision about range of the histogram. I am open the suggestions and >>>> after that I can make a new patch. >>>> >>> >>> It is trivial (to some people) to add histogram per dB for float/double >>> inputs. >>> But this patch just does some extremely trivial math calculations so that >>> float input have completely different output from integer ones. >>> That is very odd and unfriendly from my perspective. >>> >>> Besides if you only interested in discrete sample audio peak finder in >>> audio input use astats and measure_overall=Peak_level options. >>> Yes they are not default on. Because more statistics are more important >>> than single number. >>> >>> I'm not against adding proper and useful and correct float/double support >>> to volumedetect, but it needs to have same/similar structure of output as >>> integer sample format input audio, otherwise it just looks lazy and prone >>> for users wondering what is going on when they use different sample >> formats >>> in theirs graphs. >>> >>> >>>> >>>> Best Regards >>>> Yigithan >>>> >>>> >>>> _______________________________________________ >>>> ffmpeg-devel mailing list >>>> ffmpeg-devel@ffmpeg.org <mailto:ffmpeg-devel@ffmpeg.org> >>>> <mailto:ffmpeg-devel@ffmpeg.org> >>>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel >>>> >>>> To unsubscribe, visit link above, or email >>>> ffmpeg-devel-requ...@ffmpeg.org <mailto:ffmpeg-devel-requ...@ffmpeg.org> >>>> <mailto:ffmpeg-devel-requ...@ffmpeg.org> >> with subject "unsubscribe". >>>> >>> _______________________________________________ >>> ffmpeg-devel mailing list >>> ffmpeg-devel@ffmpeg.org <mailto:ffmpeg-devel@ffmpeg.org> >>> <mailto:ffmpeg-devel@ffmpeg.org> >>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel >>> >>> To unsubscribe, visit link above, or email >>> ffmpeg-devel-requ...@ffmpeg.org <mailto:ffmpeg-devel-requ...@ffmpeg.org> >>> <mailto:ffmpeg-devel-requ...@ffmpeg.org> >> with subject "unsubscribe". >> >> _______________________________________________ >> ffmpeg-devel mailing list >> ffmpeg-devel@ffmpeg.org >> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel >> >> To unsubscribe, visit link above, or email >> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". >> > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".