> On Jun 17, 2024, at 5:52 PM, Rémi Denis-Courmont <r...@remlab.net> wrote: > > > > Le 17 juin 2024 13:18:11 GMT+02:00, Yigithan Yigit > <yigithanyigitde...@gmail.com <mailto:yigithanyigitde...@gmail.com>> a écrit : >> --- >> libavfilter/af_volumedetect.c | 159 ++++++++++++++++++++++++++++------ >> 1 file changed, 133 insertions(+), 26 deletions(-) >> >> diff --git a/libavfilter/af_volumedetect.c b/libavfilter/af_volumedetect.c >> index 327801a7f9..dbbcd037a5 100644 >> --- a/libavfilter/af_volumedetect.c >> +++ b/libavfilter/af_volumedetect.c >> @@ -20,27 +20,51 @@ >> >> #include "libavutil/channel_layout.h" >> #include "libavutil/avassert.h" >> +#include "libavutil/mem.h" >> #include "audio.h" >> #include "avfilter.h" >> #include "internal.h" >> >> +#define MAX_DB_FLT 1024 >> #define MAX_DB 91 >> +#define HISTOGRAM_SIZE 0x10000 >> +#define HISTOGRAM_SIZE_FLT (MAX_DB_FLT*2) >> >> typedef struct VolDetectContext { >> - /** >> - * Number of samples at each PCM value. >> - * histogram[0x8000 + i] is the number of samples at value i. >> - * The extra element is there for symmetry. >> - */ >> - uint64_t histogram[0x10001]; >> + uint64_t* histogram; ///< for integer number of samples at each PCM >> value, for float number of samples at each dB >> + uint64_t nb_samples; ///< number of samples >> + double sum2; ///< sum of the squares of the samples >> + double max; ///< maximum sample value >> + int is_float; ///< true if the input is in floating point >> } VolDetectContext; >> >> -static inline double logdb(uint64_t v) >> +static inline double logdb(double v, enum AVSampleFormat sample_fmt) >> { >> - double d = v / (double)(0x8000 * 0x8000); >> - if (!v) >> - return MAX_DB; >> - return -log10(d) * 10; >> + if (sample_fmt == AV_SAMPLE_FMT_FLT) { >> + if (!v) >> + return MAX_DB_FLT; >> + return -log10(v) * 10; >> + } else { >> + double d = v / (double)(0x8000 * 0x8000); >> + if (!v) >> + return MAX_DB; >> + return -log10(d) * 10; >> + } >> +} >> + >> +static void update_float_stats(VolDetectContext *vd, float *audio_data) >> +{ >> + double sample; >> + int idx; >> + if(!isnormal(*audio_data)) >> + return; > > Do we really need to classify floats here? That's probably going to hurt > perfs badly, and makes an otherwise very vectorisable function not so easily > vectored.
You could be correct, we might consider checking NaN, Inf+/- values. Otherwise there is a risk of a crash if someone uses something like this “aelvalsrc=3.4e39”. > >> + sample = fabsf(*audio_data); >> + if (sample > vd->max) >> + vd->max = sample; >> + vd->sum2 += sample * sample; >> + idx = lrintf(floorf(logdb(sample * sample, AV_SAMPLE_FMT_FLT))) + >> MAX_DB_FLT; > > You're recomputing the same value again, and you seem to be rounding twice in > a row? I missed, fixed. > >> + vd->histogram[idx]++; >> + vd->nb_samples++; >> } >> >> static int filter_frame(AVFilterLink *inlink, AVFrame *samples) >> @@ -51,18 +75,41 @@ static int filter_frame(AVFilterLink *inlink, AVFrame >> *samples) >> int nb_channels = samples->ch_layout.nb_channels; >> int nb_planes = nb_channels; >> int plane, i; >> - int16_t *pcm; >> + int planar = 0; >> >> - if (!av_sample_fmt_is_planar(samples->format)) { >> - nb_samples *= nb_channels; >> + planar = av_sample_fmt_is_planar(samples->format); >> + if (!planar) >> nb_planes = 1; >> + if (vd->is_float) { >> + float *audio_data; >> + for (plane = 0; plane < nb_planes; plane++) { >> + audio_data = (float *)samples->extended_data[plane]; >> + for (i = 0; i < nb_samples; i++) { >> + if (planar) { >> + update_float_stats(vd, &audio_data[i]); >> + } else { >> + for (int j = 0; j < nb_channels; j++) >> + update_float_stats(vd, &audio_data[i * nb_channels >> + j]); >> + } >> + } >> + } >> + } else { >> + int16_t *pcm; >> + for (plane = 0; plane < nb_planes; plane++) { >> + pcm = (int16_t *)samples->extended_data[plane]; >> + for (i = 0; i < nb_samples; i++) { >> + if (planar) { >> + vd->histogram[pcm[i] + 0x8000]++; >> + vd->nb_samples++; >> + } else { >> + for (int j = 0; j < nb_channels; j++) { >> + vd->histogram[pcm[i * nb_channels + j] + 0x8000]++; >> + vd->nb_samples++; >> + } >> + } >> + } >> + } > > Can't we pick the correct implementation (planar/packed and float/int) once > and for all whilst configuring the filter? Actually sounds good, I am going to try. Thanks for the feedback! > >> } >> - for (plane = 0; plane < nb_planes; plane++) { >> - pcm = (int16_t *)samples->extended_data[plane]; >> - for (i = 0; i < nb_samples; i++) >> - vd->histogram[pcm[i] + 0x8000]++; >> - } >> - >> return ff_filter_frame(inlink->dst->outputs[0], samples); >> } >> >> @@ -73,6 +120,20 @@ static void print_stats(AVFilterContext *ctx) >> uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0; >> uint64_t histdb[MAX_DB + 1] = { 0 }; >> >> + if (!vd->nb_samples) >> + return; >> + if (vd->is_float) { >> + av_log(ctx, AV_LOG_INFO, "n_samples: %" PRId64 "\n", >> vd->nb_samples); >> + av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(vd->sum2 >> / vd->nb_samples, AV_SAMPLE_FMT_FLT)); >> + av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", >> -2.0*logdb(vd->max, AV_SAMPLE_FMT_FLT)); >> + for (i = 0; i < HISTOGRAM_SIZE_FLT && !vd->histogram[i]; i++); >> + for (; i >= 0 && sum < vd->nb_samples / 1000; i++) { >> + if (!vd->histogram[i]) >> + continue; >> + av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %" PRId64 "\n", >> MAX_DB_FLT - i, vd->histogram[i]); >> + sum += vd->histogram[i]; >> + } >> + } else { >> for (i = 0; i < 0x10000; i++) >> nb_samples += vd->histogram[i]; >> av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples); >> @@ -92,26 +153,61 @@ static void print_stats(AVFilterContext *ctx) >> return; >> power = (power + nb_samples_shift / 2) / nb_samples_shift; >> av_assert0(power <= 0x8000 * 0x8000); >> - av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power)); >> + av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", >> -logdb((double)power, AV_SAMPLE_FMT_S16)); >> >> max_volume = 0x8000; >> while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] && >> !vd->histogram[0x8000 - max_volume]) >> max_volume--; >> - av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * >> max_volume)); >> + av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", >> -logdb((double)(max_volume * max_volume), AV_SAMPLE_FMT_S16)); >> >> for (i = 0; i < 0x10000; i++) >> - histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i]; >> + histdb[(int)logdb((double)(i - 0x8000) * (i - 0x8000), >> AV_SAMPLE_FMT_S16)] += vd->histogram[i]; >> for (i = 0; i <= MAX_DB && !histdb[i]; i++); >> for (; i <= MAX_DB && sum < nb_samples / 1000; i++) { >> - av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, >> histdb[i]); >> + av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", -i, >> histdb[i]); >> sum += histdb[i]; >> } >> + } >> +} >> + >> +static int config_output(AVFilterLink *outlink) >> +{ >> + AVFilterContext *ctx = outlink->src; >> + VolDetectContext *vd = ctx->priv; >> + size_t histogram_size; >> + >> + vd->is_float = outlink->format == AV_SAMPLE_FMT_FLT || >> + outlink->format == AV_SAMPLE_FMT_FLTP; >> + >> + if (!vd->is_float) { >> + /* >> + * Number of samples at each PCM value. >> + * Only used for integer formats. >> + * For 16 bit signed PCM there are 65536. >> + * histogram[0x8000 + i] is the number of samples at value i. >> + * The extra element is there for symmetry. >> + */ >> + histogram_size = HISTOGRAM_SIZE + 1; >> + } else { >> + /* >> + * The histogram is used to store the number of samples at each dB >> + * instead of the number of samples at each PCM value. >> + */ >> + histogram_size = HISTOGRAM_SIZE_FLT + 1; >> + } >> + vd->histogram = av_calloc(histogram_size, sizeof(uint64_t)); >> + if (!vd->histogram) >> + return AVERROR(ENOMEM); >> + return 0; >> } >> >> static av_cold void uninit(AVFilterContext *ctx) >> { >> + VolDetectContext *vd = ctx->priv; >> print_stats(ctx); >> + if (vd->histogram) >> + av_freep(&vd->histogram); >> } >> >> static const AVFilterPad volumedetect_inputs[] = { >> @@ -122,6 +218,14 @@ static const AVFilterPad volumedetect_inputs[] = { >> }, >> }; >> >> +static const AVFilterPad volumedetect_outputs[] = { >> + { >> + .name = "default", >> + .type = AVMEDIA_TYPE_AUDIO, >> + .config_props = config_output, >> + }, >> +}; >> + >> const AVFilter ff_af_volumedetect = { >> .name = "volumedetect", >> .description = NULL_IF_CONFIG_SMALL("Detect audio volume."), >> @@ -129,6 +233,9 @@ const AVFilter ff_af_volumedetect = { >> .uninit = uninit, >> .flags = AVFILTER_FLAG_METADATA_ONLY, >> FILTER_INPUTS(volumedetect_inputs), >> - FILTER_OUTPUTS(ff_audio_default_filterpad), >> - FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P), >> + FILTER_OUTPUTS(volumedetect_outputs), >> + FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_S16, >> + AV_SAMPLE_FMT_S16P, >> + AV_SAMPLE_FMT_FLT, >> + AV_SAMPLE_FMT_FLTP), >> }; > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org <mailto:ffmpeg-devel@ffmpeg.org> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org <mailto:ffmpeg-devel-requ...@ffmpeg.org> with > subject "unsubscribe". _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".