Signed-off-by: Paul B Mahol <one...@gmail.com> --- doc/filters.texi | 35 +++ libavfilter/Makefile | 1 + libavfilter/af_declick.c | 586 +++++++++++++++++++++++++++++++++++++++++++++++ libavfilter/allfilters.c | 1 + 4 files changed, 623 insertions(+) create mode 100644 libavfilter/af_declick.c
diff --git a/doc/filters.texi b/doc/filters.texi index 1620ae1cfa..9a067ba9ea 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -2562,6 +2562,41 @@ Optional. It should have a value much less than 1 (e.g. 0.05 or 0.02) and is used to prevent clipping. @end table +@section declick +Remove impulsive noise from input audio. + +Samples detected as impulsive noise are replaced by interpolated samples using +autoregressive modeling. + +@table @option +@item w +Set window size, in milliseconds. Allowed range is from @code{10} to @code{100}. +Default value is @code{54} milliseconds. +This sets size of window which will be processed at once. + +@item o +Set window overlap, in percentage of window size. Allowed range is from @code{50} +to @code{95}. Default value is @code{75} percent. +Setting this to very high value increases impulsive noise removal but makes whole +processs much slower. + +@item a +Set autoregression order, in percentage of window size. Allowed range is from +@code{1} to @code{50}. Default value is @code{2} percent. This option also controls +quality of interpolated samples using neighbour good samples. + +@item t +Set threshold value. Allowed range is from @code{1} to @code{10}. +Default value is @code{2}. +This controls the strength of impulse noise which is going to be removed. + +@item b +Set burst fusion, in percentage of window size. Allowed range is @code{0} to +@code{40}. Default value is @code{10} percent. +This controls between how much samples, which are detected as impulsive noise, +any sample between 2 detected noise samples is considered also as noise sample. +@end table + @section drmeter Measure audio dynamic range. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 1043b41d80..978751d2a0 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -87,6 +87,7 @@ OBJS-$(CONFIG_COMPENSATIONDELAY_FILTER) += af_compensationdelay.o OBJS-$(CONFIG_CROSSFEED_FILTER) += af_crossfeed.o OBJS-$(CONFIG_CRYSTALIZER_FILTER) += af_crystalizer.o OBJS-$(CONFIG_DCSHIFT_FILTER) += af_dcshift.o +OBJS-$(CONFIG_DECLICK_FILTER) += af_declick.o OBJS-$(CONFIG_DRMETER_FILTER) += af_drmeter.o OBJS-$(CONFIG_DYNAUDNORM_FILTER) += af_dynaudnorm.o OBJS-$(CONFIG_EARWAX_FILTER) += af_earwax.o diff --git a/libavfilter/af_declick.c b/libavfilter/af_declick.c new file mode 100644 index 0000000000..0de4c35c95 --- /dev/null +++ b/libavfilter/af_declick.c @@ -0,0 +1,586 @@ +/* + * Copyright (c) 2018 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/audio_fifo.h" +#include "libavutil/opt.h" +#include "avfilter.h" +#include "audio.h" +#include "formats.h" + +typedef struct DeclickContext { + const AVClass *class; + double w; + double overlap; + double threshold; + double ar; + double burst; + + int ar_order; + int nb_burst_samples; + int window_size; + int hop_size; + + AVFrame *in; + AVFrame *out; + AVFrame *buffer; + AVFrame *is; + double *auxiliary; + double *detection; + double *acoefficients; + double *acorrelation; + double *tmp; + double *interpolated; + double *matrix; + int matrix_size; + double *vector; + int vector_size; + uint8_t *click; + int *index; + + double *ltriangular; + int ltriangular_size; + double *diagonal; + int d_size; + double *y; + int y_size; + + int64_t pts; + uint64_t nb_samples; + uint64_t detected_clicks; + int samples_left; + + AVAudioFifo *fifo; + double *window_func_lut; +} DeclickContext; + +#define OFFSET(x) offsetof(DeclickContext, x) +#define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM + +static const AVOption declick_options[] = { + { "w", "set window size", OFFSET(w), AV_OPT_TYPE_DOUBLE, {.dbl=54}, 10, 100, AF }, + { "o", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_DOUBLE, {.dbl=75}, 50, 95, AF }, + { "a", "set autoregression order", OFFSET(ar), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 1, 50, AF }, + { "t", "set threshold", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 1, 10, AF }, + { "b", "set burst fusion", OFFSET(burst), AV_OPT_TYPE_DOUBLE, {.dbl=10}, 0, 40, AF }, + { NULL } +}; + +AVFILTER_DEFINE_CLASS(declick); + +static int query_formats(AVFilterContext *ctx) +{ + AVFilterFormats *formats = NULL; + AVFilterChannelLayouts *layouts = NULL; + static const enum AVSampleFormat sample_fmts[] = { + AV_SAMPLE_FMT_DBLP, + AV_SAMPLE_FMT_NONE + }; + int ret; + + formats = ff_make_format_list(sample_fmts); + if (!formats) + return AVERROR(ENOMEM); + ret = ff_set_common_formats(ctx, formats); + if (ret < 0) + return ret; + + layouts = ff_all_channel_counts(); + if (!layouts) + return AVERROR(ENOMEM); + + ret = ff_set_common_channel_layouts(ctx, layouts); + if (ret < 0) + return ret; + + formats = ff_all_samplerates(); + return ff_set_common_samplerates(ctx, formats); +} + +static int config_input(AVFilterLink *inlink) +{ + AVFilterContext *ctx = inlink->dst; + DeclickContext *s = ctx->priv; + int i; + + s->pts = AV_NOPTS_VALUE; + s->window_size = inlink->sample_rate * s->w / 1000.; + if (s->window_size < 100) + return AVERROR(EINVAL); + s->ar_order = s->window_size * s->ar / 100.; + s->nb_burst_samples = s->window_size * s->burst / 1000.; + s->hop_size = s->window_size * (1. - (s->overlap / 100.)); + if (s->hop_size < 1) + return AVERROR(EINVAL); + + s->fifo = av_audio_fifo_alloc(inlink->format, inlink->channels, s->window_size); + if (!s->fifo) + return AVERROR(ENOMEM); + + s->window_func_lut = av_realloc_f(s->window_func_lut, s->window_size, + sizeof(*s->window_func_lut)); + if (!s->window_func_lut) + return AVERROR(ENOMEM); + for (i = 0; i < s->window_size; i++) + s->window_func_lut[i] = sin(M_PI * i / s->window_size) * (1. - (s->overlap / 100.)) * M_PI_2; + + av_frame_free(&s->in); + av_frame_free(&s->out); + s->in = ff_get_audio_buffer(inlink, s->window_size); + s->out = ff_get_audio_buffer(inlink, s->window_size); + s->buffer = ff_get_audio_buffer(inlink, s->window_size * 2); + s->is = ff_get_audio_buffer(inlink, s->window_size); + if (!s->in || !s->out || !s->buffer || !s->is) + return AVERROR(ENOMEM); + + s->detection = av_calloc(s->window_size, sizeof(*s->detection)); + s->auxiliary = av_calloc(s->ar_order + 1, sizeof(*s->auxiliary)); + s->acoefficients = av_calloc(s->ar_order + 1, sizeof(*s->acoefficients)); + s->acorrelation = av_calloc(s->ar_order + 1, sizeof(*s->acorrelation)); + s->tmp = av_calloc(s->ar_order, sizeof(*s->tmp)); + s->click = av_calloc(s->window_size, sizeof(*s->click)); + s->index = av_calloc(s->window_size, sizeof(*s->index)); + s->interpolated = av_calloc(s->window_size, sizeof(*s->interpolated)); + if (!s->auxiliary || !s->acoefficients || !s->detection || !s->click || + !s->index || !s->interpolated || !s->acorrelation || !s->tmp) + return AVERROR(ENOMEM); + + return 0; +} + +static void autocorrelation(const double *input, int order, int size, double *output) +{ + double scale = 1. / size; + int i, j; + + for (i = 0; i <= order; i++) { + double value = 0.; + + for (j = i; j < size; j++) + value += input[j] * input[j - i]; + + output[i] = value * scale; + } +} + +static double autoregression(const double *samples, int ar_order, int nb_samples, double *k, double *r, double *a) +{ + double alpha; + int i, j; + + memset(a, 0, ar_order * sizeof(*a)); + + autocorrelation(samples, ar_order, nb_samples, r); + + /* Levinson-Durbin algorithm */ + k[0] = a[0] = -r[1] / r[0]; + alpha = r[0] * (1. - k[0] * k[0]); + for (i = 1; i < ar_order; i++) { + double epsilon = 0.; + + for (j = 0; j < i; j++) + epsilon += a[j] * r[i - j]; + epsilon += r[i + 1]; + + k[i] = -epsilon / alpha; + alpha *= (1. - k[i] * k[i]); + for (j = i - 1; j >= 0; j--) + k[j] = a[j] + k[i] * a[i - j - 1]; + for (j = 0; j <= i; j++) + a[j] = k[j]; + } + + k[0] = 1.; + for (i = 0; i < ar_order; i++) + k[i + 1] = a[i]; + + return sqrt(alpha); +} + +static int isfinite_array(double *samples, int nb_samples) +{ + int i; + + for (i = 0; i < nb_samples; i++) + if (!isfinite(samples[i])) + return 0; + + return 1; +} + +static int find_index(int *index, int value, int size) +{ + int i, start, end; + + if ((value < index[0]) || (value > index[size - 1])) + return 1; + + i = start = 0; + end = size - 1; + + while (start <= end) { + i = (end + start) / 2; + if (index[i] == value) + return 0; + if (value < index[i]) + end = i - 1; + if (value > index[i]) + start = i + 1; + } + + return 1; +} + +static int cholesky_decomposition(DeclickContext *s, double *matrix, double *vector, int n, double *out) +{ + double *ltriangular, *diagonal, *y; + int i, j, k; + + av_fast_malloc(&s->ltriangular, &s->ltriangular_size, n * n * sizeof(*s->ltriangular)); + ltriangular = s->ltriangular; + + av_fast_malloc(&s->diagonal, &s->d_size, n * sizeof(*s->diagonal)); + diagonal = s->diagonal; + + av_fast_malloc(&s->y, &s->y_size, n * sizeof(*s->y)); + y = s->y; + + if (!ltriangular || !diagonal || !y) + return AVERROR(ENOMEM); + + memset(s->ltriangular, 0, n * n * sizeof(*s->ltriangular)); + memset(s->diagonal, 0, n * sizeof(*s->diagonal)); + memset(s->y, 0, n * sizeof(*s->y)); + + for (i = 0; i < n; i++) { + const int in = i * n; + + diagonal[i] = matrix[in + i]; + for (j = 0; j < i; j++) + diagonal[i] -= diagonal[j] * ltriangular[in + j] * ltriangular[in + j]; + + if (diagonal[i] == 0.) { + return -1; + } + + for (j = i + 1; j < n; j++) { + const int jn = j * n; + double x; + + x = matrix[jn + i]; + for (k = 0; k < i; k++) + x -= diagonal[k] * ltriangular[in + k] * ltriangular[jn + k]; + ltriangular[in + j] = ltriangular[jn + i] = x / diagonal[i]; + } + } + + for (i = 0; i < n; i++) { + const int in = i * n; + + y[i] = vector[i]; + for (j = 0; j <= i; j++) + y[i] -= ltriangular[in + j] * y[j]; + } + + for (i = n - 1; i >= 0; i--) { + const int in = i * n; + + out[i] = y[i] / diagonal[i]; + for (j = i; j < n; j++) + out[i] -= ltriangular[in + j] * out[j]; + } + + return 0; +} + +static int interpolation(DeclickContext *s, const double *src, int ar_order, + double *acoefficients, int *index, int nb_clicks, + double *auxiliary, double *interpolated) +{ + double *vector, *matrix; + int i, j; + + av_fast_malloc(&s->matrix, &s->matrix_size, nb_clicks * nb_clicks * sizeof(*s->matrix)); + matrix = s->matrix; + if (!matrix) + return AVERROR(ENOMEM); + + av_fast_malloc(&s->vector, &s->vector_size, nb_clicks * sizeof(*s->vector)); + vector = s->vector; + if (!vector) + return AVERROR(ENOMEM); + + for (i = 0; i <= ar_order; i++) { + auxiliary[i] = 0.; + for (j = i; j <= ar_order; j++) + auxiliary[i] += acoefficients[j] * acoefficients[j - i]; + } + + for (i = 0; i < nb_clicks; i++) { + const int im = i * nb_clicks; + + for (j = i; j < nb_clicks; j++) { + if (abs(index[j] - index[i]) <= ar_order) { + matrix[j * nb_clicks + i] = matrix[im + j] = auxiliary[abs(index[j] - index[i])]; + } else { + matrix[j * nb_clicks + i] = matrix[im + j] = 0; + } + } + } + + for (i = 0; i < nb_clicks; i++) { + vector[i] = 0.; + for (j = -ar_order; j <= ar_order; j++) + if (find_index(index, index[i] - j, nb_clicks)) + vector[i] -= src[index[i] - j] * auxiliary[abs(j)]; + } + + return cholesky_decomposition(s, matrix, vector, nb_clicks, interpolated); +} + +static int detect_clicks(DeclickContext *s, double sigmae, double *detection, double *acoefficients, + uint8_t *click, int *index, + const double *src, double *dst) +{ + const double threshold = s->threshold; + int i, j, nb_clicks = 0, prev = -1; + + memset(detection, 0, s->window_size * sizeof(*detection)); + + for (i = s->ar_order; i < s->window_size; i++) { + for (j = 0; j <= s->ar_order; j++) { + detection[i] += acoefficients[j] * src[i - j]; + } + } + + for (i = 0; i < s->window_size; i++) { + click[i] = fabs(detection[i]) > sigmae * threshold; + dst[i] = src[i]; + } + + for (i = 0; i < s->window_size; i++) { + if (!click[i]) + continue; + + if (prev >= 0 && (i > prev + 1) && (i <= s->nb_burst_samples + prev)) + for (j = prev + 1; j < i; j++) + click[j] = 1; + prev = i; + } + + memset(click, 0, s->ar_order * sizeof(*click)); + memset(click + (s->window_size - s->ar_order), 0, s->ar_order * sizeof(*click)); + + for (i = s->ar_order; i < s->window_size - s->ar_order; i++) + if (click[i]) + index[nb_clicks++] = i; + + return nb_clicks; +} + +static int filter_frame(AVFilterLink *inlink, AVFrame *in) +{ + AVFilterContext *ctx = inlink->dst; + AVFilterLink *outlink = ctx->outputs[0]; + DeclickContext *s = ctx->priv; + AVFrame *out = NULL; + int ret = 0; + + if (s->pts == AV_NOPTS_VALUE) + s->pts = in->pts; + + ret = av_audio_fifo_write(s->fifo, (void **)in->extended_data, + in->nb_samples); + av_frame_free(&in); + if (ret < 0) { + av_frame_free(&out); + return ret; + } + + while (av_audio_fifo_size(s->fifo) >= s->window_size) { + int j, ch, detected_clicks = 0; + + out = ff_get_audio_buffer(outlink, s->hop_size); + if (!out) + return AVERROR(ENOMEM); + + ret = av_audio_fifo_peek(s->fifo, (void **)s->in->extended_data, + s->window_size); + if (ret < 0) + break; + + for (ch = 0; ch < s->in->channels; ch++) { + const double *src = (const double *)s->in->extended_data[ch]; + double *is = (double *)s->is->extended_data[ch]; + double *dst = (double *)s->out->extended_data[ch]; + double *ptr = (double *)out->extended_data[ch]; + double *buf = (double *)s->buffer->extended_data[ch]; + const double *w = s->window_func_lut; + double sigmae; + + sigmae = autoregression(src, s->ar_order, s->window_size, s->acoefficients, s->acorrelation, s->tmp); + + if (isfinite_array(s->acoefficients, s->ar_order + 1)) { + double *interpolated = s->interpolated; + int *index = s->index; + int nb_clicks; + + nb_clicks = detect_clicks(s, sigmae, s->detection, s->acoefficients, + s->click, index, src, dst); + if (nb_clicks > 0) { + ret = interpolation(s, src, s->ar_order, s->acoefficients, index, + nb_clicks, s->auxiliary, interpolated); + if (ret < 0) + goto fail; + + for (j = 0; j < nb_clicks; j++) { + dst[index[j]] = interpolated[j]; + is[index[j]] = 1; + } + } + } else { + memcpy(dst, src, s->window_size * sizeof(*dst)); + } + + for (j = 0; j < s->window_size; j++) + buf[j] += dst[j] * w[j]; + for (j = 0; j < s->hop_size; j++) + ptr[j] = buf[j]; + + memmove(buf, buf + s->hop_size, (s->window_size * 2 - s->hop_size) * sizeof(*buf)); + memmove(is, is + s->hop_size, (s->window_size - s->hop_size) * sizeof(*is)); + memset(buf + s->window_size * 2 - s->hop_size, 0, s->hop_size * sizeof(*buf)); + memset(is + s->window_size - s->hop_size, 0, s->hop_size * sizeof(*is)); + + for (j = 0; j < s->hop_size; j++) { + if (is[j]) + detected_clicks++; + } + } + + av_audio_fifo_drain(s->fifo, s->hop_size); + + out->pts = s->pts; + s->pts += s->hop_size; + + s->detected_clicks += detected_clicks; + s->nb_samples += s->hop_size * inlink->channels; + + ret = ff_filter_frame(outlink, out); + if (ret < 0) + break; + } + +fail: + if (ret < 0) + av_frame_free(&out); + return ret; +} + +static int request_frame(AVFilterLink *outlink) +{ + AVFilterContext *ctx = outlink->src; + DeclickContext *s = ctx->priv; + int ret = 0; + + ret = ff_request_frame(ctx->inputs[0]); + + if (ret == AVERROR_EOF && av_audio_fifo_size(s->fifo) > 0) { + AVFrame *in; + + if (!s->samples_left) + s->samples_left = av_audio_fifo_size(s->fifo); + + in = ff_get_audio_buffer(outlink, s->window_size); + if (!in) + return AVERROR(ENOMEM); + ret = filter_frame(ctx->inputs[0], in); + if (s->samples_left) { + s->samples_left -= s->hop_size; + if (s->samples_left <= 0) + av_audio_fifo_drain(s->fifo, s->window_size); + } + } + + return ret; +} + + +static av_cold void uninit(AVFilterContext *ctx) +{ + DeclickContext *s = ctx->priv; + + av_log(ctx, AV_LOG_INFO, "Detected clicks in %"PRId64" of %"PRId64" samples (%g%%).\n", + s->detected_clicks, s->nb_samples, 100. * s->detected_clicks / s->nb_samples); + + av_audio_fifo_free(s->fifo); + av_freep(&s->window_func_lut); + av_frame_free(&s->in); + av_frame_free(&s->out); + av_frame_free(&s->buffer); + av_frame_free(&s->is); + av_freep(&s->detection); + av_freep(&s->auxiliary); + av_freep(&s->acoefficients); + av_freep(&s->acorrelation); + av_freep(&s->tmp); + av_freep(&s->click); + av_freep(&s->index); + av_freep(&s->interpolated); + av_freep(&s->matrix); + s->matrix_size = 0; + av_freep(&s->vector); + s->vector_size = 0; + av_freep(&s->ltriangular); + s->ltriangular_size = 0; + av_freep(&s->diagonal); + s->d_size = 0; + av_freep(&s->y); + s->y_size = 0; +} + +static const AVFilterPad inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_AUDIO, + .filter_frame = filter_frame, + .config_props = config_input, + }, + { NULL } +}; + +static const AVFilterPad outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_AUDIO, + .request_frame = request_frame, + }, + { NULL } +}; + +AVFilter ff_af_declick = { + .name = "declick", + .description = NULL_IF_CONFIG_SMALL("Remove impulsive noise from input audio."), + .query_formats = query_formats, + .priv_size = sizeof(DeclickContext), + .priv_class = &declick_class, + .uninit = uninit, + .inputs = inputs, + .outputs = outputs, +}; diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 3f67e321bf..cf5016f2c1 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -98,6 +98,7 @@ static void register_all(void) REGISTER_FILTER(CROSSFEED, crossfeed, af); REGISTER_FILTER(CRYSTALIZER, crystalizer, af); REGISTER_FILTER(DCSHIFT, dcshift, af); + REGISTER_FILTER(DECLICK, declick, af); REGISTER_FILTER(DRMETER, drmeter, af); REGISTER_FILTER(DYNAUDNORM, dynaudnorm, af); REGISTER_FILTER(EARWAX, earwax, af); -- 2.11.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel