From 88fd203d3b842f7db2cda34b72452a1ced711ed3 Mon Sep 17 00:00:00 2001
From: Ptits de Barbe <ettavolt@rambler.ru>
Date: Sun, 26 Aug 2018 11:45:28 +0300
Subject: [PATCH] AEC filter from Speex.

---
 configure                     |   4 +
 libavfilter/Makefile          |   1 +
 libavfilter/af_libspeex_aec.c | 293 ++++++++++++++++++++++++++++++++++
 libavfilter/allfilters.c      |   1 +
 4 files changed, 299 insertions(+)
 create mode 100644 libavfilter/af_libspeex_aec.c

diff --git a/configure b/configure
index e718c1531c..45c8cba724 100755
--- a/configure
+++ b/configure
@@ -259,6 +259,7 @@ External library support:
   --enable-libsnappy       enable Snappy compression, needed for hap encoding [no]
   --enable-libsoxr         enable Include libsoxr resampling [no]
   --enable-libspeex        enable Speex de/encoding via libspeex [no]
+  --enable-libspeexdsp     enable Speex AEC [no]
   --enable-libsrt          enable Haivision SRT protocol via libsrt [no]
   --enable-libssh          enable SFTP protocol via libssh [no]
   --enable-libtensorflow   enable TensorFlow as a DNN module backend
@@ -1717,6 +1718,7 @@ EXTERNAL_LIBRARY_LIST="
     libsnappy
     libsoxr
     libspeex
+    libspeexdsp
     libsrt
     libssh
     libtensorflow
@@ -3086,6 +3088,7 @@ libopus_encoder_select="audio_frame_queue"
 librsvg_decoder_deps="librsvg"
 libshine_encoder_deps="libshine"
 libshine_encoder_select="audio_frame_queue"
+libspeex_aec_filter_deps="libspeexdsp"
 libspeex_decoder_deps="libspeex"
 libspeex_encoder_deps="libspeex"
 libspeex_encoder_select="audio_frame_queue"
@@ -6079,6 +6082,7 @@ enabled libsnappy         && require libsnappy snappy-c.h snappy_compress -lsnap
 enabled libsoxr           && require libsoxr soxr.h soxr_create -lsoxr
 enabled libssh            && require_pkg_config libssh libssh libssh/sftp.h sftp_init
 enabled libspeex          && require_pkg_config libspeex speex speex/speex.h speex_decoder_init
+enabled libspeexdsp       && require_pkg_config libspeexdsp speexdsp speex/speex_echo.h speex_echo_state_init_mc
 enabled libsrt            && require_pkg_config libsrt "srt >= 1.2.0" srt/srt.h srt_socket
 enabled libtensorflow     && require libtensorflow tensorflow/c/c_api.h TF_Version -ltensorflow
 enabled libtesseract      && require_pkg_config libtesseract tesseract tesseract/capi.h TessBaseAPICreate
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 245302bbe8..1c5502f9a6 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -108,6 +108,7 @@ OBJS-$(CONFIG_HIGHPASS_FILTER)               += af_biquads.o
 OBJS-$(CONFIG_HIGHSHELF_FILTER)              += af_biquads.o
 OBJS-$(CONFIG_JOIN_FILTER)                   += af_join.o
 OBJS-$(CONFIG_LADSPA_FILTER)                 += af_ladspa.o
+OBJS-$(CONFIG_LIBSPEEX_AEC_FILTER)           += af_libspeex_aec.o
 OBJS-$(CONFIG_LOUDNORM_FILTER)               += af_loudnorm.o ebur128.o
 OBJS-$(CONFIG_LOWPASS_FILTER)                += af_biquads.o
 OBJS-$(CONFIG_LOWSHELF_FILTER)               += af_biquads.o
diff --git a/libavfilter/af_libspeex_aec.c b/libavfilter/af_libspeex_aec.c
new file mode 100644
index 0000000000..eb96f4e533
--- /dev/null
+++ b/libavfilter/af_libspeex_aec.c
@@ -0,0 +1,293 @@
+/*
+ * Speex-based Acoustic Echo Canceller.
+ * Copyright (c) 2018 Arseniy Skvortsov <ettavolt@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Speex-based Acoustic Echo Canceller
+ *
+ * Tries to remove the second input from the first input
+ * as if the 1st was recorded during playback of the 2nd.
+ * No delay is assumed.
+ */
+
+#include <speex/speex_echo.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/opt.h"
+#include "libavutil/samplefmt.h"
+
+#include "audio.h"
+#include "avfilter.h"
+#include "filters.h"
+#include "formats.h"
+#include "internal.h"
+
+typedef struct AECContext {
+    const AVClass *class;       /**< class for AVOptions */
+
+    int frame_size;             /**< number of samples */
+    int filter_length;          /**< number of samples to search echo in */
+    int nb_mics;                /**< number of channels in recording input */
+    int nb_speakers;            /**< number of channels in playback input */
+
+    SpeexEchoState *state;      /**< Speex's own state */
+} AECContext;
+
+#define OFFSET(x) offsetof(AECContext, x)
+#define A AV_OPT_FLAG_AUDIO_PARAM
+#define F AV_OPT_FLAG_FILTERING_PARAM
+static const AVOption libspeex_aec_options[] = {
+    { "frame_size", "Number of samples to process at one time (should correspond to 20 ms, preferably 2ⁿ for FFT)",
+            OFFSET(frame_size), AV_OPT_TYPE_INT, { .i64 = 1024 }, 1, INT_MAX, A|F },
+    { "filter_length", "Number of samples of echo to cancel "
+                       "(should generally correspond to 100-500 ms or a ⅓ of room reverberation time)",
+            OFFSET(filter_length), AV_OPT_TYPE_INT, { .i64 = 5000 }, 1, INT_MAX, A|F },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(libspeex_aec);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *mics_layouts = NULL;
+    //Filter supports a different layout for this input, need to create another set to enable separate negotiation.
+    AVFilterChannelLayouts *speakers_layouts = NULL;
+    AVFilterFormats *sample_rates = NULL;
+    int ret;
+
+    if ((ret = ff_add_format(&formats, AV_SAMPLE_FMT_S16P)) < 0) {
+        return ret;
+    }
+
+    mics_layouts = ff_all_channel_counts();
+    if (!mics_layouts) {
+        ret = AVERROR(ENOMEM);
+        goto clean;
+    }
+
+    speakers_layouts = ff_all_channel_counts();
+    if (!speakers_layouts) {
+        ret = AVERROR(ENOMEM);
+        goto clean;
+    }
+
+    sample_rates = ff_all_samplerates();
+    if (!sample_rates) {
+        ret = AVERROR(ENOMEM);
+        goto clean;
+    }
+
+    ret = ff_channel_layouts_ref(speakers_layouts, &ctx->inputs[1]->out_channel_layouts);
+    if (ret < 0) {
+        goto clean;
+    }
+
+    //A fail in the middle of these can leave references to a freed memory.
+    //True not only for this (caller) function, but for all callees too.
+    if ((ret = ff_set_common_formats        (ctx, formats     )) < 0 ||
+        (ret = ff_set_common_channel_layouts(ctx, mics_layouts)) < 0 ||
+        (ret = ff_set_common_samplerates    (ctx, sample_rates)) < 0) {
+        goto clean;
+    }
+    return 0;
+
+    clean:
+    if (sample_rates) {
+        av_freep(&sample_rates);
+    }
+
+    if (speakers_layouts) {
+        av_freep(&speakers_layouts);
+    }
+
+    if (mics_layouts) {
+        av_freep(&mics_layouts);
+    }
+
+    if (formats) {
+        av_freep(&formats->formats);
+        av_freep(&formats);
+    }
+    return ret;
+}
+
+static int config_state(AVFilterContext *ctx)
+{
+    AECContext *self      = ctx->priv;
+    if (!self->nb_mics || !self->nb_speakers) {
+        return 0;
+    }
+    self->state = speex_echo_state_init_mc(self->frame_size, self->filter_length, self->nb_mics, self->nb_speakers);
+    if (self->state <= 0) {
+        av_log(ctx, AV_LOG_ERROR, "Cannot initialize libspeex AEC state!");
+        return AVERROR(ENOMEM);
+    }
+
+    av_log(ctx, AV_LOG_VERBOSE, "mics:%d speakers:%d\n", self->nb_mics, self->nb_speakers);
+
+    return 0;
+}
+
+static int config_record_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AECContext *self = ctx->priv;
+    self->nb_mics = inlink->channels;
+    inlink->request_samples = self->frame_size;
+    return config_state(ctx);
+}
+
+static int config_playback_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AECContext *self = ctx->priv;
+    self->nb_speakers = inlink->channels;
+    inlink->request_samples = self->frame_size;
+    return config_state(ctx);
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = ctx->inputs[0];
+    if (inlink) {
+        if (!outlink->time_base.num && !outlink->time_base.den) {
+            outlink->time_base = inlink->time_base;
+        }
+    }
+    return 0;
+}
+
+static int filter_frames(const AVFilterContext *ctx) {
+    AVFilterLink *outlink = ctx->outputs[0];
+    AECContext *self = ctx->priv;
+    AVFilterLink *input_to_clean = ctx->inputs[0];
+    AVFrame *recorded;
+    AVFrame *played;
+    AVFrame *cleaned;
+    int ret;
+
+    if (!ff_inlink_check_available_frame(input_to_clean) ||
+        !ff_inlink_check_available_frame(ctx->inputs[1])) {
+        return 1;
+    }
+
+    ret = ff_inlink_consume_frame(input_to_clean, &recorded);
+    if (ret < 0) {
+        return ret;
+    }
+    ret = ff_inlink_consume_frame(ctx->inputs[1], &played);
+    if (ret < 0) {
+        av_frame_free(&recorded);
+        return ret;
+    }
+    cleaned = ff_get_audio_buffer(outlink, self->frame_size);
+    if (!cleaned) {
+        ret = AVERROR(ENOMEM);
+        av_frame_free(&recorded);
+        av_frame_free(&played);
+        return ret;
+    }
+    speex_echo_cancellation(
+            self->state,
+            (const spx_int16_t *) *recorded->extended_data,
+            (const spx_int16_t *) *played->extended_data,
+            (spx_int16_t *) *cleaned->extended_data
+    );
+
+    av_frame_free(&recorded);
+    av_frame_free(&played);
+    return ff_filter_frame(outlink, cleaned);
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    AVFilterLink *outlink = ctx->outputs[0];
+    int ret;
+
+    FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, ctx);
+
+    ret = filter_frames(ctx);
+    if (ret <= 0) {
+        return ret;
+    }
+
+    FF_FILTER_FORWARD_STATUS(ctx->inputs[0], outlink);
+    FF_FILTER_FORWARD_STATUS(ctx->inputs[1], outlink);
+    if (ff_outlink_frame_wanted(outlink)) {
+        //One is definitely missing a frame, because we did no processing.
+        if (!ff_inlink_check_available_frame(ctx->inputs[0])) {
+            ff_inlink_request_frame(ctx->inputs[0]);
+        }
+        if (!ff_inlink_check_available_frame(ctx->inputs[1])) {
+            ff_inlink_request_frame(ctx->inputs[1]);
+        }
+        return 0;
+    }
+    return FFERROR_NOT_READY;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AECContext *self = ctx->priv;
+
+    if (self->state) {
+        speex_echo_state_destroy(self->state);
+    }
+}
+
+static const AVFilterPad avfilter_af_libspeex_aec_inputs[] = {
+    {
+        .name          = "record",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .needs_fifo    = 1,
+        .config_props  = config_record_input,
+    },
+    {
+        .name          = "playback",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .needs_fifo    = 1,
+        .config_props  = config_playback_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avfilter_af_libspeex_aec_outputs[] = {
+    {
+        .name          = "cleaned",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .config_props  = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_libspeex_aec = {
+    .name           = "libspeex_aec",
+    .description    = NULL_IF_CONFIG_SMALL("Speex-based acoustic echo cancellation (AEC)."),
+    .priv_size      = sizeof(AECContext),
+    .priv_class     = &libspeex_aec_class,
+    .uninit         = uninit,
+    .activate       = activate,
+    .query_formats  = query_formats,
+    .inputs         = avfilter_af_libspeex_aec_inputs,
+    .outputs        = avfilter_af_libspeex_aec_outputs,
+};
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 2d19929bdc..3db1436c82 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -101,6 +101,7 @@ extern AVFilter ff_af_highpass;
 extern AVFilter ff_af_highshelf;
 extern AVFilter ff_af_join;
 extern AVFilter ff_af_ladspa;
+extern AVFilter ff_af_libspeex_aec;
 extern AVFilter ff_af_loudnorm;
 extern AVFilter ff_af_lowpass;
 extern AVFilter ff_af_lowshelf;
-- 
2.18.0