Safe behavior of the initial echo removal in AEC3

This CL adds functionality to allow removal of any echo occurring
before the render and capture signals have been properly aligned.
The functionality is added in such a manner that the transparency
to nearend is maintained as much as possible.


Bug: webrtc:8883
Change-Id: I813cbbc4c48822e7dffcd9ab6233be4c222089de
Reviewed-on: https://webrtc-review.googlesource.com/49941
Commit-Queue: Per Åhgren <peah@webrtc.org>
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#22107}
diff --git a/api/audio/BUILD.gn b/api/audio/BUILD.gn
index 9fb963a..73973bd 100644
--- a/api/audio/BUILD.gn
+++ b/api/audio/BUILD.gn
@@ -38,6 +38,7 @@
 rtc_source_set("aec3_config") {
   visibility = [ "*" ]
   sources = [
+    "echo_canceller3_config.cc",
     "echo_canceller3_config.h",
   ]
 }
diff --git a/api/audio/echo_canceller3_config.cc b/api/audio/echo_canceller3_config.cc
new file mode 100644
index 0000000..d74d7a8
--- /dev/null
+++ b/api/audio/echo_canceller3_config.cc
@@ -0,0 +1,16 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "api/audio/echo_canceller3_config.h"
+
+namespace webrtc {
+
+EchoCanceller3Config::EchoCanceller3Config() = default;
+
+}  // namespace webrtc
diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h
index 017e518..612c00f 100644
--- a/api/audio/echo_canceller3_config.h
+++ b/api/audio/echo_canceller3_config.h
@@ -17,6 +17,8 @@
 
 // Configuration struct for EchoCanceller3
 struct EchoCanceller3Config {
+  EchoCanceller3Config();
+
   struct Delay {
     size_t default_delay = 5;
     size_t down_sampling_factor = 4;
@@ -105,6 +107,14 @@
 
     float floor_first_increase = 0.00001f;
   } gain_updates;
+
+  struct EchoRemovalControl {
+    struct GainRampup {
+      float first_non_zero_gain = 0.001f;
+      int non_zero_gain_blocks = 187;
+      int full_gain_blocks = 312;
+    } gain_rampup;
+  } echo_removal_control;
 };
 }  // namespace webrtc
 
diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc
index e225e03..60dd9d4 100644
--- a/modules/audio_processing/aec3/aec_state.cc
+++ b/modules/audio_processing/aec3/aec_state.cc
@@ -47,6 +47,11 @@
                        std::max_element(delays.begin(), delays.end()));
 }
 
+float ComputeGainRampupIncrease(const EchoCanceller3Config& config) {
+  const auto& c = config.echo_removal_control.gain_rampup;
+  return powf(1.f / c.first_non_zero_gain, 1.f / c.non_zero_gain_blocks);
+}
+
 }  // namespace
 
 int AecState::instance_count_ = 0;
@@ -57,7 +62,8 @@
       erle_estimator_(config.erle.min, config.erle.max_l, config.erle.max_h),
       config_(config),
       max_render_(config_.filter.main.length_blocks, 0.f),
-      reverb_decay_(config_.ep_strength.default_len) {}
+      reverb_decay_(config_.ep_strength.default_len),
+      gain_rampup_increase_(ComputeGainRampupIncrease(config_)) {}
 
 AecState::~AecState() = default;
 
@@ -71,12 +77,10 @@
     echo_saturation_ = false;
     previous_max_sample_ = 0.f;
     std::fill(max_render_.begin(), max_render_.end(), 0.f);
-    force_zero_gain_counter_ = 0;
     blocks_with_proper_filter_adaptation_ = 0;
     capture_block_counter_ = 0;
     filter_has_had_time_to_converge_ = false;
     render_received_ = false;
-    force_zero_gain_ = true;
     blocks_with_active_render_ = 0;
     initial_state_ = true;
   };
@@ -92,8 +96,8 @@
     full_reset();
   } else if (echo_path_variability.delay_change !=
              EchoPathVariability::DelayAdjustment::kBufferFlush) {
+    active_render_seen_ = false;
     full_reset();
-
   } else if (echo_path_variability.delay_change !=
              EchoPathVariability::DelayAdjustment::kDelayReset) {
     full_reset();
@@ -129,11 +133,9 @@
   blocks_with_proper_filter_adaptation_ +=
       active_render_block && !SaturatedCapture() ? 1 : 0;
 
-  // Force zero echo suppression gain after an echo path change to allow at
-  // least some render data to be collected in order to avoid an initial echo
-  // burst.
-  force_zero_gain_ = ++force_zero_gain_counter_ < kNumBlocksPerSecond / 5;
-
+  // Update the limit on the echo suppression after an echo path change to avoid
+  // an initial echo burst.
+  UpdateSuppressorGainLimit(render_buffer.GetRenderActivity());
 
   // Update the ERL and ERLE measures.
   if (converged_filter && capture_block_counter_ >= 2 * kNumBlocksPerSecond) {
@@ -264,6 +266,37 @@
                         kFftLengthBy2;
 }
 
+// Updates the suppressor gain limit.
+void AecState::UpdateSuppressorGainLimit(bool render_activity) {
+  const auto& rampup_conf = config_.echo_removal_control.gain_rampup;
+  if (!active_render_seen_ && render_activity) {
+    active_render_seen_ = true;
+    realignment_counter_ = rampup_conf.full_gain_blocks;
+  } else if (realignment_counter_ > 0) {
+    --realignment_counter_;
+  }
+
+  if (realignment_counter_ <= 0) {
+    suppressor_gain_limit_ = 1.f;
+    return;
+  }
+
+  if (realignment_counter_ > rampup_conf.non_zero_gain_blocks) {
+    suppressor_gain_limit_ = 0.f;
+    return;
+  }
+
+  if (realignment_counter_ == rampup_conf.non_zero_gain_blocks) {
+    suppressor_gain_limit_ = rampup_conf.first_non_zero_gain;
+    return;
+  }
+
+  RTC_DCHECK_LT(0.f, suppressor_gain_limit_);
+  suppressor_gain_limit_ =
+      std::min(1.f, suppressor_gain_limit_ * gain_rampup_increase_);
+  RTC_DCHECK_GE(1.f, suppressor_gain_limit_);
+}
+
 bool AecState::DetectEchoSaturation(rtc::ArrayView<const float> x) {
   RTC_DCHECK_LT(0, x.size());
   const float max_sample = fabs(*std::max_element(
diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h
index 9a1a82e..19e6ab1 100644
--- a/modules/audio_processing/aec3/aec_state.h
+++ b/modules/audio_processing/aec3/aec_state.h
@@ -87,8 +87,8 @@
   // Returns the decay factor for the echo reverberation.
   float ReverbDecay() const { return reverb_decay_; }
 
-  // Returns whether the echo suppression gain should be forced to zero.
-  bool ForcedZeroGain() const { return force_zero_gain_; }
+  // Returns the upper limit for the echo suppression gain.
+  float SuppressionGainLimit() const { return suppressor_gain_limit_; }
 
   // Returns whether the echo in the capture signal is audible.
   bool InaudibleEcho() const { return echo_audibility_.InaudibleEcho(); }
@@ -135,6 +135,7 @@
 
   void UpdateReverb(const std::vector<float>& impulse_response);
   bool DetectActiveRender(rtc::ArrayView<const float> x) const;
+  void UpdateSuppressorGainLimit(bool render_activity);
   bool DetectEchoSaturation(rtc::ArrayView<const float> x);
 
   static int instance_count_;
@@ -150,9 +151,10 @@
   bool echo_saturation_ = false;
   bool transparent_mode_ = false;
   float previous_max_sample_ = 0.f;
-  bool force_zero_gain_ = false;
   bool render_received_ = false;
-  size_t force_zero_gain_counter_ = 0;
+  int realignment_counter_ = 0;
+  float suppressor_gain_limit_ = 1.f;
+  bool active_render_seen_ = false;
   int filter_delay_ = 0;
   size_t blocks_since_last_saturation_ = 1000;
   float reverb_decay_to_test_ = 0.9f;
@@ -165,6 +167,7 @@
   bool saturating_echo_path_ = false;
   bool filter_has_had_time_to_converge_ = false;
   bool initial_state_ = true;
+  const float gain_rampup_increase_;
 
   RTC_DISALLOW_COPY_AND_ASSIGN(AecState);
 };
diff --git a/modules/audio_processing/aec3/render_buffer.h b/modules/audio_processing/aec3/render_buffer.h
index db94e74..7789ffd 100644
--- a/modules/audio_processing/aec3/render_buffer.h
+++ b/modules/audio_processing/aec3/render_buffer.h
@@ -61,10 +61,17 @@
   void SpectralSum(size_t num_spectra,
                    std::array<float, kFftLengthBy2Plus1>* X2) const;
 
+  // Gets the recent activity seen in the render signal.
+  bool GetRenderActivity() const { return render_activity_; }
+
+  // Specifies the recent activity seen in the render signal.
+  void SetRenderActivity(bool activity) { render_activity_ = activity; }
+
  private:
   const MatrixBuffer* const block_buffer_;
   const VectorBuffer* const spectrum_buffer_;
   const FftBuffer* const fft_buffer_;
+  bool render_activity_ = false;
   RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(RenderBuffer);
 };
 
diff --git a/modules/audio_processing/aec3/render_delay_buffer.cc b/modules/audio_processing/aec3/render_delay_buffer.cc
index 1373729..60606bf 100644
--- a/modules/audio_processing/aec3/render_delay_buffer.cc
+++ b/modules/audio_processing/aec3/render_delay_buffer.cc
@@ -12,6 +12,7 @@
 
 #include <string.h>
 #include <algorithm>
+#include <numeric>
 
 #include "modules/audio_processing/aec3/aec3_common.h"
 #include "modules/audio_processing/aec3/aec3_fft.h"
@@ -72,12 +73,15 @@
   int max_observed_jitter_ = 1;
   size_t capture_call_counter_ = 0;
   size_t render_call_counter_ = 0;
+  bool render_activity_ = false;
+  size_t render_activity_counter_ = 0;
 
   int LowRateBufferOffset() const { return DelayEstimatorOffset(config_) >> 1; }
   int MaxExternalDelayToInternalDelay(size_t delay) const;
   void ApplyDelay(int delay);
   void InsertBlock(const std::vector<std::vector<float>>& block,
                    int previous_write);
+  bool DetectActiveRender(rtc::ArrayView<const float> x) const;
 
   RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(RenderDelayBufferImpl);
 };
@@ -230,6 +234,12 @@
                              ? BufferingEvent::kRenderOverrun
                              : BufferingEvent::kNone;
 
+  // Detect and update render activity.
+  if (!render_activity_) {
+    render_activity_counter_ += DetectActiveRender(block[0]) ? 1 : 0;
+    render_activity_ = render_activity_counter_ >= 20;
+  }
+
   // Insert the new render block into the specified position.
   InsertBlock(block, previous_write);
 
@@ -283,6 +293,12 @@
     Reset();
   }
 
+  echo_remover_buffer_.SetRenderActivity(render_activity_);
+  if (render_activity_) {
+    render_activity_counter_ = 0;
+    render_activity_ = false;
+  }
+
   return event;
 }
 
@@ -353,6 +369,14 @@
   f.buffer[f.write].Spectrum(optimization_, s.buffer[s.write]);
 }
 
+bool RenderDelayBufferImpl::DetectActiveRender(
+    rtc::ArrayView<const float> x) const {
+  const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
+  return x_energy > (config_.render_levels.active_render_limit *
+                     config_.render_levels.active_render_limit) *
+                        kFftLengthBy2;
+}
+
 }  // namespace
 
 int RenderDelayBuffer::RenderDelayBuffer::DelayEstimatorOffset(
diff --git a/modules/audio_processing/aec3/suppression_gain.cc b/modules/audio_processing/aec3/suppression_gain.cc
index d1543c6..0962912 100644
--- a/modules/audio_processing/aec3/suppression_gain.cc
+++ b/modules/audio_processing/aec3/suppression_gain.cc
@@ -387,17 +387,9 @@
 
   const bool saturated_echo = aec_state.SaturatedEcho();
   const bool saturating_echo_path = aec_state.SaturatingEchoPath();
-  const bool force_zero_gain = aec_state.ForcedZeroGain();
+  const float gain_upper_bound = aec_state.SuppressionGainLimit();
   const bool linear_echo_estimate = aec_state.UsableLinearEstimate();
   const bool initial_state = aec_state.InitialState();
-  if (force_zero_gain) {
-    last_gain_.fill(0.f);
-    std::copy(comfort_noise.begin(), comfort_noise.end(), last_masker_.begin());
-    low_band_gain->fill(0.f);
-    gain_increase_.fill(1.f);
-    *high_bands_gain = 0.f;
-    return;
-  }
 
   bool low_noise_render = low_render_detector_.Detect(render);
 
@@ -408,6 +400,12 @@
                 saturating_echo_path, initial_state, linear_echo_estimate,
                 nearend, echo, comfort_noise, low_band_gain);
 
+  if (gain_upper_bound < 1.f) {
+    for (size_t k = 0; k < low_band_gain->size(); ++k) {
+      (*low_band_gain)[k] = std::min((*low_band_gain)[k], gain_upper_bound);
+    }
+  }
+
   // Compute the gain for the upper bands.
   *high_bands_gain =
       UpperBandsGain(narrow_peak_band, saturated_echo, render, *low_band_gain);
diff --git a/modules/audio_processing/aec3/suppression_gain_unittest.cc b/modules/audio_processing/aec3/suppression_gain_unittest.cc
index bcdcd23..0bfc558 100644
--- a/modules/audio_processing/aec3/suppression_gain_unittest.cc
+++ b/modules/audio_processing/aec3/suppression_gain_unittest.cc
@@ -64,25 +64,13 @@
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
       RenderDelayBuffer::Create(config, 3));
 
-  // Verify the functionality for forcing a zero gain.
-  E2.fill(1000000000.f);
-  R2.fill(10000000000000.f);
-  N2.fill(0.f);
-  s.fill(10.f);
-  aec_state.Update(subtractor.FilterFrequencyResponse(),
-                   subtractor.FilterImpulseResponse(),
-                   subtractor.ConvergedFilter(),
-                   *render_delay_buffer->GetRenderBuffer(), E2, Y2, s, false);
-  suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, &high_bands_gain,
-                           &g);
-  std::for_each(g.begin(), g.end(), [](float a) { EXPECT_FLOAT_EQ(0.f, a); });
-  EXPECT_FLOAT_EQ(0.f, high_bands_gain);
-
   // Ensure that a strong noise is detected to mask any echoes.
   E2.fill(10.f);
   Y2.fill(10.f);
   R2.fill(0.1f);
   N2.fill(100.f);
+  s.fill(10.f);
+
   // Ensure that the gain is no longer forced to zero.
   for (int k = 0; k <= kNumBlocksPerSecond / 5 + 1; ++k) {
     aec_state.Update(subtractor.FilterFrequencyResponse(),
diff --git a/modules/audio_processing/include/audio_processing.cc b/modules/audio_processing/include/audio_processing.cc
index 8410a3d..e9c56e8 100644
--- a/modules/audio_processing/include/audio_processing.cc
+++ b/modules/audio_processing/include/audio_processing.cc
@@ -32,4 +32,5 @@
       target_direction(target_direction) {}
 
 Beamforming::~Beamforming() {}
+
 }  // namespace webrtc