Use the filter delay to use the proper render block in the AEC3 AecState

This CL corrects the way that the estimated filter delay is used in
AEC3. In particular
-It uses the filter delay to choose the correct render block in AecState
-It changes the code to reflect that the filter delay is always computed
-It removes part of the code that formerly relied on the filter delay
being an Optional.

Bug: webrtc:8671
Change-Id: I58135a5c174b404707e19a41c3617c09831e871d
Reviewed-on: https://webrtc-review.googlesource.com/35221
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Commit-Queue: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#21557}
diff --git a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
index a58ff85..1f53f1e 100644
--- a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
+++ b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
@@ -390,13 +390,13 @@
           false, EchoPathVariability::DelayAdjustment::kNone, false));
       aec_state.Update(filter.FilterFrequencyResponse(),
                        filter.FilterImpulseResponse(), true, *render_buffer,
-                       E2_main, Y2, x[0], s, false);
+                       E2_main, Y2, s, false);
     }
     // Verify that the filter is able to perform well.
     EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f),
               std::inner_product(y.begin(), y.end(), y.begin(), 0.f));
-    ASSERT_TRUE(aec_state.FilterDelay());
-    EXPECT_EQ(delay_samples / kBlockSize, *aec_state.FilterDelay());
+    EXPECT_EQ(delay_samples / kBlockSize,
+              static_cast<size_t>(aec_state.FilterDelay()));
   }
 }
 }  // namespace aec3
diff --git a/modules/audio_processing/aec3/aec3_common.h b/modules/audio_processing/aec3/aec3_common.h
index d6cea8c..47f0784 100644
--- a/modules/audio_processing/aec3/aec3_common.h
+++ b/modules/audio_processing/aec3/aec3_common.h
@@ -39,7 +39,6 @@
 constexpr size_t kFftLength = 2 * kFftLengthBy2;
 
 constexpr int kMaxAdaptiveFilterLength = 50;
-constexpr int kUnknownDelayRenderWindowSize = 30;
 constexpr int kRenderTransferQueueSizeFrames = 100;
 
 constexpr size_t kMaxNumBands = 3;
diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc
index 9d30713..25bb5be 100644
--- a/modules/audio_processing/aec3/aec_state.cc
+++ b/modules/audio_processing/aec3/aec_state.cc
@@ -112,12 +112,15 @@
     const RenderBuffer& render_buffer,
     const std::array<float, kFftLengthBy2Plus1>& E2_main,
     const std::array<float, kFftLengthBy2Plus1>& Y2,
-    rtc::ArrayView<const float> x,
     const std::array<float, kBlockSize>& s,
     bool echo_leakage_detected) {
   // Store input parameters.
   echo_leakage_detected_ = echo_leakage_detected;
 
+  // Estimate the filter delay.
+  filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response);
+  const std::vector<float>& x = render_buffer.Block(-filter_delay_)[0];
+
   // Update counters.
   ++capture_block_counter_;
   const bool active_render_block = DetectActiveRender(x);
@@ -130,12 +133,10 @@
   // burst.
   force_zero_gain_ = ++force_zero_gain_counter_ < kNumBlocksPerSecond / 5;
 
-  // Estimate delays.
-  filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response);
 
   // Update the ERL and ERLE measures.
   if (converged_filter && capture_block_counter_ >= 2 * kNumBlocksPerSecond) {
-    const auto& X2 = render_buffer.Spectrum(*filter_delay_);
+    const auto& X2 = render_buffer.Spectrum(filter_delay_);
     erle_estimator_.Update(X2, Y2, E2_main);
     erl_estimator_.Update(X2, Y2);
   }
@@ -174,7 +175,7 @@
 
 void AecState::UpdateReverb(const std::vector<float>& impulse_response) {
   if ((!(filter_delay_ && usable_linear_estimate_)) ||
-      (*filter_delay_ > config_.filter.length_blocks - 4)) {
+      (filter_delay_ > static_cast<int>(config_.filter.length_blocks) - 4)) {
     return;
   }
 
diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h
index e39257d..98a78dd 100644
--- a/modules/audio_processing/aec3/aec_state.h
+++ b/modules/audio_processing/aec3/aec_state.h
@@ -62,7 +62,7 @@
   float ErlTimeDomain() const { return erl_estimator_.ErlTimeDomain(); }
 
   // Returns the delay estimate based on the linear filter.
-  rtc::Optional<size_t> FilterDelay() const { return filter_delay_; }
+  int FilterDelay() const { return filter_delay_; }
 
   // Returns whether the capture signal is saturated.
   bool SaturatedCapture() const { return capture_signal_saturation_; }
@@ -111,7 +111,6 @@
               const RenderBuffer& render_buffer,
               const std::array<float, kFftLengthBy2Plus1>& E2_main,
               const std::array<float, kFftLengthBy2Plus1>& Y2,
-              rtc::ArrayView<const float> x,
               const std::array<float, kBlockSize>& s_main,
               bool echo_leakage_detected);
 
@@ -151,7 +150,7 @@
   bool force_zero_gain_ = false;
   bool render_received_ = false;
   size_t force_zero_gain_counter_ = 0;
-  rtc::Optional<size_t> filter_delay_;
+  int filter_delay_ = 0;
   size_t blocks_since_last_saturation_ = 1000;
   float reverb_decay_to_test_ = 0.9f;
   float reverb_decay_candidate_ = 0.f;
diff --git a/modules/audio_processing/aec3/aec_state_unittest.cc b/modules/audio_processing/aec3/aec_state_unittest.cc
index 06fac20..9222a91 100644
--- a/modules/audio_processing/aec3/aec_state_unittest.cc
+++ b/modules/audio_processing/aec3/aec_state_unittest.cc
@@ -48,15 +48,15 @@
 
   // Verify that linear AEC usability is false when the filter is diverged.
   state.Update(diverged_filter_frequency_response, impulse_response, true,
-               *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
-               false);
+               *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false);
   EXPECT_FALSE(state.UsableLinearEstimate());
 
   // Verify that linear AEC usability is true when the filter is converged
   std::fill(x[0].begin(), x[0].end(), 101.f);
   for (int k = 0; k < 3000; ++k) {
+    render_delay_buffer->Insert(x);
     state.Update(converged_filter_frequency_response, impulse_response, true,
-                 *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
+                 *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s,
                  false);
   }
   EXPECT_TRUE(state.UsableLinearEstimate());
@@ -66,35 +66,33 @@
   state.HandleEchoPathChange(EchoPathVariability(
       true, EchoPathVariability::DelayAdjustment::kNone, false));
   state.Update(converged_filter_frequency_response, impulse_response, true,
-               *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
-               false);
+               *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false);
   EXPECT_FALSE(state.UsableLinearEstimate());
 
   // Verify that the active render detection works as intended.
   std::fill(x[0].begin(), x[0].end(), 101.f);
+  render_delay_buffer->Insert(x);
   state.HandleEchoPathChange(EchoPathVariability(
       true, EchoPathVariability::DelayAdjustment::kNewDetectedDelay, false));
   state.Update(converged_filter_frequency_response, impulse_response, true,
-               *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
-               false);
+               *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false);
   EXPECT_FALSE(state.ActiveRender());
 
   for (int k = 0; k < 1000; ++k) {
+    render_delay_buffer->Insert(x);
     state.Update(converged_filter_frequency_response, impulse_response, true,
-                 *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
+                 *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s,
                  false);
   }
   EXPECT_TRUE(state.ActiveRender());
 
   // Verify that echo leakage is properly reported.
   state.Update(converged_filter_frequency_response, impulse_response, true,
-               *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
-               false);
+               *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false);
   EXPECT_FALSE(state.EchoLeakageDetected());
 
   state.Update(converged_filter_frequency_response, impulse_response, true,
-               *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
-               true);
+               *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, true);
   EXPECT_TRUE(state.EchoLeakageDetected());
 
   // Verify that the ERL is properly estimated
@@ -115,7 +113,7 @@
   Y2.fill(10.f * 10000.f * 10000.f);
   for (size_t k = 0; k < 1000; ++k) {
     state.Update(converged_filter_frequency_response, impulse_response, true,
-                 *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
+                 *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s,
                  false);
   }
 
@@ -132,7 +130,7 @@
   Y2.fill(10.f * E2_main[0]);
   for (size_t k = 0; k < 1000; ++k) {
     state.Update(converged_filter_frequency_response, impulse_response, true,
-                 *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
+                 *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s,
                  false);
   }
   ASSERT_TRUE(state.UsableLinearEstimate());
@@ -153,7 +151,7 @@
   Y2.fill(5.f * E2_main[0]);
   for (size_t k = 0; k < 1000; ++k) {
     state.Update(converged_filter_frequency_response, impulse_response, true,
-                 *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
+                 *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s,
                  false);
   }
 
@@ -203,9 +201,8 @@
     frequency_response[k][0] = 0.f;
     state.HandleEchoPathChange(echo_path_variability);
     state.Update(frequency_response, impulse_response, true,
-                 *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x, s,
+                 *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s,
                  false);
-    EXPECT_TRUE(k == (kFilterLength - 1) || state.FilterDelay());
     if (k != (kFilterLength - 1)) {
       EXPECT_EQ(k, state.FilterDelay());
     }
diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc
index 7aa5189..4720503 100644
--- a/modules/audio_processing/aec3/echo_remover.cc
+++ b/modules/audio_processing/aec3/echo_remover.cc
@@ -178,7 +178,7 @@
   aec_state_.Update(subtractor_.FilterFrequencyResponse(),
                     subtractor_.FilterImpulseResponse(),
                     subtractor_.ConvergedFilter(), *render_buffer, E2_main, Y2,
-                    x0, subtractor_output.s_main, echo_leakage_detected_);
+                    subtractor_output.s_main, echo_leakage_detected_);
 
   // Choose the linear output.
   output_selector_.FormLinearOutput(!aec_state_.TransparentMode(), e_main, y0);
@@ -232,9 +232,7 @@
   data_dumper_->DumpRaw("aec3_erl", aec_state_.Erl());
   data_dumper_->DumpRaw("aec3_usable_linear_estimate",
                         aec_state_.UsableLinearEstimate());
-  data_dumper_->DumpRaw(
-      "aec3_filter_delay",
-      aec_state_.FilterDelay() ? *aec_state_.FilterDelay() : -1);
+  data_dumper_->DumpRaw("aec3_filter_delay", aec_state_.FilterDelay());
   data_dumper_->DumpRaw("aec3_capture_saturation",
                         aec_state_.SaturatedCapture() ? 1 : 0);
 }
diff --git a/modules/audio_processing/aec3/echo_remover_metrics.cc b/modules/audio_processing/aec3/echo_remover_metrics.cc
index 0544a5c..bc815eb 100644
--- a/modules/audio_processing/aec3/echo_remover_metrics.cc
+++ b/modules/audio_processing/aec3/echo_remover_metrics.cc
@@ -236,10 +236,8 @@
             "WebRTC.Audio.EchoCanceller.ActiveRender",
             static_cast<int>(
                 active_render_count_ > kMetricsCollectionBlocksBy2 ? 1 : 0));
-        RTC_HISTOGRAM_COUNTS_LINEAR(
-            "WebRTC.Audio.EchoCanceller.FilterDelay",
-            aec_state.FilterDelay() ? *aec_state.FilterDelay() + 1 : 0, 0, 30,
-            31);
+        RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.FilterDelay",
+                                    aec_state.FilterDelay(), 0, 30, 31);
         RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.EchoCanceller.CaptureSaturation",
                               static_cast<int>(saturated_capture_ ? 1 : 0));
         break;
diff --git a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
index 5ca7e5a..1339a39 100644
--- a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
+++ b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
@@ -158,8 +158,8 @@
         false, EchoPathVariability::DelayAdjustment::kNone, false));
     aec_state.Update(main_filter.FilterFrequencyResponse(),
                      main_filter.FilterImpulseResponse(), true,
-                     *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0],
-                     s, false);
+                     *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s,
+                     false);
   }
 
   std::copy(e_main.begin(), e_main.end(), e_last_block->begin());
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc
index 3155770..e34d7ab 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator.cc
@@ -96,10 +96,8 @@
 
   // Estimate the residual echo power.
   if (aec_state.UsableLinearEstimate()) {
-    RTC_DCHECK(aec_state.FilterDelay());
-    const int filter_delay = *aec_state.FilterDelay();
-    LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2);
-    AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), filter_delay,
+    LinearEstimate(S2_linear, aec_state.Erle(), aec_state.FilterDelay(), R2);
+    AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), aec_state.FilterDelay(),
                   aec_state.ReverbDecay(), R2);
 
     // If the echo is saturated, estimate the echo power as the maximum echo
@@ -110,24 +108,14 @@
   } else {
     // Estimate the echo generating signal power.
     std::array<float, kFftLengthBy2Plus1> X2;
-    if (aec_state.FilterDelay()) {
-      const int delay_use = static_cast<int>(*aec_state.FilterDelay());
 
-      // Computes the spectral power over the blocks surrounding the delay.
-      constexpr int kKnownDelayRenderWindowSize = 5;
-      // TODO(peah): Add lookahead since that was what was there initially.
-      static_assert(
-          kUnknownDelayRenderWindowSize >= kKnownDelayRenderWindowSize,
-          "Requirement to ensure that the render buffer is overrun");
-      EchoGeneratingPower(
-          render_buffer, std::max(0, delay_use - 1),
-          std::min(kKnownDelayRenderWindowSize - 1, delay_use + 1), &X2);
-    } else {
-      // Computes the spectral power over the latest blocks.
-      // TODO(peah): Add lookahead since that was what was there initially.
-      EchoGeneratingPower(render_buffer, 0, kUnknownDelayRenderWindowSize - 1,
-                          &X2);
-    }
+    // Computes the spectral power over the blocks surrounding the delay.
+    constexpr int kKnownDelayRenderWindowSize = 5;
+    // TODO(peah): Add lookahead since that was what was there initially.
+    EchoGeneratingPower(
+        render_buffer, std::max(0, aec_state.FilterDelay() - 1),
+        std::min(kKnownDelayRenderWindowSize - 1, aec_state.FilterDelay() + 1),
+        &X2);
 
     // Subtract the stationary noise power to avoid stationary noise causing
     // excessive echo suppression.
@@ -140,7 +128,8 @@
                       config_.ep_strength.bounded_erl,
                       aec_state.TransparentMode(), X2, Y2, R2);
 
-    if (aec_state.FilterDelay() && aec_state.SaturatedEcho()) {
+    if (aec_state.SaturatedEcho()) {
+      // TODO(peah): Modify to make sense theoretically.
       AddEchoReverb(*R2, aec_state.SaturatedEcho(),
                     config_.filter.length_blocks, aec_state.ReverbDecay(), R2);
     }
diff --git a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
index 7b39d0a..80b6cd6 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
@@ -92,7 +92,7 @@
 
     aec_state.HandleEchoPathChange(echo_path_variability);
     aec_state.Update(H2, h, true, *render_delay_buffer->GetRenderBuffer(),
-                     E2_main, Y2, x[0], s, false);
+                     E2_main, Y2, s, false);
 
     estimator.Estimate(aec_state, *render_delay_buffer->GetRenderBuffer(),
                        S2_linear, Y2, &R2);
diff --git a/modules/audio_processing/aec3/subtractor_unittest.cc b/modules/audio_processing/aec3/subtractor_unittest.cc
index 62dc80b..11cb2e4 100644
--- a/modules/audio_processing/aec3/subtractor_unittest.cc
+++ b/modules/audio_processing/aec3/subtractor_unittest.cc
@@ -84,7 +84,7 @@
     aec_state.Update(subtractor.FilterFrequencyResponse(),
                      subtractor.FilterImpulseResponse(),
                      subtractor.ConvergedFilter(),
-                     *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0],
+                     *render_delay_buffer->GetRenderBuffer(), E2_main, Y2,
                      output.s_main, false);
   }
 
diff --git a/modules/audio_processing/aec3/suppression_gain_unittest.cc b/modules/audio_processing/aec3/suppression_gain_unittest.cc
index 6347645..bcdcd23 100644
--- a/modules/audio_processing/aec3/suppression_gain_unittest.cc
+++ b/modules/audio_processing/aec3/suppression_gain_unittest.cc
@@ -69,10 +69,10 @@
   R2.fill(10000000000000.f);
   N2.fill(0.f);
   s.fill(10.f);
-  aec_state.Update(
-      subtractor.FilterFrequencyResponse(), subtractor.FilterImpulseResponse(),
-      subtractor.ConvergedFilter(), *render_delay_buffer->GetRenderBuffer(), E2,
-      Y2, x[0], s, false);
+  aec_state.Update(subtractor.FilterFrequencyResponse(),
+                   subtractor.FilterImpulseResponse(),
+                   subtractor.ConvergedFilter(),
+                   *render_delay_buffer->GetRenderBuffer(), E2, Y2, s, false);
   suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, &high_bands_gain,
                            &g);
   std::for_each(g.begin(), g.end(), [](float a) { EXPECT_FLOAT_EQ(0.f, a); });
@@ -85,17 +85,17 @@
   N2.fill(100.f);
   // Ensure that the gain is no longer forced to zero.
   for (int k = 0; k <= kNumBlocksPerSecond / 5 + 1; ++k) {
-    aec_state.Update(
-        subtractor.FilterFrequencyResponse(),
-        subtractor.FilterImpulseResponse(), subtractor.ConvergedFilter(),
-        *render_delay_buffer->GetRenderBuffer(), E2, Y2, x[0], s, false);
+    aec_state.Update(subtractor.FilterFrequencyResponse(),
+                     subtractor.FilterImpulseResponse(),
+                     subtractor.ConvergedFilter(),
+                     *render_delay_buffer->GetRenderBuffer(), E2, Y2, s, false);
   }
 
   for (int k = 0; k < 100; ++k) {
-    aec_state.Update(
-        subtractor.FilterFrequencyResponse(),
-        subtractor.FilterImpulseResponse(), subtractor.ConvergedFilter(),
-        *render_delay_buffer->GetRenderBuffer(), E2, Y2, x[0], s, false);
+    aec_state.Update(subtractor.FilterFrequencyResponse(),
+                     subtractor.FilterImpulseResponse(),
+                     subtractor.ConvergedFilter(),
+                     *render_delay_buffer->GetRenderBuffer(), E2, Y2, s, false);
     suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x,
                              &high_bands_gain, &g);
   }
@@ -108,10 +108,10 @@
   R2.fill(0.1f);
   N2.fill(0.f);
   for (int k = 0; k < 100; ++k) {
-    aec_state.Update(
-        subtractor.FilterFrequencyResponse(),
-        subtractor.FilterImpulseResponse(), subtractor.ConvergedFilter(),
-        *render_delay_buffer->GetRenderBuffer(), E2, Y2, x[0], s, false);
+    aec_state.Update(subtractor.FilterFrequencyResponse(),
+                     subtractor.FilterImpulseResponse(),
+                     subtractor.ConvergedFilter(),
+                     *render_delay_buffer->GetRenderBuffer(), E2, Y2, s, false);
     suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x,
                              &high_bands_gain, &g);
   }