Correct the FFT windowing when computing the AEC NLP gain

This CL adds an nonwindowed spectrum of the linear filter error
to use in the NLP computation.

Bug: webrtc:8661
Change-Id: I45bc9bb3eb8eeac0c5d6adb414638eb12b635a27
Reviewed-on: https://webrtc-review.googlesource.com/38701
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Commit-Queue: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#21583}
diff --git a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
index 1f53f1e..eecb244 100644
--- a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
+++ b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
@@ -376,7 +376,7 @@
                      [&](float a, float b) { return a - b * kScale; });
       std::for_each(e.begin(), e.end(),
                     [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
-      fft.ZeroPaddedFft(e, &E);
+      fft.ZeroPaddedFft(e, Aec3Fft::Window::kRectangular, &E);
       for (size_t k = 0; k < kBlockSize; ++k) {
         s[k] = kScale * s_scratch[k + kFftLengthBy2];
       }
diff --git a/modules/audio_processing/aec3/aec3_fft.cc b/modules/audio_processing/aec3/aec3_fft.cc
index c8120cb..d669036 100644
--- a/modules/audio_processing/aec3/aec3_fft.cc
+++ b/modules/audio_processing/aec3/aec3_fft.cc
@@ -16,13 +16,46 @@
 
 namespace webrtc {
 
+namespace {
+
+const float kHanning64[kFftLengthBy2] = {
+    0.f,         0.00248461f, 0.00991376f, 0.0222136f,  0.03926189f,
+    0.06088921f, 0.08688061f, 0.11697778f, 0.15088159f, 0.1882551f,
+    0.22872687f, 0.27189467f, 0.31732949f, 0.36457977f, 0.41317591f,
+    0.46263495f, 0.51246535f, 0.56217185f, 0.61126047f, 0.65924333f,
+    0.70564355f, 0.75f,       0.79187184f, 0.83084292f, 0.86652594f,
+    0.89856625f, 0.92664544f, 0.95048443f, 0.96984631f, 0.98453864f,
+    0.99441541f, 0.99937846f, 0.99937846f, 0.99441541f, 0.98453864f,
+    0.96984631f, 0.95048443f, 0.92664544f, 0.89856625f, 0.86652594f,
+    0.83084292f, 0.79187184f, 0.75f,       0.70564355f, 0.65924333f,
+    0.61126047f, 0.56217185f, 0.51246535f, 0.46263495f, 0.41317591f,
+    0.36457977f, 0.31732949f, 0.27189467f, 0.22872687f, 0.1882551f,
+    0.15088159f, 0.11697778f, 0.08688061f, 0.06088921f, 0.03926189f,
+    0.0222136f,  0.00991376f, 0.00248461f, 0.f};
+
+}  // namespace
+
 // TODO(peah): Change x to be std::array once the rest of the code allows this.
-void Aec3Fft::ZeroPaddedFft(rtc::ArrayView<const float> x, FftData* X) const {
+void Aec3Fft::ZeroPaddedFft(rtc::ArrayView<const float> x,
+                            Window window,
+                            FftData* X) const {
   RTC_DCHECK(X);
   RTC_DCHECK_EQ(kFftLengthBy2, x.size());
   std::array<float, kFftLength> fft;
   std::fill(fft.begin(), fft.begin() + kFftLengthBy2, 0.f);
-  std::copy(x.begin(), x.end(), fft.begin() + kFftLengthBy2);
+  switch (window) {
+    case Window::kRectangular:
+      std::copy(x.begin(), x.end(), fft.begin() + kFftLengthBy2);
+      break;
+    case Window::kHanning:
+      std::transform(x.begin(), x.end(), std::begin(kHanning64),
+                     fft.begin() + kFftLengthBy2,
+                     [](float a, float b) { return a * b; });
+      break;
+    default:
+      RTC_NOTREACHED();
+  }
+
   Fft(&fft, X);
 }
 
diff --git a/modules/audio_processing/aec3/aec3_fft.h b/modules/audio_processing/aec3/aec3_fft.h
index 2a5dfef..f3dddb3 100644
--- a/modules/audio_processing/aec3/aec3_fft.h
+++ b/modules/audio_processing/aec3/aec3_fft.h
@@ -25,6 +25,8 @@
 // FftData type.
 class Aec3Fft {
  public:
+  enum class Window { kRectangular, kHanning };
+
   Aec3Fft() = default;
   // Computes the FFT. Note that both the input and output are modified.
   void Fft(std::array<float, kFftLength>* x, FftData* X) const {
@@ -40,8 +42,11 @@
     ooura_fft_.InverseFft(x->data());
   }
 
-  // Pads the input with kFftLengthBy2 initial zeros before computing the Fft.
-  void ZeroPaddedFft(rtc::ArrayView<const float> x, FftData* X) const;
+  // Windows the input using a Hanning window, and then adds padding of
+  // kFftLengthBy2 initial zeros before computing the Fft.
+  void ZeroPaddedFft(rtc::ArrayView<const float> x,
+                     Window window,
+                     FftData* X) const;
 
   // Concatenates the kFftLengthBy2 values long x and x_old before computing the
   // Fft. After that, x is copied to x_old.
diff --git a/modules/audio_processing/aec3/aec3_fft_unittest.cc b/modules/audio_processing/aec3/aec3_fft_unittest.cc
index 46831e0..87fe7a8 100644
--- a/modules/audio_processing/aec3/aec3_fft_unittest.cc
+++ b/modules/audio_processing/aec3/aec3_fft_unittest.cc
@@ -44,7 +44,8 @@
 TEST(Aec3Fft, NullZeroPaddedFftOutput) {
   Aec3Fft fft;
   std::array<float, kFftLengthBy2> x;
-  EXPECT_DEATH(fft.ZeroPaddedFft(x, nullptr), "");
+  EXPECT_DEATH(fft.ZeroPaddedFft(x, Aec3Fft::Window::kRectangular, nullptr),
+               "");
 }
 
 // Verifies that the check for input length in ZeroPaddedFft works.
@@ -52,7 +53,7 @@
   Aec3Fft fft;
   FftData X;
   std::array<float, kFftLengthBy2 - 1> x;
-  EXPECT_DEATH(fft.ZeroPaddedFft(x, &X), "");
+  EXPECT_DEATH(fft.ZeroPaddedFft(x, Aec3Fft::Window::kRectangular, &X), "");
 }
 
 // Verifies that the check for non-null output in PaddedFft works.
@@ -167,7 +168,7 @@
       x_in[j] = v++;
       x_ref[j + kFftLengthBy2] = x_in[j] * 64.f;
     }
-    fft.ZeroPaddedFft(x_in, &X);
+    fft.ZeroPaddedFft(x_in, Aec3Fft::Window::kRectangular, &X);
     fft.Ifft(X, &x_out);
     for (size_t j = 0; j < x_out.size(); ++j) {
       EXPECT_NEAR(x_ref[j], x_out[j], 0.1f);
diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc
index 4720503..fc0e680 100644
--- a/modules/audio_processing/aec3/echo_remover.cc
+++ b/modules/audio_processing/aec3/echo_remover.cc
@@ -157,8 +157,8 @@
   FftData comfort_noise;
   FftData high_band_comfort_noise;
   SubtractorOutput subtractor_output;
-  FftData& E_main = subtractor_output.E_main;
-  auto& E2_main = subtractor_output.E2_main;
+  FftData& E_main_nonwindowed = subtractor_output.E_main_nonwindowed;
+  auto& E2_main = subtractor_output.E2_main_nonwindowed;
   auto& E2_shadow = subtractor_output.E2_shadow;
   auto& e_main = subtractor_output.e_main;
 
@@ -170,8 +170,9 @@
                       &subtractor_output);
 
   // Compute spectra.
-  fft_.ZeroPaddedFft(y0, &Y);
-  LinearEchoPower(E_main, Y, &S2_linear);
+  // fft_.ZeroPaddedFft(y0, Aec3Fft::Window::kHanning, &Y);
+  fft_.ZeroPaddedFft(y0, Aec3Fft::Window::kRectangular, &Y);
+  LinearEchoPower(E_main_nonwindowed, Y, &S2_linear);
   Y.Spectrum(optimization_, Y2);
 
   // Update the AEC state information.
diff --git a/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc b/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc
index 2b30a74..fbd30d1 100644
--- a/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc
+++ b/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc
@@ -64,7 +64,7 @@
   std::array<float, kFftLengthBy2Plus1> X2;
   Aec3Fft fft;
   x.fill(1000.f);
-  fft.ZeroPaddedFft(x, &X);
+  fft.ZeroPaddedFft(x, Aec3Fft::Window::kRectangular, &X);
   X.Spectrum(Aec3Optimization::kNone, X2);
 
   float offset = -10.f * log10(32768.f * 32768.f);
diff --git a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
index 1339a39..7120cc2 100644
--- a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
+++ b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
@@ -119,7 +119,7 @@
                    [&](float a, float b) { return a - b * kScale; });
     std::for_each(e_main.begin(), e_main.end(),
                   [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
-    fft.ZeroPaddedFft(e_main, &E_main);
+    fft.ZeroPaddedFft(e_main, Aec3Fft::Window::kRectangular, &E_main);
     for (size_t k = 0; k < kBlockSize; ++k) {
       s[k] = kScale * s_scratch[k + kFftLengthBy2];
     }
@@ -132,7 +132,7 @@
                    [&](float a, float b) { return a - b * kScale; });
     std::for_each(e_shadow.begin(), e_shadow.end(),
                   [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
-    fft.ZeroPaddedFft(e_shadow, &E_shadow);
+    fft.ZeroPaddedFft(e_shadow, Aec3Fft::Window::kRectangular, &E_shadow);
 
     // Compute spectra for future use.
     E_main.Spectrum(Aec3Optimization::kNone, output.E2_main);
diff --git a/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc b/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc
index 7a7c3ab..34d4a7b 100644
--- a/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc
+++ b/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc
@@ -94,7 +94,7 @@
                    [&](float a, float b) { return a - b * kScale; });
     std::for_each(e_shadow.begin(), e_shadow.end(),
                   [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
-    fft.ZeroPaddedFft(e_shadow, &E_shadow);
+    fft.ZeroPaddedFft(e_shadow, Aec3Fft::Window::kRectangular, &E_shadow);
 
     std::array<float, kFftLengthBy2Plus1> render_power;
     render_delay_buffer->GetRenderBuffer()->SpectralSum(
diff --git a/modules/audio_processing/aec3/subtractor.cc b/modules/audio_processing/aec3/subtractor.cc
index 870c8a4..81aba7f 100644
--- a/modules/audio_processing/aec3/subtractor.cc
+++ b/modules/audio_processing/aec3/subtractor.cc
@@ -22,26 +22,10 @@
 
 namespace {
 
-const float kHanning64[64] = {
-    0.f,         0.00248461f, 0.00991376f, 0.0222136f,  0.03926189f,
-    0.06088921f, 0.08688061f, 0.11697778f, 0.15088159f, 0.1882551f,
-    0.22872687f, 0.27189467f, 0.31732949f, 0.36457977f, 0.41317591f,
-    0.46263495f, 0.51246535f, 0.56217185f, 0.61126047f, 0.65924333f,
-    0.70564355f, 0.75f,       0.79187184f, 0.83084292f, 0.86652594f,
-    0.89856625f, 0.92664544f, 0.95048443f, 0.96984631f, 0.98453864f,
-    0.99441541f, 0.99937846f, 0.99937846f, 0.99441541f, 0.98453864f,
-    0.96984631f, 0.95048443f, 0.92664544f, 0.89856625f, 0.86652594f,
-    0.83084292f, 0.79187184f, 0.75f,       0.70564355f, 0.65924333f,
-    0.61126047f, 0.56217185f, 0.51246535f, 0.46263495f, 0.41317591f,
-    0.36457977f, 0.31732949f, 0.27189467f, 0.22872687f, 0.1882551f,
-    0.15088159f, 0.11697778f, 0.08688061f, 0.06088921f, 0.03926189f,
-    0.0222136f,  0.00991376f, 0.00248461f, 0.f};
-
 void PredictionError(const Aec3Fft& fft,
                      const FftData& S,
                      rtc::ArrayView<const float> y,
                      std::array<float, kBlockSize>* e,
-                     FftData* E,
                      std::array<float, kBlockSize>* s) {
   std::array<float, kFftLength> tmp;
   fft.Ifft(S, &tmp);
@@ -57,13 +41,6 @@
 
   std::for_each(e->begin(), e->end(),
                 [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
-
-  RTC_DCHECK_EQ(64, e->size());
-  RTC_DCHECK_LE(64, tmp.size());
-  std::transform(e->begin(), e->end(), std::begin(kHanning64), tmp.begin(),
-                 [](float a, float b) { return a * b; });
-
-  fft.ZeroPaddedFft(rtc::ArrayView<const float>(tmp.data(), 64), E);
 }
 
 }  // namespace
@@ -119,6 +96,7 @@
   RTC_DCHECK_EQ(kBlockSize, capture.size());
   rtc::ArrayView<const float> y = capture;
   FftData& E_main = output->E_main;
+  FftData& E_main_nonwindowed = output->E_main_nonwindowed;
   FftData E_shadow;
   std::array<float, kBlockSize>& e_main = output->e_main;
   std::array<float, kBlockSize>& e_shadow = output->e_shadow;
@@ -128,11 +106,15 @@
 
   // Form the output of the main filter.
   main_filter_.Filter(render_buffer, &S);
-  PredictionError(fft_, S, y, &e_main, &E_main, &output->s_main);
+  PredictionError(fft_, S, y, &e_main, &output->s_main);
+  fft_.ZeroPaddedFft(e_main, Aec3Fft::Window::kHanning, &E_main);
+  fft_.ZeroPaddedFft(e_main, Aec3Fft::Window::kRectangular,
+                     &E_main_nonwindowed);
 
   // Form the output of the shadow filter.
   shadow_filter_.Filter(render_buffer, &S);
-  PredictionError(fft_, S, y, &e_shadow, &E_shadow, nullptr);
+  PredictionError(fft_, S, y, &e_shadow, nullptr);
+  fft_.ZeroPaddedFft(e_shadow, Aec3Fft::Window::kHanning, &E_shadow);
 
   if (!converged_filter_) {
     const auto sum_of_squares = [](float a, float b) { return a + b * b; };
@@ -149,6 +131,7 @@
 
   // Compute spectra for future use.
   E_main.Spectrum(optimization_, output->E2_main);
+  E_main_nonwindowed.Spectrum(optimization_, output->E2_main_nonwindowed);
   E_shadow.Spectrum(optimization_, output->E2_shadow);
 
   // Update the main filter.
diff --git a/modules/audio_processing/aec3/subtractor_output.h b/modules/audio_processing/aec3/subtractor_output.h
index 8655665..83f6cf5 100644
--- a/modules/audio_processing/aec3/subtractor_output.h
+++ b/modules/audio_processing/aec3/subtractor_output.h
@@ -24,7 +24,9 @@
   std::array<float, kBlockSize> e_main;
   std::array<float, kBlockSize> e_shadow;
   FftData E_main;
+  FftData E_main_nonwindowed;
   std::array<float, kFftLengthBy2Plus1> E2_main;
+  std::array<float, kFftLengthBy2Plus1> E2_main_nonwindowed;
   std::array<float, kFftLengthBy2Plus1> E2_shadow;
 
   void Reset() {