Correct the FFT windowing when computing the AEC NLP gain
This CL adds an nonwindowed spectrum of the linear filter error
to use in the NLP computation.
Bug: webrtc:8661
Change-Id: I45bc9bb3eb8eeac0c5d6adb414638eb12b635a27
Reviewed-on: https://webrtc-review.googlesource.com/38701
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Commit-Queue: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#21583}
diff --git a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
index 1f53f1e..eecb244 100644
--- a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
+++ b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
@@ -376,7 +376,7 @@
[&](float a, float b) { return a - b * kScale; });
std::for_each(e.begin(), e.end(),
[](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
- fft.ZeroPaddedFft(e, &E);
+ fft.ZeroPaddedFft(e, Aec3Fft::Window::kRectangular, &E);
for (size_t k = 0; k < kBlockSize; ++k) {
s[k] = kScale * s_scratch[k + kFftLengthBy2];
}
diff --git a/modules/audio_processing/aec3/aec3_fft.cc b/modules/audio_processing/aec3/aec3_fft.cc
index c8120cb..d669036 100644
--- a/modules/audio_processing/aec3/aec3_fft.cc
+++ b/modules/audio_processing/aec3/aec3_fft.cc
@@ -16,13 +16,46 @@
namespace webrtc {
+namespace {
+
+const float kHanning64[kFftLengthBy2] = {
+ 0.f, 0.00248461f, 0.00991376f, 0.0222136f, 0.03926189f,
+ 0.06088921f, 0.08688061f, 0.11697778f, 0.15088159f, 0.1882551f,
+ 0.22872687f, 0.27189467f, 0.31732949f, 0.36457977f, 0.41317591f,
+ 0.46263495f, 0.51246535f, 0.56217185f, 0.61126047f, 0.65924333f,
+ 0.70564355f, 0.75f, 0.79187184f, 0.83084292f, 0.86652594f,
+ 0.89856625f, 0.92664544f, 0.95048443f, 0.96984631f, 0.98453864f,
+ 0.99441541f, 0.99937846f, 0.99937846f, 0.99441541f, 0.98453864f,
+ 0.96984631f, 0.95048443f, 0.92664544f, 0.89856625f, 0.86652594f,
+ 0.83084292f, 0.79187184f, 0.75f, 0.70564355f, 0.65924333f,
+ 0.61126047f, 0.56217185f, 0.51246535f, 0.46263495f, 0.41317591f,
+ 0.36457977f, 0.31732949f, 0.27189467f, 0.22872687f, 0.1882551f,
+ 0.15088159f, 0.11697778f, 0.08688061f, 0.06088921f, 0.03926189f,
+ 0.0222136f, 0.00991376f, 0.00248461f, 0.f};
+
+} // namespace
+
// TODO(peah): Change x to be std::array once the rest of the code allows this.
-void Aec3Fft::ZeroPaddedFft(rtc::ArrayView<const float> x, FftData* X) const {
+void Aec3Fft::ZeroPaddedFft(rtc::ArrayView<const float> x,
+ Window window,
+ FftData* X) const {
RTC_DCHECK(X);
RTC_DCHECK_EQ(kFftLengthBy2, x.size());
std::array<float, kFftLength> fft;
std::fill(fft.begin(), fft.begin() + kFftLengthBy2, 0.f);
- std::copy(x.begin(), x.end(), fft.begin() + kFftLengthBy2);
+ switch (window) {
+ case Window::kRectangular:
+ std::copy(x.begin(), x.end(), fft.begin() + kFftLengthBy2);
+ break;
+ case Window::kHanning:
+ std::transform(x.begin(), x.end(), std::begin(kHanning64),
+ fft.begin() + kFftLengthBy2,
+ [](float a, float b) { return a * b; });
+ break;
+ default:
+ RTC_NOTREACHED();
+ }
+
Fft(&fft, X);
}
diff --git a/modules/audio_processing/aec3/aec3_fft.h b/modules/audio_processing/aec3/aec3_fft.h
index 2a5dfef..f3dddb3 100644
--- a/modules/audio_processing/aec3/aec3_fft.h
+++ b/modules/audio_processing/aec3/aec3_fft.h
@@ -25,6 +25,8 @@
// FftData type.
class Aec3Fft {
public:
+ enum class Window { kRectangular, kHanning };
+
Aec3Fft() = default;
// Computes the FFT. Note that both the input and output are modified.
void Fft(std::array<float, kFftLength>* x, FftData* X) const {
@@ -40,8 +42,11 @@
ooura_fft_.InverseFft(x->data());
}
- // Pads the input with kFftLengthBy2 initial zeros before computing the Fft.
- void ZeroPaddedFft(rtc::ArrayView<const float> x, FftData* X) const;
+ // Windows the input using a Hanning window, and then adds padding of
+ // kFftLengthBy2 initial zeros before computing the Fft.
+ void ZeroPaddedFft(rtc::ArrayView<const float> x,
+ Window window,
+ FftData* X) const;
// Concatenates the kFftLengthBy2 values long x and x_old before computing the
// Fft. After that, x is copied to x_old.
diff --git a/modules/audio_processing/aec3/aec3_fft_unittest.cc b/modules/audio_processing/aec3/aec3_fft_unittest.cc
index 46831e0..87fe7a8 100644
--- a/modules/audio_processing/aec3/aec3_fft_unittest.cc
+++ b/modules/audio_processing/aec3/aec3_fft_unittest.cc
@@ -44,7 +44,8 @@
TEST(Aec3Fft, NullZeroPaddedFftOutput) {
Aec3Fft fft;
std::array<float, kFftLengthBy2> x;
- EXPECT_DEATH(fft.ZeroPaddedFft(x, nullptr), "");
+ EXPECT_DEATH(fft.ZeroPaddedFft(x, Aec3Fft::Window::kRectangular, nullptr),
+ "");
}
// Verifies that the check for input length in ZeroPaddedFft works.
@@ -52,7 +53,7 @@
Aec3Fft fft;
FftData X;
std::array<float, kFftLengthBy2 - 1> x;
- EXPECT_DEATH(fft.ZeroPaddedFft(x, &X), "");
+ EXPECT_DEATH(fft.ZeroPaddedFft(x, Aec3Fft::Window::kRectangular, &X), "");
}
// Verifies that the check for non-null output in PaddedFft works.
@@ -167,7 +168,7 @@
x_in[j] = v++;
x_ref[j + kFftLengthBy2] = x_in[j] * 64.f;
}
- fft.ZeroPaddedFft(x_in, &X);
+ fft.ZeroPaddedFft(x_in, Aec3Fft::Window::kRectangular, &X);
fft.Ifft(X, &x_out);
for (size_t j = 0; j < x_out.size(); ++j) {
EXPECT_NEAR(x_ref[j], x_out[j], 0.1f);
diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc
index 4720503..fc0e680 100644
--- a/modules/audio_processing/aec3/echo_remover.cc
+++ b/modules/audio_processing/aec3/echo_remover.cc
@@ -157,8 +157,8 @@
FftData comfort_noise;
FftData high_band_comfort_noise;
SubtractorOutput subtractor_output;
- FftData& E_main = subtractor_output.E_main;
- auto& E2_main = subtractor_output.E2_main;
+ FftData& E_main_nonwindowed = subtractor_output.E_main_nonwindowed;
+ auto& E2_main = subtractor_output.E2_main_nonwindowed;
auto& E2_shadow = subtractor_output.E2_shadow;
auto& e_main = subtractor_output.e_main;
@@ -170,8 +170,9 @@
&subtractor_output);
// Compute spectra.
- fft_.ZeroPaddedFft(y0, &Y);
- LinearEchoPower(E_main, Y, &S2_linear);
+ // fft_.ZeroPaddedFft(y0, Aec3Fft::Window::kHanning, &Y);
+ fft_.ZeroPaddedFft(y0, Aec3Fft::Window::kRectangular, &Y);
+ LinearEchoPower(E_main_nonwindowed, Y, &S2_linear);
Y.Spectrum(optimization_, Y2);
// Update the AEC state information.
diff --git a/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc b/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc
index 2b30a74..fbd30d1 100644
--- a/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc
+++ b/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc
@@ -64,7 +64,7 @@
std::array<float, kFftLengthBy2Plus1> X2;
Aec3Fft fft;
x.fill(1000.f);
- fft.ZeroPaddedFft(x, &X);
+ fft.ZeroPaddedFft(x, Aec3Fft::Window::kRectangular, &X);
X.Spectrum(Aec3Optimization::kNone, X2);
float offset = -10.f * log10(32768.f * 32768.f);
diff --git a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
index 1339a39..7120cc2 100644
--- a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
+++ b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
@@ -119,7 +119,7 @@
[&](float a, float b) { return a - b * kScale; });
std::for_each(e_main.begin(), e_main.end(),
[](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
- fft.ZeroPaddedFft(e_main, &E_main);
+ fft.ZeroPaddedFft(e_main, Aec3Fft::Window::kRectangular, &E_main);
for (size_t k = 0; k < kBlockSize; ++k) {
s[k] = kScale * s_scratch[k + kFftLengthBy2];
}
@@ -132,7 +132,7 @@
[&](float a, float b) { return a - b * kScale; });
std::for_each(e_shadow.begin(), e_shadow.end(),
[](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
- fft.ZeroPaddedFft(e_shadow, &E_shadow);
+ fft.ZeroPaddedFft(e_shadow, Aec3Fft::Window::kRectangular, &E_shadow);
// Compute spectra for future use.
E_main.Spectrum(Aec3Optimization::kNone, output.E2_main);
diff --git a/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc b/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc
index 7a7c3ab..34d4a7b 100644
--- a/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc
+++ b/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc
@@ -94,7 +94,7 @@
[&](float a, float b) { return a - b * kScale; });
std::for_each(e_shadow.begin(), e_shadow.end(),
[](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
- fft.ZeroPaddedFft(e_shadow, &E_shadow);
+ fft.ZeroPaddedFft(e_shadow, Aec3Fft::Window::kRectangular, &E_shadow);
std::array<float, kFftLengthBy2Plus1> render_power;
render_delay_buffer->GetRenderBuffer()->SpectralSum(
diff --git a/modules/audio_processing/aec3/subtractor.cc b/modules/audio_processing/aec3/subtractor.cc
index 870c8a4..81aba7f 100644
--- a/modules/audio_processing/aec3/subtractor.cc
+++ b/modules/audio_processing/aec3/subtractor.cc
@@ -22,26 +22,10 @@
namespace {
-const float kHanning64[64] = {
- 0.f, 0.00248461f, 0.00991376f, 0.0222136f, 0.03926189f,
- 0.06088921f, 0.08688061f, 0.11697778f, 0.15088159f, 0.1882551f,
- 0.22872687f, 0.27189467f, 0.31732949f, 0.36457977f, 0.41317591f,
- 0.46263495f, 0.51246535f, 0.56217185f, 0.61126047f, 0.65924333f,
- 0.70564355f, 0.75f, 0.79187184f, 0.83084292f, 0.86652594f,
- 0.89856625f, 0.92664544f, 0.95048443f, 0.96984631f, 0.98453864f,
- 0.99441541f, 0.99937846f, 0.99937846f, 0.99441541f, 0.98453864f,
- 0.96984631f, 0.95048443f, 0.92664544f, 0.89856625f, 0.86652594f,
- 0.83084292f, 0.79187184f, 0.75f, 0.70564355f, 0.65924333f,
- 0.61126047f, 0.56217185f, 0.51246535f, 0.46263495f, 0.41317591f,
- 0.36457977f, 0.31732949f, 0.27189467f, 0.22872687f, 0.1882551f,
- 0.15088159f, 0.11697778f, 0.08688061f, 0.06088921f, 0.03926189f,
- 0.0222136f, 0.00991376f, 0.00248461f, 0.f};
-
void PredictionError(const Aec3Fft& fft,
const FftData& S,
rtc::ArrayView<const float> y,
std::array<float, kBlockSize>* e,
- FftData* E,
std::array<float, kBlockSize>* s) {
std::array<float, kFftLength> tmp;
fft.Ifft(S, &tmp);
@@ -57,13 +41,6 @@
std::for_each(e->begin(), e->end(),
[](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
-
- RTC_DCHECK_EQ(64, e->size());
- RTC_DCHECK_LE(64, tmp.size());
- std::transform(e->begin(), e->end(), std::begin(kHanning64), tmp.begin(),
- [](float a, float b) { return a * b; });
-
- fft.ZeroPaddedFft(rtc::ArrayView<const float>(tmp.data(), 64), E);
}
} // namespace
@@ -119,6 +96,7 @@
RTC_DCHECK_EQ(kBlockSize, capture.size());
rtc::ArrayView<const float> y = capture;
FftData& E_main = output->E_main;
+ FftData& E_main_nonwindowed = output->E_main_nonwindowed;
FftData E_shadow;
std::array<float, kBlockSize>& e_main = output->e_main;
std::array<float, kBlockSize>& e_shadow = output->e_shadow;
@@ -128,11 +106,15 @@
// Form the output of the main filter.
main_filter_.Filter(render_buffer, &S);
- PredictionError(fft_, S, y, &e_main, &E_main, &output->s_main);
+ PredictionError(fft_, S, y, &e_main, &output->s_main);
+ fft_.ZeroPaddedFft(e_main, Aec3Fft::Window::kHanning, &E_main);
+ fft_.ZeroPaddedFft(e_main, Aec3Fft::Window::kRectangular,
+ &E_main_nonwindowed);
// Form the output of the shadow filter.
shadow_filter_.Filter(render_buffer, &S);
- PredictionError(fft_, S, y, &e_shadow, &E_shadow, nullptr);
+ PredictionError(fft_, S, y, &e_shadow, nullptr);
+ fft_.ZeroPaddedFft(e_shadow, Aec3Fft::Window::kHanning, &E_shadow);
if (!converged_filter_) {
const auto sum_of_squares = [](float a, float b) { return a + b * b; };
@@ -149,6 +131,7 @@
// Compute spectra for future use.
E_main.Spectrum(optimization_, output->E2_main);
+ E_main_nonwindowed.Spectrum(optimization_, output->E2_main_nonwindowed);
E_shadow.Spectrum(optimization_, output->E2_shadow);
// Update the main filter.
diff --git a/modules/audio_processing/aec3/subtractor_output.h b/modules/audio_processing/aec3/subtractor_output.h
index 8655665..83f6cf5 100644
--- a/modules/audio_processing/aec3/subtractor_output.h
+++ b/modules/audio_processing/aec3/subtractor_output.h
@@ -24,7 +24,9 @@
std::array<float, kBlockSize> e_main;
std::array<float, kBlockSize> e_shadow;
FftData E_main;
+ FftData E_main_nonwindowed;
std::array<float, kFftLengthBy2Plus1> E2_main;
+ std::array<float, kFftLengthBy2Plus1> E2_main_nonwindowed;
std::array<float, kFftLengthBy2Plus1> E2_shadow;
void Reset() {