Use the filter delay to use the proper render block in the AEC3 AecState
This CL corrects the way that the estimated filter delay is used in
AEC3. In particular
-It uses the filter delay to choose the correct render block in AecState
-It changes the code to reflect that the filter delay is always computed
-It removes part of the code that formerly relied on the filter delay
being an Optional.
Bug: webrtc:8671
Change-Id: I58135a5c174b404707e19a41c3617c09831e871d
Reviewed-on: https://webrtc-review.googlesource.com/35221
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Commit-Queue: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#21557}
diff --git a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
index a58ff85..1f53f1e 100644
--- a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
+++ b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
@@ -390,13 +390,13 @@
false, EchoPathVariability::DelayAdjustment::kNone, false));
aec_state.Update(filter.FilterFrequencyResponse(),
filter.FilterImpulseResponse(), true, *render_buffer,
- E2_main, Y2, x[0], s, false);
+ E2_main, Y2, s, false);
}
// Verify that the filter is able to perform well.
EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f),
std::inner_product(y.begin(), y.end(), y.begin(), 0.f));
- ASSERT_TRUE(aec_state.FilterDelay());
- EXPECT_EQ(delay_samples / kBlockSize, *aec_state.FilterDelay());
+ EXPECT_EQ(delay_samples / kBlockSize,
+ static_cast<size_t>(aec_state.FilterDelay()));
}
}
} // namespace aec3
diff --git a/modules/audio_processing/aec3/aec3_common.h b/modules/audio_processing/aec3/aec3_common.h
index d6cea8c..47f0784 100644
--- a/modules/audio_processing/aec3/aec3_common.h
+++ b/modules/audio_processing/aec3/aec3_common.h
@@ -39,7 +39,6 @@
constexpr size_t kFftLength = 2 * kFftLengthBy2;
constexpr int kMaxAdaptiveFilterLength = 50;
-constexpr int kUnknownDelayRenderWindowSize = 30;
constexpr int kRenderTransferQueueSizeFrames = 100;
constexpr size_t kMaxNumBands = 3;
diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc
index 9d30713..25bb5be 100644
--- a/modules/audio_processing/aec3/aec_state.cc
+++ b/modules/audio_processing/aec3/aec_state.cc
@@ -112,12 +112,15 @@
const RenderBuffer& render_buffer,
const std::array<float, kFftLengthBy2Plus1>& E2_main,
const std::array<float, kFftLengthBy2Plus1>& Y2,
- rtc::ArrayView<const float> x,
const std::array<float, kBlockSize>& s,
bool echo_leakage_detected) {
// Store input parameters.
echo_leakage_detected_ = echo_leakage_detected;
+ // Estimate the filter delay.
+ filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response);
+ const std::vector<float>& x = render_buffer.Block(-filter_delay_)[0];
+
// Update counters.
++capture_block_counter_;
const bool active_render_block = DetectActiveRender(x);
@@ -130,12 +133,10 @@
// burst.
force_zero_gain_ = ++force_zero_gain_counter_ < kNumBlocksPerSecond / 5;
- // Estimate delays.
- filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response);
// Update the ERL and ERLE measures.
if (converged_filter && capture_block_counter_ >= 2 * kNumBlocksPerSecond) {
- const auto& X2 = render_buffer.Spectrum(*filter_delay_);
+ const auto& X2 = render_buffer.Spectrum(filter_delay_);
erle_estimator_.Update(X2, Y2, E2_main);
erl_estimator_.Update(X2, Y2);
}
@@ -174,7 +175,7 @@
void AecState::UpdateReverb(const std::vector<float>& impulse_response) {
if ((!(filter_delay_ && usable_linear_estimate_)) ||
- (*filter_delay_ > config_.filter.length_blocks - 4)) {
+ (filter_delay_ > static_cast<int>(config_.filter.length_blocks) - 4)) {
return;
}
diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h
index e39257d..98a78dd 100644
--- a/modules/audio_processing/aec3/aec_state.h
+++ b/modules/audio_processing/aec3/aec_state.h
@@ -62,7 +62,7 @@
float ErlTimeDomain() const { return erl_estimator_.ErlTimeDomain(); }
// Returns the delay estimate based on the linear filter.
- rtc::Optional<size_t> FilterDelay() const { return filter_delay_; }
+ int FilterDelay() const { return filter_delay_; }
// Returns whether the capture signal is saturated.
bool SaturatedCapture() const { return capture_signal_saturation_; }
@@ -111,7 +111,6 @@
const RenderBuffer& render_buffer,
const std::array<float, kFftLengthBy2Plus1>& E2_main,
const std::array<float, kFftLengthBy2Plus1>& Y2,
- rtc::ArrayView<const float> x,
const std::array<float, kBlockSize>& s_main,
bool echo_leakage_detected);
@@ -151,7 +150,7 @@
bool force_zero_gain_ = false;
bool render_received_ = false;
size_t force_zero_gain_counter_ = 0;
- rtc::Optional<size_t> filter_delay_;
+ int filter_delay_ = 0;
size_t blocks_since_last_saturation_ = 1000;
float reverb_decay_to_test_ = 0.9f;
float reverb_decay_candidate_ = 0.f;
diff --git a/modules/audio_processing/aec3/aec_state_unittest.cc b/modules/audio_processing/aec3/aec_state_unittest.cc
index 06fac20..9222a91 100644
--- a/modules/audio_processing/aec3/aec_state_unittest.cc
+++ b/modules/audio_processing/aec3/aec_state_unittest.cc
@@ -48,15 +48,15 @@
// Verify that linear AEC usability is false when the filter is diverged.
state.Update(diverged_filter_frequency_response, impulse_response, true,
- *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
- false);
+ *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false);
EXPECT_FALSE(state.UsableLinearEstimate());
// Verify that linear AEC usability is true when the filter is converged
std::fill(x[0].begin(), x[0].end(), 101.f);
for (int k = 0; k < 3000; ++k) {
+ render_delay_buffer->Insert(x);
state.Update(converged_filter_frequency_response, impulse_response, true,
- *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
+ *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s,
false);
}
EXPECT_TRUE(state.UsableLinearEstimate());
@@ -66,35 +66,33 @@
state.HandleEchoPathChange(EchoPathVariability(
true, EchoPathVariability::DelayAdjustment::kNone, false));
state.Update(converged_filter_frequency_response, impulse_response, true,
- *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
- false);
+ *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false);
EXPECT_FALSE(state.UsableLinearEstimate());
// Verify that the active render detection works as intended.
std::fill(x[0].begin(), x[0].end(), 101.f);
+ render_delay_buffer->Insert(x);
state.HandleEchoPathChange(EchoPathVariability(
true, EchoPathVariability::DelayAdjustment::kNewDetectedDelay, false));
state.Update(converged_filter_frequency_response, impulse_response, true,
- *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
- false);
+ *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false);
EXPECT_FALSE(state.ActiveRender());
for (int k = 0; k < 1000; ++k) {
+ render_delay_buffer->Insert(x);
state.Update(converged_filter_frequency_response, impulse_response, true,
- *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
+ *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s,
false);
}
EXPECT_TRUE(state.ActiveRender());
// Verify that echo leakage is properly reported.
state.Update(converged_filter_frequency_response, impulse_response, true,
- *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
- false);
+ *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false);
EXPECT_FALSE(state.EchoLeakageDetected());
state.Update(converged_filter_frequency_response, impulse_response, true,
- *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
- true);
+ *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, true);
EXPECT_TRUE(state.EchoLeakageDetected());
// Verify that the ERL is properly estimated
@@ -115,7 +113,7 @@
Y2.fill(10.f * 10000.f * 10000.f);
for (size_t k = 0; k < 1000; ++k) {
state.Update(converged_filter_frequency_response, impulse_response, true,
- *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
+ *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s,
false);
}
@@ -132,7 +130,7 @@
Y2.fill(10.f * E2_main[0]);
for (size_t k = 0; k < 1000; ++k) {
state.Update(converged_filter_frequency_response, impulse_response, true,
- *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
+ *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s,
false);
}
ASSERT_TRUE(state.UsableLinearEstimate());
@@ -153,7 +151,7 @@
Y2.fill(5.f * E2_main[0]);
for (size_t k = 0; k < 1000; ++k) {
state.Update(converged_filter_frequency_response, impulse_response, true,
- *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s,
+ *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s,
false);
}
@@ -203,9 +201,8 @@
frequency_response[k][0] = 0.f;
state.HandleEchoPathChange(echo_path_variability);
state.Update(frequency_response, impulse_response, true,
- *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x, s,
+ *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s,
false);
- EXPECT_TRUE(k == (kFilterLength - 1) || state.FilterDelay());
if (k != (kFilterLength - 1)) {
EXPECT_EQ(k, state.FilterDelay());
}
diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc
index 7aa5189..4720503 100644
--- a/modules/audio_processing/aec3/echo_remover.cc
+++ b/modules/audio_processing/aec3/echo_remover.cc
@@ -178,7 +178,7 @@
aec_state_.Update(subtractor_.FilterFrequencyResponse(),
subtractor_.FilterImpulseResponse(),
subtractor_.ConvergedFilter(), *render_buffer, E2_main, Y2,
- x0, subtractor_output.s_main, echo_leakage_detected_);
+ subtractor_output.s_main, echo_leakage_detected_);
// Choose the linear output.
output_selector_.FormLinearOutput(!aec_state_.TransparentMode(), e_main, y0);
@@ -232,9 +232,7 @@
data_dumper_->DumpRaw("aec3_erl", aec_state_.Erl());
data_dumper_->DumpRaw("aec3_usable_linear_estimate",
aec_state_.UsableLinearEstimate());
- data_dumper_->DumpRaw(
- "aec3_filter_delay",
- aec_state_.FilterDelay() ? *aec_state_.FilterDelay() : -1);
+ data_dumper_->DumpRaw("aec3_filter_delay", aec_state_.FilterDelay());
data_dumper_->DumpRaw("aec3_capture_saturation",
aec_state_.SaturatedCapture() ? 1 : 0);
}
diff --git a/modules/audio_processing/aec3/echo_remover_metrics.cc b/modules/audio_processing/aec3/echo_remover_metrics.cc
index 0544a5c..bc815eb 100644
--- a/modules/audio_processing/aec3/echo_remover_metrics.cc
+++ b/modules/audio_processing/aec3/echo_remover_metrics.cc
@@ -236,10 +236,8 @@
"WebRTC.Audio.EchoCanceller.ActiveRender",
static_cast<int>(
active_render_count_ > kMetricsCollectionBlocksBy2 ? 1 : 0));
- RTC_HISTOGRAM_COUNTS_LINEAR(
- "WebRTC.Audio.EchoCanceller.FilterDelay",
- aec_state.FilterDelay() ? *aec_state.FilterDelay() + 1 : 0, 0, 30,
- 31);
+ RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.FilterDelay",
+ aec_state.FilterDelay(), 0, 30, 31);
RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.EchoCanceller.CaptureSaturation",
static_cast<int>(saturated_capture_ ? 1 : 0));
break;
diff --git a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
index 5ca7e5a..1339a39 100644
--- a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
+++ b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
@@ -158,8 +158,8 @@
false, EchoPathVariability::DelayAdjustment::kNone, false));
aec_state.Update(main_filter.FilterFrequencyResponse(),
main_filter.FilterImpulseResponse(), true,
- *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0],
- s, false);
+ *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s,
+ false);
}
std::copy(e_main.begin(), e_main.end(), e_last_block->begin());
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc
index 3155770..e34d7ab 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator.cc
@@ -96,10 +96,8 @@
// Estimate the residual echo power.
if (aec_state.UsableLinearEstimate()) {
- RTC_DCHECK(aec_state.FilterDelay());
- const int filter_delay = *aec_state.FilterDelay();
- LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2);
- AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), filter_delay,
+ LinearEstimate(S2_linear, aec_state.Erle(), aec_state.FilterDelay(), R2);
+ AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), aec_state.FilterDelay(),
aec_state.ReverbDecay(), R2);
// If the echo is saturated, estimate the echo power as the maximum echo
@@ -110,24 +108,14 @@
} else {
// Estimate the echo generating signal power.
std::array<float, kFftLengthBy2Plus1> X2;
- if (aec_state.FilterDelay()) {
- const int delay_use = static_cast<int>(*aec_state.FilterDelay());
- // Computes the spectral power over the blocks surrounding the delay.
- constexpr int kKnownDelayRenderWindowSize = 5;
- // TODO(peah): Add lookahead since that was what was there initially.
- static_assert(
- kUnknownDelayRenderWindowSize >= kKnownDelayRenderWindowSize,
- "Requirement to ensure that the render buffer is overrun");
- EchoGeneratingPower(
- render_buffer, std::max(0, delay_use - 1),
- std::min(kKnownDelayRenderWindowSize - 1, delay_use + 1), &X2);
- } else {
- // Computes the spectral power over the latest blocks.
- // TODO(peah): Add lookahead since that was what was there initially.
- EchoGeneratingPower(render_buffer, 0, kUnknownDelayRenderWindowSize - 1,
- &X2);
- }
+ // Computes the spectral power over the blocks surrounding the delay.
+ constexpr int kKnownDelayRenderWindowSize = 5;
+ // TODO(peah): Add lookahead since that was what was there initially.
+ EchoGeneratingPower(
+ render_buffer, std::max(0, aec_state.FilterDelay() - 1),
+ std::min(kKnownDelayRenderWindowSize - 1, aec_state.FilterDelay() + 1),
+ &X2);
// Subtract the stationary noise power to avoid stationary noise causing
// excessive echo suppression.
@@ -140,7 +128,8 @@
config_.ep_strength.bounded_erl,
aec_state.TransparentMode(), X2, Y2, R2);
- if (aec_state.FilterDelay() && aec_state.SaturatedEcho()) {
+ if (aec_state.SaturatedEcho()) {
+ // TODO(peah): Modify to make sense theoretically.
AddEchoReverb(*R2, aec_state.SaturatedEcho(),
config_.filter.length_blocks, aec_state.ReverbDecay(), R2);
}
diff --git a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
index 7b39d0a..80b6cd6 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
@@ -92,7 +92,7 @@
aec_state.HandleEchoPathChange(echo_path_variability);
aec_state.Update(H2, h, true, *render_delay_buffer->GetRenderBuffer(),
- E2_main, Y2, x[0], s, false);
+ E2_main, Y2, s, false);
estimator.Estimate(aec_state, *render_delay_buffer->GetRenderBuffer(),
S2_linear, Y2, &R2);
diff --git a/modules/audio_processing/aec3/subtractor_unittest.cc b/modules/audio_processing/aec3/subtractor_unittest.cc
index 62dc80b..11cb2e4 100644
--- a/modules/audio_processing/aec3/subtractor_unittest.cc
+++ b/modules/audio_processing/aec3/subtractor_unittest.cc
@@ -84,7 +84,7 @@
aec_state.Update(subtractor.FilterFrequencyResponse(),
subtractor.FilterImpulseResponse(),
subtractor.ConvergedFilter(),
- *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0],
+ *render_delay_buffer->GetRenderBuffer(), E2_main, Y2,
output.s_main, false);
}
diff --git a/modules/audio_processing/aec3/suppression_gain_unittest.cc b/modules/audio_processing/aec3/suppression_gain_unittest.cc
index 6347645..bcdcd23 100644
--- a/modules/audio_processing/aec3/suppression_gain_unittest.cc
+++ b/modules/audio_processing/aec3/suppression_gain_unittest.cc
@@ -69,10 +69,10 @@
R2.fill(10000000000000.f);
N2.fill(0.f);
s.fill(10.f);
- aec_state.Update(
- subtractor.FilterFrequencyResponse(), subtractor.FilterImpulseResponse(),
- subtractor.ConvergedFilter(), *render_delay_buffer->GetRenderBuffer(), E2,
- Y2, x[0], s, false);
+ aec_state.Update(subtractor.FilterFrequencyResponse(),
+ subtractor.FilterImpulseResponse(),
+ subtractor.ConvergedFilter(),
+ *render_delay_buffer->GetRenderBuffer(), E2, Y2, s, false);
suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, &high_bands_gain,
&g);
std::for_each(g.begin(), g.end(), [](float a) { EXPECT_FLOAT_EQ(0.f, a); });
@@ -85,17 +85,17 @@
N2.fill(100.f);
// Ensure that the gain is no longer forced to zero.
for (int k = 0; k <= kNumBlocksPerSecond / 5 + 1; ++k) {
- aec_state.Update(
- subtractor.FilterFrequencyResponse(),
- subtractor.FilterImpulseResponse(), subtractor.ConvergedFilter(),
- *render_delay_buffer->GetRenderBuffer(), E2, Y2, x[0], s, false);
+ aec_state.Update(subtractor.FilterFrequencyResponse(),
+ subtractor.FilterImpulseResponse(),
+ subtractor.ConvergedFilter(),
+ *render_delay_buffer->GetRenderBuffer(), E2, Y2, s, false);
}
for (int k = 0; k < 100; ++k) {
- aec_state.Update(
- subtractor.FilterFrequencyResponse(),
- subtractor.FilterImpulseResponse(), subtractor.ConvergedFilter(),
- *render_delay_buffer->GetRenderBuffer(), E2, Y2, x[0], s, false);
+ aec_state.Update(subtractor.FilterFrequencyResponse(),
+ subtractor.FilterImpulseResponse(),
+ subtractor.ConvergedFilter(),
+ *render_delay_buffer->GetRenderBuffer(), E2, Y2, s, false);
suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x,
&high_bands_gain, &g);
}
@@ -108,10 +108,10 @@
R2.fill(0.1f);
N2.fill(0.f);
for (int k = 0; k < 100; ++k) {
- aec_state.Update(
- subtractor.FilterFrequencyResponse(),
- subtractor.FilterImpulseResponse(), subtractor.ConvergedFilter(),
- *render_delay_buffer->GetRenderBuffer(), E2, Y2, x[0], s, false);
+ aec_state.Update(subtractor.FilterFrequencyResponse(),
+ subtractor.FilterImpulseResponse(),
+ subtractor.ConvergedFilter(),
+ *render_delay_buffer->GetRenderBuffer(), E2, Y2, s, false);
suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x,
&high_bands_gain, &g);
}