Add AudioProcessingCaptureStats and a level estimator replacement

This adds an interface for accessing stats on the capture stream, and
adds a level estimator to report one of the stats.

Bug: webrtc:9947
Change-Id: Id472534fa2e04d46c9ab700671f620584a246afb
Reviewed-on: https://webrtc-review.googlesource.com/c/109587
Commit-Queue: Sam Zackrisson <saza@webrtc.org>
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#25786}
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index 3764647..2937c06 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -259,6 +259,7 @@
   std::unique_ptr<CustomProcessing> render_pre_processor;
   std::unique_ptr<GainApplier> pre_amplifier;
   std::unique_ptr<CustomAudioAnalyzer> capture_analyzer;
+  std::unique_ptr<LevelEstimatorImpl> output_level_estimator;
 };
 
 AudioProcessingBuilder::AudioProcessingBuilder() = default;
@@ -673,6 +674,13 @@
                    << config_.gain_controller2.enabled;
   RTC_LOG(LS_INFO) << "Pre-amplifier activated: "
                    << config_.pre_amplifier.enabled;
+
+  if (config_.level_estimation.enabled &&
+      !private_submodules_->output_level_estimator) {
+    private_submodules_->output_level_estimator.reset(
+        new LevelEstimatorImpl(&crit_capture_));
+    private_submodules_->output_level_estimator->Enable(true);
+  }
 }
 
 void AudioProcessingImpl::SetExtraOptions(const webrtc::Config& config) {
@@ -1336,6 +1344,13 @@
 
   // The level estimator operates on the recombined data.
   public_submodules_->level_estimator->ProcessStream(capture_buffer);
+  if (config_.level_estimation.enabled) {
+    private_submodules_->output_level_estimator->ProcessStream(capture_buffer);
+    capture_.stats.output_rms_dbfs =
+        private_submodules_->output_level_estimator->RMS();
+  } else {
+    capture_.stats.output_rms_dbfs = absl::nullopt;
+  }
 
   capture_output_rms_.Analyze(rtc::ArrayView<const int16_t>(
       capture_buffer->channels_const()[0],
@@ -1587,49 +1602,50 @@
 
 AudioProcessingStats AudioProcessingImpl::GetStatistics(
     bool has_remote_tracks) const {
-  AudioProcessingStats stats;
-  if (has_remote_tracks) {
-    EchoCancellationImpl::Metrics metrics;
-    rtc::CritScope cs_capture(&crit_capture_);
-    if (private_submodules_->echo_controller) {
-      auto ec_metrics = private_submodules_->echo_controller->GetMetrics();
-      stats.echo_return_loss = ec_metrics.echo_return_loss;
+  rtc::CritScope cs_capture(&crit_capture_);
+  if (!has_remote_tracks) {
+    return capture_.stats;
+  }
+  AudioProcessingStats stats = capture_.stats;
+  EchoCancellationImpl::Metrics metrics;
+  if (private_submodules_->echo_controller) {
+    auto ec_metrics = private_submodules_->echo_controller->GetMetrics();
+    stats.echo_return_loss = ec_metrics.echo_return_loss;
+    stats.echo_return_loss_enhancement =
+        ec_metrics.echo_return_loss_enhancement;
+    stats.delay_ms = ec_metrics.delay_ms;
+  } else if (private_submodules_->echo_cancellation->GetMetrics(&metrics) ==
+             Error::kNoError) {
+    if (metrics.divergent_filter_fraction != -1.0f) {
+      stats.divergent_filter_fraction =
+          absl::optional<double>(metrics.divergent_filter_fraction);
+    }
+    if (metrics.echo_return_loss.instant != -100) {
+      stats.echo_return_loss =
+          absl::optional<double>(metrics.echo_return_loss.instant);
+    }
+    if (metrics.echo_return_loss_enhancement.instant != -100) {
       stats.echo_return_loss_enhancement =
-          ec_metrics.echo_return_loss_enhancement;
-      stats.delay_ms = ec_metrics.delay_ms;
-    } else if (private_submodules_->echo_cancellation->GetMetrics(&metrics) ==
-               Error::kNoError) {
-      if (metrics.divergent_filter_fraction != -1.0f) {
-        stats.divergent_filter_fraction =
-            absl::optional<double>(metrics.divergent_filter_fraction);
-      }
-      if (metrics.echo_return_loss.instant != -100) {
-        stats.echo_return_loss =
-            absl::optional<double>(metrics.echo_return_loss.instant);
-      }
-      if (metrics.echo_return_loss_enhancement.instant != -100) {
-        stats.echo_return_loss_enhancement = absl::optional<double>(
-            metrics.echo_return_loss_enhancement.instant);
-      }
+          absl::optional<double>(metrics.echo_return_loss_enhancement.instant);
     }
-    if (config_.residual_echo_detector.enabled) {
-      RTC_DCHECK(private_submodules_->echo_detector);
-      auto ed_metrics = private_submodules_->echo_detector->GetMetrics();
-      stats.residual_echo_likelihood = ed_metrics.echo_likelihood;
-      stats.residual_echo_likelihood_recent_max =
-          ed_metrics.echo_likelihood_recent_max;
+  }
+  if (config_.residual_echo_detector.enabled) {
+    RTC_DCHECK(private_submodules_->echo_detector);
+    auto ed_metrics = private_submodules_->echo_detector->GetMetrics();
+    stats.residual_echo_likelihood = ed_metrics.echo_likelihood;
+    stats.residual_echo_likelihood_recent_max =
+        ed_metrics.echo_likelihood_recent_max;
+  }
+  int delay_median, delay_std;
+  float fraction_poor_delays;
+  if (private_submodules_->echo_cancellation->GetDelayMetrics(
+          &delay_median, &delay_std, &fraction_poor_delays) ==
+      Error::kNoError) {
+    if (delay_median >= 0) {
+      stats.delay_median_ms = absl::optional<int32_t>(delay_median);
     }
-    int delay_median, delay_std;
-    float fraction_poor_delays;
-    if (private_submodules_->echo_cancellation->GetDelayMetrics(
-            &delay_median, &delay_std, &fraction_poor_delays) ==
-        Error::kNoError) {
-      if (delay_median >= 0) {
-        stats.delay_median_ms = absl::optional<int32_t>(delay_median);
-      }
-      if (delay_std >= 0) {
-        stats.delay_standard_deviation_ms = absl::optional<int32_t>(delay_std);
-      }
+    if (delay_std >= 0) {
+      stats.delay_standard_deviation_ms = absl::optional<int32_t>(delay_std);
     }
   }
   return stats;
diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h
index e376a74..2f946c5 100644
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h
@@ -18,6 +18,7 @@
 #include "modules/audio_processing/audio_buffer.h"
 #include "modules/audio_processing/include/aec_dump.h"
 #include "modules/audio_processing/include/audio_processing.h"
+#include "modules/audio_processing/include/audio_processing_statistics.h"
 #include "modules/audio_processing/render_queue_item_verifier.h"
 #include "modules/audio_processing/rms_level.h"
 #include "rtc_base/criticalsection.h"
@@ -390,6 +391,7 @@
     bool echo_path_gain_change;
     int prev_analog_mic_level;
     float prev_pre_amp_gain;
+    AudioProcessingStats stats;
   } capture_ RTC_GUARDED_BY(crit_capture_);
 
   struct ApmCaptureNonLockedState {
diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc
index 18e669f..6809ab9 100644
--- a/modules/audio_processing/audio_processing_unittest.cc
+++ b/modules/audio_processing/audio_processing_unittest.cc
@@ -2801,4 +2801,42 @@
   EXPECT_FALSE(stats.delay_median_ms);
   EXPECT_FALSE(stats.delay_standard_deviation_ms);
 }
+
+TEST(ApmStatistics, ReportOutputRmsDbfs) {
+  ProcessingConfig processing_config = {
+      {{32000, 1}, {32000, 1}, {32000, 1}, {32000, 1}}};
+  AudioProcessing::Config config;
+
+  // Set up an audioframe.
+  AudioFrame frame;
+  frame.num_channels_ = 1;
+  SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate48kHz);
+
+  // Fill the audio frame with a sawtooth pattern.
+  int16_t* ptr = frame.mutable_data();
+  for (size_t i = 0; i < frame.kMaxDataSizeSamples; i++) {
+    ptr[i] = 10000 * ((i % 3) - 1);
+  }
+
+  std::unique_ptr<AudioProcessing> apm(AudioProcessingBuilder().Create());
+  apm->Initialize(processing_config);
+
+  // If not enabled, no metric should be reported.
+  EXPECT_EQ(apm->ProcessStream(&frame), 0);
+  EXPECT_FALSE(apm->GetStatistics(false).output_rms_dbfs);
+
+  // If enabled, metrics should be reported.
+  config.level_estimation.enabled = true;
+  apm->ApplyConfig(config);
+  EXPECT_EQ(apm->ProcessStream(&frame), 0);
+  auto stats = apm->GetStatistics(false);
+  EXPECT_TRUE(stats.output_rms_dbfs);
+  EXPECT_GE(*stats.output_rms_dbfs, 0);
+
+  // If re-disabled, the value is again not reported.
+  config.level_estimation.enabled = false;
+  apm->ApplyConfig(config);
+  EXPECT_EQ(apm->ProcessStream(&frame), 0);
+  EXPECT_FALSE(apm->GetStatistics(false).output_rms_dbfs);
+}
 }  // namespace webrtc
diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index 9a1a03c..df51313 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@@ -283,6 +283,11 @@
       } adaptive_digital;
     } gain_controller2;
 
+    // Enables reporting of |output_rms_dbfs| in webrtc::AudioProcessingStats.
+    struct LevelEstimation {
+      bool enabled = false;
+    } level_estimation;
+
     // Explicit copy assignment implementation to avoid issues with memory
     // sanitizer complaints in case of self-assignment.
     // TODO(peah): Add buildflag to ensure that this is only included for memory
diff --git a/modules/audio_processing/include/audio_processing_statistics.h b/modules/audio_processing/include/audio_processing_statistics.h
index 2ff2009..683db05 100644
--- a/modules/audio_processing/include/audio_processing_statistics.h
+++ b/modules/audio_processing/include/audio_processing_statistics.h
@@ -24,6 +24,14 @@
   AudioProcessingStats(const AudioProcessingStats& other);
   ~AudioProcessingStats();
 
+  // The root mean square (RMS) level in dBFS (decibels from digital
+  // full-scale) of the last capture frame, after processing. It is
+  // constrained to [-127, 0].
+  // The computation follows: https://tools.ietf.org/html/rfc6465
+  // with the intent that it can provide the RTP audio level indication.
+  // Only reported if level estimation is enabled in AudioProcessing::Config.
+  absl::optional<int> output_rms_dbfs;
+
   // AEC Statistics.
   // ERL = 10log_10(P_far / P_echo)
   absl::optional<double> echo_return_loss;