Delete voice_detection() pointer to submodule

The new configuration path is via AudioProcessing::ApplyConfig and
AudioProcessing::GetStatistics.

ApmTest.Process passes with unchanged reference files if
audio_processing_impl would initialize the VAD with
VoiceDetection::kLowLikelihood instead of kVeryLowLikelihood.
This was verified by testing this CL with that modification.

Bug: webrtc:9878
Change-Id: I4d08df37a07e5c72feeec02a07d6b9435f917d72
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/155445
Commit-Queue: Sam Zackrisson <saza@webrtc.org>
Reviewed-by: Ivo Creusen <ivoc@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29395}
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index b1187fa..c661848 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -40,7 +40,7 @@
 #include "modules/audio_processing/noise_suppression_proxy.h"
 #include "modules/audio_processing/residual_echo_detector.h"
 #include "modules/audio_processing/transient/transient_suppressor.h"
-#include "modules/audio_processing/voice_detection_impl.h"
+#include "modules/audio_processing/voice_detection.h"
 #include "rtc_base/atomic_ops.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/constructor_magic.h"
@@ -165,8 +165,7 @@
     bool gain_controller2_enabled,
     bool pre_amplifier_enabled,
     bool echo_controller_enabled,
-    bool voice_activity_detector_enabled,
-    bool private_voice_detector_enabled,
+    bool voice_detector_enabled,
     bool level_estimator_enabled,
     bool transient_suppressor_enabled) {
   bool changed = false;
@@ -183,10 +182,7 @@
   changed |= (pre_amplifier_enabled_ != pre_amplifier_enabled);
   changed |= (echo_controller_enabled != echo_controller_enabled_);
   changed |= (level_estimator_enabled != level_estimator_enabled_);
-  changed |=
-      (voice_activity_detector_enabled != voice_activity_detector_enabled_);
-  changed |=
-      (private_voice_detector_enabled != private_voice_detector_enabled_);
+  changed |= (voice_detector_enabled != voice_detector_enabled_);
   changed |= (transient_suppressor_enabled != transient_suppressor_enabled_);
   if (changed) {
     high_pass_filter_enabled_ = high_pass_filter_enabled;
@@ -199,8 +195,7 @@
     pre_amplifier_enabled_ = pre_amplifier_enabled;
     echo_controller_enabled_ = echo_controller_enabled;
     level_estimator_enabled_ = level_estimator_enabled;
-    voice_activity_detector_enabled_ = voice_activity_detector_enabled;
-    private_voice_detector_enabled_ = private_voice_detector_enabled;
+    voice_detector_enabled_ = voice_detector_enabled;
     transient_suppressor_enabled_ = transient_suppressor_enabled;
   }
 
@@ -211,8 +206,7 @@
 
 bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandSubModulesActive()
     const {
-  return CaptureMultiBandProcessingActive() ||
-         voice_activity_detector_enabled_ || private_voice_detector_enabled_;
+  return CaptureMultiBandProcessingActive() || voice_detector_enabled_;
 }
 
 bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandProcessingActive()
@@ -263,7 +257,6 @@
   std::unique_ptr<LevelEstimatorImpl> level_estimator;
   std::unique_ptr<NoiseSuppressionImpl> noise_suppression;
   std::unique_ptr<NoiseSuppressionProxy> noise_suppression_proxy;
-  std::unique_ptr<VoiceDetectionImpl> voice_detection;
   std::unique_ptr<GainControlImpl> gain_control;
   std::unique_ptr<GainControlForExperimentalAgc>
       gain_control_for_experimental_agc;
@@ -295,7 +288,7 @@
   std::unique_ptr<GainApplier> pre_amplifier;
   std::unique_ptr<CustomAudioAnalyzer> capture_analyzer;
   std::unique_ptr<LevelEstimatorImpl> output_level_estimator;
-  std::unique_ptr<VoiceDetectionImpl> voice_detector;
+  std::unique_ptr<VoiceDetection> voice_detector;
 };
 
 AudioProcessingBuilder::AudioProcessingBuilder() = default;
@@ -415,8 +408,6 @@
       new NoiseSuppressionImpl(&crit_capture_));
   public_submodules_->noise_suppression_proxy.reset(new NoiseSuppressionProxy(
       this, public_submodules_->noise_suppression.get()));
-  public_submodules_->voice_detection.reset(
-      new VoiceDetectionImpl(&crit_capture_));
   public_submodules_->gain_control_for_experimental_agc.reset(
       new GainControlForExperimentalAgc(
           public_submodules_->gain_control.get()));
@@ -556,11 +547,7 @@
   InitializeHighPassFilter();
   public_submodules_->noise_suppression->Initialize(num_proc_channels(),
                                                     proc_sample_rate_hz());
-  public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz());
-  if (private_submodules_->voice_detector) {
-    private_submodules_->voice_detector->Initialize(
-        proc_split_sample_rate_hz());
-  }
+  InitializeVoiceDetector();
   public_submodules_->level_estimator->Initialize();
   InitializeResidualEchoDetector();
   InitializeEchoController();
@@ -702,6 +689,9 @@
       config_.gain_controller1.analog_level_maximum !=
           config.gain_controller1.analog_level_maximum;
 
+  const bool voice_detection_config_changed =
+      config_.voice_detection.enabled != config.voice_detection.enabled;
+
   config_ = config;
 
   if (aec_config_changed) {
@@ -745,14 +735,8 @@
     private_submodules_->output_level_estimator->Enable(true);
   }
 
-  if (config_.voice_detection.enabled && !private_submodules_->voice_detector) {
-    private_submodules_->voice_detector.reset(
-        new VoiceDetectionImpl(&crit_capture_));
-    private_submodules_->voice_detector->Enable(true);
-    private_submodules_->voice_detector->set_likelihood(
-        VoiceDetection::kVeryLowLikelihood);
-    private_submodules_->voice_detector->Initialize(
-        proc_split_sample_rate_hz());
+  if (voice_detection_config_changed) {
+    InitializeVoiceDetector();
   }
 
   // Reinitialization must happen after all submodule configuration to avoid
@@ -1276,14 +1260,17 @@
     RecordUnprocessedCaptureStream(*frame);
   }
 
-  capture_.vad_activity = frame->vad_activity_;
   capture_.capture_audio->CopyFrom(frame);
   RETURN_ON_ERR(ProcessCaptureStreamLocked());
   if (submodule_states_.CaptureMultiBandProcessingActive() ||
       submodule_states_.CaptureFullBandProcessingActive()) {
     capture_.capture_audio->CopyTo(frame);
   }
-  frame->vad_activity_ = capture_.vad_activity;
+  if (capture_.stats.voice_detected) {
+    frame->vad_activity_ = *capture_.stats.voice_detected
+                               ? AudioFrame::kVadActive
+                               : AudioFrame::kVadPassive;
+  }
 
   if (aec_dump_) {
     RecordProcessedCaptureStream(*frame);
@@ -1432,19 +1419,10 @@
     public_submodules_->noise_suppression->ProcessCaptureAudio(capture_buffer);
   }
 
-  if (public_submodules_->voice_detection->is_enabled() &&
-      !public_submodules_->voice_detection->using_external_vad()) {
-    bool voice_active =
-        public_submodules_->voice_detection->ProcessCaptureAudio(
-            capture_buffer);
-    capture_.vad_activity =
-        voice_active ? AudioFrame::kVadActive : AudioFrame::kVadPassive;
-  }
-
   if (config_.voice_detection.enabled) {
-    private_submodules_->voice_detector->ProcessCaptureAudio(capture_buffer);
     capture_.stats.voice_detected =
-        private_submodules_->voice_detector->stream_has_voice();
+        private_submodules_->voice_detector->ProcessCaptureAudio(
+            capture_buffer);
   } else {
     capture_.stats.voice_detected = absl::nullopt;
   }
@@ -1817,10 +1795,6 @@
   return public_submodules_->noise_suppression_proxy.get();
 }
 
-VoiceDetection* AudioProcessingImpl::voice_detection() const {
-  return public_submodules_->voice_detection.get();
-}
-
 void AudioProcessingImpl::MutateConfig(
     rtc::FunctionView<void(AudioProcessing::Config*)> mutator) {
   rtc::CritScope cs_render(&crit_render_);
@@ -1845,7 +1819,6 @@
       public_submodules_->gain_control->is_enabled(),
       config_.gain_controller2.enabled, config_.pre_amplifier.enabled,
       capture_nonlocked_.echo_controller_enabled,
-      public_submodules_->voice_detection->is_enabled(),
       config_.voice_detection.enabled,
       public_submodules_->level_estimator->is_enabled(),
       capture_.transient_suppressor_enabled);
@@ -1871,6 +1844,14 @@
   }
 }
 
+void AudioProcessingImpl::InitializeVoiceDetector() {
+  if (config_.voice_detection.enabled) {
+    private_submodules_->voice_detector = std::make_unique<VoiceDetection>(
+        proc_split_sample_rate_hz(), VoiceDetection::kVeryLowLikelihood);
+  } else {
+    private_submodules_->voice_detector.reset();
+  }
+}
 void AudioProcessingImpl::InitializeEchoController() {
   bool use_echo_controller =
       echo_control_factory_ ||