APM: Replace most usages of AudioFrame with a stream interface

This CL creates a new stream interface and uses it to replace
most of the usage of AudioFrame in the non-test code.

The CL changes some of the test code as well, as the other
changes required that.

The CL will be followed by 2 more related CLs.

Bug: webrtc:5298
Change-Id: I5cfbe6079f30fc3fbf35b35fd077b6fb49c7def0
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/170040
Commit-Queue: Per Åhgren <peah@webrtc.org>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#30799}
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index f4c242b..dfa5437 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -19,6 +19,7 @@
 
 #include "absl/types/optional.h"
 #include "api/array_view.h"
+#include "api/audio/audio_frame.h"
 #include "common_audio/audio_converter.h"
 #include "common_audio/include/audio_util.h"
 #include "modules/audio_processing/agc2/gain_applier.h"
@@ -1064,35 +1065,60 @@
   StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_,
                              /*has_keyboard=*/false);
   RTC_DCHECK_EQ(frame->samples_per_channel(), input_config.num_frames());
+
+  VoiceDetectionResult vad_result = VoiceDetectionResult::kNotAvailable;
+
+  int result = ProcessStream(frame->data(), input_config, output_config,
+                             frame->mutable_data(), &vad_result);
+
+  if (vad_result != VoiceDetectionResult::kNotAvailable) {
+    frame->vad_activity_ = vad_result == VoiceDetectionResult::kDetected
+                               ? AudioFrame::VADActivity::kVadActive
+                               : AudioFrame::VADActivity::kVadPassive;
+  }
+
+  return result;
+}
+
+int AudioProcessingImpl::ProcessStream(const int16_t* const src,
+                                       const StreamConfig& input_config,
+                                       const StreamConfig& output_config,
+                                       int16_t* const dest,
+                                       VoiceDetectionResult* vad_result) {
   RETURN_ON_ERR(MaybeInitializeCapture(input_config, output_config));
 
   rtc::CritScope cs_capture(&crit_capture_);
 
   if (aec_dump_) {
-    RecordUnprocessedCaptureStream(*frame);
+    RecordUnprocessedCaptureStream(src, input_config);
   }
 
-  capture_.capture_audio->CopyFrom(frame);
+  capture_.capture_audio->CopyFrom(src, input_config);
   if (capture_.capture_fullband_audio) {
-    capture_.capture_fullband_audio->CopyFrom(frame);
+    capture_.capture_fullband_audio->CopyFrom(src, input_config);
   }
   RETURN_ON_ERR(ProcessCaptureStreamLocked());
   if (submodule_states_.CaptureMultiBandProcessingPresent() ||
       submodule_states_.CaptureFullBandProcessingActive()) {
     if (capture_.capture_fullband_audio) {
-      capture_.capture_fullband_audio->CopyTo(frame);
+      capture_.capture_fullband_audio->CopyTo(output_config, dest);
     } else {
-      capture_.capture_audio->CopyTo(frame);
+      capture_.capture_audio->CopyTo(output_config, dest);
     }
   }
-  if (capture_.stats.voice_detected) {
-    frame->vad_activity_ = *capture_.stats.voice_detected
-                               ? AudioFrame::kVadActive
-                               : AudioFrame::kVadPassive;
+
+  if (vad_result) {
+    if (capture_.stats.voice_detected) {
+      *vad_result = *capture_.stats.voice_detected
+                        ? VoiceDetectionResult::kDetected
+                        : VoiceDetectionResult::kNotDetected;
+    } else {
+      *vad_result = VoiceDetectionResult::kNotAvailable;
+    }
   }
 
   if (aec_dump_) {
-    RecordProcessedCaptureStream(*frame);
+    RecordProcessedCaptureStream(dest, output_config);
   }
 
   return kNoError;
@@ -1430,7 +1456,6 @@
 
 int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {
   TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_AudioFrame");
-  rtc::CritScope cs(&crit_render_);
   if (frame == nullptr) {
     return kNullPointerError;
   }
@@ -1446,31 +1471,47 @@
     return kBadNumberChannelsError;
   }
 
+  StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_,
+                            /*has_keyboard=*/false);
+  StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_,
+                             /*has_keyboard=*/false);
+
+  int result = ProcessReverseStream(frame->data(), input_config, output_config,
+                                    frame->mutable_data());
+  return result;
+}
+
+int AudioProcessingImpl::ProcessReverseStream(const int16_t* const src,
+                                              const StreamConfig& input_config,
+                                              const StreamConfig& output_config,
+                                              int16_t* const dest) {
+  rtc::CritScope cs(&crit_render_);
   ProcessingConfig processing_config = formats_.api_format;
   processing_config.reverse_input_stream().set_sample_rate_hz(
-      frame->sample_rate_hz_);
+      input_config.sample_rate_hz());
   processing_config.reverse_input_stream().set_num_channels(
-      frame->num_channels_);
+      input_config.num_channels());
   processing_config.reverse_output_stream().set_sample_rate_hz(
-      frame->sample_rate_hz_);
+      output_config.sample_rate_hz());
   processing_config.reverse_output_stream().set_num_channels(
-      frame->num_channels_);
+      output_config.num_channels());
 
   RETURN_ON_ERR(MaybeInitializeRender(processing_config));
-  if (frame->samples_per_channel_ !=
+  if (input_config.num_frames() !=
       formats_.api_format.reverse_input_stream().num_frames()) {
     return kBadDataLengthError;
   }
 
   if (aec_dump_) {
-    aec_dump_->WriteRenderStreamMessage(*frame);
+    aec_dump_->WriteRenderStreamMessage(src, input_config.num_frames(),
+                                        input_config.num_channels());
   }
 
-  render_.render_audio->CopyFrom(frame);
+  render_.render_audio->CopyFrom(src, input_config);
   RETURN_ON_ERR(ProcessRenderStreamLocked());
   if (submodule_states_.RenderMultiBandProcessingActive() ||
       submodule_states_.RenderFullBandProcessingActive()) {
-    render_.render_audio->CopyTo(frame);
+    render_.render_audio->CopyTo(output_config, dest);
   }
   return kNoError;
 }
@@ -2007,11 +2048,13 @@
 }
 
 void AudioProcessingImpl::RecordUnprocessedCaptureStream(
-    const AudioFrame& capture_frame) {
+    const int16_t* const data,
+    const StreamConfig& config) {
   RTC_DCHECK(aec_dump_);
   WriteAecDumpConfigMessage(false);
 
-  aec_dump_->AddCaptureStreamInput(capture_frame);
+  aec_dump_->AddCaptureStreamInput(data, config.num_channels(),
+                                   config.num_frames());
   RecordAudioProcessingState();
 }
 
@@ -2028,10 +2071,12 @@
 }
 
 void AudioProcessingImpl::RecordProcessedCaptureStream(
-    const AudioFrame& processed_capture_frame) {
+    const int16_t* const data,
+    const StreamConfig& config) {
   RTC_DCHECK(aec_dump_);
 
-  aec_dump_->AddCaptureStreamOutput(processed_capture_frame);
+  aec_dump_->AddCaptureStreamOutput(data, config.num_channels(),
+                                    config.num_channels());
   aec_dump_->WriteCaptureStreamMessage();
 }