Reland "Simplification and refactoring of the AudioBuffer code"
This is a reland of 81c0cf287c8514cb1cd6f3baca484d668c6eb128
Original change's description:
> Simplification and refactoring of the AudioBuffer code
>
> This CL performs a major refactoring and simplification
> of the AudioBuffer code that.
> -Removes 7 of the 9 internal buffers of the AudioBuffer.
> -Avoids the implicit copying required to keep the
> internal buffers in sync.
> -Removes all code relating to handling of fixed-point
> sample data in the AudioBuffer.
> -Changes the naming of the class methods to reflect
> that only floating point is handled.
> -Corrects some bugs in the code.
> -Extends the handling of internal downmixing to be
> more generic.
>
> Bug: webrtc:10882
> Change-Id: I12c8af156fbe366b154744a0a1b3d926bf7be572
> Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/149828
> Commit-Queue: Per Åhgren <peah@webrtc.org>
> Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#28928}
Bug: webrtc:10882
Change-Id: I2ddf327e80a03468c41662ae63c619ff34f2363a
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/150101
Commit-Queue: Per Åhgren <peah@webrtc.org>
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#28938}
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index beabd9d..464c61b 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -84,19 +84,22 @@
sample_rate_hz == AudioProcessing::kSampleRate48kHz;
}
-// Identify the native processing rate that best handles a sample rate.
-int SuitableProcessRate(int minimum_rate, bool band_splitting_required) {
+int FindNativeProcessRateToUse(int minimum_rate, bool band_splitting_required) {
#ifdef WEBRTC_ARCH_ARM_FAMILY
- constexpr int kMaxSplittingRate = 32000;
+ constexpr int kMaxSplittingNativeProcessRate =
+ AudioProcessing::kSampleRate32kHz;
#else
- constexpr int kMaxSplittingRate = 48000;
+ constexpr int kMaxSplittingNativeProcessRate =
+ AudioProcessing::kSampleRate48kHz;
#endif
- static_assert(kMaxSplittingRate <= 48000, "");
+ static_assert(
+ kMaxSplittingNativeProcessRate <= AudioProcessing::kMaxNativeSampleRateHz,
+ "");
+ const int uppermost_native_rate = band_splitting_required
+ ? kMaxSplittingNativeProcessRate
+ : AudioProcessing::kSampleRate48kHz;
- const int uppermost_native_rate =
- band_splitting_required ? kMaxSplittingRate : 48000;
-
- for (auto rate : {16000, 32000, 48000}) {
+ for (auto rate : AudioProcessing::kNativeSampleRatesHz) {
if (rate >= uppermost_native_rate) {
return uppermost_native_rate;
}
@@ -495,17 +498,18 @@
int AudioProcessingImpl::InitializeLocked() {
UpdateActiveSubmoduleStates();
- const int render_audiobuffer_num_output_frames =
+ const int render_audiobuffer_sample_rate_hz =
formats_.api_format.reverse_output_stream().num_frames() == 0
- ? formats_.render_processing_format.num_frames()
- : formats_.api_format.reverse_output_stream().num_frames();
+ ? formats_.render_processing_format.sample_rate_hz()
+ : formats_.api_format.reverse_output_stream().sample_rate_hz();
if (formats_.api_format.reverse_input_stream().num_channels() > 0) {
render_.render_audio.reset(new AudioBuffer(
- formats_.api_format.reverse_input_stream().num_frames(),
+ formats_.api_format.reverse_input_stream().sample_rate_hz(),
formats_.api_format.reverse_input_stream().num_channels(),
- formats_.render_processing_format.num_frames(),
+ formats_.render_processing_format.sample_rate_hz(),
formats_.render_processing_format.num_channels(),
- render_audiobuffer_num_output_frames));
+ render_audiobuffer_sample_rate_hz,
+ formats_.render_processing_format.num_channels()));
if (formats_.api_format.reverse_input_stream() !=
formats_.api_format.reverse_output_stream()) {
render_.render_converter = AudioConverter::Create(
@@ -521,12 +525,13 @@
render_.render_converter.reset(nullptr);
}
- capture_.capture_audio.reset(
- new AudioBuffer(formats_.api_format.input_stream().num_frames(),
- formats_.api_format.input_stream().num_channels(),
- capture_nonlocked_.capture_processing_format.num_frames(),
- formats_.api_format.output_stream().num_channels(),
- formats_.api_format.output_stream().num_frames()));
+ capture_.capture_audio.reset(new AudioBuffer(
+ formats_.api_format.input_stream().sample_rate_hz(),
+ formats_.api_format.input_stream().num_channels(),
+ capture_nonlocked_.capture_processing_format.sample_rate_hz(),
+ formats_.api_format.output_stream().num_channels(),
+ formats_.api_format.output_stream().sample_rate_hz(),
+ formats_.api_format.output_stream().num_channels()));
AllocateRenderQueue();
@@ -590,19 +595,18 @@
formats_.api_format = config;
- int capture_processing_rate = SuitableProcessRate(
+ int capture_processing_rate = FindNativeProcessRateToUse(
std::min(formats_.api_format.input_stream().sample_rate_hz(),
formats_.api_format.output_stream().sample_rate_hz()),
submodule_states_.CaptureMultiBandSubModulesActive() ||
submodule_states_.RenderMultiBandSubModulesActive());
- RTC_DCHECK_NE(8000, capture_processing_rate);
capture_nonlocked_.capture_processing_format =
StreamConfig(capture_processing_rate);
int render_processing_rate;
if (!capture_nonlocked_.echo_controller_enabled) {
- render_processing_rate = SuitableProcessRate(
+ render_processing_rate = FindNativeProcessRateToUse(
std::min(formats_.api_format.reverse_input_stream().sample_rate_hz(),
formats_.api_format.reverse_output_stream().sample_rate_hz()),
submodule_states_.CaptureMultiBandSubModulesActive() ||
@@ -629,7 +633,6 @@
render_processing_rate =
std::max(render_processing_rate, static_cast<int>(kSampleRate16kHz));
}
- RTC_DCHECK_NE(8000, render_processing_rate);
// Always downmix the render stream to mono for analysis. This has been
// demonstrated to work well for AEC in most practical scenarios.
@@ -1244,11 +1247,11 @@
}
capture_.vad_activity = frame->vad_activity_;
- capture_.capture_audio->DeinterleaveFrom(frame);
+ capture_.capture_audio->CopyFrom(frame);
RETURN_ON_ERR(ProcessCaptureStreamLocked());
if (submodule_states_.CaptureMultiBandProcessingActive() ||
submodule_states_.CaptureFullBandProcessingActive()) {
- capture_.capture_audio->InterleaveTo(frame);
+ capture_.capture_audio->CopyTo(frame);
}
frame->vad_activity_ = capture_.vad_activity;
@@ -1274,12 +1277,12 @@
if (private_submodules_->pre_amplifier) {
private_submodules_->pre_amplifier->ApplyGain(AudioFrameView<float>(
- capture_buffer->channels_f(), capture_buffer->num_channels(),
+ capture_buffer->channels(), capture_buffer->num_channels(),
capture_buffer->num_frames()));
}
capture_input_rms_.Analyze(rtc::ArrayView<const float>(
- capture_buffer->channels_const_f()[0],
+ capture_buffer->channels_const()[0],
capture_nonlocked_.capture_processing_format.num_frames()));
const bool log_rms = ++capture_rms_interval_counter_ >= 1000;
if (log_rms) {
@@ -1327,7 +1330,7 @@
if (constants_.use_experimental_agc_process_before_aec) {
private_submodules_->agc_manager->Process(
- capture_buffer->channels_const_f()[0],
+ capture_buffer->channels_const()[0],
capture_nonlocked_.capture_processing_format.num_frames(),
capture_nonlocked_.capture_processing_format.sample_rate_hz());
}
@@ -1436,7 +1439,7 @@
if (config_.residual_echo_detector.enabled) {
RTC_DCHECK(private_submodules_->echo_detector);
private_submodules_->echo_detector->AnalyzeCaptureAudio(
- rtc::ArrayView<const float>(capture_buffer->channels_f()[0],
+ rtc::ArrayView<const float>(capture_buffer->channels()[0],
capture_buffer->num_frames()));
}
@@ -1449,9 +1452,9 @@
: 1.f;
public_submodules_->transient_suppressor->Suppress(
- capture_buffer->channels_f()[0], capture_buffer->num_frames(),
+ capture_buffer->channels()[0], capture_buffer->num_frames(),
capture_buffer->num_channels(),
- capture_buffer->split_bands_const_f(0)[kBand0To8kHz],
+ capture_buffer->split_bands_const(0)[kBand0To8kHz],
capture_buffer->num_frames_per_band(),
capture_.keyboard_info.keyboard_data,
capture_.keyboard_info.num_keyboard_frames, voice_probability,
@@ -1474,9 +1477,9 @@
}
// The level estimator operates on the recombined data.
- public_submodules_->level_estimator->ProcessStream(capture_buffer);
+ public_submodules_->level_estimator->ProcessStream(*capture_buffer);
if (config_.level_estimation.enabled) {
- private_submodules_->output_level_estimator->ProcessStream(capture_buffer);
+ private_submodules_->output_level_estimator->ProcessStream(*capture_buffer);
capture_.stats.output_rms_dbfs =
private_submodules_->output_level_estimator->RMS();
} else {
@@ -1484,7 +1487,7 @@
}
capture_output_rms_.Analyze(rtc::ArrayView<const float>(
- capture_buffer->channels_const_f()[0],
+ capture_buffer->channels_const()[0],
capture_nonlocked_.capture_processing_format.num_frames()));
if (log_rms) {
RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak();
@@ -1609,11 +1612,11 @@
aec_dump_->WriteRenderStreamMessage(*frame);
}
- render_.render_audio->DeinterleaveFrom(frame);
+ render_.render_audio->CopyFrom(frame);
RETURN_ON_ERR(ProcessRenderStreamLocked());
if (submodule_states_.RenderMultiBandProcessingActive() ||
submodule_states_.RenderFullBandProcessingActive()) {
- render_.render_audio->InterleaveTo(frame);
+ render_.render_audio->CopyTo(frame);
}
return kNoError;
}