Receive-side ready for multiple channels.
Made path from NetEq to AudioTransport ready for many-channel audio.
If there is one stream, we can handle anything that fits in an
AudioFrame. For many streams, the current limit is 6.
Some multi-channel combinations are not supported: e.g. if we get
stereo audio and attempt to play out 6 channels.
Changes:
* AudioFrameOperations - replaced the MonoTo* and *ToMono methods by
UpmixChannels & DownmixChannels.
* AudioMixer: removed DCHECKs for <= 2 channels and tweaked the mixing
algorithm to handle many channels.
Bug: webrtc:8649
Change-Id: Ib83e16d463694e35658caa09c27849e853d508fb
Reviewed-on: https://webrtc-review.googlesource.com/c/106040
Reviewed-by: Oskar Sundbom <ossu@webrtc.org>
Commit-Queue: Alex Loiko <aleloi@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#26446}
diff --git a/modules/audio_mixer/BUILD.gn b/modules/audio_mixer/BUILD.gn
index 3979136..ee07704 100644
--- a/modules/audio_mixer/BUILD.gn
+++ b/modules/audio_mixer/BUILD.gn
@@ -52,6 +52,7 @@
"../audio_processing:apm_logging",
"../audio_processing:audio_frame_view",
"../audio_processing/agc2:fixed_digital",
+ "//third_party/abseil-cpp/absl/memory",
]
}
@@ -99,6 +100,7 @@
"../../rtc_base:rtc_base_approved",
"../../rtc_base:rtc_task_queue_for_test",
"../../test:test_support",
+ "//third_party/abseil-cpp/absl/memory",
]
}
diff --git a/modules/audio_mixer/audio_frame_manipulator.cc b/modules/audio_mixer/audio_frame_manipulator.cc
index e8a5f75..78c11b1 100644
--- a/modules/audio_mixer/audio_frame_manipulator.cc
+++ b/modules/audio_mixer/audio_frame_manipulator.cc
@@ -9,6 +9,7 @@
*/
#include "modules/audio_mixer/audio_frame_manipulator.h"
+
#include "audio/utility/audio_frame_operations.h"
#include "rtc_base/checks.h"
@@ -55,11 +56,16 @@
void RemixFrame(size_t target_number_of_channels, AudioFrame* frame) {
RTC_DCHECK_GE(target_number_of_channels, 1);
- RTC_DCHECK_LE(target_number_of_channels, 2);
- if (frame->num_channels_ == 1 && target_number_of_channels == 2) {
- AudioFrameOperations::MonoToStereo(frame);
- } else if (frame->num_channels_ == 2 && target_number_of_channels == 1) {
- AudioFrameOperations::StereoToMono(frame);
+ if (frame->num_channels_ == target_number_of_channels) {
+ return;
}
+ if (frame->num_channels_ > target_number_of_channels) {
+ AudioFrameOperations::DownmixChannels(target_number_of_channels, frame);
+ } else if (frame->num_channels_ < target_number_of_channels) {
+ AudioFrameOperations::UpmixChannels(target_number_of_channels, frame);
+ }
+ RTC_DCHECK_EQ(frame->num_channels_, target_number_of_channels)
+ << "Wrong number of channels, " << frame->num_channels_ << " vs "
+ << target_number_of_channels;
}
} // namespace webrtc
diff --git a/modules/audio_mixer/audio_mixer_impl.cc b/modules/audio_mixer/audio_mixer_impl.cc
index bca2a57..11f5abf 100644
--- a/modules/audio_mixer/audio_mixer_impl.cc
+++ b/modules/audio_mixer/audio_mixer_impl.cc
@@ -119,7 +119,7 @@
void AudioMixerImpl::Mix(size_t number_of_channels,
AudioFrame* audio_frame_for_mixing) {
- RTC_DCHECK(number_of_channels == 1 || number_of_channels == 2);
+ RTC_DCHECK(number_of_channels >= 1);
RTC_DCHECK_RUNS_SERIALIZED(&race_checker_);
CalculateOutputFrequency();
diff --git a/modules/audio_mixer/audio_mixer_impl.h b/modules/audio_mixer/audio_mixer_impl.h
index c38aff2..bab4a3d 100644
--- a/modules/audio_mixer/audio_mixer_impl.h
+++ b/modules/audio_mixer/audio_mixer_impl.h
@@ -84,13 +84,6 @@
// kMaximumAmountOfMixedAudioSources audio sources.
AudioFrameList GetAudioFromSources() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
- // Add/remove the MixerAudioSource to the specified
- // MixerAudioSource list.
- bool AddAudioSourceToList(Source* audio_source,
- SourceStatusList* audio_source_list) const;
- bool RemoveAudioSourceFromList(Source* remove_audio_source,
- SourceStatusList* audio_source_list) const;
-
// The critical section lock guards audio source insertion and
// removal, which can be done from any thread. The race checker
// checks that mixing is done sequentially.
diff --git a/modules/audio_mixer/audio_mixer_impl_unittest.cc b/modules/audio_mixer/audio_mixer_impl_unittest.cc
index 29632ca..397bbf5 100644
--- a/modules/audio_mixer/audio_mixer_impl_unittest.cc
+++ b/modules/audio_mixer/audio_mixer_impl_unittest.cc
@@ -15,6 +15,7 @@
#include <string>
#include <utility>
+#include "absl/memory/memory.h"
#include "api/audio/audio_mixer.h"
#include "modules/audio_mixer/audio_mixer_impl.h"
#include "modules/audio_mixer/default_output_rate_calculator.h"
@@ -23,6 +24,7 @@
#include "rtc_base/strings/string_builder.h"
#include "rtc_base/task_queue_for_test.h"
#include "test/gmock.h"
+#include "test/gtest.h"
using testing::_;
using testing::Exactly;
@@ -520,6 +522,7 @@
std::vector<MockMixerAudioSource> sources(number_of_sources);
for (auto& source : sources) {
+ ResetFrame(source.fake_frame());
mixer->AddSource(&source);
}
@@ -530,4 +533,110 @@
}
}
}
+
+TEST(AudioMixer, MultipleChannelsOneParticipant) {
+ // Set up a participant with a 6-channel frame, and make sure a 6-channel
+ // frame with the right sample values comes out from the mixer. There are 2
+ // Mix calls because of ramp-up.
+ constexpr size_t kNumberOfChannels = 6;
+ MockMixerAudioSource source;
+ ResetFrame(source.fake_frame());
+ const auto mixer = AudioMixerImpl::Create();
+ mixer->AddSource(&source);
+ mixer->Mix(1, &frame_for_mixing);
+ auto* frame = source.fake_frame();
+ frame->num_channels_ = kNumberOfChannels;
+ std::fill(frame->mutable_data(),
+ frame->mutable_data() + AudioFrame::kMaxDataSizeSamples, 0);
+ for (size_t i = 0; i < kNumberOfChannels; ++i) {
+ frame->mutable_data()[100 * frame->num_channels_ + i] = 1000 * i;
+ }
+
+ mixer->Mix(kNumberOfChannels, &frame_for_mixing);
+
+ EXPECT_EQ(frame_for_mixing.num_channels_, kNumberOfChannels);
+ for (size_t i = 0; i < kNumberOfChannels; ++i) {
+ EXPECT_EQ(frame_for_mixing.data()[100 * frame_for_mixing.num_channels_ + i],
+ static_cast<int16_t>(1000 * i));
+ }
+}
+
+TEST(AudioMixer, MultipleChannelsManyParticipants) {
+ // Sets up 2 participants. One has a 6-channel frame. Make sure a 6-channel
+ // frame with the right sample values comes out from the mixer. There are 2
+ // Mix calls because of ramp-up.
+ constexpr size_t kNumberOfChannels = 6;
+ MockMixerAudioSource source;
+ const auto mixer = AudioMixerImpl::Create();
+ mixer->AddSource(&source);
+ ResetFrame(source.fake_frame());
+ mixer->Mix(1, &frame_for_mixing);
+ auto* frame = source.fake_frame();
+ frame->num_channels_ = kNumberOfChannels;
+ std::fill(frame->mutable_data(),
+ frame->mutable_data() + AudioFrame::kMaxDataSizeSamples, 0);
+ for (size_t i = 0; i < kNumberOfChannels; ++i) {
+ frame->mutable_data()[100 * frame->num_channels_ + i] = 1000 * i;
+ }
+ MockMixerAudioSource other_source;
+ ResetFrame(other_source.fake_frame());
+ mixer->AddSource(&other_source);
+
+ mixer->Mix(kNumberOfChannels, &frame_for_mixing);
+
+ EXPECT_EQ(frame_for_mixing.num_channels_, kNumberOfChannels);
+ for (size_t i = 0; i < kNumberOfChannels; ++i) {
+ EXPECT_EQ(frame_for_mixing.data()[100 * frame_for_mixing.num_channels_ + i],
+ static_cast<int16_t>(1000 * i));
+ }
+}
+
+class HighOutputRateCalculator : public OutputRateCalculator {
+ public:
+ static const int kDefaultFrequency = 76000;
+ int CalculateOutputRate(
+ const std::vector<int>& preferred_sample_rates) override {
+ return kDefaultFrequency;
+ }
+ ~HighOutputRateCalculator() override {}
+};
+const int HighOutputRateCalculator::kDefaultFrequency;
+
+TEST(AudioMixer, MultipleChannelsAndHighRate) {
+ constexpr size_t kSamplesPerChannel =
+ HighOutputRateCalculator::kDefaultFrequency / 100;
+ // As many channels as an AudioFrame can fit:
+ constexpr size_t kNumberOfChannels =
+ AudioFrame::kMaxDataSizeSamples / kSamplesPerChannel;
+ MockMixerAudioSource source;
+ const auto mixer = AudioMixerImpl::Create(
+ absl::make_unique<HighOutputRateCalculator>(), true);
+ mixer->AddSource(&source);
+ ResetFrame(source.fake_frame());
+ mixer->Mix(1, &frame_for_mixing);
+ auto* frame = source.fake_frame();
+ frame->num_channels_ = kNumberOfChannels;
+ frame->sample_rate_hz_ = HighOutputRateCalculator::kDefaultFrequency;
+ frame->samples_per_channel_ = kSamplesPerChannel;
+
+ std::fill(frame->mutable_data(),
+ frame->mutable_data() + AudioFrame::kMaxDataSizeSamples, 0);
+ MockMixerAudioSource other_source;
+ ResetFrame(other_source.fake_frame());
+ auto* other_frame = other_source.fake_frame();
+ other_frame->num_channels_ = kNumberOfChannels;
+ other_frame->sample_rate_hz_ = HighOutputRateCalculator::kDefaultFrequency;
+ other_frame->samples_per_channel_ = kSamplesPerChannel;
+ mixer->AddSource(&other_source);
+
+#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
+ EXPECT_DEATH(mixer->Mix(kNumberOfChannels, &frame_for_mixing), "");
+#elif !RTC_DCHECK_IS_ON
+ mixer->Mix(kNumberOfChannels, &frame_for_mixing);
+ EXPECT_EQ(frame_for_mixing.num_channels_, kNumberOfChannels);
+ EXPECT_EQ(frame_for_mixing.sample_rate_hz_,
+ HighOutputRateCalculator::kDefaultFrequency);
+#endif
+}
+
} // namespace webrtc
diff --git a/modules/audio_mixer/frame_combiner.cc b/modules/audio_mixer/frame_combiner.cc
index dd6f729..4aa86f7 100644
--- a/modules/audio_mixer/frame_combiner.cc
+++ b/modules/audio_mixer/frame_combiner.cc
@@ -16,6 +16,7 @@
#include <iterator>
#include <string>
+#include "absl/memory/memory.h"
#include "api/array_view.h"
#include "common_audio/include/audio_util.h"
#include "modules/audio_mixer/audio_frame_manipulator.h"
@@ -30,11 +31,9 @@
namespace webrtc {
namespace {
-// Stereo, 48 kHz, 10 ms.
-constexpr int kMaximumAmountOfChannels = 2;
-constexpr int kMaximumChannelSize = 48 * AudioMixerImpl::kFrameDurationInMs;
-
-using OneChannelBuffer = std::array<float, kMaximumChannelSize>;
+using MixingBuffer =
+ std::array<std::array<float, FrameCombiner::kMaximumChannelSize>,
+ FrameCombiner::kMaximumNumberOfChannels>;
void SetAudioFrameFields(const std::vector<AudioFrame*>& mix_list,
size_t number_of_channels,
@@ -74,23 +73,30 @@
audio_frame_for_mixing->mutable_data());
}
-std::array<OneChannelBuffer, kMaximumAmountOfChannels> MixToFloatFrame(
- const std::vector<AudioFrame*>& mix_list,
- size_t samples_per_channel,
- size_t number_of_channels) {
- // Convert to FloatS16 and mix.
- using OneChannelBuffer = std::array<float, kMaximumChannelSize>;
- std::array<OneChannelBuffer, kMaximumAmountOfChannels> mixing_buffer{};
+void MixToFloatFrame(const std::vector<AudioFrame*>& mix_list,
+ size_t samples_per_channel,
+ size_t number_of_channels,
+ MixingBuffer* mixing_buffer) {
+ RTC_DCHECK_LE(samples_per_channel, FrameCombiner::kMaximumChannelSize);
+ RTC_DCHECK_LE(number_of_channels, FrameCombiner::kMaximumNumberOfChannels);
+ // Clear the mixing buffer.
+ for (auto& one_channel_buffer : *mixing_buffer) {
+ std::fill(one_channel_buffer.begin(), one_channel_buffer.end(), 0.f);
+ }
+ // Convert to FloatS16 and mix.
for (size_t i = 0; i < mix_list.size(); ++i) {
const AudioFrame* const frame = mix_list[i];
- for (size_t j = 0; j < number_of_channels; ++j) {
- for (size_t k = 0; k < samples_per_channel; ++k) {
- mixing_buffer[j][k] += frame->data()[number_of_channels * k + j];
+ for (size_t j = 0; j < std::min(number_of_channels,
+ FrameCombiner::kMaximumNumberOfChannels);
+ ++j) {
+ for (size_t k = 0; k < std::min(samples_per_channel,
+ FrameCombiner::kMaximumChannelSize);
+ ++k) {
+ (*mixing_buffer)[j][k] += frame->data()[number_of_channels * k + j];
}
}
}
- return mixing_buffer;
}
void RunLimiter(AudioFrameView<float> mixing_buffer_view, Limiter* limiter) {
@@ -116,10 +122,20 @@
}
} // namespace
+constexpr size_t FrameCombiner::kMaximumNumberOfChannels;
+constexpr size_t FrameCombiner::kMaximumChannelSize;
+
FrameCombiner::FrameCombiner(bool use_limiter)
: data_dumper_(new ApmDataDumper(0)),
+ mixing_buffer_(
+ absl::make_unique<std::array<std::array<float, kMaximumChannelSize>,
+ kMaximumNumberOfChannels>>()),
limiter_(static_cast<size_t>(48000), data_dumper_.get(), "AudioMixer"),
- use_limiter_(use_limiter) {}
+ use_limiter_(use_limiter) {
+ static_assert(kMaximumChannelSize * kMaximumNumberOfChannels <=
+ AudioFrame::kMaxDataSizeSamples,
+ "");
+}
FrameCombiner::~FrameCombiner() = default;
@@ -154,16 +170,22 @@
return;
}
- std::array<OneChannelBuffer, kMaximumAmountOfChannels> mixing_buffer =
- MixToFloatFrame(mix_list, samples_per_channel, number_of_channels);
+ MixToFloatFrame(mix_list, samples_per_channel, number_of_channels,
+ mixing_buffer_.get());
+
+ const size_t output_number_of_channels =
+ std::min(number_of_channels, kMaximumNumberOfChannels);
+ const size_t output_samples_per_channel =
+ std::min(samples_per_channel, kMaximumChannelSize);
// Put float data in an AudioFrameView.
- std::array<float*, kMaximumAmountOfChannels> channel_pointers{};
- for (size_t i = 0; i < number_of_channels; ++i) {
- channel_pointers[i] = &mixing_buffer[i][0];
+ std::array<float*, kMaximumNumberOfChannels> channel_pointers{};
+ for (size_t i = 0; i < output_number_of_channels; ++i) {
+ channel_pointers[i] = &(*mixing_buffer_.get())[i][0];
}
- AudioFrameView<float> mixing_buffer_view(
- &channel_pointers[0], number_of_channels, samples_per_channel);
+ AudioFrameView<float> mixing_buffer_view(&channel_pointers[0],
+ output_number_of_channels,
+ output_samples_per_channel);
if (use_limiter_) {
RunLimiter(mixing_buffer_view, &limiter_);
diff --git a/modules/audio_mixer/frame_combiner.h b/modules/audio_mixer/frame_combiner.h
index 1c1cd53..d989d02 100644
--- a/modules/audio_mixer/frame_combiner.h
+++ b/modules/audio_mixer/frame_combiner.h
@@ -38,12 +38,20 @@
size_t number_of_streams,
AudioFrame* audio_frame_for_mixing);
+ // Stereo, 48 kHz, 10 ms.
+ static constexpr size_t kMaximumNumberOfChannels = 8;
+ static constexpr size_t kMaximumChannelSize = 48 * 10;
+
+ using MixingBuffer = std::array<std::array<float, kMaximumChannelSize>,
+ kMaximumNumberOfChannels>;
+
private:
void LogMixingStats(const std::vector<AudioFrame*>& mix_list,
int sample_rate,
size_t number_of_streams) const;
std::unique_ptr<ApmDataDumper> data_dumper_;
+ std::unique_ptr<MixingBuffer> mixing_buffer_;
Limiter limiter_;
const bool use_limiter_;
mutable int uma_logging_counter_ = 0;
diff --git a/modules/audio_mixer/frame_combiner_unittest.cc b/modules/audio_mixer/frame_combiner_unittest.cc
index 21f6383..5f024a4 100644
--- a/modules/audio_mixer/frame_combiner_unittest.cc
+++ b/modules/audio_mixer/frame_combiner_unittest.cc
@@ -22,6 +22,7 @@
#include "modules/audio_mixer/sine_wave_generator.h"
#include "rtc_base/checks.h"
#include "rtc_base/strings/string_builder.h"
+#include "test/gmock.h"
#include "test/gtest.h"
namespace webrtc {
@@ -71,7 +72,7 @@
TEST(FrameCombiner, BasicApiCallsLimiter) {
FrameCombiner combiner(true);
for (const int rate : {8000, 18000, 34000, 48000}) {
- for (const int number_of_channels : {1, 2}) {
+ for (const int number_of_channels : {1, 2, 4, 8}) {
const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
SetUpFrames(rate, number_of_channels);
@@ -87,12 +88,71 @@
}
}
+// There are DCHECKs in place to check for invalid parameters.
+TEST(FrameCombiner, DebugBuildCrashesWithManyChannels) {
+ FrameCombiner combiner(true);
+ for (const int rate : {8000, 18000, 34000, 48000}) {
+ for (const int number_of_channels : {10, 20, 21}) {
+ if (static_cast<size_t>(rate / 100 * number_of_channels) >
+ AudioFrame::kMaxDataSizeSamples) {
+ continue;
+ }
+ const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
+ SetUpFrames(rate, number_of_channels);
+
+ const int number_of_frames = 2;
+ SCOPED_TRACE(
+ ProduceDebugText(rate, number_of_channels, number_of_frames));
+ const std::vector<AudioFrame*> frames_to_combine(
+ all_frames.begin(), all_frames.begin() + number_of_frames);
+#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
+ EXPECT_DEATH(
+ combiner.Combine(frames_to_combine, number_of_channels, rate,
+ frames_to_combine.size(), &audio_frame_for_mixing),
+ "");
+#elif !RTC_DCHECK_IS_ON
+ combiner.Combine(frames_to_combine, number_of_channels, rate,
+ frames_to_combine.size(), &audio_frame_for_mixing);
+#endif
+ }
+ }
+}
+
+TEST(FrameCombiner, DebugBuildCrashesWithHighRate) {
+ FrameCombiner combiner(true);
+ for (const int rate : {50000, 96000, 128000, 196000}) {
+ for (const int number_of_channels : {1, 2, 3}) {
+ if (static_cast<size_t>(rate / 100 * number_of_channels) >
+ AudioFrame::kMaxDataSizeSamples) {
+ continue;
+ }
+ const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
+ SetUpFrames(rate, number_of_channels);
+
+ const int number_of_frames = 2;
+ SCOPED_TRACE(
+ ProduceDebugText(rate, number_of_channels, number_of_frames));
+ const std::vector<AudioFrame*> frames_to_combine(
+ all_frames.begin(), all_frames.begin() + number_of_frames);
+#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
+ EXPECT_DEATH(
+ combiner.Combine(frames_to_combine, number_of_channels, rate,
+ frames_to_combine.size(), &audio_frame_for_mixing),
+ "");
+#elif !RTC_DCHECK_IS_ON
+ combiner.Combine(frames_to_combine, number_of_channels, rate,
+ frames_to_combine.size(), &audio_frame_for_mixing);
+#endif
+ }
+ }
+}
+
// With no limiter, the rate has to be divisible by 100 since we use
// 10 ms frames.
TEST(FrameCombiner, BasicApiCallsNoLimiter) {
FrameCombiner combiner(false);
for (const int rate : {8000, 10000, 11000, 32000, 44100}) {
- for (const int number_of_channels : {1, 2}) {
+ for (const int number_of_channels : {1, 2, 4, 8}) {
const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
SetUpFrames(rate, number_of_channels);
@@ -133,7 +193,7 @@
TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) {
FrameCombiner combiner(false);
for (const int rate : {8000, 10000, 11000, 32000, 44100}) {
- for (const int number_of_channels : {1, 2}) {
+ for (const int number_of_channels : {1, 2, 4, 8, 10}) {
SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1));
SetUpFrames(rate, number_of_channels);
@@ -165,7 +225,7 @@
std::vector<FrameCombinerConfig> configs = {
{false, 30100, 2, 50.f}, {false, 16500, 1, 3200.f},
{true, 8000, 1, 3200.f}, {true, 16000, 1, 50.f},
- {true, 18000, 2, 3200.f}, {true, 10000, 2, 50.f},
+ {true, 18000, 8, 3200.f}, {true, 10000, 2, 50.f},
};
for (const auto& config : configs) {