Receive-side ready for multiple channels. Made path from NetEq to AudioTransport ready for many-channel audio. If there is one stream, we can handle anything that fits in an AudioFrame. For many streams, the current limit is 6. Some multi-channel combinations are not supported: e.g. if we get stereo audio and attempt to play out 6 channels. Changes: * AudioFrameOperations - replaced the MonoTo* and *ToMono methods by UpmixChannels & DownmixChannels. * AudioMixer: removed DCHECKs for <= 2 channels and tweaked the mixing algorithm to handle many channels. Bug: webrtc:8649 Change-Id: Ib83e16d463694e35658caa09c27849e853d508fb Reviewed-on: https://webrtc-review.googlesource.com/c/106040 Reviewed-by: Oskar Sundbom <ossu@webrtc.org> Commit-Queue: Alex Loiko <aleloi@webrtc.org> Cr-Commit-Position: refs/heads/master@{#26446}

commit: b4977de306bd80dbf8c20930943c318f01e037ac [log] [tgz]
author: Alex Loiko <aleloi@webrtc.org> Mon Jan 28 16:38:38 2019 +0100
committer: Commit Bot <commit-bot@chromium.org> Tue Jan 29 12:43:23 2019 +0000
tree: 4d0f19db2c12bb74e22cdef568ab2d24d07a3256
parent: 7a3e43a5d7920438b4a3d2b8d983db5ff429cb88 [diff]
diff --git a/audio/audio_transport_impl.cc b/audio/audio_transport_impl.cc
index 71b809c..cdbdacd 100644
--- a/audio/audio_transport_impl.cc
+++ b/audio/audio_transport_impl.cc

@@ -214,7 +214,6 @@
                                         int64_t* ntp_time_ms) {
   RTC_DCHECK_EQ(bits_per_sample, 16);
   RTC_DCHECK_GE(number_of_channels, 1);
-  RTC_DCHECK_LE(number_of_channels, 2);
   RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz);
 
   // 100 = 1 second / data duration (10 ms).

diff --git a/audio/remix_resample.cc b/audio/remix_resample.cc
index cc59e2a..e77c386 100644
--- a/audio/remix_resample.cc
+++ b/audio/remix_resample.cc

@@ -80,7 +80,7 @@
     // The audio in dst_frame really is mono at this point; MonoToStereo will
     // set this back to stereo.
     dst_frame->num_channels_ = 1;
-    AudioFrameOperations::MonoToStereo(dst_frame);
+    AudioFrameOperations::UpmixChannels(2, dst_frame);
   }
 }
 

diff --git a/audio/utility/BUILD.gn b/audio/utility/BUILD.gn
index 11a65bd..50c4475 100644
--- a/audio/utility/BUILD.gn
+++ b/audio/utility/BUILD.gn

@@ -22,7 +22,9 @@
 
   deps = [
     "../../api/audio:audio_frame_api",
+    "../../common_audio",
     "../../rtc_base:checks",
+    "../../rtc_base:deprecation",
     "../../rtc_base:rtc_base_approved",
   ]
 }

diff --git a/audio/utility/audio_frame_operations.cc b/audio/utility/audio_frame_operations.cc
index 1a8232b..d3180a7 100644
--- a/audio/utility/audio_frame_operations.cc
+++ b/audio/utility/audio_frame_operations.cc

@@ -13,7 +13,9 @@
 #include <string.h>
 #include <algorithm>
 #include <cstdint>
+#include <utility>
 
+#include "common_audio/include/audio_util.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/numerics/safe_conversions.h"
 
@@ -69,60 +71,20 @@
   }
 }
 
-void AudioFrameOperations::MonoToStereo(const int16_t* src_audio,
-                                        size_t samples_per_channel,
-                                        int16_t* dst_audio) {
-  for (size_t i = 0; i < samples_per_channel; i++) {
-    dst_audio[2 * i] = src_audio[i];
-    dst_audio[2 * i + 1] = src_audio[i];
-  }
-}
-
 int AudioFrameOperations::MonoToStereo(AudioFrame* frame) {
   if (frame->num_channels_ != 1) {
     return -1;
   }
-  if ((frame->samples_per_channel_ * 2) >= AudioFrame::kMaxDataSizeSamples) {
-    // Not enough memory to expand from mono to stereo.
-    return -1;
-  }
-
-  if (!frame->muted()) {
-    // TODO(yujo): this operation can be done in place.
-    int16_t data_copy[AudioFrame::kMaxDataSizeSamples];
-    memcpy(data_copy, frame->data(),
-           sizeof(int16_t) * frame->samples_per_channel_);
-    MonoToStereo(data_copy, frame->samples_per_channel_, frame->mutable_data());
-  }
-  frame->num_channels_ = 2;
-
+  UpmixChannels(2, frame);
   return 0;
 }
 
-void AudioFrameOperations::StereoToMono(const int16_t* src_audio,
-                                        size_t samples_per_channel,
-                                        int16_t* dst_audio) {
-  for (size_t i = 0; i < samples_per_channel; i++) {
-    dst_audio[i] =
-        (static_cast<int32_t>(src_audio[2 * i]) + src_audio[2 * i + 1]) >> 1;
-  }
-}
-
 int AudioFrameOperations::StereoToMono(AudioFrame* frame) {
   if (frame->num_channels_ != 2) {
     return -1;
   }
-
-  RTC_DCHECK_LE(frame->samples_per_channel_ * 2,
-                AudioFrame::kMaxDataSizeSamples);
-
-  if (!frame->muted()) {
-    StereoToMono(frame->data(), frame->samples_per_channel_,
-                 frame->mutable_data());
-  }
-  frame->num_channels_ = 1;
-
-  return 0;
+  DownmixChannels(1, frame);
+  return frame->num_channels_ == 1 ? 0 : -1;
 }
 
 void AudioFrameOperations::QuadToStereo(const int16_t* src_audio,
@@ -154,65 +116,66 @@
   return 0;
 }
 
-void AudioFrameOperations::QuadToMono(const int16_t* src_audio,
-                                      size_t samples_per_channel,
-                                      int16_t* dst_audio) {
-  for (size_t i = 0; i < samples_per_channel; i++) {
-    dst_audio[i] =
-        (static_cast<int32_t>(src_audio[4 * i]) + src_audio[4 * i + 1] +
-         src_audio[4 * i + 2] + src_audio[4 * i + 3]) >>
-        2;
-  }
-}
-
-int AudioFrameOperations::QuadToMono(AudioFrame* frame) {
-  if (frame->num_channels_ != 4) {
-    return -1;
-  }
-
-  RTC_DCHECK_LE(frame->samples_per_channel_ * 4,
-                AudioFrame::kMaxDataSizeSamples);
-
-  if (!frame->muted()) {
-    QuadToMono(frame->data(), frame->samples_per_channel_,
-               frame->mutable_data());
-  }
-  frame->num_channels_ = 1;
-
-  return 0;
-}
-
 void AudioFrameOperations::DownmixChannels(const int16_t* src_audio,
                                            size_t src_channels,
                                            size_t samples_per_channel,
                                            size_t dst_channels,
                                            int16_t* dst_audio) {
-  if (src_channels == 2 && dst_channels == 1) {
-    StereoToMono(src_audio, samples_per_channel, dst_audio);
+  if (src_channels > 1 && dst_channels == 1) {
+    DownmixInterleavedToMono(src_audio, samples_per_channel, src_channels,
+                             dst_audio);
     return;
   } else if (src_channels == 4 && dst_channels == 2) {
     QuadToStereo(src_audio, samples_per_channel, dst_audio);
     return;
-  } else if (src_channels == 4 && dst_channels == 1) {
-    QuadToMono(src_audio, samples_per_channel, dst_audio);
-    return;
   }
 
   RTC_NOTREACHED() << "src_channels: " << src_channels
                    << ", dst_channels: " << dst_channels;
 }
 
-int AudioFrameOperations::DownmixChannels(size_t dst_channels,
-                                          AudioFrame* frame) {
-  if (frame->num_channels_ == 2 && dst_channels == 1) {
-    return StereoToMono(frame);
+void AudioFrameOperations::DownmixChannels(size_t dst_channels,
+                                           AudioFrame* frame) {
+  RTC_DCHECK_LE(frame->samples_per_channel_ * frame->num_channels_,
+                AudioFrame::kMaxDataSizeSamples);
+  if (frame->num_channels_ > 1 && dst_channels == 1) {
+    if (!frame->muted()) {
+      DownmixInterleavedToMono(frame->data(), frame->samples_per_channel_,
+                               frame->num_channels_, frame->mutable_data());
+    }
+    frame->num_channels_ = 1;
   } else if (frame->num_channels_ == 4 && dst_channels == 2) {
-    return QuadToStereo(frame);
-  } else if (frame->num_channels_ == 4 && dst_channels == 1) {
-    return QuadToMono(frame);
+    int err = QuadToStereo(frame);
+    RTC_DCHECK_EQ(err, 0);
+  } else {
+    RTC_NOTREACHED() << "src_channels: " << frame->num_channels_
+                     << ", dst_channels: " << dst_channels;
+  }
+}
+
+void AudioFrameOperations::UpmixChannels(size_t target_number_of_channels,
+                                         AudioFrame* frame) {
+  RTC_DCHECK_EQ(frame->num_channels_, 1);
+  RTC_DCHECK_LE(frame->samples_per_channel_ * target_number_of_channels,
+                AudioFrame::kMaxDataSizeSamples);
+
+  if (frame->num_channels_ != 1 ||
+      frame->samples_per_channel_ * target_number_of_channels >
+          AudioFrame::kMaxDataSizeSamples) {
+    return;
   }
 
-  return -1;
+  if (!frame->muted()) {
+    // Up-mixing done in place. Going backwards through the frame ensure nothing
+    // is irrevocably overwritten.
+    for (int i = frame->samples_per_channel_ - 1; i >= 0; i--) {
+      for (size_t j = 0; j < target_number_of_channels; ++j) {
+        frame->mutable_data()[target_number_of_channels * i + j] =
+            frame->data()[i];
+      }
+    }
+  }
+  frame->num_channels_ = target_number_of_channels;
 }
 
 void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) {
@@ -223,9 +186,7 @@
 
   int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
-    int16_t temp_data = frame_data[i];
-    frame_data[i] = frame_data[i + 1];
-    frame_data[i + 1] = temp_data;
+    std::swap(frame_data[i], frame_data[i + 1]);
   }
 }
 

diff --git a/audio/utility/audio_frame_operations.h b/audio/utility/audio_frame_operations.h
index c1445b6..65c310c 100644
--- a/audio/utility/audio_frame_operations.h
+++ b/audio/utility/audio_frame_operations.h

@@ -15,6 +15,7 @@
 #include <stdint.h>
 
 #include "api/audio/audio_frame.h"
+#include "rtc_base/deprecation.h"
 
 namespace webrtc {
 
@@ -32,28 +33,15 @@
   // |result_frame| is empty.
   static void Add(const AudioFrame& frame_to_add, AudioFrame* result_frame);
 
-  // Upmixes mono |src_audio| to stereo |dst_audio|. This is an out-of-place
-  // operation, meaning src_audio and dst_audio must point to different
-  // buffers. It is the caller's responsibility to ensure that |dst_audio| is
-  // sufficiently large.
-  static void MonoToStereo(const int16_t* src_audio,
-                           size_t samples_per_channel,
-                           int16_t* dst_audio);
-
   // |frame.num_channels_| will be updated. This version checks for sufficient
-  // buffer size and that |num_channels_| is mono.
-  static int MonoToStereo(AudioFrame* frame);
-
-  // Downmixes stereo |src_audio| to mono |dst_audio|. This is an in-place
-  // operation, meaning |src_audio| and |dst_audio| may point to the same
-  // buffer.
-  static void StereoToMono(const int16_t* src_audio,
-                           size_t samples_per_channel,
-                           int16_t* dst_audio);
+  // buffer size and that |num_channels_| is mono. Use UpmixChannels
+  // instead. TODO(bugs.webrtc.org/8649): remove.
+  RTC_DEPRECATED static int MonoToStereo(AudioFrame* frame);
 
   // |frame.num_channels_| will be updated. This version checks that
-  // |num_channels_| is stereo.
-  static int StereoToMono(AudioFrame* frame);
+  // |num_channels_| is stereo. Use DownmixChannels
+  // instead. TODO(bugs.webrtc.org/8649): remove.
+  RTC_DEPRECATED static int StereoToMono(AudioFrame* frame);
 
   // Downmixes 4 channels |src_audio| to stereo |dst_audio|. This is an in-place
   // operation, meaning |src_audio| and |dst_audio| may point to the same
@@ -66,17 +54,6 @@
   // |num_channels_| is 4 channels.
   static int QuadToStereo(AudioFrame* frame);
 
-  // Downmixes 4 channels |src_audio| to mono |dst_audio|. This is an in-place
-  // operation, meaning |src_audio| and |dst_audio| may point to the same
-  // buffer.
-  static void QuadToMono(const int16_t* src_audio,
-                         size_t samples_per_channel,
-                         int16_t* dst_audio);
-
-  // |frame.num_channels_| will be updated. This version checks that
-  // |num_channels_| is 4 channels.
-  static int QuadToMono(AudioFrame* frame);
-
   // Downmixes |src_channels| |src_audio| to |dst_channels| |dst_audio|.
   // This is an in-place operation, meaning |src_audio| and |dst_audio|
   // may point to the same buffer. Supported channel combinations are
@@ -88,10 +65,16 @@
                               int16_t* dst_audio);
 
   // |frame.num_channels_| will be updated. This version checks that
+  // |num_channels_| and |dst_channels| are valid and performs relevant downmix.
+  // Supported channel combinations are N channels to Mono, and Quad to Stereo.
+  static void DownmixChannels(size_t dst_channels, AudioFrame* frame);
+
+  // |frame.num_channels_| will be updated. This version checks that
   // |num_channels_| and |dst_channels| are valid and performs relevant
-  // downmix.  Supported channel combinations are Stereo to Mono, Quad to Mono,
-  // and Quad to Stereo.
-  static int DownmixChannels(size_t dst_channels, AudioFrame* frame);
+  // downmix. Supported channel combinations are Mono to N
+  // channels. The single channel is replicated.
+  static void UpmixChannels(size_t target_number_of_channels,
+                            AudioFrame* frame);
 
   // Swap the left and right channels of |frame|. Fails silently if |frame| is
   // not stereo.

diff --git a/audio/utility/audio_frame_operations_unittest.cc b/audio/utility/audio_frame_operations_unittest.cc
index 76f1dcd..dd41d1a 100644
--- a/audio/utility/audio_frame_operations_unittest.cc
+++ b/audio/utility/audio_frame_operations_unittest.cc

@@ -103,19 +103,21 @@
   }
 }
 
+#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
 TEST_F(AudioFrameOperationsTest, MonoToStereoFailsWithBadParameters) {
-  EXPECT_EQ(-1, AudioFrameOperations::MonoToStereo(&frame_));
-
+  EXPECT_DEATH(AudioFrameOperations::UpmixChannels(2, &frame_), "");
   frame_.samples_per_channel_ = AudioFrame::kMaxDataSizeSamples;
   frame_.num_channels_ = 1;
-  EXPECT_EQ(-1, AudioFrameOperations::MonoToStereo(&frame_));
+  EXPECT_DEATH(AudioFrameOperations::UpmixChannels(2, &frame_), "");
 }
+#endif
 
 TEST_F(AudioFrameOperationsTest, MonoToStereoSucceeds) {
   frame_.num_channels_ = 1;
   SetFrameData(1, &frame_);
 
-  EXPECT_EQ(0, AudioFrameOperations::MonoToStereo(&frame_));
+  AudioFrameOperations::UpmixChannels(2, &frame_);
+  EXPECT_EQ(2u, frame_.num_channels_);
 
   AudioFrame stereo_frame;
   stereo_frame.samples_per_channel_ = 320;
@@ -127,36 +129,22 @@
 TEST_F(AudioFrameOperationsTest, MonoToStereoMuted) {
   frame_.num_channels_ = 1;
   ASSERT_TRUE(frame_.muted());
-  EXPECT_EQ(0, AudioFrameOperations::MonoToStereo(&frame_));
+  AudioFrameOperations::UpmixChannels(2, &frame_);
+  EXPECT_EQ(2u, frame_.num_channels_);
   EXPECT_TRUE(frame_.muted());
 }
 
-TEST_F(AudioFrameOperationsTest, MonoToStereoBufferSucceeds) {
-  AudioFrame target_frame;
-  frame_.num_channels_ = 1;
-  SetFrameData(4, &frame_);
-
-  target_frame.num_channels_ = 2;
-  target_frame.samples_per_channel_ = frame_.samples_per_channel_;
-
-  AudioFrameOperations::MonoToStereo(frame_.data(), frame_.samples_per_channel_,
-                                     target_frame.mutable_data());
-
-  AudioFrame stereo_frame;
-  stereo_frame.samples_per_channel_ = 320;
-  stereo_frame.num_channels_ = 2;
-  SetFrameData(4, 4, &stereo_frame);
-  VerifyFramesAreEqual(stereo_frame, target_frame);
-}
-
+#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
 TEST_F(AudioFrameOperationsTest, StereoToMonoFailsWithBadParameters) {
   frame_.num_channels_ = 1;
-  EXPECT_EQ(-1, AudioFrameOperations::StereoToMono(&frame_));
+  EXPECT_DEATH(AudioFrameOperations::DownmixChannels(1, &frame_), "");
 }
+#endif
 
 TEST_F(AudioFrameOperationsTest, StereoToMonoSucceeds) {
   SetFrameData(4, 2, &frame_);
-  EXPECT_EQ(0, AudioFrameOperations::StereoToMono(&frame_));
+  AudioFrameOperations::DownmixChannels(1, &frame_);
+  EXPECT_EQ(1u, frame_.num_channels_);
 
   AudioFrame mono_frame;
   mono_frame.samples_per_channel_ = 320;
@@ -167,7 +155,8 @@
 
 TEST_F(AudioFrameOperationsTest, StereoToMonoMuted) {
   ASSERT_TRUE(frame_.muted());
-  EXPECT_EQ(0, AudioFrameOperations::StereoToMono(&frame_));
+  AudioFrameOperations::DownmixChannels(1, &frame_);
+  EXPECT_EQ(1u, frame_.num_channels_);
   EXPECT_TRUE(frame_.muted());
 }
 
@@ -178,8 +167,9 @@
   target_frame.num_channels_ = 1;
   target_frame.samples_per_channel_ = frame_.samples_per_channel_;
 
-  AudioFrameOperations::StereoToMono(frame_.data(), frame_.samples_per_channel_,
-                                     target_frame.mutable_data());
+  AudioFrameOperations::DownmixChannels(frame_.data(), 2,
+                                        frame_.samples_per_channel_, 1,
+                                        target_frame.mutable_data());
 
   AudioFrame mono_frame;
   mono_frame.samples_per_channel_ = 320;
@@ -190,8 +180,8 @@
 
 TEST_F(AudioFrameOperationsTest, StereoToMonoDoesNotWrapAround) {
   SetFrameData(-32768, -32768, &frame_);
-  EXPECT_EQ(0, AudioFrameOperations::StereoToMono(&frame_));
-
+  AudioFrameOperations::DownmixChannels(1, &frame_);
+  EXPECT_EQ(1u, frame_.num_channels_);
   AudioFrame mono_frame;
   mono_frame.samples_per_channel_ = 320;
   mono_frame.num_channels_ = 1;
@@ -199,18 +189,12 @@
   VerifyFramesAreEqual(mono_frame, frame_);
 }
 
-TEST_F(AudioFrameOperationsTest, QuadToMonoFailsWithBadParameters) {
-  frame_.num_channels_ = 1;
-  EXPECT_EQ(-1, AudioFrameOperations::QuadToMono(&frame_));
-  frame_.num_channels_ = 2;
-  EXPECT_EQ(-1, AudioFrameOperations::QuadToMono(&frame_));
-}
-
 TEST_F(AudioFrameOperationsTest, QuadToMonoSucceeds) {
   frame_.num_channels_ = 4;
   SetFrameData(4, 2, 6, 8, &frame_);
 
-  EXPECT_EQ(0, AudioFrameOperations::QuadToMono(&frame_));
+  AudioFrameOperations::DownmixChannels(1, &frame_);
+  EXPECT_EQ(1u, frame_.num_channels_);
 
   AudioFrame mono_frame;
   mono_frame.samples_per_channel_ = 320;
@@ -222,7 +206,8 @@
 TEST_F(AudioFrameOperationsTest, QuadToMonoMuted) {
   frame_.num_channels_ = 4;
   ASSERT_TRUE(frame_.muted());
-  EXPECT_EQ(0, AudioFrameOperations::QuadToMono(&frame_));
+  AudioFrameOperations::DownmixChannels(1, &frame_);
+  EXPECT_EQ(1u, frame_.num_channels_);
   EXPECT_TRUE(frame_.muted());
 }
 
@@ -234,8 +219,9 @@
   target_frame.num_channels_ = 1;
   target_frame.samples_per_channel_ = frame_.samples_per_channel_;
 
-  AudioFrameOperations::QuadToMono(frame_.data(), frame_.samples_per_channel_,
-                                   target_frame.mutable_data());
+  AudioFrameOperations::DownmixChannels(frame_.data(), 4,
+                                        frame_.samples_per_channel_, 1,
+                                        target_frame.mutable_data());
   AudioFrame mono_frame;
   mono_frame.samples_per_channel_ = 320;
   mono_frame.num_channels_ = 1;
@@ -246,7 +232,8 @@
 TEST_F(AudioFrameOperationsTest, QuadToMonoDoesNotWrapAround) {
   frame_.num_channels_ = 4;
   SetFrameData(-32768, -32768, -32768, -32768, &frame_);
-  EXPECT_EQ(0, AudioFrameOperations::QuadToMono(&frame_));
+  AudioFrameOperations::DownmixChannels(1, &frame_);
+  EXPECT_EQ(1u, frame_.num_channels_);
 
   AudioFrame mono_frame;
   mono_frame.samples_per_channel_ = 320;

diff --git a/modules/audio_mixer/BUILD.gn b/modules/audio_mixer/BUILD.gn
index 3979136..ee07704 100644
--- a/modules/audio_mixer/BUILD.gn
+++ b/modules/audio_mixer/BUILD.gn

@@ -52,6 +52,7 @@
     "../audio_processing:apm_logging",
     "../audio_processing:audio_frame_view",
     "../audio_processing/agc2:fixed_digital",
+    "//third_party/abseil-cpp/absl/memory",
   ]
 }
 
@@ -99,6 +100,7 @@
       "../../rtc_base:rtc_base_approved",
       "../../rtc_base:rtc_task_queue_for_test",
       "../../test:test_support",
+      "//third_party/abseil-cpp/absl/memory",
     ]
   }
 

diff --git a/modules/audio_mixer/audio_frame_manipulator.cc b/modules/audio_mixer/audio_frame_manipulator.cc
index e8a5f75..78c11b1 100644
--- a/modules/audio_mixer/audio_frame_manipulator.cc
+++ b/modules/audio_mixer/audio_frame_manipulator.cc

@@ -9,6 +9,7 @@
  */
 
 #include "modules/audio_mixer/audio_frame_manipulator.h"
+
 #include "audio/utility/audio_frame_operations.h"
 #include "rtc_base/checks.h"
 
@@ -55,11 +56,16 @@
 
 void RemixFrame(size_t target_number_of_channels, AudioFrame* frame) {
   RTC_DCHECK_GE(target_number_of_channels, 1);
-  RTC_DCHECK_LE(target_number_of_channels, 2);
-  if (frame->num_channels_ == 1 && target_number_of_channels == 2) {
-    AudioFrameOperations::MonoToStereo(frame);
-  } else if (frame->num_channels_ == 2 && target_number_of_channels == 1) {
-    AudioFrameOperations::StereoToMono(frame);
+  if (frame->num_channels_ == target_number_of_channels) {
+    return;
   }
+  if (frame->num_channels_ > target_number_of_channels) {
+    AudioFrameOperations::DownmixChannels(target_number_of_channels, frame);
+  } else if (frame->num_channels_ < target_number_of_channels) {
+    AudioFrameOperations::UpmixChannels(target_number_of_channels, frame);
+  }
+  RTC_DCHECK_EQ(frame->num_channels_, target_number_of_channels)
+      << "Wrong number of channels, " << frame->num_channels_ << " vs "
+      << target_number_of_channels;
 }
 }  // namespace webrtc

diff --git a/modules/audio_mixer/audio_mixer_impl.cc b/modules/audio_mixer/audio_mixer_impl.cc
index bca2a57..11f5abf 100644
--- a/modules/audio_mixer/audio_mixer_impl.cc
+++ b/modules/audio_mixer/audio_mixer_impl.cc

@@ -119,7 +119,7 @@
 
 void AudioMixerImpl::Mix(size_t number_of_channels,
                          AudioFrame* audio_frame_for_mixing) {
-  RTC_DCHECK(number_of_channels == 1 || number_of_channels == 2);
+  RTC_DCHECK(number_of_channels >= 1);
   RTC_DCHECK_RUNS_SERIALIZED(&race_checker_);
 
   CalculateOutputFrequency();

diff --git a/modules/audio_mixer/audio_mixer_impl.h b/modules/audio_mixer/audio_mixer_impl.h
index c38aff2..bab4a3d 100644
--- a/modules/audio_mixer/audio_mixer_impl.h
+++ b/modules/audio_mixer/audio_mixer_impl.h

@@ -84,13 +84,6 @@
   // kMaximumAmountOfMixedAudioSources audio sources.
   AudioFrameList GetAudioFromSources() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
 
-  // Add/remove the MixerAudioSource to the specified
-  // MixerAudioSource list.
-  bool AddAudioSourceToList(Source* audio_source,
-                            SourceStatusList* audio_source_list) const;
-  bool RemoveAudioSourceFromList(Source* remove_audio_source,
-                                 SourceStatusList* audio_source_list) const;
-
   // The critical section lock guards audio source insertion and
   // removal, which can be done from any thread. The race checker
   // checks that mixing is done sequentially.

diff --git a/modules/audio_mixer/audio_mixer_impl_unittest.cc b/modules/audio_mixer/audio_mixer_impl_unittest.cc
index 29632ca..397bbf5 100644
--- a/modules/audio_mixer/audio_mixer_impl_unittest.cc
+++ b/modules/audio_mixer/audio_mixer_impl_unittest.cc

@@ -15,6 +15,7 @@
 #include <string>
 #include <utility>
 
+#include "absl/memory/memory.h"
 #include "api/audio/audio_mixer.h"
 #include "modules/audio_mixer/audio_mixer_impl.h"
 #include "modules/audio_mixer/default_output_rate_calculator.h"
@@ -23,6 +24,7 @@
 #include "rtc_base/strings/string_builder.h"
 #include "rtc_base/task_queue_for_test.h"
 #include "test/gmock.h"
+#include "test/gtest.h"
 
 using testing::_;
 using testing::Exactly;
@@ -520,6 +522,7 @@
 
         std::vector<MockMixerAudioSource> sources(number_of_sources);
         for (auto& source : sources) {
+          ResetFrame(source.fake_frame());
           mixer->AddSource(&source);
         }
 
@@ -530,4 +533,110 @@
     }
   }
 }
+
+TEST(AudioMixer, MultipleChannelsOneParticipant) {
+  // Set up a participant with a 6-channel frame, and make sure a 6-channel
+  // frame with the right sample values comes out from the mixer. There are 2
+  // Mix calls because of ramp-up.
+  constexpr size_t kNumberOfChannels = 6;
+  MockMixerAudioSource source;
+  ResetFrame(source.fake_frame());
+  const auto mixer = AudioMixerImpl::Create();
+  mixer->AddSource(&source);
+  mixer->Mix(1, &frame_for_mixing);
+  auto* frame = source.fake_frame();
+  frame->num_channels_ = kNumberOfChannels;
+  std::fill(frame->mutable_data(),
+            frame->mutable_data() + AudioFrame::kMaxDataSizeSamples, 0);
+  for (size_t i = 0; i < kNumberOfChannels; ++i) {
+    frame->mutable_data()[100 * frame->num_channels_ + i] = 1000 * i;
+  }
+
+  mixer->Mix(kNumberOfChannels, &frame_for_mixing);
+
+  EXPECT_EQ(frame_for_mixing.num_channels_, kNumberOfChannels);
+  for (size_t i = 0; i < kNumberOfChannels; ++i) {
+    EXPECT_EQ(frame_for_mixing.data()[100 * frame_for_mixing.num_channels_ + i],
+              static_cast<int16_t>(1000 * i));
+  }
+}
+
+TEST(AudioMixer, MultipleChannelsManyParticipants) {
+  // Sets up 2 participants. One has a 6-channel frame. Make sure a 6-channel
+  // frame with the right sample values comes out from the mixer. There are 2
+  // Mix calls because of ramp-up.
+  constexpr size_t kNumberOfChannels = 6;
+  MockMixerAudioSource source;
+  const auto mixer = AudioMixerImpl::Create();
+  mixer->AddSource(&source);
+  ResetFrame(source.fake_frame());
+  mixer->Mix(1, &frame_for_mixing);
+  auto* frame = source.fake_frame();
+  frame->num_channels_ = kNumberOfChannels;
+  std::fill(frame->mutable_data(),
+            frame->mutable_data() + AudioFrame::kMaxDataSizeSamples, 0);
+  for (size_t i = 0; i < kNumberOfChannels; ++i) {
+    frame->mutable_data()[100 * frame->num_channels_ + i] = 1000 * i;
+  }
+  MockMixerAudioSource other_source;
+  ResetFrame(other_source.fake_frame());
+  mixer->AddSource(&other_source);
+
+  mixer->Mix(kNumberOfChannels, &frame_for_mixing);
+
+  EXPECT_EQ(frame_for_mixing.num_channels_, kNumberOfChannels);
+  for (size_t i = 0; i < kNumberOfChannels; ++i) {
+    EXPECT_EQ(frame_for_mixing.data()[100 * frame_for_mixing.num_channels_ + i],
+              static_cast<int16_t>(1000 * i));
+  }
+}
+
+class HighOutputRateCalculator : public OutputRateCalculator {
+ public:
+  static const int kDefaultFrequency = 76000;
+  int CalculateOutputRate(
+      const std::vector<int>& preferred_sample_rates) override {
+    return kDefaultFrequency;
+  }
+  ~HighOutputRateCalculator() override {}
+};
+const int HighOutputRateCalculator::kDefaultFrequency;
+
+TEST(AudioMixer, MultipleChannelsAndHighRate) {
+  constexpr size_t kSamplesPerChannel =
+      HighOutputRateCalculator::kDefaultFrequency / 100;
+  // As many channels as an AudioFrame can fit:
+  constexpr size_t kNumberOfChannels =
+      AudioFrame::kMaxDataSizeSamples / kSamplesPerChannel;
+  MockMixerAudioSource source;
+  const auto mixer = AudioMixerImpl::Create(
+      absl::make_unique<HighOutputRateCalculator>(), true);
+  mixer->AddSource(&source);
+  ResetFrame(source.fake_frame());
+  mixer->Mix(1, &frame_for_mixing);
+  auto* frame = source.fake_frame();
+  frame->num_channels_ = kNumberOfChannels;
+  frame->sample_rate_hz_ = HighOutputRateCalculator::kDefaultFrequency;
+  frame->samples_per_channel_ = kSamplesPerChannel;
+
+  std::fill(frame->mutable_data(),
+            frame->mutable_data() + AudioFrame::kMaxDataSizeSamples, 0);
+  MockMixerAudioSource other_source;
+  ResetFrame(other_source.fake_frame());
+  auto* other_frame = other_source.fake_frame();
+  other_frame->num_channels_ = kNumberOfChannels;
+  other_frame->sample_rate_hz_ = HighOutputRateCalculator::kDefaultFrequency;
+  other_frame->samples_per_channel_ = kSamplesPerChannel;
+  mixer->AddSource(&other_source);
+
+#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
+  EXPECT_DEATH(mixer->Mix(kNumberOfChannels, &frame_for_mixing), "");
+#elif !RTC_DCHECK_IS_ON
+  mixer->Mix(kNumberOfChannels, &frame_for_mixing);
+  EXPECT_EQ(frame_for_mixing.num_channels_, kNumberOfChannels);
+  EXPECT_EQ(frame_for_mixing.sample_rate_hz_,
+            HighOutputRateCalculator::kDefaultFrequency);
+#endif
+}
+
 }  // namespace webrtc

diff --git a/modules/audio_mixer/frame_combiner.cc b/modules/audio_mixer/frame_combiner.cc
index dd6f729..4aa86f7 100644
--- a/modules/audio_mixer/frame_combiner.cc
+++ b/modules/audio_mixer/frame_combiner.cc

@@ -16,6 +16,7 @@
 #include <iterator>
 #include <string>
 
+#include "absl/memory/memory.h"
 #include "api/array_view.h"
 #include "common_audio/include/audio_util.h"
 #include "modules/audio_mixer/audio_frame_manipulator.h"
@@ -30,11 +31,9 @@
 namespace webrtc {
 namespace {
 
-// Stereo, 48 kHz, 10 ms.
-constexpr int kMaximumAmountOfChannels = 2;
-constexpr int kMaximumChannelSize = 48 * AudioMixerImpl::kFrameDurationInMs;
-
-using OneChannelBuffer = std::array<float, kMaximumChannelSize>;
+using MixingBuffer =
+    std::array<std::array<float, FrameCombiner::kMaximumChannelSize>,
+               FrameCombiner::kMaximumNumberOfChannels>;
 
 void SetAudioFrameFields(const std::vector<AudioFrame*>& mix_list,
                          size_t number_of_channels,
@@ -74,23 +73,30 @@
             audio_frame_for_mixing->mutable_data());
 }
 
-std::array<OneChannelBuffer, kMaximumAmountOfChannels> MixToFloatFrame(
-    const std::vector<AudioFrame*>& mix_list,
-    size_t samples_per_channel,
-    size_t number_of_channels) {
-  // Convert to FloatS16 and mix.
-  using OneChannelBuffer = std::array<float, kMaximumChannelSize>;
-  std::array<OneChannelBuffer, kMaximumAmountOfChannels> mixing_buffer{};
+void MixToFloatFrame(const std::vector<AudioFrame*>& mix_list,
+                     size_t samples_per_channel,
+                     size_t number_of_channels,
+                     MixingBuffer* mixing_buffer) {
+  RTC_DCHECK_LE(samples_per_channel, FrameCombiner::kMaximumChannelSize);
+  RTC_DCHECK_LE(number_of_channels, FrameCombiner::kMaximumNumberOfChannels);
+  // Clear the mixing buffer.
+  for (auto& one_channel_buffer : *mixing_buffer) {
+    std::fill(one_channel_buffer.begin(), one_channel_buffer.end(), 0.f);
+  }
 
+  // Convert to FloatS16 and mix.
   for (size_t i = 0; i < mix_list.size(); ++i) {
     const AudioFrame* const frame = mix_list[i];
-    for (size_t j = 0; j < number_of_channels; ++j) {
-      for (size_t k = 0; k < samples_per_channel; ++k) {
-        mixing_buffer[j][k] += frame->data()[number_of_channels * k + j];
+    for (size_t j = 0; j < std::min(number_of_channels,
+                                    FrameCombiner::kMaximumNumberOfChannels);
+         ++j) {
+      for (size_t k = 0; k < std::min(samples_per_channel,
+                                      FrameCombiner::kMaximumChannelSize);
+           ++k) {
+        (*mixing_buffer)[j][k] += frame->data()[number_of_channels * k + j];
       }
     }
   }
-  return mixing_buffer;
 }
 
 void RunLimiter(AudioFrameView<float> mixing_buffer_view, Limiter* limiter) {
@@ -116,10 +122,20 @@
 }
 }  // namespace
 
+constexpr size_t FrameCombiner::kMaximumNumberOfChannels;
+constexpr size_t FrameCombiner::kMaximumChannelSize;
+
 FrameCombiner::FrameCombiner(bool use_limiter)
     : data_dumper_(new ApmDataDumper(0)),
+      mixing_buffer_(
+          absl::make_unique<std::array<std::array<float, kMaximumChannelSize>,
+                                       kMaximumNumberOfChannels>>()),
       limiter_(static_cast<size_t>(48000), data_dumper_.get(), "AudioMixer"),
-      use_limiter_(use_limiter) {}
+      use_limiter_(use_limiter) {
+  static_assert(kMaximumChannelSize * kMaximumNumberOfChannels <=
+                    AudioFrame::kMaxDataSizeSamples,
+                "");
+}
 
 FrameCombiner::~FrameCombiner() = default;
 
@@ -154,16 +170,22 @@
     return;
   }
 
-  std::array<OneChannelBuffer, kMaximumAmountOfChannels> mixing_buffer =
-      MixToFloatFrame(mix_list, samples_per_channel, number_of_channels);
+  MixToFloatFrame(mix_list, samples_per_channel, number_of_channels,
+                  mixing_buffer_.get());
+
+  const size_t output_number_of_channels =
+      std::min(number_of_channels, kMaximumNumberOfChannels);
+  const size_t output_samples_per_channel =
+      std::min(samples_per_channel, kMaximumChannelSize);
 
   // Put float data in an AudioFrameView.
-  std::array<float*, kMaximumAmountOfChannels> channel_pointers{};
-  for (size_t i = 0; i < number_of_channels; ++i) {
-    channel_pointers[i] = &mixing_buffer[i][0];
+  std::array<float*, kMaximumNumberOfChannels> channel_pointers{};
+  for (size_t i = 0; i < output_number_of_channels; ++i) {
+    channel_pointers[i] = &(*mixing_buffer_.get())[i][0];
   }
-  AudioFrameView<float> mixing_buffer_view(
-      &channel_pointers[0], number_of_channels, samples_per_channel);
+  AudioFrameView<float> mixing_buffer_view(&channel_pointers[0],
+                                           output_number_of_channels,
+                                           output_samples_per_channel);
 
   if (use_limiter_) {
     RunLimiter(mixing_buffer_view, &limiter_);

diff --git a/modules/audio_mixer/frame_combiner.h b/modules/audio_mixer/frame_combiner.h
index 1c1cd53..d989d02 100644
--- a/modules/audio_mixer/frame_combiner.h
+++ b/modules/audio_mixer/frame_combiner.h

@@ -38,12 +38,20 @@
                size_t number_of_streams,
                AudioFrame* audio_frame_for_mixing);
 
+  // Stereo, 48 kHz, 10 ms.
+  static constexpr size_t kMaximumNumberOfChannels = 8;
+  static constexpr size_t kMaximumChannelSize = 48 * 10;
+
+  using MixingBuffer = std::array<std::array<float, kMaximumChannelSize>,
+                                  kMaximumNumberOfChannels>;
+
  private:
   void LogMixingStats(const std::vector<AudioFrame*>& mix_list,
                       int sample_rate,
                       size_t number_of_streams) const;
 
   std::unique_ptr<ApmDataDumper> data_dumper_;
+  std::unique_ptr<MixingBuffer> mixing_buffer_;
   Limiter limiter_;
   const bool use_limiter_;
   mutable int uma_logging_counter_ = 0;

diff --git a/modules/audio_mixer/frame_combiner_unittest.cc b/modules/audio_mixer/frame_combiner_unittest.cc
index 21f6383..5f024a4 100644
--- a/modules/audio_mixer/frame_combiner_unittest.cc
+++ b/modules/audio_mixer/frame_combiner_unittest.cc

@@ -22,6 +22,7 @@
 #include "modules/audio_mixer/sine_wave_generator.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/strings/string_builder.h"
+#include "test/gmock.h"
 #include "test/gtest.h"
 
 namespace webrtc {
@@ -71,7 +72,7 @@
 TEST(FrameCombiner, BasicApiCallsLimiter) {
   FrameCombiner combiner(true);
   for (const int rate : {8000, 18000, 34000, 48000}) {
-    for (const int number_of_channels : {1, 2}) {
+    for (const int number_of_channels : {1, 2, 4, 8}) {
       const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
       SetUpFrames(rate, number_of_channels);
 
@@ -87,12 +88,71 @@
   }
 }
 
+// There are DCHECKs in place to check for invalid parameters.
+TEST(FrameCombiner, DebugBuildCrashesWithManyChannels) {
+  FrameCombiner combiner(true);
+  for (const int rate : {8000, 18000, 34000, 48000}) {
+    for (const int number_of_channels : {10, 20, 21}) {
+      if (static_cast<size_t>(rate / 100 * number_of_channels) >
+          AudioFrame::kMaxDataSizeSamples) {
+        continue;
+      }
+      const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
+      SetUpFrames(rate, number_of_channels);
+
+      const int number_of_frames = 2;
+      SCOPED_TRACE(
+          ProduceDebugText(rate, number_of_channels, number_of_frames));
+      const std::vector<AudioFrame*> frames_to_combine(
+          all_frames.begin(), all_frames.begin() + number_of_frames);
+#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
+      EXPECT_DEATH(
+          combiner.Combine(frames_to_combine, number_of_channels, rate,
+                           frames_to_combine.size(), &audio_frame_for_mixing),
+          "");
+#elif !RTC_DCHECK_IS_ON
+      combiner.Combine(frames_to_combine, number_of_channels, rate,
+                       frames_to_combine.size(), &audio_frame_for_mixing);
+#endif
+    }
+  }
+}
+
+TEST(FrameCombiner, DebugBuildCrashesWithHighRate) {
+  FrameCombiner combiner(true);
+  for (const int rate : {50000, 96000, 128000, 196000}) {
+    for (const int number_of_channels : {1, 2, 3}) {
+      if (static_cast<size_t>(rate / 100 * number_of_channels) >
+          AudioFrame::kMaxDataSizeSamples) {
+        continue;
+      }
+      const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
+      SetUpFrames(rate, number_of_channels);
+
+      const int number_of_frames = 2;
+      SCOPED_TRACE(
+          ProduceDebugText(rate, number_of_channels, number_of_frames));
+      const std::vector<AudioFrame*> frames_to_combine(
+          all_frames.begin(), all_frames.begin() + number_of_frames);
+#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
+      EXPECT_DEATH(
+          combiner.Combine(frames_to_combine, number_of_channels, rate,
+                           frames_to_combine.size(), &audio_frame_for_mixing),
+          "");
+#elif !RTC_DCHECK_IS_ON
+      combiner.Combine(frames_to_combine, number_of_channels, rate,
+                       frames_to_combine.size(), &audio_frame_for_mixing);
+#endif
+    }
+  }
+}
+
 // With no limiter, the rate has to be divisible by 100 since we use
 // 10 ms frames.
 TEST(FrameCombiner, BasicApiCallsNoLimiter) {
   FrameCombiner combiner(false);
   for (const int rate : {8000, 10000, 11000, 32000, 44100}) {
-    for (const int number_of_channels : {1, 2}) {
+    for (const int number_of_channels : {1, 2, 4, 8}) {
       const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
       SetUpFrames(rate, number_of_channels);
 
@@ -133,7 +193,7 @@
 TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) {
   FrameCombiner combiner(false);
   for (const int rate : {8000, 10000, 11000, 32000, 44100}) {
-    for (const int number_of_channels : {1, 2}) {
+    for (const int number_of_channels : {1, 2, 4, 8, 10}) {
       SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1));
 
       SetUpFrames(rate, number_of_channels);
@@ -165,7 +225,7 @@
   std::vector<FrameCombinerConfig> configs = {
       {false, 30100, 2, 50.f},  {false, 16500, 1, 3200.f},
       {true, 8000, 1, 3200.f},  {true, 16000, 1, 50.f},
-      {true, 18000, 2, 3200.f}, {true, 10000, 2, 50.f},
+      {true, 18000, 8, 3200.f}, {true, 10000, 2, 50.f},
   };
 
   for (const auto& config : configs) {
commit	b4977de306bd80dbf8c20930943c318f01e037ac	[log] [tgz]
author	Alex Loiko <aleloi@webrtc.org>	Mon Jan 28 16:38:38 2019 +0100
committer	Commit Bot <commit-bot@chromium.org>	Tue Jan 29 12:43:23 2019 +0000
tree	4d0f19db2c12bb74e22cdef568ab2d24d07a3256
parent	7a3e43a5d7920438b4a3d2b8d983db5ff429cb88 [diff]