blob: a61ea731028db572afdc5f9efd819cf829c6815c [file] [log] [blame]
Fredrik Solenberg2a877972017-12-15 16:42:15 +01001/*
2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "audio/audio_transport_impl.h"
12
13#include <algorithm>
14#include <memory>
15#include <utility>
16
Fredrik Solenberga8b7c7f2018-01-17 11:18:31 +010017#include "audio/remix_resample.h"
Fredrik Solenberg2a877972017-12-15 16:42:15 +010018#include "audio/utility/audio_frame_operations.h"
Tim Nab8c775a2020-01-10 10:33:05 -080019#include "call/audio_sender.h"
Per Åhgren71652f42020-03-17 13:23:58 +010020#include "modules/audio_processing/include/audio_frame_proxies.h"
Yves Gerey988cc082018-10-23 12:03:01 +020021#include "rtc_base/checks.h"
Fredrik Solenberg2a877972017-12-15 16:42:15 +010022
23namespace webrtc {
24
25namespace {
26
27// We want to process at the lowest sample rate and channel count possible
28// without losing information. Choose the lowest native rate at least equal to
29// the minimum of input and codec rates, choose lowest channel count, and
30// configure the audio frame.
31void InitializeCaptureFrame(int input_sample_rate,
32 int send_sample_rate_hz,
33 size_t input_num_channels,
34 size_t send_num_channels,
35 AudioFrame* audio_frame) {
36 RTC_DCHECK(audio_frame);
37 int min_processing_rate_hz = std::min(input_sample_rate, send_sample_rate_hz);
38 for (int native_rate_hz : AudioProcessing::kNativeSampleRatesHz) {
39 audio_frame->sample_rate_hz_ = native_rate_hz;
40 if (audio_frame->sample_rate_hz_ >= min_processing_rate_hz) {
41 break;
42 }
43 }
44 audio_frame->num_channels_ = std::min(input_num_channels, send_num_channels);
45}
46
henrika649a3852017-12-22 13:58:29 +010047void ProcessCaptureFrame(uint32_t delay_ms,
Fredrik Solenberg2a877972017-12-15 16:42:15 +010048 bool key_pressed,
49 bool swap_stereo_channels,
50 AudioProcessing* audio_processing,
51 AudioFrame* audio_frame) {
52 RTC_DCHECK(audio_processing);
53 RTC_DCHECK(audio_frame);
Fredrik Solenberg2a877972017-12-15 16:42:15 +010054 audio_processing->set_stream_delay_ms(delay_ms);
55 audio_processing->set_stream_key_pressed(key_pressed);
Per Åhgren71652f42020-03-17 13:23:58 +010056 int error = ProcessAudioFrame(audio_processing, audio_frame);
57
Fredrik Solenberg2a877972017-12-15 16:42:15 +010058 RTC_DCHECK_EQ(0, error) << "ProcessStream() error: " << error;
59 if (swap_stereo_channels) {
60 AudioFrameOperations::SwapStereoChannels(audio_frame);
61 }
62}
63
64// Resample audio in |frame| to given sample rate preserving the
65// channel count and place the result in |destination|.
66int Resample(const AudioFrame& frame,
67 const int destination_sample_rate,
68 PushResampler<int16_t>* resampler,
69 int16_t* destination) {
70 const int number_of_channels = static_cast<int>(frame.num_channels_);
71 const int target_number_of_samples_per_channel =
72 destination_sample_rate / 100;
73 resampler->InitializeIfNeeded(frame.sample_rate_hz_, destination_sample_rate,
74 number_of_channels);
75
76 // TODO(yujo): make resampler take an AudioFrame, and add special case
77 // handling of muted frames.
78 return resampler->Resample(
79 frame.data(), frame.samples_per_channel_ * number_of_channels,
80 destination, number_of_channels * target_number_of_samples_per_channel);
81}
82} // namespace
83
84AudioTransportImpl::AudioTransportImpl(AudioMixer* mixer,
henrika649a3852017-12-22 13:58:29 +010085 AudioProcessing* audio_processing)
Yves Gerey665174f2018-06-19 15:03:05 +020086 : audio_processing_(audio_processing), mixer_(mixer) {
Fredrik Solenberg2a877972017-12-15 16:42:15 +010087 RTC_DCHECK(mixer);
88 RTC_DCHECK(audio_processing);
Fredrik Solenberg2a877972017-12-15 16:42:15 +010089}
90
91AudioTransportImpl::~AudioTransportImpl() {}
92
93// Not used in Chromium. Process captured audio and distribute to all sending
94// streams, and try to do this at the lowest possible sample rate.
95int32_t AudioTransportImpl::RecordedDataIsAvailable(
96 const void* audio_data,
97 const size_t number_of_frames,
98 const size_t bytes_per_sample,
99 const size_t number_of_channels,
100 const uint32_t sample_rate,
101 const uint32_t audio_delay_milliseconds,
102 const int32_t /*clock_drift*/,
henrika649a3852017-12-22 13:58:29 +0100103 const uint32_t /*volume*/,
Fredrik Solenberg2a877972017-12-15 16:42:15 +0100104 const bool key_pressed,
105 uint32_t& /*new_mic_volume*/) { // NOLINT: to avoid changing APIs
106 RTC_DCHECK(audio_data);
107 RTC_DCHECK_GE(number_of_channels, 1);
108 RTC_DCHECK_LE(number_of_channels, 2);
109 RTC_DCHECK_EQ(2 * number_of_channels, bytes_per_sample);
110 RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz);
111 // 100 = 1 second / data duration (10 ms).
112 RTC_DCHECK_EQ(number_of_frames * 100, sample_rate);
113 RTC_DCHECK_LE(bytes_per_sample * number_of_frames * number_of_channels,
114 AudioFrame::kMaxDataSizeBytes);
115
Fredrik Solenberg2a877972017-12-15 16:42:15 +0100116 int send_sample_rate_hz = 0;
117 size_t send_num_channels = 0;
118 bool swap_stereo_channels = false;
119 {
120 rtc::CritScope lock(&capture_lock_);
121 send_sample_rate_hz = send_sample_rate_hz_;
122 send_num_channels = send_num_channels_;
123 swap_stereo_channels = swap_stereo_channels_;
124 }
125
126 std::unique_ptr<AudioFrame> audio_frame(new AudioFrame());
Yves Gerey665174f2018-06-19 15:03:05 +0200127 InitializeCaptureFrame(sample_rate, send_sample_rate_hz, number_of_channels,
128 send_num_channels, audio_frame.get());
Fredrik Solenberg2a877972017-12-15 16:42:15 +0100129 voe::RemixAndResample(static_cast<const int16_t*>(audio_data),
130 number_of_frames, number_of_channels, sample_rate,
131 &capture_resampler_, audio_frame.get());
henrika649a3852017-12-22 13:58:29 +0100132 ProcessCaptureFrame(audio_delay_milliseconds, key_pressed,
Fredrik Solenberg2a877972017-12-15 16:42:15 +0100133 swap_stereo_channels, audio_processing_,
134 audio_frame.get());
135
136 // Typing detection (utilizes the APM/VAD decision). We let the VAD determine
137 // if we're using this feature or not.
Sam Zackrissonba502232019-01-04 10:36:48 +0100138 // TODO(solenberg): GetConfig() takes a lock. Work around that.
Fredrik Solenberg2a877972017-12-15 16:42:15 +0100139 bool typing_detected = false;
Sam Zackrissonba502232019-01-04 10:36:48 +0100140 if (audio_processing_->GetConfig().voice_detection.enabled) {
Fredrik Solenberg2a877972017-12-15 16:42:15 +0100141 if (audio_frame->vad_activity_ != AudioFrame::kVadUnknown) {
142 bool vad_active = audio_frame->vad_activity_ == AudioFrame::kVadActive;
143 typing_detected = typing_detection_.Process(key_pressed, vad_active);
144 }
145 }
146
Fredrik Solenberg2a877972017-12-15 16:42:15 +0100147 // Copy frame and push to each sending stream. The copy is required since an
148 // encoding task will be posted internally to each stream.
149 {
150 rtc::CritScope lock(&capture_lock_);
151 typing_noise_detected_ = typing_detected;
152
153 RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0);
Tim Nab8c775a2020-01-10 10:33:05 -0800154 if (!audio_senders_.empty()) {
155 auto it = audio_senders_.begin();
156 while (++it != audio_senders_.end()) {
Fredrik Solenberg2a877972017-12-15 16:42:15 +0100157 std::unique_ptr<AudioFrame> audio_frame_copy(new AudioFrame());
Benjamin Wright17b050f2019-03-13 17:35:46 -0700158 audio_frame_copy->CopyFrom(*audio_frame);
Fredrik Solenberg2a877972017-12-15 16:42:15 +0100159 (*it)->SendAudioData(std::move(audio_frame_copy));
160 }
161 // Send the original frame to the first stream w/o copying.
Tim Nab8c775a2020-01-10 10:33:05 -0800162 (*audio_senders_.begin())->SendAudioData(std::move(audio_frame));
Fredrik Solenberg2a877972017-12-15 16:42:15 +0100163 }
164 }
165
166 return 0;
167}
168
169// Mix all received streams, feed the result to the AudioProcessing module, then
170// resample the result to the requested output rate.
171int32_t AudioTransportImpl::NeedMorePlayData(const size_t nSamples,
Yves Gerey665174f2018-06-19 15:03:05 +0200172 const size_t nBytesPerSample,
173 const size_t nChannels,
174 const uint32_t samplesPerSec,
175 void* audioSamples,
176 size_t& nSamplesOut,
177 int64_t* elapsed_time_ms,
178 int64_t* ntp_time_ms) {
Fredrik Solenberg2a877972017-12-15 16:42:15 +0100179 RTC_DCHECK_EQ(sizeof(int16_t) * nChannels, nBytesPerSample);
180 RTC_DCHECK_GE(nChannels, 1);
181 RTC_DCHECK_LE(nChannels, 2);
182 RTC_DCHECK_GE(
183 samplesPerSec,
184 static_cast<uint32_t>(AudioProcessing::NativeRate::kSampleRate8kHz));
185
186 // 100 = 1 second / data duration (10 ms).
187 RTC_DCHECK_EQ(nSamples * 100, samplesPerSec);
188 RTC_DCHECK_LE(nBytesPerSample * nSamples * nChannels,
189 AudioFrame::kMaxDataSizeBytes);
190
191 mixer_->Mix(nChannels, &mixed_frame_);
192 *elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
193 *ntp_time_ms = mixed_frame_.ntp_time_ms_;
194
Per Åhgren71652f42020-03-17 13:23:58 +0100195 const auto error = ProcessReverseAudioFrame(audio_processing_, &mixed_frame_);
Fredrik Solenberg2a877972017-12-15 16:42:15 +0100196 RTC_DCHECK_EQ(error, AudioProcessing::kNoError);
197
198 nSamplesOut = Resample(mixed_frame_, samplesPerSec, &render_resampler_,
199 static_cast<int16_t*>(audioSamples));
200 RTC_DCHECK_EQ(nSamplesOut, nChannels * nSamples);
201 return 0;
202}
203
204// Used by Chromium - same as NeedMorePlayData() but because Chrome has its
205// own APM instance, does not call audio_processing_->ProcessReverseStream().
206void AudioTransportImpl::PullRenderData(int bits_per_sample,
Yves Gerey665174f2018-06-19 15:03:05 +0200207 int sample_rate,
208 size_t number_of_channels,
209 size_t number_of_frames,
210 void* audio_data,
211 int64_t* elapsed_time_ms,
212 int64_t* ntp_time_ms) {
Fredrik Solenberg2a877972017-12-15 16:42:15 +0100213 RTC_DCHECK_EQ(bits_per_sample, 16);
214 RTC_DCHECK_GE(number_of_channels, 1);
Fredrik Solenberg2a877972017-12-15 16:42:15 +0100215 RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz);
216
217 // 100 = 1 second / data duration (10 ms).
218 RTC_DCHECK_EQ(number_of_frames * 100, sample_rate);
219
220 // 8 = bits per byte.
221 RTC_DCHECK_LE(bits_per_sample / 8 * number_of_frames * number_of_channels,
222 AudioFrame::kMaxDataSizeBytes);
223 mixer_->Mix(number_of_channels, &mixed_frame_);
224 *elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
225 *ntp_time_ms = mixed_frame_.ntp_time_ms_;
226
227 auto output_samples = Resample(mixed_frame_, sample_rate, &render_resampler_,
228 static_cast<int16_t*>(audio_data));
229 RTC_DCHECK_EQ(output_samples, number_of_channels * number_of_frames);
230}
231
Tim Nab8c775a2020-01-10 10:33:05 -0800232void AudioTransportImpl::UpdateAudioSenders(std::vector<AudioSender*> senders,
233 int send_sample_rate_hz,
234 size_t send_num_channels) {
Fredrik Solenberg2a877972017-12-15 16:42:15 +0100235 rtc::CritScope lock(&capture_lock_);
Tim Nab8c775a2020-01-10 10:33:05 -0800236 audio_senders_ = std::move(senders);
Fredrik Solenberg2a877972017-12-15 16:42:15 +0100237 send_sample_rate_hz_ = send_sample_rate_hz;
238 send_num_channels_ = send_num_channels;
239}
240
241void AudioTransportImpl::SetStereoChannelSwapping(bool enable) {
242 rtc::CritScope lock(&capture_lock_);
243 swap_stereo_channels_ = enable;
244}
245
246bool AudioTransportImpl::typing_noise_detected() const {
247 rtc::CritScope lock(&capture_lock_);
248 return typing_noise_detected_;
249}
250} // namespace webrtc