Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | #include "audio/audio_transport_impl.h" |
| 12 | |
| 13 | #include <algorithm> |
| 14 | #include <memory> |
| 15 | #include <utility> |
| 16 | |
Fredrik Solenberg | a8b7c7f | 2018-01-17 11:18:31 +0100 | [diff] [blame] | 17 | #include "audio/remix_resample.h" |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 18 | #include "audio/utility/audio_frame_operations.h" |
| 19 | #include "call/audio_send_stream.h" |
| 20 | #include "rtc_base/logging.h" |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 21 | |
| 22 | namespace webrtc { |
| 23 | |
| 24 | namespace { |
| 25 | |
| 26 | // We want to process at the lowest sample rate and channel count possible |
| 27 | // without losing information. Choose the lowest native rate at least equal to |
| 28 | // the minimum of input and codec rates, choose lowest channel count, and |
| 29 | // configure the audio frame. |
| 30 | void InitializeCaptureFrame(int input_sample_rate, |
| 31 | int send_sample_rate_hz, |
| 32 | size_t input_num_channels, |
| 33 | size_t send_num_channels, |
| 34 | AudioFrame* audio_frame) { |
| 35 | RTC_DCHECK(audio_frame); |
| 36 | int min_processing_rate_hz = std::min(input_sample_rate, send_sample_rate_hz); |
| 37 | for (int native_rate_hz : AudioProcessing::kNativeSampleRatesHz) { |
| 38 | audio_frame->sample_rate_hz_ = native_rate_hz; |
| 39 | if (audio_frame->sample_rate_hz_ >= min_processing_rate_hz) { |
| 40 | break; |
| 41 | } |
| 42 | } |
| 43 | audio_frame->num_channels_ = std::min(input_num_channels, send_num_channels); |
| 44 | } |
| 45 | |
henrika | 649a385 | 2017-12-22 13:58:29 +0100 | [diff] [blame] | 46 | void ProcessCaptureFrame(uint32_t delay_ms, |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 47 | bool key_pressed, |
| 48 | bool swap_stereo_channels, |
| 49 | AudioProcessing* audio_processing, |
| 50 | AudioFrame* audio_frame) { |
| 51 | RTC_DCHECK(audio_processing); |
| 52 | RTC_DCHECK(audio_frame); |
| 53 | RTC_DCHECK( |
| 54 | !audio_processing->echo_cancellation()->is_drift_compensation_enabled()); |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 55 | audio_processing->set_stream_delay_ms(delay_ms); |
| 56 | audio_processing->set_stream_key_pressed(key_pressed); |
henrika | 649a385 | 2017-12-22 13:58:29 +0100 | [diff] [blame] | 57 | int error = audio_processing->ProcessStream(audio_frame); |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 58 | RTC_DCHECK_EQ(0, error) << "ProcessStream() error: " << error; |
| 59 | if (swap_stereo_channels) { |
| 60 | AudioFrameOperations::SwapStereoChannels(audio_frame); |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | // Resample audio in |frame| to given sample rate preserving the |
| 65 | // channel count and place the result in |destination|. |
| 66 | int Resample(const AudioFrame& frame, |
| 67 | const int destination_sample_rate, |
| 68 | PushResampler<int16_t>* resampler, |
| 69 | int16_t* destination) { |
| 70 | const int number_of_channels = static_cast<int>(frame.num_channels_); |
| 71 | const int target_number_of_samples_per_channel = |
| 72 | destination_sample_rate / 100; |
| 73 | resampler->InitializeIfNeeded(frame.sample_rate_hz_, destination_sample_rate, |
| 74 | number_of_channels); |
| 75 | |
| 76 | // TODO(yujo): make resampler take an AudioFrame, and add special case |
| 77 | // handling of muted frames. |
| 78 | return resampler->Resample( |
| 79 | frame.data(), frame.samples_per_channel_ * number_of_channels, |
| 80 | destination, number_of_channels * target_number_of_samples_per_channel); |
| 81 | } |
| 82 | } // namespace |
| 83 | |
| 84 | AudioTransportImpl::AudioTransportImpl(AudioMixer* mixer, |
henrika | 649a385 | 2017-12-22 13:58:29 +0100 | [diff] [blame] | 85 | AudioProcessing* audio_processing) |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 86 | : audio_processing_(audio_processing), |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 87 | mixer_(mixer) { |
| 88 | RTC_DCHECK(mixer); |
| 89 | RTC_DCHECK(audio_processing); |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 90 | } |
| 91 | |
| 92 | AudioTransportImpl::~AudioTransportImpl() {} |
| 93 | |
| 94 | // Not used in Chromium. Process captured audio and distribute to all sending |
| 95 | // streams, and try to do this at the lowest possible sample rate. |
| 96 | int32_t AudioTransportImpl::RecordedDataIsAvailable( |
| 97 | const void* audio_data, |
| 98 | const size_t number_of_frames, |
| 99 | const size_t bytes_per_sample, |
| 100 | const size_t number_of_channels, |
| 101 | const uint32_t sample_rate, |
| 102 | const uint32_t audio_delay_milliseconds, |
| 103 | const int32_t /*clock_drift*/, |
henrika | 649a385 | 2017-12-22 13:58:29 +0100 | [diff] [blame] | 104 | const uint32_t /*volume*/, |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 105 | const bool key_pressed, |
| 106 | uint32_t& /*new_mic_volume*/) { // NOLINT: to avoid changing APIs |
| 107 | RTC_DCHECK(audio_data); |
| 108 | RTC_DCHECK_GE(number_of_channels, 1); |
| 109 | RTC_DCHECK_LE(number_of_channels, 2); |
| 110 | RTC_DCHECK_EQ(2 * number_of_channels, bytes_per_sample); |
| 111 | RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz); |
| 112 | // 100 = 1 second / data duration (10 ms). |
| 113 | RTC_DCHECK_EQ(number_of_frames * 100, sample_rate); |
| 114 | RTC_DCHECK_LE(bytes_per_sample * number_of_frames * number_of_channels, |
| 115 | AudioFrame::kMaxDataSizeBytes); |
| 116 | |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 117 | int send_sample_rate_hz = 0; |
| 118 | size_t send_num_channels = 0; |
| 119 | bool swap_stereo_channels = false; |
| 120 | { |
| 121 | rtc::CritScope lock(&capture_lock_); |
| 122 | send_sample_rate_hz = send_sample_rate_hz_; |
| 123 | send_num_channels = send_num_channels_; |
| 124 | swap_stereo_channels = swap_stereo_channels_; |
| 125 | } |
| 126 | |
| 127 | std::unique_ptr<AudioFrame> audio_frame(new AudioFrame()); |
| 128 | InitializeCaptureFrame(sample_rate, send_sample_rate_hz, |
| 129 | number_of_channels, send_num_channels, |
| 130 | audio_frame.get()); |
| 131 | voe::RemixAndResample(static_cast<const int16_t*>(audio_data), |
| 132 | number_of_frames, number_of_channels, sample_rate, |
| 133 | &capture_resampler_, audio_frame.get()); |
henrika | 649a385 | 2017-12-22 13:58:29 +0100 | [diff] [blame] | 134 | ProcessCaptureFrame(audio_delay_milliseconds, key_pressed, |
Fredrik Solenberg | 2a87797 | 2017-12-15 16:42:15 +0100 | [diff] [blame] | 135 | swap_stereo_channels, audio_processing_, |
| 136 | audio_frame.get()); |
| 137 | |
| 138 | // Typing detection (utilizes the APM/VAD decision). We let the VAD determine |
| 139 | // if we're using this feature or not. |
| 140 | // TODO(solenberg): is_enabled() takes a lock. Work around that. |
| 141 | bool typing_detected = false; |
| 142 | if (audio_processing_->voice_detection()->is_enabled()) { |
| 143 | if (audio_frame->vad_activity_ != AudioFrame::kVadUnknown) { |
| 144 | bool vad_active = audio_frame->vad_activity_ == AudioFrame::kVadActive; |
| 145 | typing_detected = typing_detection_.Process(key_pressed, vad_active); |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | // Measure audio level of speech after all processing. |
| 150 | double sample_duration = static_cast<double>(number_of_frames) / sample_rate; |
| 151 | audio_level_.ComputeLevel(*audio_frame.get(), sample_duration); |
| 152 | |
| 153 | // Copy frame and push to each sending stream. The copy is required since an |
| 154 | // encoding task will be posted internally to each stream. |
| 155 | { |
| 156 | rtc::CritScope lock(&capture_lock_); |
| 157 | typing_noise_detected_ = typing_detected; |
| 158 | |
| 159 | RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0); |
| 160 | if (!sending_streams_.empty()) { |
| 161 | auto it = sending_streams_.begin(); |
| 162 | while (++it != sending_streams_.end()) { |
| 163 | std::unique_ptr<AudioFrame> audio_frame_copy(new AudioFrame()); |
| 164 | audio_frame_copy->CopyFrom(*audio_frame.get()); |
| 165 | (*it)->SendAudioData(std::move(audio_frame_copy)); |
| 166 | } |
| 167 | // Send the original frame to the first stream w/o copying. |
| 168 | (*sending_streams_.begin())->SendAudioData(std::move(audio_frame)); |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | return 0; |
| 173 | } |
| 174 | |
| 175 | // Mix all received streams, feed the result to the AudioProcessing module, then |
| 176 | // resample the result to the requested output rate. |
| 177 | int32_t AudioTransportImpl::NeedMorePlayData(const size_t nSamples, |
| 178 | const size_t nBytesPerSample, |
| 179 | const size_t nChannels, |
| 180 | const uint32_t samplesPerSec, |
| 181 | void* audioSamples, |
| 182 | size_t& nSamplesOut, |
| 183 | int64_t* elapsed_time_ms, |
| 184 | int64_t* ntp_time_ms) { |
| 185 | RTC_DCHECK_EQ(sizeof(int16_t) * nChannels, nBytesPerSample); |
| 186 | RTC_DCHECK_GE(nChannels, 1); |
| 187 | RTC_DCHECK_LE(nChannels, 2); |
| 188 | RTC_DCHECK_GE( |
| 189 | samplesPerSec, |
| 190 | static_cast<uint32_t>(AudioProcessing::NativeRate::kSampleRate8kHz)); |
| 191 | |
| 192 | // 100 = 1 second / data duration (10 ms). |
| 193 | RTC_DCHECK_EQ(nSamples * 100, samplesPerSec); |
| 194 | RTC_DCHECK_LE(nBytesPerSample * nSamples * nChannels, |
| 195 | AudioFrame::kMaxDataSizeBytes); |
| 196 | |
| 197 | mixer_->Mix(nChannels, &mixed_frame_); |
| 198 | *elapsed_time_ms = mixed_frame_.elapsed_time_ms_; |
| 199 | *ntp_time_ms = mixed_frame_.ntp_time_ms_; |
| 200 | |
| 201 | const auto error = audio_processing_->ProcessReverseStream(&mixed_frame_); |
| 202 | RTC_DCHECK_EQ(error, AudioProcessing::kNoError); |
| 203 | |
| 204 | nSamplesOut = Resample(mixed_frame_, samplesPerSec, &render_resampler_, |
| 205 | static_cast<int16_t*>(audioSamples)); |
| 206 | RTC_DCHECK_EQ(nSamplesOut, nChannels * nSamples); |
| 207 | return 0; |
| 208 | } |
| 209 | |
| 210 | // Used by Chromium - same as NeedMorePlayData() but because Chrome has its |
| 211 | // own APM instance, does not call audio_processing_->ProcessReverseStream(). |
| 212 | void AudioTransportImpl::PullRenderData(int bits_per_sample, |
| 213 | int sample_rate, |
| 214 | size_t number_of_channels, |
| 215 | size_t number_of_frames, |
| 216 | void* audio_data, |
| 217 | int64_t* elapsed_time_ms, |
| 218 | int64_t* ntp_time_ms) { |
| 219 | RTC_DCHECK_EQ(bits_per_sample, 16); |
| 220 | RTC_DCHECK_GE(number_of_channels, 1); |
| 221 | RTC_DCHECK_LE(number_of_channels, 2); |
| 222 | RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz); |
| 223 | |
| 224 | // 100 = 1 second / data duration (10 ms). |
| 225 | RTC_DCHECK_EQ(number_of_frames * 100, sample_rate); |
| 226 | |
| 227 | // 8 = bits per byte. |
| 228 | RTC_DCHECK_LE(bits_per_sample / 8 * number_of_frames * number_of_channels, |
| 229 | AudioFrame::kMaxDataSizeBytes); |
| 230 | mixer_->Mix(number_of_channels, &mixed_frame_); |
| 231 | *elapsed_time_ms = mixed_frame_.elapsed_time_ms_; |
| 232 | *ntp_time_ms = mixed_frame_.ntp_time_ms_; |
| 233 | |
| 234 | auto output_samples = Resample(mixed_frame_, sample_rate, &render_resampler_, |
| 235 | static_cast<int16_t*>(audio_data)); |
| 236 | RTC_DCHECK_EQ(output_samples, number_of_channels * number_of_frames); |
| 237 | } |
| 238 | |
| 239 | void AudioTransportImpl::UpdateSendingStreams( |
| 240 | std::vector<AudioSendStream*> streams, int send_sample_rate_hz, |
| 241 | size_t send_num_channels) { |
| 242 | rtc::CritScope lock(&capture_lock_); |
| 243 | sending_streams_ = std::move(streams); |
| 244 | send_sample_rate_hz_ = send_sample_rate_hz; |
| 245 | send_num_channels_ = send_num_channels; |
| 246 | } |
| 247 | |
| 248 | void AudioTransportImpl::SetStereoChannelSwapping(bool enable) { |
| 249 | rtc::CritScope lock(&capture_lock_); |
| 250 | swap_stereo_channels_ = enable; |
| 251 | } |
| 252 | |
| 253 | bool AudioTransportImpl::typing_noise_detected() const { |
| 254 | rtc::CritScope lock(&capture_lock_); |
| 255 | return typing_noise_detected_; |
| 256 | } |
| 257 | } // namespace webrtc |