blob: ef3cb7c4031b4dec51b55ff87583cc4148880816 [file] [log] [blame]
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +00001// Copyright 2013 The Chromium Authors. All rights reserved.
Torne (Richard Coles)58218062012-11-14 11:43:16 +00002// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_
6#define CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_
7
8#include <string>
Ben Murdochbb1529c2013-08-08 10:24:53 +01009#include <vector>
Torne (Richard Coles)58218062012-11-14 11:43:16 +000010
11#include "base/basictypes.h"
12#include "base/compiler_specific.h"
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +000013#include "base/logging.h"
Torne (Richard Coles)58218062012-11-14 11:43:16 +000014#include "base/memory/ref_counted.h"
15#include "base/memory/scoped_ptr.h"
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +000016#include "base/threading/thread_checker.h"
Torne (Richard Coles)58218062012-11-14 11:43:16 +000017#include "content/common/content_export.h"
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +000018#include "content/renderer/media/webrtc_audio_capturer.h"
19#include "content/renderer/media/webrtc_audio_device_not_impl.h"
Torne (Richard Coles)58218062012-11-14 11:43:16 +000020#include "content/renderer/media/webrtc_audio_renderer.h"
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +000021#include "media/base/audio_capturer_source.h"
Torne (Richard Coles)58218062012-11-14 11:43:16 +000022#include "media/base/audio_renderer_sink.h"
Torne (Richard Coles)58218062012-11-14 11:43:16 +000023
24// A WebRtcAudioDeviceImpl instance implements the abstract interface
25// webrtc::AudioDeviceModule which makes it possible for a user (e.g. webrtc::
26// VoiceEngine) to register this class as an external AudioDeviceModule (ADM).
27// Then WebRtcAudioDeviceImpl::SetSessionId() needs to be called to set the
Torne (Richard Coles)868fa2f2013-06-11 10:57:03 +010028// session id that tells which device to use. The user can then call
29// WebRtcAudioDeviceImpl::StartPlayout() and
30// WebRtcAudioDeviceImpl::StartRecording() from the render process to initiate
31// and start audio rendering and capturing in the browser process. IPC is
32// utilized to set up the media streams.
Torne (Richard Coles)58218062012-11-14 11:43:16 +000033//
34// Usage example:
35//
36// using namespace webrtc;
37//
38// {
39// scoped_refptr<WebRtcAudioDeviceImpl> external_adm;
40// external_adm = new WebRtcAudioDeviceImpl();
Torne (Richard Coles)868fa2f2013-06-11 10:57:03 +010041// external_adm->SetSessionId(session_id);
Torne (Richard Coles)58218062012-11-14 11:43:16 +000042// VoiceEngine* voe = VoiceEngine::Create();
43// VoEBase* base = VoEBase::GetInterface(voe);
44// base->Init(external_adm);
45// int ch = base->CreateChannel();
46// ...
47// base->StartReceive(ch)
48// base->StartPlayout(ch);
49// base->StartSending(ch);
50// ...
51// <== full-duplex audio session with AGC enabled ==>
52// ...
53// base->DeleteChannel(ch);
54// base->Terminate();
55// base->Release();
56// VoiceEngine::Delete(voe);
57// }
58//
59// webrtc::VoiceEngine::Init() calls these ADM methods (in this order):
60//
61// RegisterAudioCallback(this)
62// webrtc::VoiceEngine is an webrtc::AudioTransport implementation and
63// implements the RecordedDataIsAvailable() and NeedMorePlayData() callbacks.
64//
65// Init()
66// Creates and initializes the AudioOutputDevice and AudioInputDevice
67// objects.
68//
69// SetAGC(true)
70// Enables the adaptive analog mode of the AGC which ensures that a
71// suitable microphone volume level will be set. This scheme will affect
72// the actual microphone control slider.
73//
Torne (Richard Coles)58218062012-11-14 11:43:16 +000074// AGC overview:
75//
76// It aims to maintain a constant speech loudness level from the microphone.
77// This is done by both controlling the analog microphone gain and applying
78// digital gain. The microphone gain on the sound card is slowly
79// increased/decreased during speech only. By observing the microphone control
80// slider you can see it move when you speak. If you scream, the slider moves
81// downwards and then upwards again when you return to normal. It is not
82// uncommon that the slider hits the maximum. This means that the maximum
83// analog gain is not large enough to give the desired loudness. Nevertheless,
84// we can in general still attain the desired loudness. If the microphone
85// control slider is moved manually, the gain adaptation restarts and returns
86// to roughly the same position as before the change if the circumstances are
87// still the same. When the input microphone signal causes saturation, the
88// level is decreased dramatically and has to re-adapt towards the old level.
89// The adaptation is a slowly varying process and at the beginning of capture
90// this is noticed by a slow increase in volume. Smaller changes in microphone
91// input level is leveled out by the built-in digital control. For larger
92// differences we need to rely on the slow adaptation.
93// See http://en.wikipedia.org/wiki/Automatic_gain_control for more details.
94//
95// AGC implementation details:
96//
97// The adaptive analog mode of the AGC is always enabled for desktop platforms
98// in WebRTC.
99//
100// Before recording starts, the ADM enables AGC on the AudioInputDevice.
101//
102// A capture session with AGC is started up as follows (simplified):
103//
104// [renderer]
105// |
106// ADM::StartRecording()
107// AudioInputDevice::InitializeOnIOThread()
108// AudioInputHostMsg_CreateStream(..., agc=true) [IPC]
109// |
110// [IPC to the browser]
111// |
112// AudioInputRendererHost::OnCreateStream()
113// AudioInputController::CreateLowLatency()
114// AudioInputController::DoSetAutomaticGainControl(true)
115// AudioInputStream::SetAutomaticGainControl(true)
116// |
117// AGC is now enabled in the media layer and streaming starts (details omitted).
118// The figure below illustrates the AGC scheme which is active in combination
119// with the default media flow explained earlier.
120// |
121// [browser]
122// |
123// AudioInputStream::(Capture thread loop)
Torne (Richard Coles)90dce4d2013-05-29 14:40:03 +0100124// AgcAudioStream<AudioInputStream>::GetAgcVolume() => get latest mic volume
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000125// AudioInputData::OnData(..., volume)
126// AudioInputController::OnData(..., volume)
127// AudioInputSyncWriter::Write(..., volume)
128// |
129// [volume | size | data] is sent to the renderer [shared memory]
130// |
131// [renderer]
132// |
133// AudioInputDevice::AudioThreadCallback::Process()
134// WebRtcAudioDeviceImpl::Capture(..., volume)
135// AudioTransport::RecordedDataIsAvailable(...,volume, new_volume)
136// |
137// The AGC now uses the current volume input and computes a suitable new
138// level given by the |new_level| output. This value is only non-zero if the
139// AGC has take a decision that the microphone level should change.
140// |
141// if (new_volume != 0)
142// AudioInputDevice::SetVolume(new_volume)
143// AudioInputHostMsg_SetVolume(new_volume) [IPC]
144// |
145// [IPC to the browser]
146// |
147// AudioInputRendererHost::OnSetVolume()
148// AudioInputController::SetVolume()
149// AudioInputStream::SetVolume(scaled_volume)
150// |
151// Here we set the new microphone level in the media layer and at the same time
152// read the new setting (we might not get exactly what is set).
153// |
154// AudioInputData::OnData(..., updated_volume)
155// AudioInputController::OnData(..., updated_volume)
156// |
157// |
158// This process repeats until we stop capturing data. Note that, a common
159// steady state is that the volume control reaches its max and the new_volume
160// value from the AGC is zero. A loud voice input is required to break this
161// state and start lowering the level again.
162//
163// Implementation notes:
164//
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000165// - This class must be created and destroyed on the main render thread and
166// most methods are called on the same thread. However, some methods are
167// also called on a Libjingle worker thread. RenderData is called on the
168// AudioOutputDevice thread and CaptureData on the AudioInputDevice thread.
169// To summarize: this class lives on four different threads.
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000170// - The webrtc::AudioDeviceModule is reference counted.
171// - AGC is only supported in combination with the WASAPI-based audio layer
172// on Windows, i.e., it is not supported on Windows XP.
173// - All volume levels required for the AGC scheme are transfered in a
174// normalized range [0.0, 1.0]. Scaling takes place in both endpoints
175// (WebRTC client a media layer). This approach ensures that we can avoid
176// transferring maximum levels between the renderer and the browser.
177//
178
179namespace content {
180
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000181class WebRtcAudioCapturer;
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000182class WebRtcAudioRenderer;
183
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000184// TODO(xians): Move the following two interfaces to webrtc so that
185// libjingle can own references to the renderer and capturer.
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000186class WebRtcAudioRendererSource {
187 public:
188 // Callback to get the rendered interleaved data.
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000189 // TODO(xians): Change uint8* to int16*.
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000190 virtual void RenderData(uint8* audio_data,
191 int number_of_channels,
192 int number_of_frames,
193 int audio_delay_milliseconds) = 0;
194
195 // Set the format for the capture audio parameters.
196 virtual void SetRenderFormat(const media::AudioParameters& params) = 0;
197
198 // Callback to notify the client that the renderer is going away.
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000199 virtual void RemoveAudioRenderer(WebRtcAudioRenderer* renderer) = 0;
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000200
201 protected:
202 virtual ~WebRtcAudioRendererSource() {}
203};
204
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000205class WebRtcAudioCapturerSink {
206 public:
207 // Callback to deliver the captured interleaved data.
Ben Murdochbb1529c2013-08-08 10:24:53 +0100208 // |channels| contains a vector of WebRtc VoE channels.
209 // |audio_data| is the pointer to the audio data.
210 // |sample_rate| is the sample frequency of audio data.
211 // |number_of_channels| is the number of channels reflecting the order of
212 // surround sound channels.
213 // |audio_delay_milliseconds| is recording delay value.
214 // |current_volume| is current microphone volume, in range of |0, 255].
215 // |need_audio_processing| indicates if the audio needs WebRtc AEC/NS/AGC
216 // audio processing.
217 // The return value is the new microphone volume, in the range of |0, 255].
218 // When the volume does not need to be updated, it returns 0.
219 virtual int CaptureData(const std::vector<int>& channels,
220 const int16* audio_data,
221 int sample_rate,
222 int number_of_channels,
223 int number_of_frames,
224 int audio_delay_milliseconds,
225 int current_volume,
226 bool need_audio_processing) = 0;
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000227
228 // Set the format for the capture audio parameters.
229 virtual void SetCaptureFormat(const media::AudioParameters& params) = 0;
230
231 protected:
232 virtual ~WebRtcAudioCapturerSink() {}
233};
234
235// Note that this class inherits from webrtc::AudioDeviceModule but due to
236// the high number of non-implemented methods, we move the cruft over to the
237// WebRtcAudioDeviceNotImpl.
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000238class CONTENT_EXPORT WebRtcAudioDeviceImpl
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000239 : NON_EXPORTED_BASE(public WebRtcAudioDeviceNotImpl),
240 NON_EXPORTED_BASE(public WebRtcAudioCapturerSink),
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000241 NON_EXPORTED_BASE(public WebRtcAudioRendererSource) {
242 public:
Ben Murdochbb1529c2013-08-08 10:24:53 +0100243 // The maximum volume value WebRtc uses.
244 static const int kMaxVolumeLevel = 255;
245
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000246 // Instances of this object are created on the main render thread.
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000247 WebRtcAudioDeviceImpl();
248
249 // webrtc::RefCountedModule implementation.
250 // The creator must call AddRef() after construction and use Release()
251 // to release the reference and delete this object.
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000252 // Called on the main render thread.
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000253 virtual int32_t AddRef() OVERRIDE;
254 virtual int32_t Release() OVERRIDE;
255
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000256 // webrtc::AudioDeviceModule implementation.
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000257 // All implemented methods are called on the main render thread unless
258 // anything else is stated.
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000259
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000260 virtual int32_t RegisterAudioCallback(webrtc::AudioTransport* audio_callback)
261 OVERRIDE;
262
263 virtual int32_t Init() OVERRIDE;
264 virtual int32_t Terminate() OVERRIDE;
265 virtual bool Initialized() const OVERRIDE;
266
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000267 virtual int32_t PlayoutIsAvailable(bool* available) OVERRIDE;
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000268 virtual bool PlayoutIsInitialized() const OVERRIDE;
269 virtual int32_t RecordingIsAvailable(bool* available) OVERRIDE;
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000270 virtual bool RecordingIsInitialized() const OVERRIDE;
271
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000272 // All Start/Stop methods are called on a libJingle worker thread.
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000273 virtual int32_t StartPlayout() OVERRIDE;
274 virtual int32_t StopPlayout() OVERRIDE;
275 virtual bool Playing() const OVERRIDE;
276 virtual int32_t StartRecording() OVERRIDE;
277 virtual int32_t StopRecording() OVERRIDE;
278 virtual bool Recording() const OVERRIDE;
279
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000280 // Called on the main render thread and libJingle worker thread.
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000281 virtual int32_t SetAGC(bool enable) OVERRIDE;
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000282
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000283 virtual bool AGC() const OVERRIDE;
284
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000285 // Called on the AudioInputDevice worker thread.
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000286 virtual int32_t SetMicrophoneVolume(uint32_t volume) OVERRIDE;
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000287
288 // TODO(henrika): sort out calling thread once we start using this API.
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000289 virtual int32_t MicrophoneVolume(uint32_t* volume) const OVERRIDE;
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000290
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000291 virtual int32_t MaxMicrophoneVolume(uint32_t* max_volume) const OVERRIDE;
292 virtual int32_t MinMicrophoneVolume(uint32_t* min_volume) const OVERRIDE;
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000293 virtual int32_t StereoPlayoutIsAvailable(bool* available) const OVERRIDE;
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000294 virtual int32_t StereoRecordingIsAvailable(bool* available) const OVERRIDE;
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000295 virtual int32_t PlayoutDelay(uint16_t* delay_ms) const OVERRIDE;
296 virtual int32_t RecordingDelay(uint16_t* delay_ms) const OVERRIDE;
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000297 virtual int32_t RecordingSampleRate(uint32_t* samples_per_sec) const OVERRIDE;
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000298 virtual int32_t PlayoutSampleRate(uint32_t* samples_per_sec) const OVERRIDE;
299
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000300 // Sets the |renderer_|, returns false if |renderer_| already exists.
301 // Called on the main renderer thread.
302 bool SetAudioRenderer(WebRtcAudioRenderer* renderer);
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000303
Ben Murdochbb1529c2013-08-08 10:24:53 +0100304 // Adds the capturer to the ADM.
305 void AddAudioCapturer(const scoped_refptr<WebRtcAudioCapturer>& capturer);
306
307 // Gets the default capturer, which is the capturer in the list with
308 // a valid |device_id|. Microphones are represented by capturers with a valid
309 // |device_id|, since only one microphone is supported today, only one
310 // capturer in the |capturers_| can have a valid |device_id|.
311 scoped_refptr<WebRtcAudioCapturer> GetDefaultCapturer() const;
312
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000313 const scoped_refptr<WebRtcAudioRenderer>& renderer() const {
314 return renderer_;
315 }
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000316 int output_buffer_size() const {
317 return output_audio_parameters_.frames_per_buffer();
318 }
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000319 int output_channels() const {
320 return output_audio_parameters_.channels();
321 }
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000322 int output_sample_rate() const {
323 return output_audio_parameters_.sample_rate();
324 }
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000325
326 private:
Ben Murdochbb1529c2013-08-08 10:24:53 +0100327 typedef std::list<scoped_refptr<WebRtcAudioCapturer> > CapturerList;
328
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000329 // Make destructor private to ensure that we can only be deleted by Release().
330 virtual ~WebRtcAudioDeviceImpl();
331
Torne (Richard Coles)c2e0dbd2013-05-09 18:35:53 +0100332 // WebRtcAudioCapturerSink implementation.
333
334 // Called on the AudioInputDevice worker thread.
Ben Murdochbb1529c2013-08-08 10:24:53 +0100335 virtual int CaptureData(const std::vector<int>& channels,
336 const int16* audio_data,
337 int sample_rate,
338 int number_of_channels,
339 int number_of_frames,
340 int audio_delay_milliseconds,
341 int current_volume,
342 bool need_audio_processing) OVERRIDE;
Torne (Richard Coles)c2e0dbd2013-05-09 18:35:53 +0100343
344 // Called on the main render thread.
345 virtual void SetCaptureFormat(const media::AudioParameters& params) OVERRIDE;
346
347 // WebRtcAudioRendererSource implementation.
348
349 // Called on the AudioInputDevice worker thread.
350 virtual void RenderData(uint8* audio_data,
351 int number_of_channels,
352 int number_of_frames,
353 int audio_delay_milliseconds) OVERRIDE;
354
355 // Called on the main render thread.
356 virtual void SetRenderFormat(const media::AudioParameters& params) OVERRIDE;
357 virtual void RemoveAudioRenderer(WebRtcAudioRenderer* renderer) OVERRIDE;
358
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000359 // Used to DCHECK that we are called on the correct thread.
360 base::ThreadChecker thread_checker_;
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000361
362 int ref_count_;
363
Ben Murdochbb1529c2013-08-08 10:24:53 +0100364 // List of captures which provides access to the native audio input layer
365 // in the browser process.
366 CapturerList capturers_;
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000367
368 // Provides access to the audio renderer in the browser process.
369 scoped_refptr<WebRtcAudioRenderer> renderer_;
370
371 // Weak reference to the audio callback.
372 // The webrtc client defines |audio_transport_callback_| by calling
373 // RegisterAudioCallback().
374 webrtc::AudioTransport* audio_transport_callback_;
375
Ben Murdochbb1529c2013-08-08 10:24:53 +0100376 // Cached values of used output audio parameters. Platform dependent.
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000377 media::AudioParameters output_audio_parameters_;
378
379 // Cached value of the current audio delay on the input/capture side.
380 int input_delay_ms_;
381
382 // Cached value of the current audio delay on the output/renderer side.
383 int output_delay_ms_;
384
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000385 // Protects |recording_|, |output_delay_ms_|, |input_delay_ms_|, |renderer_|
386 // |recording_| and |microphone_volume_|.
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000387 mutable base::Lock lock_;
388
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000389 bool initialized_;
390 bool playing_;
391 bool recording_;
392
393 // Local copy of the current Automatic Gain Control state.
394 bool agc_is_enabled_;
395
396 // Used for histograms of total recording and playout times.
397 base::Time start_capture_time_;
398 base::Time start_render_time_;
399
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000400 // Stores latest microphone volume received in a CaptureData() callback.
401 // Range is [0, 255].
402 uint32_t microphone_volume_;
403
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000404 DISALLOW_COPY_AND_ASSIGN(WebRtcAudioDeviceImpl);
405};
406
407} // namespace content
408
409#endif // CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_