blob: e210a45d61bd1669e54bbf329f68addead2051b8 [file] [log] [blame]
henrike@webrtc.org28e20752013-07-10 00:45:36 +00001/*
kjellander65c7f672016-02-12 00:05:01 -08002 * Copyright 2011 The WebRTC project authors. All Rights Reserved.
henrike@webrtc.org28e20752013-07-10 00:45:36 +00003 *
kjellander65c7f672016-02-12 00:05:01 -08004 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
henrike@webrtc.org28e20752013-07-10 00:45:36 +00009 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "pc/currentspeakermonitor.h"
henrike@webrtc.org28e20752013-07-10 00:45:36 +000012
terelius8c011e52016-04-26 05:28:11 -070013#include <vector>
14
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020015#include "media/base/streamparams.h"
16#include "pc/audiomonitor.h"
17#include "rtc_base/logging.h"
henrike@webrtc.org28e20752013-07-10 00:45:36 +000018
19namespace cricket {
20
21namespace {
22const int kMaxAudioLevel = 9;
23// To avoid overswitching, we disable switching for a period of time after a
24// switch is done.
25const int kDefaultMinTimeBetweenSwitches = 1000;
26}
27
buildbot@webrtc.orgca272362014-05-08 23:10:23 +000028CurrentSpeakerMonitor::CurrentSpeakerMonitor(
deadbeefd59daf82015-10-14 15:02:44 -070029 AudioSourceContext* audio_source_context)
henrike@webrtc.org28e20752013-07-10 00:45:36 +000030 : started_(false),
buildbot@webrtc.orgca272362014-05-08 23:10:23 +000031 audio_source_context_(audio_source_context),
henrike@webrtc.org28e20752013-07-10 00:45:36 +000032 current_speaker_ssrc_(0),
33 earliest_permitted_switch_time_(0),
deadbeefd59daf82015-10-14 15:02:44 -070034 min_time_between_switches_(kDefaultMinTimeBetweenSwitches) {}
henrike@webrtc.org28e20752013-07-10 00:45:36 +000035
36CurrentSpeakerMonitor::~CurrentSpeakerMonitor() {
37 Stop();
38}
39
40void CurrentSpeakerMonitor::Start() {
41 if (!started_) {
buildbot@webrtc.orgca272362014-05-08 23:10:23 +000042 audio_source_context_->SignalAudioMonitor.connect(
henrike@webrtc.org28e20752013-07-10 00:45:36 +000043 this, &CurrentSpeakerMonitor::OnAudioMonitor);
buildbot@webrtc.orgca272362014-05-08 23:10:23 +000044 audio_source_context_->SignalMediaStreamsUpdate.connect(
henrike@webrtc.org28e20752013-07-10 00:45:36 +000045 this, &CurrentSpeakerMonitor::OnMediaStreamsUpdate);
buildbot@webrtc.org49a6a272014-05-21 00:24:54 +000046 audio_source_context_->SignalMediaStreamsReset.connect(
47 this, &CurrentSpeakerMonitor::OnMediaStreamsReset);
henrike@webrtc.org28e20752013-07-10 00:45:36 +000048
49 started_ = true;
50 }
51}
52
53void CurrentSpeakerMonitor::Stop() {
54 if (started_) {
buildbot@webrtc.orgca272362014-05-08 23:10:23 +000055 audio_source_context_->SignalAudioMonitor.disconnect(this);
56 audio_source_context_->SignalMediaStreamsUpdate.disconnect(this);
henrike@webrtc.org28e20752013-07-10 00:45:36 +000057
58 started_ = false;
59 ssrc_to_speaking_state_map_.clear();
60 current_speaker_ssrc_ = 0;
61 earliest_permitted_switch_time_ = 0;
62 }
63}
64
65void CurrentSpeakerMonitor::set_min_time_between_switches(
Honghai Zhang82d78622016-05-06 11:29:15 -070066 int min_time_between_switches) {
henrike@webrtc.org28e20752013-07-10 00:45:36 +000067 min_time_between_switches_ = min_time_between_switches;
68}
69
buildbot@webrtc.orgca272362014-05-08 23:10:23 +000070void CurrentSpeakerMonitor::OnAudioMonitor(
71 AudioSourceContext* audio_source_context, const AudioInfo& info) {
Peter Boström0c4e06b2015-10-07 12:23:21 +020072 std::map<uint32_t, int> active_ssrc_to_level_map;
henrike@webrtc.org28e20752013-07-10 00:45:36 +000073 cricket::AudioInfo::StreamList::const_iterator stream_list_it;
74 for (stream_list_it = info.active_streams.begin();
75 stream_list_it != info.active_streams.end(); ++stream_list_it) {
Peter Boström0c4e06b2015-10-07 12:23:21 +020076 uint32_t ssrc = stream_list_it->first;
henrike@webrtc.org28e20752013-07-10 00:45:36 +000077 active_ssrc_to_level_map[ssrc] = stream_list_it->second;
78
79 // It's possible we haven't yet added this source to our map. If so,
80 // add it now with a "not speaking" state.
81 if (ssrc_to_speaking_state_map_.find(ssrc) ==
82 ssrc_to_speaking_state_map_.end()) {
83 ssrc_to_speaking_state_map_[ssrc] = SS_NOT_SPEAKING;
84 }
85 }
86
87 int max_level = 0;
Peter Boström0c4e06b2015-10-07 12:23:21 +020088 uint32_t loudest_speaker_ssrc = 0;
henrike@webrtc.org28e20752013-07-10 00:45:36 +000089
90 // Update the speaking states of all participants based on the new audio
91 // level information. Also retain loudest speaker.
Peter Boström0c4e06b2015-10-07 12:23:21 +020092 std::map<uint32_t, SpeakingState>::iterator state_it;
henrike@webrtc.org28e20752013-07-10 00:45:36 +000093 for (state_it = ssrc_to_speaking_state_map_.begin();
94 state_it != ssrc_to_speaking_state_map_.end(); ++state_it) {
95 bool is_previous_speaker = current_speaker_ssrc_ == state_it->first;
96
97 // This uses a state machine in order to gradually identify
98 // members as having started or stopped speaking. Matches the
99 // algorithm used by the hangouts js code.
100
Peter Boström0c4e06b2015-10-07 12:23:21 +0200101 std::map<uint32_t, int>::const_iterator level_it =
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000102 active_ssrc_to_level_map.find(state_it->first);
103 // Note that the stream map only contains streams with non-zero audio
104 // levels.
105 int level = (level_it != active_ssrc_to_level_map.end()) ?
106 level_it->second : 0;
107 switch (state_it->second) {
108 case SS_NOT_SPEAKING:
109 if (level > 0) {
110 // Reset level because we don't think they're really speaking.
111 level = 0;
112 state_it->second = SS_MIGHT_BE_SPEAKING;
113 } else {
114 // State unchanged.
115 }
116 break;
117 case SS_MIGHT_BE_SPEAKING:
118 if (level > 0) {
119 state_it->second = SS_SPEAKING;
120 } else {
121 state_it->second = SS_NOT_SPEAKING;
122 }
123 break;
124 case SS_SPEAKING:
125 if (level > 0) {
126 // State unchanged.
127 } else {
128 state_it->second = SS_WAS_SPEAKING_RECENTLY1;
129 if (is_previous_speaker) {
130 // Assume this is an inter-word silence and assign him the highest
131 // volume.
132 level = kMaxAudioLevel;
133 }
134 }
135 break;
136 case SS_WAS_SPEAKING_RECENTLY1:
137 if (level > 0) {
138 state_it->second = SS_SPEAKING;
139 } else {
140 state_it->second = SS_WAS_SPEAKING_RECENTLY2;
141 if (is_previous_speaker) {
142 // Assume this is an inter-word silence and assign him the highest
143 // volume.
144 level = kMaxAudioLevel;
145 }
146 }
147 break;
148 case SS_WAS_SPEAKING_RECENTLY2:
149 if (level > 0) {
150 state_it->second = SS_SPEAKING;
151 } else {
152 state_it->second = SS_NOT_SPEAKING;
153 }
154 break;
155 }
156
157 if (level > max_level) {
158 loudest_speaker_ssrc = state_it->first;
159 max_level = level;
160 } else if (level > 0 && level == max_level && is_previous_speaker) {
161 // Favor continuity of loudest speakers if audio levels are equal.
162 loudest_speaker_ssrc = state_it->first;
163 }
164 }
165
166 // We avoid over-switching by disabling switching for a period of time after
167 // a switch is done.
Honghai Zhang82d78622016-05-06 11:29:15 -0700168 int64_t now = rtc::TimeMillis();
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000169 if (earliest_permitted_switch_time_ <= now &&
170 current_speaker_ssrc_ != loudest_speaker_ssrc) {
171 current_speaker_ssrc_ = loudest_speaker_ssrc;
Mirko Bonadei675513b2017-11-09 11:09:25 +0100172 RTC_LOG(LS_INFO) << "Current speaker changed to " << current_speaker_ssrc_;
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000173 earliest_permitted_switch_time_ = now + min_time_between_switches_;
174 SignalUpdate(this, current_speaker_ssrc_);
175 }
176}
177
buildbot@webrtc.orgca272362014-05-08 23:10:23 +0000178void CurrentSpeakerMonitor::OnMediaStreamsUpdate(
deadbeefd59daf82015-10-14 15:02:44 -0700179 AudioSourceContext* audio_source_context,
180 const MediaStreams& added,
181 const MediaStreams& removed) {
182 if (audio_source_context == audio_source_context_) {
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000183 // Update the speaking state map based on added and removed streams.
184 for (std::vector<cricket::StreamParams>::const_iterator
buildbot@webrtc.org49a6a272014-05-21 00:24:54 +0000185 it = removed.audio().begin(); it != removed.audio().end(); ++it) {
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000186 ssrc_to_speaking_state_map_.erase(it->first_ssrc());
187 }
188
189 for (std::vector<cricket::StreamParams>::const_iterator
buildbot@webrtc.org49a6a272014-05-21 00:24:54 +0000190 it = added.audio().begin(); it != added.audio().end(); ++it) {
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000191 ssrc_to_speaking_state_map_[it->first_ssrc()] = SS_NOT_SPEAKING;
192 }
193 }
194}
195
buildbot@webrtc.org49a6a272014-05-21 00:24:54 +0000196void CurrentSpeakerMonitor::OnMediaStreamsReset(
deadbeefd59daf82015-10-14 15:02:44 -0700197 AudioSourceContext* audio_source_context) {
198 if (audio_source_context == audio_source_context_) {
buildbot@webrtc.org49a6a272014-05-21 00:24:54 +0000199 ssrc_to_speaking_state_map_.clear();
200 }
201}
202
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000203} // namespace cricket