Merge audio_processing changes.
R=aluebs@webrtc.org, bjornv@webrtc.org
BUG=
Review URL: https://webrtc-codereview.appspot.com/32769004
git-svn-id: http://webrtc.googlecode.com/svn/trunk@7893 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/.gitignore b/.gitignore
index 581c3bf..1082352 100644
--- a/.gitignore
+++ b/.gitignore
@@ -48,8 +48,7 @@
/links.db
/net
/out
-/resources/*.*
-/resources/*/*.*
+/resources
/talk/examples/android/bin
/talk/examples/android/gen
/talk/examples/android/libs
diff --git a/resources/audio_processing/agc/agc_audio.pcm.sha1 b/resources/audio_processing/agc/agc_audio.pcm.sha1
new file mode 100644
index 0000000..583d38f
--- /dev/null
+++ b/resources/audio_processing/agc/agc_audio.pcm.sha1
@@ -0,0 +1 @@
+10a52dc6d6f15242a1aa549205657f2834353673
\ No newline at end of file
diff --git a/resources/audio_processing/agc/agc_no_circular_buffer.dat.sha1 b/resources/audio_processing/agc/agc_no_circular_buffer.dat.sha1
new file mode 100644
index 0000000..c413bb0
--- /dev/null
+++ b/resources/audio_processing/agc/agc_no_circular_buffer.dat.sha1
@@ -0,0 +1 @@
+61219028e15606a3adbbc61d393575ab36b4078b
\ No newline at end of file
diff --git a/resources/audio_processing/agc/agc_pitch_gain.dat.sha1 b/resources/audio_processing/agc/agc_pitch_gain.dat.sha1
new file mode 100644
index 0000000..734005a
--- /dev/null
+++ b/resources/audio_processing/agc/agc_pitch_gain.dat.sha1
@@ -0,0 +1 @@
+ba0c6e93a5e6d351d95385699fb9a719b6a6d0cc
\ No newline at end of file
diff --git a/resources/audio_processing/agc/agc_pitch_lag.dat.sha1 b/resources/audio_processing/agc/agc_pitch_lag.dat.sha1
new file mode 100644
index 0000000..781e7a8
--- /dev/null
+++ b/resources/audio_processing/agc/agc_pitch_lag.dat.sha1
@@ -0,0 +1 @@
+590c6fe033665d11fa70dbbbd3e7d8f0b8a616ce
\ No newline at end of file
diff --git a/resources/audio_processing/agc/agc_spectral_peak.dat.sha1 b/resources/audio_processing/agc/agc_spectral_peak.dat.sha1
new file mode 100644
index 0000000..473becc
--- /dev/null
+++ b/resources/audio_processing/agc/agc_spectral_peak.dat.sha1
@@ -0,0 +1 @@
+3a5a28763e3ad5cd0f2833a90b685f4da97c2002
\ No newline at end of file
diff --git a/resources/audio_processing/agc/agc_vad.dat.sha1 b/resources/audio_processing/agc/agc_vad.dat.sha1
new file mode 100644
index 0000000..fd704a0
--- /dev/null
+++ b/resources/audio_processing/agc/agc_vad.dat.sha1
@@ -0,0 +1 @@
+7cae05c6902812609fa23ac04037485503b0924d
\ No newline at end of file
diff --git a/resources/audio_processing/agc/agc_voicing_prob.dat.sha1 b/resources/audio_processing/agc/agc_voicing_prob.dat.sha1
new file mode 100644
index 0000000..f0d1d10
--- /dev/null
+++ b/resources/audio_processing/agc/agc_voicing_prob.dat.sha1
@@ -0,0 +1 @@
+b1ea860f0bfad3e86fedc43cd8752821e0d75a46
\ No newline at end of file
diff --git a/resources/audio_processing/agc/agc_with_circular_buffer.dat.sha1 b/resources/audio_processing/agc/agc_with_circular_buffer.dat.sha1
new file mode 100644
index 0000000..996a4ce
--- /dev/null
+++ b/resources/audio_processing/agc/agc_with_circular_buffer.dat.sha1
@@ -0,0 +1 @@
+49402cfaa36be32320167a65c8e96f70548f5257
\ No newline at end of file
diff --git a/resources/audio_processing/transient/ajm-macbook-1-spke.gai.sha1 b/resources/audio_processing/transient/ajm-macbook-1-spke.gai.sha1
new file mode 100644
index 0000000..e929ad4
--- /dev/null
+++ b/resources/audio_processing/transient/ajm-macbook-1-spke.gai.sha1
@@ -0,0 +1 @@
+7c80af623675b2284f4081cfd2df9a0227bbc2a0
\ No newline at end of file
diff --git a/resources/audio_processing/transient/ajm-macbook-1-spke16m.pcm.sha1 b/resources/audio_processing/transient/ajm-macbook-1-spke16m.pcm.sha1
new file mode 100644
index 0000000..654fad5
--- /dev/null
+++ b/resources/audio_processing/transient/ajm-macbook-1-spke16m.pcm.sha1
@@ -0,0 +1 @@
+04155a7e186deb7524e3013476de3eaabd59a1f8
\ No newline at end of file
diff --git a/resources/audio_processing/transient/ajm-macbook-1-spke16m_chunk_10_transient_30_rational.dat.sha1 b/resources/audio_processing/transient/ajm-macbook-1-spke16m_chunk_10_transient_30_rational.dat.sha1
new file mode 100644
index 0000000..762ae4c
--- /dev/null
+++ b/resources/audio_processing/transient/ajm-macbook-1-spke16m_chunk_10_transient_30_rational.dat.sha1
@@ -0,0 +1 @@
+6c33b25be2eb9b441429aabf203d5b4a9e734c63
\ No newline at end of file
diff --git a/resources/audio_processing/transient/audio16kHz.pcm.sha1 b/resources/audio_processing/transient/audio16kHz.pcm.sha1
new file mode 100644
index 0000000..b35750c
--- /dev/null
+++ b/resources/audio_processing/transient/audio16kHz.pcm.sha1
@@ -0,0 +1 @@
+81cb7e547fad2894b5702fa571f9eb55ed6e1096
\ No newline at end of file
diff --git a/resources/audio_processing/transient/audio32kHz.pcm.sha1 b/resources/audio_processing/transient/audio32kHz.pcm.sha1
new file mode 100644
index 0000000..f6728e7
--- /dev/null
+++ b/resources/audio_processing/transient/audio32kHz.pcm.sha1
@@ -0,0 +1 @@
+81cfcff6b0d70938fe74060ba0303504c31c6d7e
\ No newline at end of file
diff --git a/resources/audio_processing/transient/audio48kHz.pcm.sha1 b/resources/audio_processing/transient/audio48kHz.pcm.sha1
new file mode 100644
index 0000000..126ff85
--- /dev/null
+++ b/resources/audio_processing/transient/audio48kHz.pcm.sha1
@@ -0,0 +1 @@
+01278951e13675a3467782e1d2f18273c05eef50
\ No newline at end of file
diff --git a/resources/audio_processing/transient/audio8kHz.pcm.sha1 b/resources/audio_processing/transient/audio8kHz.pcm.sha1
new file mode 100644
index 0000000..7f44983
--- /dev/null
+++ b/resources/audio_processing/transient/audio8kHz.pcm.sha1
@@ -0,0 +1 @@
+5fcb4621ea0f50c3fc9a63e4720ff52631258437
\ No newline at end of file
diff --git a/resources/audio_processing/transient/detect16kHz.dat.sha1 b/resources/audio_processing/transient/detect16kHz.dat.sha1
new file mode 100644
index 0000000..333e7c5
--- /dev/null
+++ b/resources/audio_processing/transient/detect16kHz.dat.sha1
@@ -0,0 +1 @@
+35639dd1b73b678360897975a91a7c8af0be3644
\ No newline at end of file
diff --git a/resources/audio_processing/transient/detect32kHz.dat.sha1 b/resources/audio_processing/transient/detect32kHz.dat.sha1
new file mode 100644
index 0000000..ff9485c
--- /dev/null
+++ b/resources/audio_processing/transient/detect32kHz.dat.sha1
@@ -0,0 +1 @@
+c9d3d0b81262ffaba7d358ad534e6fcb27c00076
\ No newline at end of file
diff --git a/resources/audio_processing/transient/detect48kHz.dat.sha1 b/resources/audio_processing/transient/detect48kHz.dat.sha1
new file mode 100644
index 0000000..0410b9a
--- /dev/null
+++ b/resources/audio_processing/transient/detect48kHz.dat.sha1
@@ -0,0 +1 @@
+f46a3380c9285324e583965ef547fcaa1650f8b8
\ No newline at end of file
diff --git a/resources/audio_processing/transient/detect8kHz.dat.sha1 b/resources/audio_processing/transient/detect8kHz.dat.sha1
new file mode 100644
index 0000000..30c19bd
--- /dev/null
+++ b/resources/audio_processing/transient/detect8kHz.dat.sha1
@@ -0,0 +1 @@
+f625c14d134d69ad38b67295459406fc9947a705
\ No newline at end of file
diff --git a/resources/audio_processing/transient/double-utils.dat.sha1 b/resources/audio_processing/transient/double-utils.dat.sha1
new file mode 100644
index 0000000..3895184
--- /dev/null
+++ b/resources/audio_processing/transient/double-utils.dat.sha1
@@ -0,0 +1 @@
+c26083880cd227178917b4df230520dbfb9b9bb1
\ No newline at end of file
diff --git a/resources/audio_processing/transient/float-utils.dat.sha1 b/resources/audio_processing/transient/float-utils.dat.sha1
new file mode 100644
index 0000000..1817c60
--- /dev/null
+++ b/resources/audio_processing/transient/float-utils.dat.sha1
@@ -0,0 +1 @@
+0eaaf21344b4b030d6c0fb6dcc419e7d3959a148
\ No newline at end of file
diff --git a/resources/audio_processing/transient/suppressed16kHz.pcm.sha1 b/resources/audio_processing/transient/suppressed16kHz.pcm.sha1
new file mode 100644
index 0000000..7ea55c3
--- /dev/null
+++ b/resources/audio_processing/transient/suppressed16kHz.pcm.sha1
@@ -0,0 +1 @@
+9781792dc39d7aada6418370246eef9f544ca47b
\ No newline at end of file
diff --git a/resources/audio_processing/transient/suppressed32kHz.pcm.sha1 b/resources/audio_processing/transient/suppressed32kHz.pcm.sha1
new file mode 100644
index 0000000..5f49bc0
--- /dev/null
+++ b/resources/audio_processing/transient/suppressed32kHz.pcm.sha1
@@ -0,0 +1 @@
+8b2bd11b591521178232aae598e6df0d001051c4
\ No newline at end of file
diff --git a/resources/audio_processing/transient/suppressed8kHz.pcm.sha1 b/resources/audio_processing/transient/suppressed8kHz.pcm.sha1
new file mode 100644
index 0000000..b0086da
--- /dev/null
+++ b/resources/audio_processing/transient/suppressed8kHz.pcm.sha1
@@ -0,0 +1 @@
+8a6c7ed696f9791f8cb5c5b061f07eb019affd49
\ No newline at end of file
diff --git a/resources/audio_processing/transient/wpd0.dat.sha1 b/resources/audio_processing/transient/wpd0.dat.sha1
new file mode 100644
index 0000000..9d9edd3
--- /dev/null
+++ b/resources/audio_processing/transient/wpd0.dat.sha1
@@ -0,0 +1 @@
+7c01839f888fe6e10276e1819bd5207668345dcf
\ No newline at end of file
diff --git a/resources/audio_processing/transient/wpd1.dat.sha1 b/resources/audio_processing/transient/wpd1.dat.sha1
new file mode 100644
index 0000000..59ff085
--- /dev/null
+++ b/resources/audio_processing/transient/wpd1.dat.sha1
@@ -0,0 +1 @@
+f7553df9abca91401715185d97d1d9c20a2ecb9b
\ No newline at end of file
diff --git a/resources/audio_processing/transient/wpd2.dat.sha1 b/resources/audio_processing/transient/wpd2.dat.sha1
new file mode 100644
index 0000000..3161de8
--- /dev/null
+++ b/resources/audio_processing/transient/wpd2.dat.sha1
@@ -0,0 +1 @@
+0455d7042c64075e793285753a98f02268e6238b
\ No newline at end of file
diff --git a/resources/audio_processing/transient/wpd3.dat.sha1 b/resources/audio_processing/transient/wpd3.dat.sha1
new file mode 100644
index 0000000..1a3b6f6
--- /dev/null
+++ b/resources/audio_processing/transient/wpd3.dat.sha1
@@ -0,0 +1 @@
+941cc5d0bfccfd1d6bd68a1d882975202f22b6de
\ No newline at end of file
diff --git a/resources/audio_processing/transient/wpd4.dat.sha1 b/resources/audio_processing/transient/wpd4.dat.sha1
new file mode 100644
index 0000000..3e05085
--- /dev/null
+++ b/resources/audio_processing/transient/wpd4.dat.sha1
@@ -0,0 +1 @@
+a16139b3750a13b62327e2a78ea008493a2b508b
\ No newline at end of file
diff --git a/resources/audio_processing/transient/wpd5.dat.sha1 b/resources/audio_processing/transient/wpd5.dat.sha1
new file mode 100644
index 0000000..aef4367
--- /dev/null
+++ b/resources/audio_processing/transient/wpd5.dat.sha1
@@ -0,0 +1 @@
+6bf9272123656bc0561550a40734245709bbac10
\ No newline at end of file
diff --git a/resources/audio_processing/transient/wpd6.dat.sha1 b/resources/audio_processing/transient/wpd6.dat.sha1
new file mode 100644
index 0000000..355c31e
--- /dev/null
+++ b/resources/audio_processing/transient/wpd6.dat.sha1
@@ -0,0 +1 @@
+6a2667c6c4b3794776af1dabacc3575791023168
\ No newline at end of file
diff --git a/resources/audio_processing/transient/wpd7.dat.sha1 b/resources/audio_processing/transient/wpd7.dat.sha1
new file mode 100644
index 0000000..daf85ed
--- /dev/null
+++ b/resources/audio_processing/transient/wpd7.dat.sha1
@@ -0,0 +1 @@
+620cf1f732c99003ff0e5d6ae3350c0a2ea2a9d7
\ No newline at end of file
diff --git a/webrtc/modules/audio_processing/BUILD.gn b/webrtc/modules/audio_processing/BUILD.gn
index fbc1e7a..d3b1012 100644
--- a/webrtc/modules/audio_processing/BUILD.gn
+++ b/webrtc/modules/audio_processing/BUILD.gn
@@ -22,25 +22,52 @@
source_set("audio_processing") {
sources = [
- "aec/include/echo_cancellation.h",
+ "aec/aec_core.c",
+ "aec/aec_core.h",
+ "aec/aec_core_internal.h",
+ "aec/aec_rdft.c",
+ "aec/aec_rdft.h",
+ "aec/aec_resampler.c",
+ "aec/aec_resampler.h",
"aec/echo_cancellation.c",
"aec/echo_cancellation_internal.h",
- "aec/aec_core.h",
- "aec/aec_core.c",
- "aec/aec_core_internal.h",
- "aec/aec_rdft.h",
- "aec/aec_rdft.c",
- "aec/aec_resampler.h",
- "aec/aec_resampler.c",
- "aecm/include/echo_control_mobile.h",
- "aecm/echo_control_mobile.c",
+ "aec/include/echo_cancellation.h",
"aecm/aecm_core.c",
"aecm/aecm_core.h",
- "agc/include/gain_control.h",
+ "aecm/echo_control_mobile.c",
+ "aecm/include/echo_control_mobile.h",
+ "agc/agc.cc",
+ "agc/agc.h",
+ "agc/agc_audio_proc.cc",
+ "agc/agc_audio_proc.h",
+ "agc/agc_audio_proc_internal.h",
+ "agc/agc_manager_direct.cc",
+ "agc/agc_manager_direct.h",
"agc/analog_agc.c",
"agc/analog_agc.h",
+ "agc/circular_buffer.cc",
+ "agc/circular_buffer.h",
+ "agc/common.h",
"agc/digital_agc.c",
"agc/digital_agc.h",
+ "agc/gain_map_internal.h",
+ "agc/gmm.cc",
+ "agc/gmm.h",
+ "agc/histogram.cc",
+ "agc/histogram.h",
+ "agc/include/gain_control.h",
+ "agc/noise_gmm_tables.h",
+ "agc/pitch_based_vad.cc",
+ "agc/pitch_based_vad.h",
+ "agc/pitch_internal.cc",
+ "agc/pitch_internal.h",
+ "agc/pole_zero_filter.cc",
+ "agc/pole_zero_filter.h",
+ "agc/standalone_vad.cc",
+ "agc/standalone_vad.h",
+ "agc/utility.cc",
+ "agc/utility.h",
+ "agc/voice_gmm_tables.h",
"audio_buffer.cc",
"audio_buffer.h",
"audio_processing_impl.cc",
@@ -67,6 +94,19 @@
"rms_level.h",
"splitting_filter.cc",
"splitting_filter.h",
+ "transient/common.h",
+ "transient/daubechies_8_wavelet_coeffs.h",
+ "transient/dyadic_decimator.h",
+ "transient/moving_moments.cc",
+ "transient/moving_moments.h",
+ "transient/transient_detector.cc",
+ "transient/transient_detector.h",
+ "transient/transient_suppressor.cc",
+ "transient/transient_suppressor.h",
+ "transient/wpd_node.cc",
+ "transient/wpd_node.h",
+ "transient/wpd_tree.cc",
+ "transient/wpd_tree.h",
"typing_detection.cc",
"typing_detection.h",
"utility/delay_estimator.c",
diff --git a/webrtc/modules/audio_processing/agc/agc.cc b/webrtc/modules/audio_processing/agc/agc.cc
new file mode 100644
index 0000000..298cfd9
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/agc.cc
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/agc.h"
+
+#include <cmath>
+#include <cstdlib>
+
+#include <algorithm>
+
+#include "webrtc/common_audio/resampler/include/resampler.h"
+#include "webrtc/modules/audio_processing/agc/agc_audio_proc.h"
+#include "webrtc/modules/audio_processing/agc/common.h"
+#include "webrtc/modules/audio_processing/agc/histogram.h"
+#include "webrtc/modules/audio_processing/agc/pitch_based_vad.h"
+#include "webrtc/modules/audio_processing/agc/standalone_vad.h"
+#include "webrtc/modules/audio_processing/agc/utility.h"
+#include "webrtc/modules/interface/module_common_types.h"
+#include "webrtc/system_wrappers/interface/compile_assert.h"
+
+namespace webrtc {
+namespace {
+
+const int kDefaultLevelDbfs = -18;
+const double kDefaultVoiceValue = 1.0;
+const int kNumAnalysisFrames = 100;
+const double kActivityThreshold = 0.3;
+
+} // namespace
+
+Agc::Agc()
+ : target_level_loudness_(Dbfs2Loudness(kDefaultLevelDbfs)),
+ last_voice_probability_(kDefaultVoiceValue),
+ target_level_dbfs_(kDefaultLevelDbfs),
+ standalone_vad_enabled_(true),
+ histogram_(Histogram::Create(kNumAnalysisFrames)),
+ inactive_histogram_(Histogram::Create()),
+ audio_processing_(new AgcAudioProc()),
+ pitch_based_vad_(new PitchBasedVad()),
+ standalone_vad_(StandaloneVad::Create()),
+ // Initialize to the most common resampling situation.
+ resampler_(new Resampler(32000, kSampleRateHz, kResamplerSynchronous)) {
+ }
+
+Agc::~Agc() {}
+
+float Agc::AnalyzePreproc(const int16_t* audio, int length) {
+ assert(length > 0);
+ int num_clipped = 0;
+ for (int i = 0; i < length; ++i) {
+ if (audio[i] == 32767 || audio[i] == -32768)
+ ++num_clipped;
+ }
+ return 1.0f * num_clipped / length;
+}
+
+int Agc::Process(const int16_t* audio, int length, int sample_rate_hz) {
+ assert(length == sample_rate_hz / 100);
+ if (sample_rate_hz > 32000) {
+ return -1;
+ }
+ // Resample to the required rate.
+ int16_t resampled[kLength10Ms];
+ const int16_t* resampled_ptr = audio;
+ if (sample_rate_hz != kSampleRateHz) {
+ if (resampler_->ResetIfNeeded(sample_rate_hz,
+ kSampleRateHz,
+ kResamplerSynchronous) != 0) {
+ return -1;
+ }
+ resampler_->Push(audio, length, resampled, kLength10Ms, length);
+ resampled_ptr = resampled;
+ }
+ assert(length == kLength10Ms);
+
+ if (standalone_vad_enabled_) {
+ if (standalone_vad_->AddAudio(resampled_ptr, length) != 0)
+ return -1;
+ }
+
+ AudioFeatures features;
+ audio_processing_->ExtractFeatures(resampled_ptr, length, &features);
+ if (features.num_frames > 0) {
+ if (features.silence) {
+ // The other features are invalid, so update the histogram with an
+ // arbitrary low value.
+ for (int n = 0; n < features.num_frames; ++n)
+ histogram_->Update(features.rms[n], 0.01);
+ return 0;
+ }
+
+ // Initialize to 0.5 which is a neutral value for combining probabilities,
+ // in case the standalone-VAD is not enabled.
+ double p_combined[] = {0.5, 0.5, 0.5, 0.5};
+ COMPILE_ASSERT(sizeof(p_combined) / sizeof(p_combined[0]) == kMaxNumFrames,
+ combined_probability_incorrect_size);
+ if (standalone_vad_enabled_) {
+ if (standalone_vad_->GetActivity(p_combined, kMaxNumFrames) < 0)
+ return -1;
+ }
+ // If any other VAD is enabled it must be combined before calling the
+ // pitch-based VAD.
+ if (pitch_based_vad_->VoicingProbability(features, p_combined) < 0)
+ return -1;
+ for (int n = 0; n < features.num_frames; n++) {
+ histogram_->Update(features.rms[n], p_combined[n]);
+ last_voice_probability_ = p_combined[n];
+ }
+ }
+ return 0;
+}
+
+bool Agc::GetRmsErrorDb(int* error) {
+ if (!error) {
+ assert(false);
+ return false;
+ }
+
+ if (histogram_->num_updates() < kNumAnalysisFrames) {
+ // We haven't yet received enough frames.
+ return false;
+ }
+
+ if (histogram_->AudioContent() < kNumAnalysisFrames * kActivityThreshold) {
+ // We are likely in an inactive segment.
+ return false;
+ }
+
+ double loudness = Linear2Loudness(histogram_->CurrentRms());
+ *error = std::floor(Loudness2Db(target_level_loudness_ - loudness) + 0.5);
+ histogram_->Reset();
+ return true;
+}
+
+void Agc::Reset() {
+ histogram_->Reset();
+}
+
+int Agc::set_target_level_dbfs(int level) {
+ // TODO(turajs): just some arbitrary sanity check. We can come up with better
+ // limits. The upper limit should be chosen such that the risk of clipping is
+ // low. The lower limit should not result in a too quiet signal.
+ if (level >= 0 || level <= -100)
+ return -1;
+ target_level_dbfs_ = level;
+ target_level_loudness_ = Dbfs2Loudness(level);
+ return 0;
+}
+
+void Agc::EnableStandaloneVad(bool enable) {
+ standalone_vad_enabled_ = enable;
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/agc/agc.h b/webrtc/modules/audio_processing/agc/agc.h
new file mode 100644
index 0000000..3c535d3
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/agc.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_
+
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+class AudioFrame;
+class AgcAudioProc;
+class Histogram;
+class PitchBasedVad;
+class Resampler;
+class StandaloneVad;
+
+class Agc {
+ public:
+ Agc();
+ virtual ~Agc();
+
+ // Returns the proportion of samples in the buffer which are at full-scale
+ // (and presumably clipped).
+ virtual float AnalyzePreproc(const int16_t* audio, int length);
+ // |audio| must be mono; in a multi-channel stream, provide the first (usually
+ // left) channel.
+ virtual int Process(const int16_t* audio, int length, int sample_rate_hz);
+
+ // Retrieves the difference between the target RMS level and the current
+ // signal RMS level in dB. Returns true if an update is available and false
+ // otherwise, in which case |error| should be ignored and no action taken.
+ virtual bool GetRmsErrorDb(int* error);
+ virtual void Reset();
+
+ virtual int set_target_level_dbfs(int level);
+ virtual int target_level_dbfs() const { return target_level_dbfs_; }
+
+ virtual void EnableStandaloneVad(bool enable);
+ virtual bool standalone_vad_enabled() const {
+ return standalone_vad_enabled_;
+ }
+
+ virtual double voice_probability() const { return last_voice_probability_; }
+
+ private:
+ double target_level_loudness_;
+ double last_voice_probability_;
+ int target_level_dbfs_;
+ bool standalone_vad_enabled_;
+ scoped_ptr<Histogram> histogram_;
+ scoped_ptr<Histogram> inactive_histogram_;
+ scoped_ptr<AgcAudioProc> audio_processing_;
+ scoped_ptr<PitchBasedVad> pitch_based_vad_;
+ scoped_ptr<StandaloneVad> standalone_vad_;
+ scoped_ptr<Resampler> resampler_;
+};
+
+} // namespace webrtc
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_
diff --git a/webrtc/modules/audio_processing/agc/agc_audio_proc.cc b/webrtc/modules/audio_processing/agc/agc_audio_proc.cc
new file mode 100644
index 0000000..002b201
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/agc_audio_proc.cc
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/agc_audio_proc.h"
+
+#include <math.h>
+#include <stdio.h>
+
+#include "webrtc/modules/audio_processing/agc/agc_audio_proc_internal.h"
+#include "webrtc/modules/audio_processing/agc/pitch_internal.h"
+#include "webrtc/modules/audio_processing/agc/pole_zero_filter.h"
+extern "C" {
+#include "webrtc/modules/audio_coding/codecs/isac/main/source/codec.h"
+#include "webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.h"
+#include "webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
+#include "webrtc/modules/audio_coding/codecs/isac/main/source/structs.h"
+#include "webrtc/modules/audio_processing/utility/fft4g.h"
+}
+#include "webrtc/modules/interface/module_common_types.h"
+#include "webrtc/system_wrappers/interface/compile_assert.h"
+
+namespace webrtc {
+
+// The following structures are declared anonymous in iSAC's structs.h. To
+// forward declare them, we use this derived class trick.
+struct AgcAudioProc::PitchAnalysisStruct : public ::PitchAnalysisStruct {};
+struct AgcAudioProc::PreFiltBankstr : public ::PreFiltBankstr {};
+
+static const float kFrequencyResolution = kSampleRateHz /
+ static_cast<float>(AgcAudioProc::kDftSize);
+static const int kSilenceRms = 5;
+
+// TODO(turajs): Make a Create or Init for AgcAudioProc.
+AgcAudioProc::AgcAudioProc()
+ : audio_buffer_(),
+ num_buffer_samples_(kNumPastSignalSamples),
+ log_old_gain_(-2),
+ old_lag_(50), // Arbitrary but valid as pitch-lag (in samples).
+ pitch_analysis_handle_(new PitchAnalysisStruct),
+ pre_filter_handle_(new PreFiltBankstr),
+ high_pass_filter_(PoleZeroFilter::Create(
+ kCoeffNumerator, kFilterOrder, kCoeffDenominator, kFilterOrder)) {
+ COMPILE_ASSERT(kNumPastSignalSamples + kNumSubframeSamples ==
+ sizeof(kLpcAnalWin) / sizeof(kLpcAnalWin[0]),
+ lpc_analysis_window_incorrect_size);
+ COMPILE_ASSERT(kLpcOrder + 1 == sizeof(kCorrWeight) / sizeof(kCorrWeight[0]),
+ correlation_weight_incorrect_size);
+
+ // TODO(turajs): Are we doing too much in the constructor?
+ float data[kDftSize];
+ // Make FFT to initialize.
+ ip_[0] = 0;
+ WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_);
+ // TODO(turajs): Need to initialize high-pass filter.
+
+ // Initialize iSAC components.
+ WebRtcIsac_InitPreFilterbank(pre_filter_handle_.get());
+ WebRtcIsac_InitPitchAnalysis(pitch_analysis_handle_.get());
+}
+
+AgcAudioProc::~AgcAudioProc() {}
+
+void AgcAudioProc::ResetBuffer() {
+ memcpy(audio_buffer_, &audio_buffer_[kNumSamplesToProcess],
+ sizeof(audio_buffer_[0]) * kNumPastSignalSamples);
+ num_buffer_samples_ = kNumPastSignalSamples;
+}
+
+int AgcAudioProc::ExtractFeatures(const int16_t* frame,
+ int length,
+ AudioFeatures* features) {
+ features->num_frames = 0;
+ if (length != kNumSubframeSamples) {
+ return -1;
+ }
+
+ // High-pass filter to remove the DC component and very low frequency content.
+ // We have experienced that this high-pass filtering improves voice/non-voiced
+ // classification.
+ if (high_pass_filter_->Filter(frame, kNumSubframeSamples,
+ &audio_buffer_[num_buffer_samples_]) != 0) {
+ return -1;
+ }
+
+ num_buffer_samples_ += kNumSubframeSamples;
+ if (num_buffer_samples_ < kBufferLength) {
+ return 0;
+ }
+ assert(num_buffer_samples_ == kBufferLength);
+ features->num_frames = kNum10msSubframes;
+ features->silence = false;
+
+ Rms(features->rms, kMaxNumFrames);
+ for (int i = 0; i < kNum10msSubframes; ++i) {
+ if (features->rms[i] < kSilenceRms) {
+ // PitchAnalysis can cause NaNs in the pitch gain if it's fed silence.
+ // Bail out here instead.
+ features->silence = true;
+ ResetBuffer();
+ return 0;
+ }
+ }
+
+ PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz,
+ kMaxNumFrames);
+ FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames);
+ ResetBuffer();
+ return 0;
+}
+
+// Computes |kLpcOrder + 1| correlation coefficients.
+void AgcAudioProc::SubframeCorrelation(double* corr, int length_corr,
+ int subframe_index) {
+ assert(length_corr >= kLpcOrder + 1);
+ double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples];
+ int buffer_index = subframe_index * kNumSubframeSamples;
+
+ for (int n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++)
+ windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n];
+
+ WebRtcIsac_AutoCorr(corr, windowed_audio, kNumSubframeSamples +
+ kNumPastSignalSamples, kLpcOrder);
+}
+
+// Compute |kNum10msSubframes| sets of LPC coefficients, one per 10 ms input.
+// The analysis window is 15 ms long and it is centered on the first half of
+// each 10ms sub-frame. This is equivalent to computing LPC coefficients for the
+// first half of each 10 ms subframe.
+void AgcAudioProc::GetLpcPolynomials(double* lpc, int length_lpc) {
+ assert(length_lpc >= kNum10msSubframes * (kLpcOrder + 1));
+ double corr[kLpcOrder + 1];
+ double reflec_coeff[kLpcOrder];
+ for (int i = 0, offset_lpc = 0; i < kNum10msSubframes;
+ i++, offset_lpc += kLpcOrder + 1) {
+ SubframeCorrelation(corr, kLpcOrder + 1, i);
+ corr[0] *= 1.0001;
+ // This makes Lev-Durb a bit more stable.
+ for (int k = 0; k < kLpcOrder + 1; k++) {
+ corr[k] *= kCorrWeight[k];
+ }
+ WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder);
+ }
+}
+
+// Fit a second order curve to these 3 points and find the location of the
+// extremum. The points are inverted before curve fitting.
+static float QuadraticInterpolation(float prev_val, float curr_val,
+ float next_val) {
+ // Doing the interpolation in |1 / A(z)|^2.
+ float fractional_index = 0;
+ next_val = 1.0f / next_val;
+ prev_val = 1.0f / prev_val;
+ curr_val = 1.0f / curr_val;
+
+ fractional_index = -(next_val - prev_val) * 0.5f / (next_val + prev_val -
+ 2.f * curr_val);
+ assert(fabs(fractional_index) < 1);
+ return fractional_index;
+}
+
+// 1 / A(z), where A(z) is defined by |lpc| is a model of the spectral envelope
+// of the input signal. The local maximum of the spectral envelope corresponds
+// with the local minimum of A(z). It saves complexity, as we save one
+// inversion. Furthermore, we find the first local maximum of magnitude squared,
+// to save on one square root.
+void AgcAudioProc::FindFirstSpectralPeaks(double* f_peak, int length_f_peak) {
+ assert(length_f_peak >= kNum10msSubframes);
+ double lpc[kNum10msSubframes * (kLpcOrder + 1)];
+ // For all sub-frames.
+ GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1));
+
+ const int kNumDftCoefficients = kDftSize / 2 + 1;
+ float data[kDftSize];
+
+ for (int i = 0; i < kNum10msSubframes; i++) {
+ // Convert to float with zero pad.
+ memset(data, 0, sizeof(data));
+ for (int n = 0; n < kLpcOrder + 1; n++) {
+ data[n] = static_cast<float>(lpc[i * (kLpcOrder + 1) + n]);
+ }
+ // Transform to frequency domain.
+ WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_);
+
+ int index_peak = 0;
+ float prev_magn_sqr = data[0] * data[0];
+ float curr_magn_sqr = data[2] * data[2] + data[3] * data[3];
+ float next_magn_sqr;
+ bool found_peak = false;
+ for (int n = 2; n < kNumDftCoefficients - 1; n++) {
+ next_magn_sqr = data[2 * n] * data[2 * n] +
+ data[2 * n + 1] * data[2 * n + 1];
+ if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) {
+ found_peak = true;
+ index_peak = n - 1;
+ break;
+ }
+ prev_magn_sqr = curr_magn_sqr;
+ curr_magn_sqr = next_magn_sqr;
+ }
+ float fractional_index = 0;
+ if (!found_peak) {
+ // Checking if |kNumDftCoefficients - 1| is the local minimum.
+ next_magn_sqr = data[1] * data[1];
+ if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) {
+ index_peak = kNumDftCoefficients - 1;
+ }
+ } else {
+ // A peak is found, do a simple quadratic interpolation to get a more
+ // accurate estimate of the peak location.
+ fractional_index = QuadraticInterpolation(prev_magn_sqr, curr_magn_sqr,
+ next_magn_sqr);
+ }
+ f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution;
+ }
+}
+
+// Using iSAC functions to estimate pitch gains & lags.
+void AgcAudioProc::PitchAnalysis(double* log_pitch_gains, double* pitch_lags_hz,
+ int length) {
+ // TODO(turajs): This can be "imported" from iSAC & and the next two
+ // constants.
+ assert(length >= kNum10msSubframes);
+ const int kNumPitchSubframes = 4;
+ double gains[kNumPitchSubframes];
+ double lags[kNumPitchSubframes];
+
+ const int kNumSubbandFrameSamples = 240;
+ const int kNumLookaheadSamples = 24;
+
+ float lower[kNumSubbandFrameSamples];
+ float upper[kNumSubbandFrameSamples];
+ double lower_lookahead[kNumSubbandFrameSamples];
+ double upper_lookahead[kNumSubbandFrameSamples];
+ double lower_lookahead_pre_filter[kNumSubbandFrameSamples +
+ kNumLookaheadSamples];
+
+ // Split signal to lower and upper bands
+ WebRtcIsac_SplitAndFilterFloat(&audio_buffer_[kNumPastSignalSamples],
+ lower, upper, lower_lookahead, upper_lookahead,
+ pre_filter_handle_.get());
+ WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter,
+ pitch_analysis_handle_.get(), lags, gains);
+
+ // Lags are computed on lower-band signal with sampling rate half of the
+ // input signal.
+ GetSubframesPitchParameters(kSampleRateHz / 2, gains, lags,
+ kNumPitchSubframes, kNum10msSubframes,
+ &log_old_gain_, &old_lag_,
+ log_pitch_gains, pitch_lags_hz);
+}
+
+void AgcAudioProc::Rms(double* rms, int length_rms) {
+ assert(length_rms >= kNum10msSubframes);
+ int offset = kNumPastSignalSamples;
+ for (int i = 0; i < kNum10msSubframes; i++) {
+ rms[i] = 0;
+ for (int n = 0; n < kNumSubframeSamples; n++, offset++)
+ rms[i] += audio_buffer_[offset] * audio_buffer_[offset];
+ rms[i] = sqrt(rms[i] / kNumSubframeSamples);
+ }
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/agc/agc_audio_proc.h b/webrtc/modules/audio_processing/agc/agc_audio_proc.h
new file mode 100644
index 0000000..aedc20b
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/agc_audio_proc.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_H_
+
+#include "webrtc/modules/audio_processing/agc/common.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+class AudioFrame;
+class PoleZeroFilter;
+
+class AgcAudioProc {
+ public:
+ // Forward declare iSAC structs.
+ struct PitchAnalysisStruct;
+ struct PreFiltBankstr;
+
+ AgcAudioProc();
+ ~AgcAudioProc();
+
+ int ExtractFeatures(const int16_t* audio_frame,
+ int length,
+ AudioFeatures* audio_features);
+
+ static const int kDftSize = 512;
+
+ private:
+ void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, int length);
+ void SubframeCorrelation(double* corr, int lenght_corr, int subframe_index);
+ void GetLpcPolynomials(double* lpc, int length_lpc);
+ void FindFirstSpectralPeaks(double* f_peak, int length_f_peak);
+ void Rms(double* rms, int length_rms);
+ void ResetBuffer();
+
+ // To compute spectral peak we perform LPC analysis to get spectral envelope.
+ // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis.
+ // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame
+ // we need 5 ms of past signal to create the input of LPC analysis.
+ static const int kNumPastSignalSamples = kSampleRateHz / 200;
+
+ // TODO(turajs): maybe defining this at a higher level (maybe enum) so that
+ // all the code recognize it as "no-error."
+ static const int kNoError = 0;
+
+ static const int kNum10msSubframes = 3;
+ static const int kNumSubframeSamples = kSampleRateHz / 100;
+ static const int kNumSamplesToProcess = kNum10msSubframes *
+ kNumSubframeSamples; // Samples in 30 ms @ given sampling rate.
+ static const int kBufferLength = kNumPastSignalSamples + kNumSamplesToProcess;
+ static const int kIpLength = kDftSize >> 1;
+ static const int kWLength = kDftSize >> 1;
+
+ static const int kLpcOrder = 16;
+
+ int ip_[kIpLength];
+ float w_fft_[kWLength];
+
+ // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ).
+ float audio_buffer_[kBufferLength];
+ int num_buffer_samples_;
+
+ double log_old_gain_;
+ double old_lag_;
+
+ scoped_ptr<PitchAnalysisStruct> pitch_analysis_handle_;
+ scoped_ptr<PreFiltBankstr> pre_filter_handle_;
+ scoped_ptr<PoleZeroFilter> high_pass_filter_;
+};
+
+} // namespace webrtc
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_H_
diff --git a/webrtc/modules/audio_processing/agc/agc_audio_proc_internal.h b/webrtc/modules/audio_processing/agc/agc_audio_proc_internal.h
new file mode 100644
index 0000000..dc125ef
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/agc_audio_proc_internal.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_INTERNAL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_INTERNAL_H_
+
+#include "webrtc/system_wrappers/interface/compile_assert.h"
+
+namespace webrtc {
+
+// These values should match MATLAB counterparts for unit-tests to pass.
+static const double kCorrWeight[] = {
+ 1.000000, 0.985000, 0.970225, 0.955672, 0.941337, 0.927217, 0.913308,
+ 0.899609, 0.886115, 0.872823, 0.859730, 0.846834, 0.834132, 0.821620,
+ 0.809296, 0.797156, 0.785199
+};
+
+static const double kLpcAnalWin[] = {
+ 0.00000000, 0.01314436, 0.02628645, 0.03942400, 0.05255473, 0.06567639,
+ 0.07878670, 0.09188339, 0.10496421, 0.11802689, 0.13106918, 0.14408883,
+ 0.15708358, 0.17005118, 0.18298941, 0.19589602, 0.20876878, 0.22160547,
+ 0.23440387, 0.24716177, 0.25987696, 0.27254725, 0.28517045, 0.29774438,
+ 0.31026687, 0.32273574, 0.33514885, 0.34750406, 0.35979922, 0.37203222,
+ 0.38420093, 0.39630327, 0.40833713, 0.42030043, 0.43219112, 0.44400713,
+ 0.45574642, 0.46740697, 0.47898676, 0.49048379, 0.50189608, 0.51322164,
+ 0.52445853, 0.53560481, 0.54665854, 0.55761782, 0.56848075, 0.57924546,
+ 0.58991008, 0.60047278, 0.61093173, 0.62128512, 0.63153117, 0.64166810,
+ 0.65169416, 0.66160761, 0.67140676, 0.68108990, 0.69065536, 0.70010148,
+ 0.70942664, 0.71862923, 0.72770765, 0.73666033, 0.74548573, 0.75418233,
+ 0.76274862, 0.77118312, 0.77948437, 0.78765094, 0.79568142, 0.80357442,
+ 0.81132858, 0.81894256, 0.82641504, 0.83374472, 0.84093036, 0.84797069,
+ 0.85486451, 0.86161063, 0.86820787, 0.87465511, 0.88095122, 0.88709512,
+ 0.89308574, 0.89892206, 0.90460306, 0.91012776, 0.91549520, 0.92070447,
+ 0.92575465, 0.93064488, 0.93537432, 0.93994213, 0.94434755, 0.94858979,
+ 0.95266814, 0.95658189, 0.96033035, 0.96391289, 0.96732888, 0.97057773,
+ 0.97365889, 0.97657181, 0.97931600, 0.98189099, 0.98429632, 0.98653158,
+ 0.98859639, 0.99049038, 0.99221324, 0.99376466, 0.99514438, 0.99635215,
+ 0.99738778, 0.99825107, 0.99894188, 0.99946010, 0.99980562, 0.99997840,
+ 0.99997840, 0.99980562, 0.99946010, 0.99894188, 0.99825107, 0.99738778,
+ 0.99635215, 0.99514438, 0.99376466, 0.99221324, 0.99049038, 0.98859639,
+ 0.98653158, 0.98429632, 0.98189099, 0.97931600, 0.97657181, 0.97365889,
+ 0.97057773, 0.96732888, 0.96391289, 0.96033035, 0.95658189, 0.95266814,
+ 0.94858979, 0.94434755, 0.93994213, 0.93537432, 0.93064488, 0.92575465,
+ 0.92070447, 0.91549520, 0.91012776, 0.90460306, 0.89892206, 0.89308574,
+ 0.88709512, 0.88095122, 0.87465511, 0.86820787, 0.86161063, 0.85486451,
+ 0.84797069, 0.84093036, 0.83374472, 0.82641504, 0.81894256, 0.81132858,
+ 0.80357442, 0.79568142, 0.78765094, 0.77948437, 0.77118312, 0.76274862,
+ 0.75418233, 0.74548573, 0.73666033, 0.72770765, 0.71862923, 0.70942664,
+ 0.70010148, 0.69065536, 0.68108990, 0.67140676, 0.66160761, 0.65169416,
+ 0.64166810, 0.63153117, 0.62128512, 0.61093173, 0.60047278, 0.58991008,
+ 0.57924546, 0.56848075, 0.55761782, 0.54665854, 0.53560481, 0.52445853,
+ 0.51322164, 0.50189608, 0.49048379, 0.47898676, 0.46740697, 0.45574642,
+ 0.44400713, 0.43219112, 0.42030043, 0.40833713, 0.39630327, 0.38420093,
+ 0.37203222, 0.35979922, 0.34750406, 0.33514885, 0.32273574, 0.31026687,
+ 0.29774438, 0.28517045, 0.27254725, 0.25987696, 0.24716177, 0.23440387,
+ 0.22160547, 0.20876878, 0.19589602, 0.18298941, 0.17005118, 0.15708358,
+ 0.14408883, 0.13106918, 0.11802689, 0.10496421, 0.09188339, 0.07878670,
+ 0.06567639, 0.05255473, 0.03942400, 0.02628645, 0.01314436, 0.00000000
+};
+
+static const int kFilterOrder = 2;
+static const float kCoeffNumerator[kFilterOrder + 1] = {0.974827f, -1.949650f,
+ 0.974827f};
+static const float kCoeffDenominator[kFilterOrder + 1] = {1.0f, -1.971999f,
+ 0.972457f};
+
+COMPILE_ASSERT(kFilterOrder + 1 == sizeof(kCoeffNumerator) /
+ sizeof(kCoeffNumerator[0]), numerator_coefficients_incorrect_size);
+COMPILE_ASSERT(kFilterOrder + 1 == sizeof(kCoeffDenominator) /
+ sizeof(kCoeffDenominator[0]), denominator_coefficients_incorrect_size);
+
+} // namespace webrtc
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AUDIO_PROCESSING_H_
diff --git a/webrtc/modules/audio_processing/agc/agc_audio_proc_unittest.cc b/webrtc/modules/audio_processing/agc/agc_audio_proc_unittest.cc
new file mode 100644
index 0000000..9534aec
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/agc_audio_proc_unittest.cc
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// We don't test the value of pitch gain and lags as they are created by iSAC
+// routines. However, interpolation of pitch-gain and lags is in a separate
+// class and has its own unit-test.
+
+#include "webrtc/modules/audio_processing/agc/agc_audio_proc.h"
+
+#include <math.h>
+#include <stdio.h>
+
+#include "gtest/gtest.h"
+#include "webrtc/modules/audio_processing/agc/common.h"
+#include "webrtc/modules/interface/module_common_types.h"
+#include "webrtc/test/testsupport/fileutils.h"
+
+namespace webrtc {
+
+TEST(AudioProcessingTest, DISABLED_ComputingFirstSpectralPeak) {
+ AgcAudioProc audioproc;
+
+ std::string peak_file_name =
+ test::ResourcePath("audio_processing/agc/agc_spectral_peak", "dat");
+ FILE* peak_file = fopen(peak_file_name.c_str(), "rb");
+ ASSERT_TRUE(peak_file != NULL);
+
+ std::string pcm_file_name =
+ test::ResourcePath("audio_processing/agc/agc_audio", "pcm");
+ FILE* pcm_file = fopen(pcm_file_name.c_str(), "rb");
+ ASSERT_TRUE(pcm_file != NULL);
+
+ // Read 10 ms audio in each iteration.
+ const size_t kDataLength = kLength10Ms;
+ int16_t data[kDataLength] = { 0 };
+ AudioFeatures features;
+ double sp[kMaxNumFrames];
+ while (fread(data, sizeof(int16_t), kDataLength, pcm_file) == kDataLength) {
+ audioproc.ExtractFeatures(data, kDataLength, &features);
+ if (features.num_frames > 0) {
+ ASSERT_LT(features.num_frames, kMaxNumFrames);
+ // Read reference values.
+ const size_t num_frames = features.num_frames;
+ ASSERT_EQ(num_frames, fread(sp, sizeof(sp[0]), num_frames, peak_file));
+ for (int n = 0; n < features.num_frames; n++)
+ EXPECT_NEAR(features.spectral_peak[n], sp[n], 3);
+ }
+ }
+
+ fclose(peak_file);
+ fclose(pcm_file);
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/agc/agc_manager_direct.cc b/webrtc/modules/audio_processing/agc/agc_manager_direct.cc
new file mode 100644
index 0000000..37248c1
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/agc_manager_direct.cc
@@ -0,0 +1,436 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"
+
+#include <cassert>
+#include <cmath>
+
+#ifdef WEBRTC_AGC_DEBUG_DUMP
+#include <cstdio>
+#endif
+
+#include "webrtc/modules/audio_processing/agc/gain_map_internal.h"
+#include "webrtc/modules/audio_processing/gain_control_impl.h"
+#include "webrtc/modules/interface/module_common_types.h"
+#include "webrtc/system_wrappers/interface/compile_assert.h"
+#include "webrtc/system_wrappers/interface/logging.h"
+
+namespace webrtc {
+
+namespace {
+
+// Lowest the microphone level can be lowered due to clipping.
+const int kClippedLevelMin = 170;
+// Amount the microphone level is lowered with every clipping event.
+const int kClippedLevelStep = 15;
+// Proportion of clipped samples required to declare a clipping event.
+const float kClippedRatioThreshold = 0.1f;
+// Time in frames to wait after a clipping event before checking again.
+const int kClippedWaitFrames = 300;
+
+// Amount of error we tolerate in the microphone level (presumably due to OS
+// quantization) before we assume the user has manually adjusted the microphone.
+const int kLevelQuantizationSlack = 25;
+
+const int kDefaultCompressionGain = 7;
+const int kMaxCompressionGain = 12;
+const int kMinCompressionGain = 2;
+// Controls the rate of compression changes towards the target.
+const float kCompressionGainStep = 0.05f;
+
+const int kMaxMicLevel = 255;
+COMPILE_ASSERT(kGainMapSize > kMaxMicLevel, gain_map_too_small);
+const int kMinMicLevel = 12;
+const int kMinInitMicLevel = 85;
+
+// Prevent very large microphone level changes.
+const int kMaxResidualGainChange = 15;
+
+// Maximum additional gain allowed to compensate for microphone level
+// restrictions from clipping events.
+const int kSurplusCompressionGain = 6;
+
+int LevelFromGainError(int gain_error, int level) {
+ assert(level >= 0 && level <= kMaxMicLevel);
+ if (gain_error == 0) {
+ return level;
+ }
+ // TODO(ajm): Could be made more efficient with a binary search.
+ int new_level = level;
+ if (gain_error > 0) {
+ while (kGainMap[new_level] - kGainMap[level] < gain_error &&
+ new_level < kMaxMicLevel) {
+ ++new_level;
+ }
+ } else {
+ while (kGainMap[new_level] - kGainMap[level] > gain_error &&
+ new_level > kMinMicLevel) {
+ --new_level;
+ }
+ }
+ return new_level;
+}
+
+} // namespace
+
+// Facility for dumping debug audio files. All methods are no-ops in the
+// default case where WEBRTC_AGC_DEBUG_DUMP is undefined.
+class DebugFile {
+#ifdef WEBRTC_AGC_DEBUG_DUMP
+ public:
+ explicit DebugFile(const char* filename)
+ : file_(fopen(filename, "wb")) {
+ assert(file_);
+ }
+ ~DebugFile() {
+ fclose(file_);
+ }
+ void Write(const int16_t* data, int length_samples) {
+ fwrite(data, 1, length_samples * sizeof(int16_t), file_);
+ }
+ private:
+ FILE* file_;
+#else
+ public:
+ explicit DebugFile(const char* filename) {
+ }
+ ~DebugFile() {
+ }
+ void Write(const int16_t* data, int length_samples) {
+ }
+#endif // WEBRTC_AGC_DEBUG_DUMP
+};
+
+AgcManagerDirect::AgcManagerDirect(GainControl* gctrl,
+ VolumeCallbacks* volume_callbacks)
+ : agc_(new Agc()),
+ gctrl_(gctrl),
+ volume_callbacks_(volume_callbacks),
+ frames_since_clipped_(kClippedWaitFrames),
+ level_(0),
+ max_level_(kMaxMicLevel),
+ max_compression_gain_(kMaxCompressionGain),
+ target_compression_(kDefaultCompressionGain),
+ compression_(target_compression_),
+ compression_accumulator_(compression_),
+ capture_muted_(false),
+ check_volume_on_next_process_(true), // Check at startup.
+ startup_(true),
+ file_preproc_(new DebugFile("agc_preproc.pcm")),
+ file_postproc_(new DebugFile("agc_postproc.pcm")) {
+}
+
+AgcManagerDirect::AgcManagerDirect(Agc* agc,
+ GainControl* gctrl,
+ VolumeCallbacks* volume_callbacks)
+ : agc_(agc),
+ gctrl_(gctrl),
+ volume_callbacks_(volume_callbacks),
+ frames_since_clipped_(kClippedWaitFrames),
+ level_(0),
+ max_level_(kMaxMicLevel),
+ max_compression_gain_(kMaxCompressionGain),
+ target_compression_(kDefaultCompressionGain),
+ compression_(target_compression_),
+ compression_accumulator_(compression_),
+ capture_muted_(false),
+ check_volume_on_next_process_(true), // Check at startup.
+ startup_(true),
+ file_preproc_(new DebugFile("agc_preproc.pcm")),
+ file_postproc_(new DebugFile("agc_postproc.pcm")) {
+}
+
+AgcManagerDirect::~AgcManagerDirect() {}
+
+int AgcManagerDirect::Initialize() {
+ max_level_ = kMaxMicLevel;
+ max_compression_gain_ = kMaxCompressionGain;
+ target_compression_ = kDefaultCompressionGain;
+ compression_ = target_compression_;
+ compression_accumulator_ = compression_;
+ capture_muted_ = false;
+ check_volume_on_next_process_ = true;
+ // TODO(bjornv): Investigate if we need to reset |startup_| as well. For
+ // example, what happens when we change devices.
+
+ if (gctrl_->set_mode(GainControl::kFixedDigital) != 0) {
+ LOG_FERR1(LS_ERROR, set_mode, GainControl::kFixedDigital);
+ return -1;
+ }
+ if (gctrl_->set_target_level_dbfs(2) != 0) {
+ LOG_FERR1(LS_ERROR, set_target_level_dbfs, 2);
+ return -1;
+ }
+ if (gctrl_->set_compression_gain_db(kDefaultCompressionGain) != 0) {
+ LOG_FERR1(LS_ERROR, set_compression_gain_db, kDefaultCompressionGain);
+ return -1;
+ }
+ if (gctrl_->enable_limiter(true) != 0) {
+ LOG_FERR1(LS_ERROR, enable_limiter, true);
+ return -1;
+ }
+ return 0;
+}
+
+void AgcManagerDirect::AnalyzePreProcess(int16_t* audio,
+ int num_channels,
+ int samples_per_channel) {
+ int length = num_channels * samples_per_channel;
+ if (capture_muted_) {
+ return;
+ }
+
+ file_preproc_->Write(audio, length);
+
+ if (frames_since_clipped_ < kClippedWaitFrames) {
+ ++frames_since_clipped_;
+ return;
+ }
+
+ // Check for clipped samples, as the AGC has difficulty detecting pitch
+ // under clipping distortion. We do this in the preprocessing phase in order
+ // to catch clipped echo as well.
+ //
+ // If we find a sufficiently clipped frame, drop the current microphone level
+ // and enforce a new maximum level, dropped the same amount from the current
+ // maximum. This harsh treatment is an effort to avoid repeated clipped echo
+ // events. As compensation for this restriction, the maximum compression
+ // gain is increased, through SetMaxLevel().
+ float clipped_ratio = agc_->AnalyzePreproc(audio, length);
+ if (clipped_ratio > kClippedRatioThreshold) {
+ LOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio="
+ << clipped_ratio;
+ // Always decrease the maximum level, even if the current level is below
+ // threshold.
+ SetMaxLevel(std::max(kClippedLevelMin, max_level_ - kClippedLevelStep));
+ if (level_ > kClippedLevelMin) {
+ // Don't try to adjust the level if we're already below the limit. As
+ // a consequence, if the user has brought the level above the limit, we
+ // will still not react until the postproc updates the level.
+ SetLevel(std::max(kClippedLevelMin, level_ - kClippedLevelStep));
+ // Reset the AGC since the level has changed.
+ agc_->Reset();
+ }
+ frames_since_clipped_ = 0;
+ }
+}
+
+void AgcManagerDirect::Process(const int16_t* audio,
+ int length,
+ int sample_rate_hz) {
+ if (capture_muted_) {
+ return;
+ }
+
+ if (check_volume_on_next_process_) {
+ check_volume_on_next_process_ = false;
+ // We have to wait until the first process call to check the volume,
+ // because Chromium doesn't guarantee it to be valid any earlier.
+ CheckVolumeAndReset();
+ }
+
+ if (agc_->Process(audio, length, sample_rate_hz) != 0) {
+ LOG_FERR0(LS_ERROR, Agc::Process);
+ assert(false);
+ }
+
+ UpdateGain();
+ UpdateCompressor();
+
+ file_postproc_->Write(audio, length);
+}
+
+void AgcManagerDirect::SetLevel(int new_level) {
+ int voe_level = volume_callbacks_->GetMicVolume();
+ if (voe_level < 0) {
+ return;
+ }
+ if (voe_level == 0) {
+ LOG(LS_INFO) << "[agc] VolumeCallbacks returned level=0, taking no action.";
+ return;
+ }
+ if (voe_level > kMaxMicLevel) {
+ LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" << voe_level;
+ return;
+ }
+
+ if (voe_level > level_ + kLevelQuantizationSlack ||
+ voe_level < level_ - kLevelQuantizationSlack) {
+ LOG(LS_INFO) << "[agc] Mic volume was manually adjusted. Updating "
+ << "stored level from " << level_ << " to " << voe_level;
+ level_ = voe_level;
+ // Always allow the user to increase the volume.
+ if (level_ > max_level_) {
+ SetMaxLevel(level_);
+ }
+ // Take no action in this case, since we can't be sure when the volume
+ // was manually adjusted. The compressor will still provide some of the
+ // desired gain change.
+ agc_->Reset();
+ return;
+ }
+
+ new_level = std::min(new_level, max_level_);
+ if (new_level == level_) {
+ return;
+ }
+
+ volume_callbacks_->SetMicVolume(new_level);
+ LOG(LS_INFO) << "[agc] voe_level=" << voe_level << ", "
+ << "level_=" << level_ << ", "
+ << "new_level=" << new_level;
+ level_ = new_level;
+}
+
+void AgcManagerDirect::SetMaxLevel(int level) {
+ assert(level >= kClippedLevelMin);
+ max_level_ = level;
+ // Scale the |kSurplusCompressionGain| linearly across the restricted
+ // level range.
+ max_compression_gain_ = kMaxCompressionGain + std::floor(
+ (1.f * kMaxMicLevel - max_level_) / (kMaxMicLevel - kClippedLevelMin) *
+ kSurplusCompressionGain + 0.5f);
+ LOG(LS_INFO) << "[agc] max_level_=" << max_level_
+ << ", max_compression_gain_=" << max_compression_gain_;
+}
+
+void AgcManagerDirect::SetCaptureMuted(bool muted) {
+ if (capture_muted_ == muted) {
+ return;
+ }
+ capture_muted_ = muted;
+
+ if (!muted) {
+ // When we unmute, we should reset things to be safe.
+ check_volume_on_next_process_ = true;
+ }
+}
+
+float AgcManagerDirect::voice_probability() {
+ return static_cast<float>(agc_->voice_probability());
+}
+
+int AgcManagerDirect::CheckVolumeAndReset() {
+ int level = volume_callbacks_->GetMicVolume();
+ if (level < 0) {
+ return -1;
+ }
+ // Reasons for taking action at startup:
+ // 1) A person starting a call is expected to be heard.
+ // 2) Independent of interpretation of |level| == 0 we should raise it so the
+ // AGC can do its job properly.
+ if (level == 0 && !startup_) {
+ LOG(LS_INFO) << "[agc] VolumeCallbacks returned level=0, taking no action.";
+ return 0;
+ }
+ if (level > kMaxMicLevel) {
+ LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" << level;
+ return -1;
+ }
+ LOG(LS_INFO) << "[agc] Initial GetMicVolume()=" << level;
+
+ int minLevel = startup_ ? kMinInitMicLevel : kMinMicLevel;
+ if (level < minLevel) {
+ level = minLevel;
+ LOG(LS_INFO) << "[agc] Initial volume too low, raising to " << level;
+ volume_callbacks_->SetMicVolume(level);
+ }
+ agc_->Reset();
+ level_ = level;
+ startup_ = false;
+ return 0;
+}
+
+// Requests the RMS error from AGC and distributes the required gain change
+// between the digital compression stage and volume slider. We use the
+// compressor first, providing a slack region around the current slider
+// position to reduce movement.
+//
+// If the slider needs to be moved, we check first if the user has adjusted
+// it, in which case we take no action and cache the updated level.
+void AgcManagerDirect::UpdateGain() {
+ int rms_error = 0;
+ if (!agc_->GetRmsErrorDb(&rms_error)) {
+ // No error update ready.
+ return;
+ }
+ // The compressor will always add at least kMinCompressionGain. In effect,
+ // this adjusts our target gain upward by the same amount and rms_error
+ // needs to reflect that.
+ rms_error += kMinCompressionGain;
+
+ // Handle as much error as possible with the compressor first.
+ int raw_compression = std::max(std::min(rms_error, max_compression_gain_),
+ kMinCompressionGain);
+ // Deemphasize the compression gain error. Move halfway between the current
+ // target and the newly received target. This serves to soften perceptible
+ // intra-talkspurt adjustments, at the cost of some adaptation speed.
+ if ((raw_compression == max_compression_gain_ &&
+ target_compression_ == max_compression_gain_ - 1) ||
+ (raw_compression == kMinCompressionGain &&
+ target_compression_ == kMinCompressionGain + 1)) {
+ // Special case to allow the target to reach the endpoints of the
+ // compression range. The deemphasis would otherwise halt it at 1 dB shy.
+ target_compression_ = raw_compression;
+ } else {
+ target_compression_ = (raw_compression - target_compression_) / 2
+ + target_compression_;
+ }
+
+ // Residual error will be handled by adjusting the volume slider. Use the
+ // raw rather than deemphasized compression here as we would otherwise
+ // shrink the amount of slack the compressor provides.
+ int residual_gain = rms_error - raw_compression;
+ residual_gain = std::min(std::max(residual_gain, -kMaxResidualGainChange),
+ kMaxResidualGainChange);
+ LOG(LS_INFO) << "[agc] rms_error=" << rms_error << ", "
+ << "target_compression=" << target_compression_ << ", "
+ << "residual_gain=" << residual_gain;
+ if (residual_gain == 0)
+ return;
+
+ SetLevel(LevelFromGainError(residual_gain, level_));
+}
+
+void AgcManagerDirect::UpdateCompressor() {
+ if (compression_ == target_compression_) {
+ return;
+ }
+
+ // Adapt the compression gain slowly towards the target, in order to avoid
+ // highly perceptible changes.
+ if (target_compression_ > compression_) {
+ compression_accumulator_ += kCompressionGainStep;
+ } else {
+ compression_accumulator_ -= kCompressionGainStep;
+ }
+
+ // The compressor accepts integer gains in dB. Adjust the gain when
+ // we've come within half a stepsize of the nearest integer. (We don't
+ // check for equality due to potential floating point imprecision).
+ int new_compression = compression_;
+ int nearest_neighbor = std::floor(compression_accumulator_ + 0.5);
+ if (std::fabs(compression_accumulator_ - nearest_neighbor) <
+ kCompressionGainStep / 2) {
+ new_compression = nearest_neighbor;
+ }
+
+ // Set the new compression gain.
+ if (new_compression != compression_) {
+ compression_ = new_compression;
+ compression_accumulator_ = new_compression;
+ if (gctrl_->set_compression_gain_db(compression_) != 0) {
+ LOG_FERR1(LS_ERROR, set_compression_gain_db, compression_);
+ }
+ }
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/agc/agc_manager_direct.h b/webrtc/modules/audio_processing/agc/agc_manager_direct.h
new file mode 100644
index 0000000..fac5f02
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/agc_manager_direct.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
+
+#include "webrtc/modules/audio_processing/agc/agc.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+
+namespace webrtc {
+
+class AudioFrame;
+class DebugFile;
+class GainControl;
+
+// Callbacks that need to be injected into AgcManagerDirect to read and control
+// the volume values. They have different behavior if they are called from
+// AgcManager or AudioProcessing. This is done to remove the VoiceEngine
+// dependency in AgcManagerDirect.
+class VolumeCallbacks {
+ public:
+ virtual ~VolumeCallbacks() {}
+ virtual void SetMicVolume(int volume) = 0;
+ virtual int GetMicVolume() = 0;
+};
+
+// Direct interface to use AGC to set volume and compression values.
+// AudioProcessing uses this interface directly to integrate the callback-less
+// AGC. AgcManager delegates most of its calls here. See agc_manager.h for
+// undocumented methods.
+//
+// This class is not thread-safe.
+class AgcManagerDirect {
+ public:
+ // AgcManagerDirect will configure GainControl internally. The user is
+ // responsible for processing the audio using it after the call to Process.
+ AgcManagerDirect(GainControl* gctrl, VolumeCallbacks* volume_callbacks);
+ // Dependency injection for testing. Don't delete |agc| as the memory is owned
+ // by the manager.
+ AgcManagerDirect(Agc* agc,
+ GainControl* gctrl,
+ VolumeCallbacks* volume_callbacks);
+ ~AgcManagerDirect();
+
+ int Initialize();
+ void AnalyzePreProcess(int16_t* audio,
+ int num_channels,
+ int samples_per_channel);
+ void Process(const int16_t* audio, int length, int sample_rate_hz);
+
+ // Sets a new microphone level, after first checking that it hasn't been
+ // updated by the user, in which case no action is taken.
+ void SetLevel(int new_level);
+
+ // Set the maximum level the AGC is allowed to apply. Also updates the
+ // maximum compression gain to compensate. The level must be at least
+ // |kClippedLevelMin|.
+ void SetMaxLevel(int level);
+
+ void SetCaptureMuted(bool muted);
+ bool capture_muted() { return capture_muted_; }
+
+ float voice_probability();
+
+ private:
+ int CheckVolumeAndReset();
+ void UpdateGain();
+ void UpdateCompressor();
+
+ scoped_ptr<Agc> agc_;
+ GainControl* gctrl_;
+ VolumeCallbacks* volume_callbacks_;
+
+ int frames_since_clipped_;
+ int level_;
+ int max_level_;
+ int max_compression_gain_;
+ int target_compression_;
+ int compression_;
+ float compression_accumulator_;
+ bool capture_muted_;
+ bool check_volume_on_next_process_;
+ bool startup_;
+
+ scoped_ptr<DebugFile> file_preproc_;
+ scoped_ptr<DebugFile> file_postproc_;
+};
+
+} // namespace webrtc
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
diff --git a/webrtc/modules/audio_processing/agc/agc_unittest.cc b/webrtc/modules/audio_processing/agc/agc_unittest.cc
new file mode 100644
index 0000000..3d5e2d7
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/agc_unittest.cc
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/agc.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "webrtc/modules/audio_processing/agc/test/test_utils.h"
+#include "webrtc/modules/interface/module_common_types.h"
+#include "webrtc/test/testsupport/fileutils.h"
+
+using ::testing::_;
+using ::testing::AllOf;
+using ::testing::AtLeast;
+using ::testing::Eq;
+using ::testing::Gt;
+using ::testing::InSequence;
+using ::testing::Lt;
+using ::testing::Mock;
+using ::testing::SaveArg;
+
+namespace webrtc {
+namespace {
+
+// The tested values depend on this assumed gain.
+const int kMaxGain = 80;
+
+MATCHER_P(GtPointee, p, "") { return arg > *p; }
+MATCHER_P(LtPointee, p, "") { return arg < *p; }
+
+class AgcChecker {
+ public:
+ MOCK_METHOD2(LevelChanged, void(int iterations, int level));
+};
+
+class AgcTest : public ::testing::Test {
+ protected:
+ AgcTest()
+ : agc_(),
+ checker_(),
+ mic_level_(128) {
+ }
+
+ // A gain of <= -100 will zero out the signal.
+ void RunAgc(int iterations, float gain_db) {
+ FILE* input_file = fopen(
+ test::ResourcePath("voice_engine/audio_long16", "pcm").c_str(), "rb");
+ ASSERT_TRUE(input_file != NULL);
+
+ AudioFrame frame;
+ frame.sample_rate_hz_ = 16000;
+ frame.num_channels_ = 1;
+ frame.samples_per_channel_ = frame.sample_rate_hz_ / 100;
+ const size_t length = frame.samples_per_channel_ * frame.num_channels_;
+
+ float gain = Db2Linear(gain_db);
+ if (gain_db <= -100) {
+ gain = 0;
+ }
+
+ for (int i = 0; i < iterations; ++i) {
+ ASSERT_EQ(length, fread(frame.data_, sizeof(int16_t), length,
+ input_file));
+ SimulateMic(kMaxGain, mic_level_, &frame);
+ ApplyGainLinear(gain, &frame);
+ ASSERT_GE(agc_.Process(frame), 0);
+
+ int mic_level = agc_.MicLevel();
+ if (mic_level != mic_level_) {
+ printf("mic_level=%d\n", mic_level);
+ checker_.LevelChanged(i, mic_level);
+ }
+ mic_level_ = mic_level;
+ }
+ fclose(input_file);
+ }
+
+ Agc agc_;
+ AgcChecker checker_;
+ // Stores mic level between multiple runs of RunAgc in one test.
+ int mic_level_;
+};
+
+TEST_F(AgcTest, UpwardsChangeIsLimited) {
+ {
+ InSequence seq;
+ EXPECT_CALL(checker_, LevelChanged(Lt(500), Eq(179))).Times(1);
+ EXPECT_CALL(checker_, LevelChanged(_, Gt(179))).Times(AtLeast(1));
+ }
+ RunAgc(1000, -40);
+}
+
+TEST_F(AgcTest, DownwardsChangeIsLimited) {
+ {
+ InSequence seq;
+ EXPECT_CALL(checker_, LevelChanged(Lt(500), Eq(77))).Times(1);
+ EXPECT_CALL(checker_, LevelChanged(_, Lt(77))).Times(AtLeast(1));
+ }
+ RunAgc(1000, 40);
+}
+
+TEST_F(AgcTest, MovesUpToMaxAndDownToMin) {
+ int last_level = 128;
+ EXPECT_CALL(checker_, LevelChanged(_, GtPointee(&last_level)))
+ .Times(AtLeast(2))
+ .WillRepeatedly(SaveArg<1>(&last_level));
+ RunAgc(1000, -30);
+ EXPECT_EQ(255, last_level);
+ Mock::VerifyAndClearExpectations(&checker_);
+
+ EXPECT_CALL(checker_, LevelChanged(_, LtPointee(&last_level)))
+ .Times(AtLeast(2))
+ .WillRepeatedly(SaveArg<1>(&last_level));
+ RunAgc(1000, 50);
+ EXPECT_EQ(1, last_level);
+}
+
+TEST_F(AgcTest, HandlesZeroSignal) {
+ int last_level = 128;
+ // Doesn't respond to a zero signal.
+ EXPECT_CALL(checker_, LevelChanged(_, _)).Times(0);
+ RunAgc(1000, -100);
+ Mock::VerifyAndClearExpectations(&checker_);
+
+ // Reacts as usual afterwards.
+ EXPECT_CALL(checker_, LevelChanged(_, GtPointee(&last_level)))
+ .Times(AtLeast(2))
+ .WillRepeatedly(SaveArg<1>(&last_level));
+ RunAgc(500, -20);
+}
+
+TEST_F(AgcTest, ReachesSteadyState) {
+ int last_level = 128;
+ EXPECT_CALL(checker_, LevelChanged(_, _))
+ .Times(AtLeast(2))
+ .WillRepeatedly(SaveArg<1>(&last_level));
+ RunAgc(1000, -20);
+ Mock::VerifyAndClearExpectations(&checker_);
+
+ // If the level changes, it should be in a narrow band around the previous
+ // adaptation.
+ EXPECT_CALL(checker_, LevelChanged(_,
+ AllOf(Gt(last_level * 0.95), Lt(last_level * 1.05))))
+ .Times(AtLeast(0));
+ RunAgc(1000, -20);
+}
+
+// TODO(ajm): Add this test; requires measuring the signal RMS.
+TEST_F(AgcTest, AdaptsToCorrectRMS) {
+}
+
+} // namespace
+} // namespace webrtc
+
diff --git a/webrtc/modules/audio_processing/agc/circular_buffer.cc b/webrtc/modules/audio_processing/agc/circular_buffer.cc
new file mode 100644
index 0000000..8ecb760
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/circular_buffer.cc
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/circular_buffer.h"
+
+#include <assert.h>
+#include <stdlib.h>
+
+namespace webrtc {
+
+AgcCircularBuffer::AgcCircularBuffer(int buffer_size)
+ : buffer_(new double[buffer_size]),
+ is_full_(false),
+ index_(0),
+ buffer_size_(buffer_size),
+ sum_(0) {}
+
+AgcCircularBuffer::~AgcCircularBuffer() {}
+
+void AgcCircularBuffer::Reset() {
+ is_full_ = false;
+ index_ = 0;
+ sum_ = 0;
+}
+
+AgcCircularBuffer* AgcCircularBuffer::Create(int buffer_size) {
+ if (buffer_size <= 0)
+ return NULL;
+ return new AgcCircularBuffer(buffer_size);
+}
+
+double AgcCircularBuffer::Oldest() const {
+ if (!is_full_)
+ return buffer_[0];
+ else
+ return buffer_[index_];
+}
+
+double AgcCircularBuffer::Mean() {
+ double m;
+ if (is_full_) {
+ m = sum_ / buffer_size_;
+ } else {
+ if (index_ > 0)
+ m = sum_ / index_;
+ else
+ m = 0;
+ }
+ return m;
+}
+
+void AgcCircularBuffer::Insert(double value) {
+ if (is_full_) {
+ sum_ -= buffer_[index_];
+ }
+ sum_ += value;
+ buffer_[index_] = value;
+ index_++;
+ if (index_ >= buffer_size_) {
+ is_full_ = true;
+ index_ = 0;
+ }
+}
+int AgcCircularBuffer::BufferLevel() {
+ if (is_full_)
+ return buffer_size_;
+ return index_;
+}
+
+int AgcCircularBuffer::Get(int index, double* value) const {
+ int err = ConvertToLinearIndex(&index);
+ if (err < 0)
+ return -1;
+ *value = buffer_[index];
+ return 0;
+}
+
+int AgcCircularBuffer::Set(int index, double value) {
+ int err = ConvertToLinearIndex(&index);
+ if (err < 0)
+ return -1;
+
+ sum_ -= buffer_[index];
+ buffer_[index] = value;
+ sum_ += value;
+ return 0;
+}
+
+int AgcCircularBuffer::ConvertToLinearIndex(int* index) const {
+ if (*index < 0 || *index >= buffer_size_)
+ return -1;
+
+ if (!is_full_ && *index >= index_)
+ return -1;
+
+ *index = index_ - 1 - *index;
+ if (*index < 0)
+ *index += buffer_size_;
+ return 0;
+}
+
+int AgcCircularBuffer::RemoveTransient(int width_threshold,
+ double val_threshold) {
+ if (!is_full_ && index_ < width_threshold + 2)
+ return 0;
+
+ int index_1 = 0;
+ int index_2 = width_threshold + 1;
+ double v = 0;
+ if (Get(index_1, &v) < 0)
+ return -1;
+ if (v < val_threshold) {
+ Set(index_1, 0);
+ int index;
+ for (index = index_2; index > index_1; index--) {
+ if (Get(index, &v) < 0)
+ return -1;
+ if (v < val_threshold)
+ break;
+ }
+ for (; index > index_1; index--) {
+ if (Set(index, 0.0) < 0)
+ return -1;
+ }
+ }
+ return 0;
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/agc/circular_buffer.h b/webrtc/modules/audio_processing/agc/circular_buffer.h
new file mode 100644
index 0000000..98baa13
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/circular_buffer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_CIRCULAR_BUFFER_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_CIRCULAR_BUFFER_H_
+
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+
+namespace webrtc {
+
+// A circular buffer tailored to the need of this project. It stores last
+// K samples of the input, and keeps track of the mean of the last samples.
+//
+// It is used in class "PitchBasedActivity" to keep track of posterior
+// probabilities in the past few seconds. The posterior probabilities are used
+// to recursively update prior probabilities.
+class AgcCircularBuffer {
+ public:
+ static AgcCircularBuffer* Create(int buffer_size);
+ ~AgcCircularBuffer();
+
+ // If buffer is wrapped around.
+ bool is_full() const { return is_full_; }
+ // Get the oldest entry in the buffer.
+ double Oldest() const;
+ // Insert new value into the buffer.
+ void Insert(double value);
+ // Reset buffer, forget the past, start fresh.
+ void Reset();
+
+ // The mean value of the elements in the buffer. The return value is zero if
+ // buffer is empty, i.e. no value is inserted.
+ double Mean();
+ // Remove transients. If the values exceed |val_threshold| for a period
+ // shorter then or equal to |width_threshold|, then that period is considered
+ // transient and set to zero.
+ int RemoveTransient(int width_threshold, double val_threshold);
+
+ private:
+ explicit AgcCircularBuffer(int buffer_size);
+ // Get previous values. |index = 0| corresponds to the most recent
+ // insertion. |index = 1| is the one before the most recent insertion, and
+ // so on.
+ int Get(int index, double* value) const;
+ // Set a given position to |value|. |index| is interpreted as above.
+ int Set(int index, double value);
+ // Return the number of valid elements in the buffer.
+ int BufferLevel();
+
+ // Convert an index with the interpretation as get() method to the
+ // corresponding linear index.
+ int ConvertToLinearIndex(int* index) const;
+
+ scoped_ptr<double[]> buffer_;
+ bool is_full_;
+ int index_;
+ int buffer_size_;
+ double sum_;
+};
+
+} // namespace webrtc
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_CIRCULAR_BUFFER_H_
diff --git a/webrtc/modules/audio_processing/agc/circular_buffer_unittest.cc b/webrtc/modules/audio_processing/agc/circular_buffer_unittest.cc
new file mode 100644
index 0000000..6bab2e5
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/circular_buffer_unittest.cc
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/circular_buffer.h"
+
+#include <stdio.h>
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+
+namespace webrtc {
+
+static const int kWidthThreshold = 7;
+static const double kValThreshold = 1.0;
+static const int kLongBuffSize = 100;
+static const int kShortBuffSize = 10;
+
+static void InsertSequentially(int k, AgcCircularBuffer* circular_buffer) {
+ double mean_val;
+ for (int n = 1; n <= k; n++) {
+ EXPECT_TRUE(!circular_buffer->is_full());
+ circular_buffer->Insert(n);
+ mean_val = circular_buffer->Mean();
+ EXPECT_EQ((n + 1.0) / 2., mean_val);
+ }
+}
+
+static void Insert(double value, int num_insertion,
+ AgcCircularBuffer* circular_buffer) {
+ for (int n = 0; n < num_insertion; n++)
+ circular_buffer->Insert(value);
+}
+
+static void InsertZeros(int num_zeros, AgcCircularBuffer* circular_buffer) {
+ Insert(0.0, num_zeros, circular_buffer);
+}
+
+TEST(AgcCircularBufferTest, GeneralTest) {
+ scoped_ptr<AgcCircularBuffer> circular_buffer(
+ AgcCircularBuffer::Create(kShortBuffSize));
+ double mean_val;
+
+ // Mean should return zero if nothing is inserted.
+ mean_val = circular_buffer->Mean();
+ EXPECT_DOUBLE_EQ(0.0, mean_val);
+ InsertSequentially(kShortBuffSize, circular_buffer.get());
+
+ // Should be full.
+ EXPECT_TRUE(circular_buffer->is_full());
+ // Correct update after being full.
+ for (int n = 1; n < kShortBuffSize; n++) {
+ circular_buffer->Insert(n);
+ mean_val = circular_buffer->Mean();
+ EXPECT_DOUBLE_EQ((kShortBuffSize + 1.) / 2., mean_val);
+ EXPECT_TRUE(circular_buffer->is_full());
+ }
+
+ // Check reset. This should be like starting fresh.
+ circular_buffer->Reset();
+ mean_val = circular_buffer->Mean();
+ EXPECT_DOUBLE_EQ(0, mean_val);
+ InsertSequentially(kShortBuffSize, circular_buffer.get());
+ EXPECT_TRUE(circular_buffer->is_full());
+}
+
+TEST(AgcCircularBufferTest, TransientsRemoval) {
+ scoped_ptr<AgcCircularBuffer> circular_buffer(
+ AgcCircularBuffer::Create(kLongBuffSize));
+ // Let the first transient be in wrap-around.
+ InsertZeros(kLongBuffSize - kWidthThreshold / 2, circular_buffer.get());
+
+ double push_val = kValThreshold;
+ double mean_val;
+ for (int k = kWidthThreshold; k >= 1; k--) {
+ Insert(push_val, k, circular_buffer.get());
+ circular_buffer->Insert(0);
+ mean_val = circular_buffer->Mean();
+ EXPECT_DOUBLE_EQ(k * push_val / kLongBuffSize, mean_val);
+ circular_buffer->RemoveTransient(kWidthThreshold, kValThreshold);
+ mean_val = circular_buffer->Mean();
+ EXPECT_DOUBLE_EQ(0, mean_val);
+ }
+}
+
+TEST(AgcCircularBufferTest, TransientDetection) {
+ scoped_ptr<AgcCircularBuffer> circular_buffer(
+ AgcCircularBuffer::Create(kLongBuffSize));
+ // Let the first transient be in wrap-around.
+ int num_insertion = kLongBuffSize - kWidthThreshold / 2;
+ InsertZeros(num_insertion, circular_buffer.get());
+
+ double push_val = 2;
+ // This is longer than a transient and shouldn't be removed.
+ int num_non_zero_elements = kWidthThreshold + 1;
+ Insert(push_val, num_non_zero_elements, circular_buffer.get());
+
+ double mean_val = circular_buffer->Mean();
+ EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val);
+ circular_buffer->Insert(0);
+ EXPECT_EQ(0, circular_buffer->RemoveTransient(kWidthThreshold,
+ kValThreshold));
+ mean_val = circular_buffer->Mean();
+ EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val);
+
+ // A transient right after a non-transient, should be removed and mean is
+ // not changed.
+ num_insertion = 3;
+ Insert(push_val, num_insertion, circular_buffer.get());
+ circular_buffer->Insert(0);
+ EXPECT_EQ(0, circular_buffer->RemoveTransient(kWidthThreshold,
+ kValThreshold));
+ mean_val = circular_buffer->Mean();
+ EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val);
+
+ // Last input is larger than threshold, although the sequence is short but
+ // it shouldn't be considered transient.
+ Insert(push_val, num_insertion, circular_buffer.get());
+ num_non_zero_elements += num_insertion;
+ EXPECT_EQ(0, circular_buffer->RemoveTransient(kWidthThreshold,
+ kValThreshold));
+ mean_val = circular_buffer->Mean();
+ EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val);
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/agc/common.h b/webrtc/modules/audio_processing/agc/common.h
new file mode 100644
index 0000000..e9ed1ed
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/common.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_COMMON_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_COMMON_H_
+
+static const int kSampleRateHz = 16000;
+static const int kLength10Ms = kSampleRateHz / 100;
+static const int kMaxNumFrames = 4;
+
+struct AudioFeatures {
+ double log_pitch_gain[kMaxNumFrames];
+ double pitch_lag_hz[kMaxNumFrames];
+ double spectral_peak[kMaxNumFrames];
+ double rms[kMaxNumFrames];
+ int num_frames;
+ bool silence;
+};
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_COMMON_H_
diff --git a/webrtc/modules/audio_processing/agc/gain_map_internal.h b/webrtc/modules/audio_processing/agc/gain_map_internal.h
new file mode 100644
index 0000000..53c71c1
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/gain_map_internal.h
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
+
+static const int kGainMapSize = 256;
+// Uses parameters: si = 2, sf = 0.25, D = 8/256
+static const int kGainMap[kGainMapSize] = {
+ -56,
+ -54,
+ -52,
+ -50,
+ -48,
+ -47,
+ -45,
+ -43,
+ -42,
+ -40,
+ -38,
+ -37,
+ -35,
+ -34,
+ -33,
+ -31,
+ -30,
+ -29,
+ -27,
+ -26,
+ -25,
+ -24,
+ -23,
+ -22,
+ -20,
+ -19,
+ -18,
+ -17,
+ -16,
+ -15,
+ -14,
+ -14,
+ -13,
+ -12,
+ -11,
+ -10,
+ -9,
+ -8,
+ -8,
+ -7,
+ -6,
+ -5,
+ -5,
+ -4,
+ -3,
+ -2,
+ -2,
+ -1,
+ 0,
+ 0,
+ 1,
+ 1,
+ 2,
+ 3,
+ 3,
+ 4,
+ 4,
+ 5,
+ 5,
+ 6,
+ 6,
+ 7,
+ 7,
+ 8,
+ 8,
+ 9,
+ 9,
+ 10,
+ 10,
+ 11,
+ 11,
+ 12,
+ 12,
+ 13,
+ 13,
+ 13,
+ 14,
+ 14,
+ 15,
+ 15,
+ 15,
+ 16,
+ 16,
+ 17,
+ 17,
+ 17,
+ 18,
+ 18,
+ 18,
+ 19,
+ 19,
+ 19,
+ 20,
+ 20,
+ 21,
+ 21,
+ 21,
+ 22,
+ 22,
+ 22,
+ 23,
+ 23,
+ 23,
+ 24,
+ 24,
+ 24,
+ 24,
+ 25,
+ 25,
+ 25,
+ 26,
+ 26,
+ 26,
+ 27,
+ 27,
+ 27,
+ 28,
+ 28,
+ 28,
+ 28,
+ 29,
+ 29,
+ 29,
+ 30,
+ 30,
+ 30,
+ 30,
+ 31,
+ 31,
+ 31,
+ 32,
+ 32,
+ 32,
+ 32,
+ 33,
+ 33,
+ 33,
+ 33,
+ 34,
+ 34,
+ 34,
+ 35,
+ 35,
+ 35,
+ 35,
+ 36,
+ 36,
+ 36,
+ 36,
+ 37,
+ 37,
+ 37,
+ 38,
+ 38,
+ 38,
+ 38,
+ 39,
+ 39,
+ 39,
+ 39,
+ 40,
+ 40,
+ 40,
+ 40,
+ 41,
+ 41,
+ 41,
+ 41,
+ 42,
+ 42,
+ 42,
+ 42,
+ 43,
+ 43,
+ 43,
+ 44,
+ 44,
+ 44,
+ 44,
+ 45,
+ 45,
+ 45,
+ 45,
+ 46,
+ 46,
+ 46,
+ 46,
+ 47,
+ 47,
+ 47,
+ 47,
+ 48,
+ 48,
+ 48,
+ 48,
+ 49,
+ 49,
+ 49,
+ 49,
+ 50,
+ 50,
+ 50,
+ 50,
+ 51,
+ 51,
+ 51,
+ 51,
+ 52,
+ 52,
+ 52,
+ 52,
+ 53,
+ 53,
+ 53,
+ 53,
+ 54,
+ 54,
+ 54,
+ 54,
+ 55,
+ 55,
+ 55,
+ 55,
+ 56,
+ 56,
+ 56,
+ 56,
+ 57,
+ 57,
+ 57,
+ 57,
+ 58,
+ 58,
+ 58,
+ 58,
+ 59,
+ 59,
+ 59,
+ 59,
+ 60,
+ 60,
+ 60,
+ 60,
+ 61,
+ 61,
+ 61,
+ 61,
+ 62,
+ 62,
+ 62,
+ 62,
+ 63,
+ 63,
+ 63,
+ 63,
+ 64
+};
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
diff --git a/webrtc/modules/audio_processing/agc/gmm.cc b/webrtc/modules/audio_processing/agc/gmm.cc
new file mode 100644
index 0000000..9ad8ef9
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/gmm.cc
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/gmm.h"
+
+#include <math.h>
+#include <stdlib.h>
+
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+static const int kMaxDimension = 10;
+
+static void RemoveMean(const double* in, const double* mean_vec,
+ int dimension, double* out) {
+ for (int n = 0; n < dimension; ++n)
+ out[n] = in[n] - mean_vec[n];
+}
+
+static double ComputeExponent(const double* in, const double* covar_inv,
+ int dimension) {
+ double q = 0;
+ for (int i = 0; i < dimension; ++i) {
+ double v = 0;
+ for (int j = 0; j < dimension; j++)
+ v += (*covar_inv++) * in[j];
+ q += v * in[i];
+ }
+ q *= -0.5;
+ return q;
+}
+
+double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters) {
+ if (gmm_parameters.dimension > kMaxDimension) {
+ return -1; // This is invalid pdf so the caller can check this.
+ }
+ double f = 0;
+ double v[kMaxDimension];
+ const double* mean_vec = gmm_parameters.mean;
+ const double* covar_inv = gmm_parameters.covar_inverse;
+
+ for (int n = 0; n < gmm_parameters.num_mixtures; n++) {
+ RemoveMean(x, mean_vec, gmm_parameters.dimension, v);
+ double q = ComputeExponent(v, covar_inv, gmm_parameters.dimension) +
+ gmm_parameters.weight[n];
+ f += exp(q);
+ mean_vec += gmm_parameters.dimension;
+ covar_inv += gmm_parameters.dimension * gmm_parameters.dimension;
+ }
+ return f;
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/agc/gmm.h b/webrtc/modules/audio_processing/agc/gmm.h
new file mode 100644
index 0000000..90ce95d
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/gmm.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GMM_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GMM_H_
+
+namespace webrtc {
+
+// A structure that specifies a GMM.
+// A GMM is formulated as
+// f(x) = w[0] * mixture[0] + w[1] * mixture[1] + ... +
+// w[num_mixtures - 1] * mixture[num_mixtures - 1];
+// Where a 'mixture' is a Gaussian density.
+
+struct GmmParameters {
+ // weight[n] = log(w[n]) - |dimension|/2 * log(2*pi) - 1/2 * log(det(cov[n]));
+ // where cov[n] is the covariance matrix of mixture n;
+ const double* weight;
+ // pointer to the first element of a |num_mixtures|x|dimension| matrix
+ // where kth row is the mean of the kth mixture.
+ const double* mean;
+ // pointer to the first element of a |num_mixtures|x|dimension|x|dimension|
+ // 3D-matrix, where the kth 2D-matrix is the inverse of the covariance
+ // matrix of the kth mixture.
+ const double* covar_inverse;
+ // Dimensionality of the mixtures.
+ int dimension;
+ // number of the mixtures.
+ int num_mixtures;
+};
+
+// Evaluate the given GMM, according to |gmm_parameters|, at the given point
+// |x|. If the dimensionality of the given GMM is larger that the maximum
+// acceptable dimension by the following function -1 is returned.
+double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters);
+
+} // namespace webrtc
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GMM_H_
diff --git a/webrtc/modules/audio_processing/agc/gmm_unittest.cc b/webrtc/modules/audio_processing/agc/gmm_unittest.cc
new file mode 100644
index 0000000..4ca658d
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/gmm_unittest.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/gmm.h"
+
+#include <math.h>
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webrtc/modules/audio_processing/agc/noise_gmm_tables.h"
+#include "webrtc/modules/audio_processing/agc/voice_gmm_tables.h"
+
+namespace webrtc {
+
+TEST(GmmTest, EvaluateGmm) {
+ GmmParameters noise_gmm;
+ GmmParameters voice_gmm;
+
+ // Setup noise GMM.
+ noise_gmm.dimension = kNoiseGmmDim;
+ noise_gmm.num_mixtures = kNoiseGmmNumMixtures;
+ noise_gmm.weight = kNoiseGmmWeights;
+ noise_gmm.mean = &kNoiseGmmMean[0][0];
+ noise_gmm.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
+
+ // Setup voice GMM.
+ voice_gmm.dimension = kVoiceGmmDim;
+ voice_gmm.num_mixtures = kVoiceGmmNumMixtures;
+ voice_gmm.weight = kVoiceGmmWeights;
+ voice_gmm.mean = &kVoiceGmmMean[0][0];
+ voice_gmm.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
+
+ // Test vectors. These are the mean of the GMM means.
+ const double kXVoice[kVoiceGmmDim] = {
+ -1.35893162459863, 602.862491970368, 178.022069191324};
+ const double kXNoise[kNoiseGmmDim] = {
+ -2.33443722724409, 2827.97828765184, 141.114178166812};
+
+ // Expected pdf values. These values are computed in MATLAB using EvalGmm.m
+ const double kPdfNoise = 1.88904409403101e-07;
+ const double kPdfVoice = 1.30453996982266e-06;
+
+ // Relative error should be smaller that the following value.
+ const double kAcceptedRelativeErr = 1e-10;
+
+ // Test Voice.
+ double pdf = EvaluateGmm(kXVoice, voice_gmm);
+ EXPECT_GT(pdf, 0);
+ double relative_error = fabs(pdf - kPdfVoice) / kPdfVoice;
+ EXPECT_LE(relative_error, kAcceptedRelativeErr);
+
+ // Test Noise.
+ pdf = EvaluateGmm(kXNoise, noise_gmm);
+ EXPECT_GT(pdf, 0);
+ relative_error = fabs(pdf - kPdfNoise) / kPdfNoise;
+ EXPECT_LE(relative_error, kAcceptedRelativeErr);
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/agc/histogram.cc b/webrtc/modules/audio_processing/agc/histogram.cc
new file mode 100644
index 0000000..ab18c65
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/histogram.cc
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/histogram.h"
+
+#include <cmath>
+#include <cstring>
+
+#include "webrtc/modules/interface/module_common_types.h"
+#include "webrtc/system_wrappers/interface/compile_assert.h"
+
+namespace webrtc {
+
+static const double kHistBinCenters[] = {
+ 7.59621091765857e-02, 9.02036021061016e-02, 1.07115112009343e-01,
+ 1.27197217770508e-01, 1.51044347572047e-01, 1.79362373905283e-01,
+ 2.12989507320644e-01, 2.52921107370304e-01, 3.00339145144454e-01,
+ 3.56647189489147e-01, 4.23511952494003e-01, 5.02912623991786e-01,
+ 5.97199455365749e-01, 7.09163326739184e-01, 8.42118356728544e-01,
+ 1.00000000000000e+00, 1.18748153630660e+00, 1.41011239906908e+00,
+ 1.67448243801153e+00, 1.98841697800836e+00, 2.36120844786349e+00,
+ 2.80389143520905e+00, 3.32956930911896e+00, 3.95380207843188e+00,
+ 4.69506696634852e+00, 5.57530533426190e+00, 6.62057214370769e+00,
+ 7.86180718043869e+00, 9.33575086877358e+00, 1.10860317842269e+01,
+ 1.31644580546776e+01, 1.56325508754123e+01, 1.85633655299256e+01,
+ 2.20436538184971e+01, 2.61764319021997e+01, 3.10840295702492e+01,
+ 3.69117111886792e+01, 4.38319755100383e+01, 5.20496616180135e+01,
+ 6.18080121423973e+01, 7.33958732149108e+01, 8.71562442838066e+01,
+ 1.03496430860848e+02, 1.22900100720889e+02, 1.45941600416277e+02,
+ 1.73302955873365e+02, 2.05794060286978e+02, 2.44376646872353e+02,
+ 2.90192756065437e+02, 3.44598539797631e+02, 4.09204403447902e+02,
+ 4.85922673669740e+02, 5.77024203055553e+02, 6.85205587130498e+02,
+ 8.13668983291589e+02, 9.66216894324125e+02, 1.14736472207740e+03,
+ 1.36247442287647e+03, 1.61791322085579e+03, 1.92124207711260e+03,
+ 2.28143949334655e+03, 2.70916727454970e+03, 3.21708611729384e+03,
+ 3.82023036499473e+03, 4.53645302286906e+03, 5.38695420497926e+03,
+ 6.39690865534207e+03, 7.59621091765857e+03, 9.02036021061016e+03,
+ 1.07115112009343e+04, 1.27197217770508e+04, 1.51044347572047e+04,
+ 1.79362373905283e+04, 2.12989507320644e+04, 2.52921107370304e+04,
+ 3.00339145144454e+04, 3.56647189489147e+04};
+
+static const double kProbQDomain = 1024.0;
+// Loudness of -15 dB (smallest expected loudness) in log domain,
+// loudness_db = 13.5 * log10(rms);
+static const double kLogDomainMinBinCenter = -2.57752062648587;
+// Loudness step of 1 dB in log domain
+static const double kLogDomainStepSizeInverse = 5.81954605750359;
+
+static const int kTransientWidthThreshold = 7;
+static const double kLowProbabilityThreshold = 0.2;
+
+static const int kLowProbThresholdQ10 = static_cast<int>(
+ kLowProbabilityThreshold * kProbQDomain);
+
+Histogram::Histogram()
+ : num_updates_(0),
+ audio_content_q10_(0),
+ bin_count_q10_(),
+ activity_probability_(),
+ hist_bin_index_(),
+ buffer_index_(0),
+ buffer_is_full_(false),
+ len_circular_buffer_(0),
+ len_high_activity_(0) {
+ COMPILE_ASSERT(kHistSize == sizeof(kHistBinCenters) /
+ sizeof(kHistBinCenters[0]), histogram_bin_centers_incorrect_size);
+}
+
+Histogram::Histogram(int window_size)
+ : num_updates_(0),
+ audio_content_q10_(0),
+ bin_count_q10_(),
+ activity_probability_(new int[window_size]),
+ hist_bin_index_(new int[window_size]),
+ buffer_index_(0),
+ buffer_is_full_(false),
+ len_circular_buffer_(window_size),
+ len_high_activity_(0) {}
+
+Histogram::~Histogram() {}
+
+void Histogram::Update(double rms, double activity_probaility) {
+ // If circular histogram is activated then remove the oldest entry.
+ if (len_circular_buffer_ > 0)
+ RemoveOldestEntryAndUpdate();
+
+ // Find the corresponding bin.
+ int hist_index = GetBinIndex(rms);
+ // To Q10 domain.
+ int prob_q10 = static_cast<int16_t>(floor(activity_probaility *
+ kProbQDomain));
+ InsertNewestEntryAndUpdate(prob_q10, hist_index);
+}
+
+// Doing nothing if buffer is not full, yet.
+void Histogram::RemoveOldestEntryAndUpdate() {
+ assert(len_circular_buffer_ > 0);
+ // Do nothing if circular buffer is not full.
+ if (!buffer_is_full_)
+ return;
+
+ int oldest_prob = activity_probability_[buffer_index_];
+ int oldest_hist_index = hist_bin_index_[buffer_index_];
+ UpdateHist(-oldest_prob, oldest_hist_index);
+}
+
+void Histogram::RemoveTransient() {
+ // Don't expect to be here if high-activity region is longer than
+ // |kTransientWidthThreshold| or there has not been any transient.
+ assert(len_high_activity_ <= kTransientWidthThreshold);
+ int index = (buffer_index_ > 0) ? (buffer_index_ - 1) :
+ len_circular_buffer_ - 1;
+ while (len_high_activity_ > 0) {
+ UpdateHist(-activity_probability_[index], hist_bin_index_[index]);
+ activity_probability_[index] = 0;
+ index = (index > 0) ? (index - 1) : (len_circular_buffer_ - 1);
+ len_high_activity_--;
+ }
+}
+
+void Histogram::InsertNewestEntryAndUpdate(int activity_prob_q10,
+ int hist_index) {
+ // Update the circular buffer if it is enabled.
+ if (len_circular_buffer_ > 0) {
+ // Removing transient.
+ if (activity_prob_q10 <= kLowProbThresholdQ10) {
+ // Lower than threshold probability, set it to zero.
+ activity_prob_q10 = 0;
+ // Check if this has been a transient.
+ if (len_high_activity_ <= kTransientWidthThreshold)
+ RemoveTransient(); // Remove this transient.
+ len_high_activity_ = 0;
+ } else if (len_high_activity_ <= kTransientWidthThreshold) {
+ len_high_activity_++;
+ }
+ // Updating the circular buffer.
+ activity_probability_[buffer_index_] = activity_prob_q10;
+ hist_bin_index_[buffer_index_] = hist_index;
+ // Increment the buffer index and check for wrap-around.
+ buffer_index_++;
+ if (buffer_index_ >= len_circular_buffer_) {
+ buffer_index_ = 0;
+ buffer_is_full_ = true;
+ }
+ }
+
+ num_updates_++;
+ if (num_updates_ < 0)
+ num_updates_--;
+
+ UpdateHist(activity_prob_q10, hist_index);
+}
+
+void Histogram::UpdateHist(int activity_prob_q10, int hist_index) {
+ bin_count_q10_[hist_index] += activity_prob_q10;
+ audio_content_q10_ += activity_prob_q10;
+}
+
+double Histogram::AudioContent() const {
+ return audio_content_q10_ / kProbQDomain;
+}
+
+Histogram* Histogram::Create() {
+ return new Histogram;
+}
+
+Histogram* Histogram::Create(int window_size) {
+ if (window_size < 0)
+ return NULL;
+ return new Histogram(window_size);
+}
+
+void Histogram::Reset() {
+ // Reset the histogram, audio-content and number of updates.
+ memset(bin_count_q10_, 0, sizeof(bin_count_q10_));
+ audio_content_q10_ = 0;
+ num_updates_ = 0;
+ // Empty the circular buffer.
+ buffer_index_ = 0;
+ buffer_is_full_ = false;
+ len_high_activity_ = 0;
+}
+
+int Histogram::GetBinIndex(double rms) {
+ // First exclude overload cases.
+ if (rms <= kHistBinCenters[0]) {
+ return 0;
+ } else if (rms >= kHistBinCenters[kHistSize - 1]) {
+ return kHistSize - 1;
+ } else {
+ // The quantizer is uniform in log domain. Alternatively we could do binary
+ // search in linear domain.
+ double rms_log = log(rms);
+
+ int index = static_cast<int>(floor((rms_log - kLogDomainMinBinCenter) *
+ kLogDomainStepSizeInverse));
+ // The final decision is in linear domain.
+ double b = 0.5 * (kHistBinCenters[index] + kHistBinCenters[index + 1]);
+ if (rms > b) {
+ return index + 1;
+ }
+ return index;
+ }
+}
+
+double Histogram::CurrentRms() const {
+ double p;
+ double mean_val = 0;
+ if (audio_content_q10_ > 0) {
+ double p_total_inverse = 1. / static_cast<double>(audio_content_q10_);
+ for (int n = 0; n < kHistSize; n++) {
+ p = static_cast<double>(bin_count_q10_[n]) * p_total_inverse;
+ mean_val += p * kHistBinCenters[n];
+ }
+ } else {
+ mean_val = kHistBinCenters[0];
+ }
+ return mean_val;
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/agc/histogram.h b/webrtc/modules/audio_processing/agc/histogram.h
new file mode 100644
index 0000000..8f5c518
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/histogram.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_
+
+#include <string.h>
+
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+// This class implements the histogram of loudness with circular buffers so that
+// the histogram tracks the last T seconds of the loudness.
+class Histogram {
+ public:
+ // Create a non-sliding Histogram.
+ static Histogram* Create();
+
+ // Create a sliding Histogram, i.e. the histogram represents the last
+ // |window_size| samples.
+ static Histogram* Create(int window_size);
+ ~Histogram();
+
+ // Insert RMS and the corresponding activity probability.
+ void Update(double rms, double activity_probability);
+
+ // Reset the histogram, forget the past.
+ void Reset();
+
+ // Current loudness, which is actually the mean of histogram in loudness
+ // domain.
+ double CurrentRms() const;
+
+ // Sum of the histogram content.
+ double AudioContent() const;
+
+ // Number of times the histogram has been updated.
+ int num_updates() const { return num_updates_; }
+
+ private:
+ Histogram();
+ explicit Histogram(int window);
+
+ // Find the histogram bin associated with the given |rms|.
+ int GetBinIndex(double rms);
+
+ void RemoveOldestEntryAndUpdate();
+ void InsertNewestEntryAndUpdate(int activity_prob_q10, int hist_index);
+ void UpdateHist(int activity_prob_q10, int hist_index);
+ void RemoveTransient();
+
+ // Number of histogram bins.
+ static const int kHistSize = 77;
+
+ // Number of times the histogram is updated
+ int num_updates_;
+ // Audio content, this should be equal to the sum of the components of
+ // |bin_count_q10_|.
+ int64_t audio_content_q10_;
+
+ // Histogram of input RMS in Q10 with |kHistSize_| bins. In each 'Update(),'
+ // we increment the associated histogram-bin with the given probability. The
+ // increment is implemented in Q10 to avoid rounding errors.
+ int64_t bin_count_q10_[kHistSize];
+
+ // Circular buffer for probabilities
+ scoped_ptr<int[]> activity_probability_;
+ // Circular buffer for histogram-indices of probabilities.
+ scoped_ptr<int[]> hist_bin_index_;
+ // Current index of circular buffer, where the newest data will be written to,
+ // therefore, pointing to the oldest data if buffer is full.
+ int buffer_index_;
+ // Indicating if buffer is full and we had a wrap around.
+ int buffer_is_full_;
+ // Size of circular buffer.
+ int len_circular_buffer_;
+ int len_high_activity_;
+};
+
+} // namespace webrtc
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_
diff --git a/webrtc/modules/audio_processing/agc/histogram_unittest.cc b/webrtc/modules/audio_processing/agc/histogram_unittest.cc
new file mode 100644
index 0000000..0ae7591
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/histogram_unittest.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Use CreateHistUnittestFile.m to generate the input file.
+
+#include "webrtc/modules/audio_processing/agc/histogram.h"
+
+#include <stdio.h>
+#include <cmath>
+
+#include "gtest/gtest.h"
+#include "webrtc/test/testsupport/fileutils.h"
+#include "webrtc/modules/audio_processing/agc/utility.h"
+
+namespace webrtc {
+
+struct InputOutput {
+ double rms;
+ double activity_probability;
+ double audio_content;
+ double loudness;
+};
+
+const double kRelativeErrTol = 1e-10;
+
+class HistogramTest : public ::testing::Test {
+ protected:
+ void RunTest(bool enable_circular_buff,
+ const char* filename);
+
+ private:
+ void TestClean();
+ scoped_ptr<Histogram> hist_;
+};
+
+void HistogramTest::TestClean() {
+ EXPECT_EQ(hist_->CurrentRms(), 7.59621091765857e-02);
+ EXPECT_EQ(hist_->AudioContent(), 0);
+ EXPECT_EQ(hist_->num_updates(), 0);
+}
+
+void HistogramTest::RunTest(bool enable_circular_buff, const char* filename) {
+ FILE* in_file = fopen(filename, "rb");
+ ASSERT_TRUE(in_file != NULL);
+ if (enable_circular_buff) {
+ int buffer_size;
+ EXPECT_EQ(fread(&buffer_size, sizeof(buffer_size), 1, in_file), 1u);
+ hist_.reset(Histogram::Create(buffer_size));
+ } else {
+ hist_.reset(Histogram::Create());
+ }
+ TestClean();
+
+ InputOutput io;
+ int num_updates = 0;
+ int num_reset = 0;
+ while (fread(&io, sizeof(InputOutput), 1, in_file) == 1) {
+ if (io.rms < 0) {
+ // We have to reset.
+ hist_->Reset();
+ TestClean();
+ num_updates = 0;
+ num_reset++;
+ // Read the next chunk of input.
+ if (fread(&io, sizeof(InputOutput), 1, in_file) != 1)
+ break;
+ }
+ hist_->Update(io.rms, io.activity_probability);
+ num_updates++;
+ EXPECT_EQ(hist_->num_updates(), num_updates);
+ double audio_content = hist_->AudioContent();
+
+ double abs_err = std::min(audio_content, io.audio_content) *
+ kRelativeErrTol;
+
+ ASSERT_NEAR(audio_content, io.audio_content, abs_err);
+ double current_loudness = Linear2Loudness(hist_->CurrentRms());
+ abs_err = std::min(fabs(current_loudness), fabs(io.loudness)) *
+ kRelativeErrTol;
+ ASSERT_NEAR(current_loudness, io.loudness, abs_err);
+ }
+ fclose(in_file);
+}
+
+TEST_F(HistogramTest, ActiveCircularBuffer) {
+ RunTest(true,
+ test::ResourcePath("audio_processing/agc/agc_with_circular_buffer",
+ "dat").c_str());
+}
+
+TEST_F(HistogramTest, InactiveCircularBuffer) {
+ RunTest(false,
+ test::ResourcePath("audio_processing/agc/agc_no_circular_buffer",
+ "dat").c_str());
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/agc/mock_agc.h b/webrtc/modules/audio_processing/agc/mock_agc.h
new file mode 100644
index 0000000..1c36a05
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/mock_agc.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_
+
+#include "webrtc/modules/audio_processing/agc/agc.h"
+
+#include "gmock/gmock.h"
+#include "webrtc/modules/interface/module_common_types.h"
+
+namespace webrtc {
+
+class MockAgc : public Agc {
+ public:
+ MOCK_METHOD2(AnalyzePreproc, float(const int16_t* audio, int length));
+ MOCK_METHOD3(Process, int(const int16_t* audio, int length,
+ int sample_rate_hz));
+ MOCK_METHOD1(GetRmsErrorDb, bool(int* error));
+ MOCK_METHOD0(Reset, void());
+ MOCK_METHOD1(set_target_level_dbfs, int(int level));
+ MOCK_CONST_METHOD0(target_level_dbfs, int());
+ MOCK_METHOD1(EnableStandaloneVad, void(bool enable));
+ MOCK_CONST_METHOD0(standalone_vad_enabled, bool());
+};
+
+} // namespace webrtc
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_
diff --git a/webrtc/modules/audio_processing/agc/noise_gmm_tables.h b/webrtc/modules/audio_processing/agc/noise_gmm_tables.h
new file mode 100644
index 0000000..779fd8c
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/noise_gmm_tables.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// GMM tables for inactive segments. Generated by MakeGmmTables.m.
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_NOISE_GMM_TABLES_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_NOISE_GMM_TABLES_H_
+
+static const int kNoiseGmmNumMixtures = 12;
+static const int kNoiseGmmDim = 3;
+
+static const double kNoiseGmmCovarInverse[kNoiseGmmNumMixtures]
+ [kNoiseGmmDim][kNoiseGmmDim] = {
+ {{ 7.36219567592941e+00, 4.83060785179861e-03, 1.23335151497610e-02},
+ { 4.83060785179861e-03, 1.65289507047817e-04, -2.41490588169997e-04},
+ { 1.23335151497610e-02, -2.41490588169997e-04, 6.59472060689382e-03}},
+ {{ 8.70265239309140e+00, -5.30636201431086e-04, 5.44014966585347e-03},
+ {-5.30636201431086e-04, 3.11095453521008e-04, -1.86287206836035e-04},
+ { 5.44014966585347e-03, -1.86287206836035e-04, 6.29493388790744e-04}},
+ {{ 4.53467851955055e+00, -3.92977536695197e-03, -2.46521420693317e-03},
+ {-3.92977536695197e-03, 4.94650752632750e-05, -1.08587438501826e-05},
+ {-2.46521420693317e-03, -1.08587438501826e-05, 9.28793975422261e-05}},
+ {{ 9.26817997114275e-01, -4.03976069276753e-04, -3.56441427392165e-03},
+ {-4.03976069276753e-04, 2.51976251631430e-06, 1.46914206734572e-07},
+ {-3.56441427392165e-03, 1.46914206734572e-07, 8.19914567685373e-05}},
+ {{ 7.61715986787441e+00, -1.54889041216888e-04, 2.41756280071656e-02},
+ {-1.54889041216888e-04, 3.50282550461672e-07, -6.27251196972490e-06},
+ { 2.41756280071656e-02, -6.27251196972490e-06, 1.45061847649872e-02}},
+ {{ 8.31193642663158e+00, -3.84070508164323e-04, -3.09750630821876e-02},
+ {-3.84070508164323e-04, 3.80433432277336e-07, -1.14321142836636e-06},
+ {-3.09750630821876e-02, -1.14321142836636e-06, 8.35091486289997e-04}},
+ {{ 9.67283151270894e-01, 5.82465812445039e-05, -3.18350798617053e-03},
+ { 5.82465812445039e-05, 2.23762672000318e-07, -7.74196587408623e-07},
+ {-3.18350798617053e-03, -7.74196587408623e-07, 3.85120938338325e-04}},
+ {{ 8.28066236985388e+00, 5.87634508319763e-05, 6.99303090891743e-03},
+ { 5.87634508319763e-05, 2.93746018618058e-07, 3.40843332882272e-07},
+ { 6.99303090891743e-03, 3.40843332882272e-07, 1.99379171190344e-04}},
+ {{ 6.07488998675646e+00, -1.11494526618473e-02, 5.10013111123381e-03},
+ {-1.11494526618473e-02, 6.99238879921751e-04, 5.36718550370870e-05},
+ { 5.10013111123381e-03, 5.36718550370870e-05, 5.26909853276753e-04}},
+ {{ 6.90492021419175e+00, 4.20639355257863e-04, -2.38612752336481e-03},
+ { 4.20639355257863e-04, 3.31246767338153e-06, -2.42052288150859e-08},
+ {-2.38612752336481e-03, -2.42052288150859e-08, 4.46608368363412e-04}},
+ {{ 1.31069150869715e+01, -1.73718583865670e-04, -1.97591814508578e-02},
+ {-1.73718583865670e-04, 2.80451716300124e-07, 9.96570755379865e-07},
+ {-1.97591814508578e-02, 9.96570755379865e-07, 2.41361900868847e-03}},
+ {{ 4.69566344239814e+00, -2.61077567563690e-04, 5.26359000761433e-03},
+ {-2.61077567563690e-04, 1.82420859823767e-06, -7.83645887541601e-07},
+ { 5.26359000761433e-03, -7.83645887541601e-07, 1.33586288288802e-02}}};
+
+static const double kNoiseGmmMean[kNoiseGmmNumMixtures][kNoiseGmmDim] = {
+ {-2.01386094766163e+00, 1.69702162045397e+02, 7.41715804872181e+01},
+ {-1.94684591777290e+00, 1.42398396732668e+02, 1.64186321157831e+02},
+ {-2.29319297562437e+00, 3.86415425589868e+02, 2.13452215267125e+02},
+ {-3.25487177070268e+00, 1.08668712553616e+03, 2.33119949467419e+02},
+ {-2.13159632447467e+00, 4.83821702557717e+03, 6.86786166673740e+01},
+ {-2.26171410780526e+00, 4.79420193982422e+03, 1.53222513286450e+02},
+ {-3.32166740703185e+00, 4.35161135834358e+03, 1.33206448431316e+02},
+ {-2.19290322814343e+00, 3.98325506609408e+03, 2.13249167359934e+02},
+ {-2.02898459255404e+00, 7.37039893155007e+03, 1.12518527491926e+02},
+ {-2.26150236399500e+00, 1.54896745196145e+03, 1.49717357868579e+02},
+ {-2.00417668301790e+00, 3.82434760310304e+03, 1.07438913004312e+02},
+ {-2.30193040814533e+00, 1.43953696546439e+03, 7.04085275122649e+01}};
+
+static const double kNoiseGmmWeights[kNoiseGmmNumMixtures] = {
+ -1.09422832086193e+01, -1.10847897513425e+01, -1.36767587732187e+01,
+ -1.79789356118641e+01, -1.42830169160894e+01, -1.56500228061379e+01,
+ -1.83124990950113e+01, -1.69979436177477e+01, -1.12329424387828e+01,
+ -1.41311785780639e+01, -1.47171861448585e+01, -1.35963362781839e+01};
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_NOISE_GMM_TABLES_H_
diff --git a/webrtc/modules/audio_processing/agc/pitch_based_vad.cc b/webrtc/modules/audio_processing/agc/pitch_based_vad.cc
new file mode 100644
index 0000000..675a1c8
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/pitch_based_vad.cc
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/pitch_based_vad.h"
+
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+
+#include "webrtc/modules/audio_processing/agc/circular_buffer.h"
+#include "webrtc/modules/audio_processing/agc/common.h"
+#include "webrtc/modules/audio_processing/agc/noise_gmm_tables.h"
+#include "webrtc/modules/audio_processing/agc/voice_gmm_tables.h"
+#include "webrtc/modules/interface/module_common_types.h"
+#include "webrtc/system_wrappers/interface/compile_assert.h"
+
+namespace webrtc {
+
+COMPILE_ASSERT(kNoiseGmmDim == kVoiceGmmDim,
+ noise_and_voice_gmm_dimension_not_equal);
+
+// These values should match MATLAB counterparts for unit-tests to pass.
+static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames.
+static const double kInitialPriorProbability = 0.3;
+static const int kTransientWidthThreshold = 7;
+static const double kLowProbabilityThreshold = 0.2;
+
+static double LimitProbability(double p) {
+ const double kLimHigh = 0.99;
+ const double kLimLow = 0.01;
+
+ if (p > kLimHigh)
+ p = kLimHigh;
+ else if (p < kLimLow)
+ p = kLimLow;
+ return p;
+}
+
+PitchBasedVad::PitchBasedVad()
+ : p_prior_(kInitialPriorProbability),
+ circular_buffer_(AgcCircularBuffer::Create(kPosteriorHistorySize)) {
+ // Setup noise GMM.
+ noise_gmm_.dimension = kNoiseGmmDim;
+ noise_gmm_.num_mixtures = kNoiseGmmNumMixtures;
+ noise_gmm_.weight = kNoiseGmmWeights;
+ noise_gmm_.mean = &kNoiseGmmMean[0][0];
+ noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
+
+ // Setup voice GMM.
+ voice_gmm_.dimension = kVoiceGmmDim;
+ voice_gmm_.num_mixtures = kVoiceGmmNumMixtures;
+ voice_gmm_.weight = kVoiceGmmWeights;
+ voice_gmm_.mean = &kVoiceGmmMean[0][0];
+ voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
+}
+
+PitchBasedVad::~PitchBasedVad() {}
+
+int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
+ double* p_combined) {
+ double p;
+ double gmm_features[3];
+ double pdf_features_given_voice;
+ double pdf_features_given_noise;
+ // These limits are the same in matlab implementation 'VoicingProbGMM().'
+ const double kLimLowLogPitchGain = -2.0;
+ const double kLimHighLogPitchGain = -0.9;
+ const double kLimLowSpectralPeak = 200;
+ const double kLimHighSpectralPeak = 2000;
+ const double kEps = 1e-12;
+ for (int n = 0; n < features.num_frames; n++) {
+ gmm_features[0] = features.log_pitch_gain[n];
+ gmm_features[1] = features.spectral_peak[n];
+ gmm_features[2] = features.pitch_lag_hz[n];
+
+ pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_);
+ pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_);
+
+ if (features.spectral_peak[n] < kLimLowSpectralPeak ||
+ features.spectral_peak[n] > kLimHighSpectralPeak ||
+ features.log_pitch_gain[n] < kLimLowLogPitchGain) {
+ pdf_features_given_voice = kEps * pdf_features_given_noise;
+ } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) {
+ pdf_features_given_noise = kEps * pdf_features_given_voice;
+ }
+
+ p = p_prior_ * pdf_features_given_voice / (pdf_features_given_voice *
+ p_prior_ + pdf_features_given_noise * (1 - p_prior_));
+
+ p = LimitProbability(p);
+
+ // Combine pitch-based probability with standalone probability, before
+ // updating prior probabilities.
+ double prod_active = p * p_combined[n];
+ double prod_inactive = (1 - p) * (1 - p_combined[n]);
+ p_combined[n] = prod_active / (prod_active + prod_inactive);
+
+ if (UpdatePrior(p_combined[n]) < 0)
+ return -1;
+ // Limit prior probability. With a zero prior probability the posterior
+ // probability is always zero.
+ p_prior_ = LimitProbability(p_prior_);
+ }
+ return 0;
+}
+
+int PitchBasedVad::UpdatePrior(double p) {
+ circular_buffer_->Insert(p);
+ if (circular_buffer_->RemoveTransient(kTransientWidthThreshold,
+ kLowProbabilityThreshold) < 0)
+ return -1;
+ p_prior_ = circular_buffer_->Mean();
+ return 0;
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/agc/pitch_based_vad.h b/webrtc/modules/audio_processing/agc/pitch_based_vad.h
new file mode 100644
index 0000000..41183a5
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/pitch_based_vad.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_BASED_VAD_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_BASED_VAD_H_
+
+#include "webrtc/modules/audio_processing/agc/common.h"
+#include "webrtc/modules/audio_processing/agc/gmm.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+class AudioFrame;
+class AgcCircularBuffer;
+
+// Computes the probability of the input audio frame to be active given
+// the corresponding pitch-gain and lag of the frame.
+class PitchBasedVad {
+ public:
+ PitchBasedVad();
+ ~PitchBasedVad();
+
+ // Compute pitch-based voicing probability, given the features.
+ // features: a structure containing features required for computing voicing
+ // probabilities.
+ //
+ // p_combined: an array which contains the combined activity probabilities
+ // computed prior to the call of this function. The method,
+ // then, computes the voicing probabilities and combine them
+ // with the given values. The result are returned in |p|.
+ int VoicingProbability(const AudioFeatures& features, double* p_combined);
+ private:
+ int UpdatePrior(double p);
+
+ // TODO(turajs): maybe defining this at a higher level (maybe enum) so that
+ // all the code recognize it as "no-error."
+ static const int kNoError = 0;
+
+ GmmParameters noise_gmm_;
+ GmmParameters voice_gmm_;
+
+ double p_prior_;
+
+ scoped_ptr<AgcCircularBuffer> circular_buffer_;
+};
+
+} // namespace webrtc
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_BASED_VAD_H_
diff --git a/webrtc/modules/audio_processing/agc/pitch_based_vad_unittest.cc b/webrtc/modules/audio_processing/agc/pitch_based_vad_unittest.cc
new file mode 100644
index 0000000..3ec0baa
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/pitch_based_vad_unittest.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/pitch_based_vad.h"
+
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "gtest/gtest.h"
+#include "webrtc/test/testsupport/fileutils.h"
+
+namespace webrtc {
+
+TEST(PitchBasedVadTest, VoicingProbabilityTest) {
+ std::string spectral_peak_file_name = test::ResourcePath(
+ "audio_processing/agc/agc_spectral_peak", "dat");
+ FILE* spectral_peak_file = fopen(spectral_peak_file_name.c_str(), "rb");
+ ASSERT_TRUE(spectral_peak_file != NULL);
+
+ std::string pitch_gain_file_name =
+ test::ResourcePath("audio_processing/agc/agc_pitch_gain", "dat");
+ FILE* pitch_gain_file = fopen(pitch_gain_file_name.c_str(), "rb");
+ ASSERT_TRUE(pitch_gain_file != NULL);
+
+ std::string pitch_lag_file_name =
+ test::ResourcePath("audio_processing/agc/agc_pitch_lag", "dat");
+ FILE* pitch_lag_file = fopen(pitch_lag_file_name.c_str(), "rb");
+ ASSERT_TRUE(pitch_lag_file != NULL);
+
+ std::string voicing_prob_file_name =
+ test::ResourcePath("audio_processing/agc/agc_voicing_prob", "dat");
+ FILE* voicing_prob_file = fopen(voicing_prob_file_name.c_str(), "rb");
+ ASSERT_TRUE(voicing_prob_file != NULL);
+
+ PitchBasedVad vad_;
+
+ double reference_activity_probability;
+
+ AudioFeatures audio_features;
+ memset(&audio_features, 0, sizeof(audio_features));
+ audio_features.num_frames = 1;
+ while (fread(audio_features.spectral_peak,
+ sizeof(audio_features.spectral_peak[0]), 1,
+ spectral_peak_file) == 1u) {
+ double p;
+ ASSERT_EQ(1u, fread(audio_features.log_pitch_gain, sizeof(
+ audio_features.log_pitch_gain[0]), 1, pitch_gain_file));
+ ASSERT_EQ(1u, fread(audio_features.pitch_lag_hz, sizeof(
+ audio_features.pitch_lag_hz[0]), 1, pitch_lag_file));
+ ASSERT_EQ(1u, fread(&reference_activity_probability, sizeof(
+ reference_activity_probability), 1, voicing_prob_file));
+
+ p = 0.5; // Initialize to the neutral value for combining probabilities.
+ EXPECT_EQ(0, vad_.VoicingProbability(audio_features, &p));
+ EXPECT_NEAR(p, reference_activity_probability, 0.01);
+ }
+
+ fclose(spectral_peak_file);
+ fclose(pitch_gain_file);
+ fclose(pitch_lag_file);
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/agc/pitch_internal.cc b/webrtc/modules/audio_processing/agc/pitch_internal.cc
new file mode 100644
index 0000000..b394074
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/pitch_internal.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/pitch_internal.h"
+
+#include <cmath>
+
+// A 4-to-3 linear interpolation.
+// The interpolation constants are derived as following:
+// Input pitch parameters are updated every 7.5 ms. Within a 30-ms interval
+// we are interested in pitch parameters of 0-5 ms, 10-15ms and 20-25ms. This is
+// like interpolating 4-to-6 and keep the odd samples.
+// The reason behind this is that LPC coefficients are computed for the first
+// half of each 10ms interval.
+static void PitchInterpolation(double old_val, const double* in, double* out) {
+ out[0] = 1. / 6. * old_val + 5. / 6. * in[0];
+ out[1] = 5. / 6. * in[1] + 1. / 6. * in[2];
+ out[2] = 0.5 * in[2] + 0.5 * in[3];
+}
+
+
+void GetSubframesPitchParameters(int sampling_rate_hz,
+ double* gains,
+ double* lags,
+ int num_in_frames,
+ int num_out_frames,
+ double* log_old_gain,
+ double* old_lag,
+ double* log_pitch_gain,
+ double* pitch_lag_hz) {
+ // Gain interpolation is in log-domain, also returned in log-domain.
+ for (int n = 0; n < num_in_frames; n++)
+ gains[n] = log(gains[n] + 1e-12);
+
+ // Interpolate lags and gains.
+ PitchInterpolation(*log_old_gain, gains, log_pitch_gain);
+ *log_old_gain = gains[num_in_frames - 1];
+ PitchInterpolation(*old_lag, lags, pitch_lag_hz);
+ *old_lag = lags[num_in_frames - 1];
+
+ // Convert pitch-lags to Hertz.
+ for (int n = 0; n < num_out_frames; n++) {
+ pitch_lag_hz[n] = (sampling_rate_hz) / (pitch_lag_hz[n]);
+ }
+}
diff --git a/webrtc/modules/audio_processing/agc/pitch_internal.h b/webrtc/modules/audio_processing/agc/pitch_internal.h
new file mode 100644
index 0000000..ed73760
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/pitch_internal.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_INTERNAL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_INTERNAL_H_
+
+// TODO(turajs): Write a description of this function. Also be consistent with
+// usage of |sampling_rate_hz| vs |kSamplingFreqHz|.
+void GetSubframesPitchParameters(int sampling_rate_hz,
+ double* gains,
+ double* lags,
+ int num_in_frames,
+ int num_out_frames,
+ double* log_old_gain,
+ double* old_lag,
+ double* log_pitch_gain,
+ double* pitch_lag_hz);
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_INTERNAL_H_
diff --git a/webrtc/modules/audio_processing/agc/pitch_internal_unittest.cc b/webrtc/modules/audio_processing/agc/pitch_internal_unittest.cc
new file mode 100644
index 0000000..8998f90
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/pitch_internal_unittest.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/pitch_internal.h"
+
+#include <math.h>
+
+#include "gtest/gtest.h"
+
+TEST(PitchInternalTest, test) {
+ const int kSamplingRateHz = 8000;
+ const int kNumInputParameters = 4;
+ const int kNumOutputParameters = 3;
+ // Inputs
+ double log_old_gain = log(0.5);
+ double gains[] = {0.6, 0.2, 0.5, 0.4};
+
+ double old_lag = 70;
+ double lags[] = {90, 111, 122, 50};
+
+ // Expected outputs
+ double expected_log_pitch_gain[] = {-0.541212549898316, -1.45672279045507,
+ -0.80471895621705};
+ double expected_log_old_gain = log(gains[kNumInputParameters - 1]);
+
+ double expected_pitch_lag_hz[] = {92.3076923076923, 70.9010339734121,
+ 93.0232558139535};
+ double expected_old_lag = lags[kNumInputParameters - 1];
+
+ double log_pitch_gain[kNumOutputParameters];
+ double pitch_lag_hz[kNumInputParameters];
+
+ GetSubframesPitchParameters(kSamplingRateHz, gains, lags, kNumInputParameters,
+ kNumOutputParameters, &log_old_gain, &old_lag,
+ log_pitch_gain, pitch_lag_hz);
+
+ for (int n = 0; n < 3; n++) {
+ EXPECT_NEAR(pitch_lag_hz[n], expected_pitch_lag_hz[n], 1e-6);
+ EXPECT_NEAR(log_pitch_gain[n], expected_log_pitch_gain[n], 1e-8);
+ }
+ EXPECT_NEAR(old_lag, expected_old_lag, 1e-6);
+ EXPECT_NEAR(log_old_gain, expected_log_old_gain, 1e-8);
+}
diff --git a/webrtc/modules/audio_processing/agc/pole_zero_filter.cc b/webrtc/modules/audio_processing/agc/pole_zero_filter.cc
new file mode 100644
index 0000000..3c41e33
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/pole_zero_filter.cc
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/pole_zero_filter.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <algorithm>
+
+namespace webrtc {
+
+PoleZeroFilter* PoleZeroFilter::Create(const float* numerator_coefficients,
+ int order_numerator,
+ const float* denominator_coefficients,
+ int order_denominator) {
+ if (order_numerator < 0 ||
+ order_denominator < 0 ||
+ order_numerator > kMaxFilterOrder ||
+ order_denominator > kMaxFilterOrder ||
+ denominator_coefficients[0] == 0 ||
+ numerator_coefficients == NULL ||
+ denominator_coefficients == NULL)
+ return NULL;
+ return new PoleZeroFilter(numerator_coefficients, order_numerator,
+ denominator_coefficients, order_denominator);
+}
+
+PoleZeroFilter::PoleZeroFilter(const float* numerator_coefficients,
+ int order_numerator,
+ const float* denominator_coefficients,
+ int order_denominator)
+ : past_input_(),
+ past_output_(),
+ numerator_coefficients_(),
+ denominator_coefficients_(),
+ order_numerator_(order_numerator),
+ order_denominator_(order_denominator),
+ highest_order_(std::max(order_denominator, order_numerator)) {
+ memcpy(numerator_coefficients_, numerator_coefficients,
+ sizeof(numerator_coefficients_[0]) * (order_numerator_ + 1));
+ memcpy(denominator_coefficients_, denominator_coefficients,
+ sizeof(denominator_coefficients_[0]) * (order_denominator_ + 1));
+
+ if (denominator_coefficients_[0] != 1) {
+ for (int n = 0; n <= order_numerator_; n++)
+ numerator_coefficients_[n] /= denominator_coefficients_[0];
+ for (int n = 0; n <= order_denominator_; n++)
+ denominator_coefficients_[n] /= denominator_coefficients_[0];
+ }
+}
+
+template <typename T>
+static float FilterArPast(const T* past, int order,
+ const float* coefficients) {
+ float sum = 0.0f;
+ int past_index = order - 1;
+ for (int k = 1; k <= order; k++, past_index--)
+ sum += coefficients[k] * past[past_index];
+ return sum;
+}
+
+int PoleZeroFilter::Filter(const int16_t* in,
+ int num_input_samples,
+ float* output) {
+ if (in == NULL || num_input_samples < 0 || output == NULL)
+ return -1;
+ // This is the typical case, just a memcpy.
+ const int k = std::min(num_input_samples, highest_order_);
+ int n;
+ for (n = 0; n < k; n++) {
+ output[n] = in[n] * numerator_coefficients_[0];
+ output[n] += FilterArPast(&past_input_[n], order_numerator_,
+ numerator_coefficients_);
+ output[n] -= FilterArPast(&past_output_[n], order_denominator_,
+ denominator_coefficients_);
+
+ past_input_[n + order_numerator_] = in[n];
+ past_output_[n + order_denominator_] = output[n];
+ }
+ if (highest_order_ < num_input_samples) {
+ for (int m = 0; n < num_input_samples; n++, m++) {
+ output[n] = in[n] * numerator_coefficients_[0];
+ output[n] += FilterArPast(&in[m], order_numerator_,
+ numerator_coefficients_);
+ output[n] -= FilterArPast(&output[m], order_denominator_,
+ denominator_coefficients_);
+ }
+ // Record into the past signal.
+ memcpy(past_input_, &in[num_input_samples - order_numerator_],
+ sizeof(in[0]) * order_numerator_);
+ memcpy(past_output_, &output[num_input_samples - order_denominator_],
+ sizeof(output[0]) * order_denominator_);
+ } else {
+ // Odd case that the length of the input is shorter that filter order.
+ memmove(past_input_, &past_input_[num_input_samples], order_numerator_ *
+ sizeof(past_input_[0]));
+ memmove(past_output_, &past_output_[num_input_samples], order_denominator_ *
+ sizeof(past_output_[0]));
+ }
+ return 0;
+}
+
+} // namespace webrtc
+
diff --git a/webrtc/modules/audio_processing/agc/pole_zero_filter.h b/webrtc/modules/audio_processing/agc/pole_zero_filter.h
new file mode 100644
index 0000000..c9d96fd
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/pole_zero_filter.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_POLE_ZERO_FILTER_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_POLE_ZERO_FILTER_H_
+
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+class PoleZeroFilter {
+ public:
+ ~PoleZeroFilter() {}
+
+ static PoleZeroFilter* Create(const float* numerator_coefficients,
+ int order_numerator,
+ const float* denominator_coefficients,
+ int order_denominator);
+
+ int Filter(const int16_t* in, int num_input_samples, float* output);
+
+ private:
+ PoleZeroFilter(const float* numerator_coefficients,
+ int order_numerator,
+ const float* denominator_coefficients,
+ int order_denominator);
+
+ static const int kMaxFilterOrder = 24;
+
+ int16_t past_input_[kMaxFilterOrder * 2];
+ float past_output_[kMaxFilterOrder * 2];
+
+ float numerator_coefficients_[kMaxFilterOrder + 1];
+ float denominator_coefficients_[kMaxFilterOrder + 1];
+
+ int order_numerator_;
+ int order_denominator_;
+ int highest_order_;
+};
+
+} // namespace webrtc
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_POLE_ZERO_FILTER_H_
diff --git a/webrtc/modules/audio_processing/agc/pole_zero_filter_unittest.cc b/webrtc/modules/audio_processing/agc/pole_zero_filter_unittest.cc
new file mode 100644
index 0000000..e487858
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/pole_zero_filter_unittest.cc
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/pole_zero_filter.h"
+
+#include <math.h>
+#include <stdio.h>
+
+#include "gtest/gtest.h"
+#include "webrtc/modules/audio_processing/agc/agc_audio_proc_internal.h"
+#include "webrtc/system_wrappers/interface/compile_assert.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+#include "webrtc/test/testsupport/fileutils.h"
+
+namespace webrtc {
+
+static const int kInputSamples = 50;
+
+static const int16_t kInput[kInputSamples] = {-2136, -7116, 10715, 2464, 3164,
+ 8139, 11393, 24013, -32117, -5544, -27740, 10181, 14190, -24055, -15912,
+ 17393, 6359, -9950, -13894, 32432, -23944, 3437, -8381, 19768, 3087, -19795,
+ -5920, 13310, 1407, 3876, 4059, 3524, -23130, 19121, -27900, -24840, 4089,
+ 21422, -3625, 3015, -11236, 28856, 13424, 6571, -19761, -6361, 15821, -9469,
+ 29727, 32229};
+
+static const float kReferenceOutput[kInputSamples] = {-2082.230472f,
+ -6878.572941f, 10697.090871f, 2358.373952f, 2973.936512f, 7738.580650f,
+ 10690.803213f, 22687.091576f, -32676.684717f, -5879.621684f, -27359.297432f,
+ 10368.735888f, 13994.584604f, -23676.126249f, -15078.250390f, 17818.253338f,
+ 6577.743123f, -9498.369315f, -13073.651079f, 32460.026588f, -23391.849347f,
+ 3953.805667f, -7667.761363f, 19995.153447f, 3185.575477f, -19207.365160f,
+ -5143.103201f, 13756.317237f, 1779.654794f, 4142.269755f, 4209.475034f,
+ 3572.991789f, -22509.089546f, 19307.878964f, -27060.439759f, -23319.042810f,
+ 5547.685267f, 22312.718676f, -2707.309027f, 3852.358490f, -10135.510093f,
+ 29241.509970f, 13394.397233f, 6340.721417f, -19510.207905f, -5908.442086f,
+ 15882.301634f, -9211.335255f, 29253.056735f, 30874.443046f};
+
+class PoleZeroFilterTest : public ::testing::Test {
+ protected:
+ PoleZeroFilterTest()
+ : my_filter_(PoleZeroFilter::Create(
+ kCoeffNumerator, kFilterOrder, kCoeffDenominator, kFilterOrder)) {}
+
+ ~PoleZeroFilterTest() {}
+
+ void FilterSubframes(int num_subframes);
+
+ private:
+ void TestClean();
+ scoped_ptr<PoleZeroFilter> my_filter_;
+};
+
+void PoleZeroFilterTest::FilterSubframes(int num_subframes) {
+ float output[kInputSamples];
+ const int num_subframe_samples = kInputSamples / num_subframes;
+ EXPECT_EQ(num_subframe_samples * num_subframes, kInputSamples);
+
+ for (int n = 0; n < num_subframes; n++) {
+ my_filter_->Filter(&kInput[n * num_subframe_samples], num_subframe_samples,
+ &output[n * num_subframe_samples]);
+ }
+ for (int n = 0; n < kInputSamples; n++) {
+ EXPECT_NEAR(output[n], kReferenceOutput[n], 1);
+ }
+}
+
+TEST_F(PoleZeroFilterTest, OneSubframe) {
+ FilterSubframes(1);
+}
+
+TEST_F(PoleZeroFilterTest, TwoSubframes) {
+ FilterSubframes(2);
+}
+
+TEST_F(PoleZeroFilterTest, FiveSubframes) {
+ FilterSubframes(5);
+}
+
+TEST_F(PoleZeroFilterTest, TenSubframes) {
+ FilterSubframes(10);
+}
+
+TEST_F(PoleZeroFilterTest, TwentyFiveSubframes) {
+ FilterSubframes(25);
+}
+
+TEST_F(PoleZeroFilterTest, FiftySubframes) {
+ FilterSubframes(50);
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/agc/standalone_vad.cc b/webrtc/modules/audio_processing/agc/standalone_vad.cc
new file mode 100644
index 0000000..afd9d7b
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/standalone_vad.cc
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/standalone_vad.h"
+
+#include <assert.h>
+
+#include "webrtc/modules/interface/module_common_types.h"
+#include "webrtc/modules/utility/interface/audio_frame_operations.h"
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+static const int kDefaultStandaloneVadMode = 3;
+
+StandaloneVad::StandaloneVad(VadInst* vad)
+ : vad_(vad),
+ buffer_(),
+ index_(0),
+ mode_(kDefaultStandaloneVadMode) {}
+
+StandaloneVad::~StandaloneVad() {
+ WebRtcVad_Free(vad_);
+}
+
+StandaloneVad* StandaloneVad::Create() {
+ VadInst* vad = NULL;
+ if (WebRtcVad_Create(&vad) < 0)
+ return NULL;
+
+ int err = WebRtcVad_Init(vad);
+ err |= WebRtcVad_set_mode(vad, kDefaultStandaloneVadMode);
+ if (err != 0) {
+ WebRtcVad_Free(vad);
+ return NULL;
+ }
+ return new StandaloneVad(vad);
+}
+
+int StandaloneVad::AddAudio(const int16_t* data, int length) {
+ if (length != kLength10Ms)
+ return -1;
+
+ if (index_ + length > kLength10Ms * kMaxNum10msFrames)
+ // Reset the buffer if it's full.
+ // TODO(ajm): Instead, consider just processing every 10 ms frame. Then we
+ // can forgo the buffering.
+ index_ = 0;
+
+ memcpy(&buffer_[index_], data, sizeof(int16_t) * length);
+ index_ += length;
+ return 0;
+}
+
+int StandaloneVad::GetActivity(double* p, int length_p) {
+ if (index_ == 0)
+ return -1;
+
+ const int num_frames = index_ / kLength10Ms;
+ if (num_frames > length_p)
+ return -1;
+ assert(WebRtcVad_ValidRateAndFrameLength(kSampleRateHz, index_) == 0);
+
+ int activity = WebRtcVad_Process(vad_, kSampleRateHz, buffer_, index_);
+ if (activity < 0)
+ return -1;
+ else if (activity == 0)
+ p[0] = 0.01; // Arbitrary but small and non-zero.
+ else
+ p[0] = 0.5; // 0.5 is neutral values when combinned by other probabilities.
+ for (int n = 1; n < num_frames; n++)
+ p[n] = p[0];
+ // Reset the buffer to start from the beginning.
+ index_ = 0;
+ return activity;
+}
+
+int StandaloneVad::set_mode(int mode) {
+ if (mode < 0 || mode > 3)
+ return -1;
+ if (WebRtcVad_set_mode(vad_, mode) != 0)
+ return -1;
+
+ mode_ = mode;
+ return 0;
+}
+
+} // namespace webrtc
+
diff --git a/webrtc/modules/audio_processing/agc/standalone_vad.h b/webrtc/modules/audio_processing/agc/standalone_vad.h
new file mode 100644
index 0000000..6f26838
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/standalone_vad.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
+
+#include "webrtc/common_audio/vad/include/webrtc_vad.h"
+#include "webrtc/modules/audio_processing/agc/common.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+class AudioFrame;
+
+class StandaloneVad {
+ public:
+ static StandaloneVad* Create();
+ ~StandaloneVad();
+
+ // Outputs
+ // p: a buffer where probabilities are written to.
+ // length_p: number of elements of |p|.
+ //
+ // return value:
+ // -1: if no audio is stored or VAD returns error.
+ // 0: in success.
+ // In case of error the content of |activity| is unchanged.
+ //
+ // Note that due to a high false-positive (VAD decision is active while the
+ // processed audio is just background noise) rate, stand-alone VAD is used as
+ // a one-sided indicator. The activity probability is 0.5 if the frame is
+ // classified as active, and the probability is 0.01 if the audio is
+ // classified as passive. In this way, when probabilities are combined, the
+ // effect of the stand-alone VAD is neutral if the input is classified as
+ // active.
+ int GetActivity(double* p, int length_p);
+
+ // Expecting 10 ms of 16 kHz audio to be pushed in.
+ int AddAudio(const int16_t* data, int length);
+
+ // Set aggressiveness of VAD, 0 is the least aggressive and 3 is the most
+ // aggressive mode. Returns -1 if the input is less than 0 or larger than 3,
+ // otherwise 0 is returned.
+ int set_mode(int mode);
+ // Get the agressiveness of the current VAD.
+ int mode() const { return mode_; }
+
+ private:
+ explicit StandaloneVad(VadInst* vad);
+
+ static const int kMaxNum10msFrames = 3;
+
+ // TODO(turajs): Is there a way to use scoped-pointer here?
+ VadInst* vad_;
+ int16_t buffer_[kMaxNum10msFrames * kLength10Ms];
+ int index_;
+ int mode_;
+};
+
+} // namespace webrtc
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
diff --git a/webrtc/modules/audio_processing/agc/standalone_vad_unittest.cc b/webrtc/modules/audio_processing/agc/standalone_vad_unittest.cc
new file mode 100644
index 0000000..43d09a7
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/standalone_vad_unittest.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/standalone_vad.h"
+
+#include <string.h>
+
+#include "gtest/gtest.h"
+#include "webrtc/modules/interface/module_common_types.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+#include "webrtc/test/testsupport/fileutils.h"
+
+namespace webrtc {
+
+TEST(StandaloneVadTest, Api) {
+ scoped_ptr<StandaloneVad> vad(StandaloneVad::Create());
+ int16_t data[kLength10Ms] = { 0 };
+
+ // Valid frame length (for 32 kHz rate), but not what the VAD is expecting.
+ EXPECT_EQ(-1, vad->AddAudio(data, 320));
+
+ const int kMaxNumFrames = 3;
+ double p[kMaxNumFrames];
+ for (int n = 0; n < kMaxNumFrames; n++)
+ EXPECT_EQ(0, vad->AddAudio(data, kLength10Ms));
+
+ // Pretend |p| is shorter that it should be.
+ EXPECT_EQ(-1, vad->GetActivity(p, kMaxNumFrames - 1));
+
+ EXPECT_EQ(0, vad->GetActivity(p, kMaxNumFrames));
+
+ // Ask for activity when buffer is empty.
+ EXPECT_EQ(-1, vad->GetActivity(p, kMaxNumFrames));
+
+ // Should reset and result in one buffer.
+ for (int n = 0; n < kMaxNumFrames + 1; n++)
+ EXPECT_EQ(0, vad->AddAudio(data, kLength10Ms));
+ EXPECT_EQ(0, vad->GetActivity(p, 1));
+
+ // Wrong modes
+ EXPECT_EQ(-1, vad->set_mode(-1));
+ EXPECT_EQ(-1, vad->set_mode(4));
+
+ // Valid mode.
+ const int kMode = 2;
+ EXPECT_EQ(0, vad->set_mode(kMode));
+ EXPECT_EQ(kMode, vad->mode());
+}
+
+TEST(StandaloneVadTest, ActivityDetection) {
+ scoped_ptr<StandaloneVad> vad(StandaloneVad::Create());
+ const size_t kDataLength = kLength10Ms;
+ int16_t data[kDataLength] = { 0 };
+
+ FILE* pcm_file =
+ fopen(test::ResourcePath("audio_processing/agc/agc_audio", "pcm").c_str(),
+ "rb");
+ ASSERT_TRUE(pcm_file != NULL);
+
+ FILE* reference_file = fopen(
+ test::ResourcePath("audio_processing/agc/agc_vad", "dat").c_str(), "rb");
+ ASSERT_TRUE(reference_file != NULL);
+
+ // Reference activities are prepared with 0 aggressiveness.
+ ASSERT_EQ(0, vad->set_mode(0));
+
+ // Stand-alone VAD can operate on 1, 2 or 3 frames of length 10 ms. The
+ // reference file is created for 30 ms frame.
+ const int kNumVadFramesToProcess = 3;
+ int num_frames = 0;
+ while (fread(data, sizeof(int16_t), kDataLength, pcm_file) == kDataLength) {
+ vad->AddAudio(data, kDataLength);
+ num_frames++;
+ if (num_frames == kNumVadFramesToProcess) {
+ num_frames = 0;
+ int referece_activity;
+ double p[kNumVadFramesToProcess];
+ EXPECT_EQ(1u, fread(&referece_activity, sizeof(referece_activity), 1,
+ reference_file));
+ int activity = vad->GetActivity(p, kNumVadFramesToProcess);
+ EXPECT_EQ(referece_activity, activity);
+ if (activity != 0) {
+ // When active, probabilities are set to 0.5.
+ for (int n = 0; n < kNumVadFramesToProcess; n++)
+ EXPECT_EQ(0.5, p[n]);
+ } else {
+ // When inactive, probabilities are set to 0.01.
+ for (int n = 0; n < kNumVadFramesToProcess; n++)
+ EXPECT_EQ(0.01, p[n]);
+ }
+ }
+ }
+ fclose(reference_file);
+ fclose(pcm_file);
+}
+}
diff --git a/webrtc/modules/audio_processing/agc/test/fake_agc.h b/webrtc/modules/audio_processing/agc/test/fake_agc.h
new file mode 100644
index 0000000..e2aabd8
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/test/fake_agc.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_TEST_FAKE_AGC_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_TEST_FAKE_AGC_H_
+
+#include "webrtc/modules/audio_processing/agc/agc.h"
+
+namespace webrtc {
+
+class FakeAgc : public Agc {
+ public:
+ FakeAgc()
+ : counter_(0),
+ volume_(kMaxVolume / 2) {
+ }
+
+ virtual int Process(const AudioFrame& audio_frame) {
+ const int kUpdateIntervalFrames = 10;
+ const int kMaxVolume = 255;
+ if (counter_ % kUpdateIntervalFrames == 0) {
+ volume_ = (++volume_) % kMaxVolume;
+ }
+ counter_++;
+ return 0;
+ }
+
+ virtual int FakeAgc::MicVolume() {
+ return volume_;
+ }
+
+ private:
+ int counter_;
+ int volume_;
+};
+
+} // namespace webrtc
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_TEST_FAKE_AGC_H_
diff --git a/webrtc/modules/audio_processing/agc/test/test_utils.cc b/webrtc/modules/audio_processing/agc/test/test_utils.cc
new file mode 100644
index 0000000..e7c884b
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/test/test_utils.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/test/test_utils.h"
+
+#include <cmath>
+
+#include <algorithm>
+
+#include "webrtc/modules/interface/module_common_types.h"
+
+namespace webrtc {
+
+float MicLevel2Gain(int gain_range_db, int level) {
+ return (level - 127.0f) / 128.0f * gain_range_db / 2;
+}
+
+float Db2Linear(float db) {
+ return powf(10.0f, db / 20.0f);
+}
+
+void ApplyGainLinear(float gain, float last_gain, AudioFrame* frame) {
+ const int frame_length = frame->samples_per_channel_ * frame->num_channels_;
+ // Smooth the transition between gain levels across the frame.
+ float smoothed_gain = last_gain;
+ float gain_step = (gain - last_gain) / (frame_length - 1);
+ for (int i = 0; i < frame_length; ++i) {
+ smoothed_gain += gain_step;
+ float sample = std::floor(frame->data_[i] * smoothed_gain + 0.5);
+ sample = std::max(std::min(32767.0f, sample), -32768.0f);
+ frame->data_[i] = static_cast<int16_t>(sample);
+ }
+}
+
+void ApplyGain(float gain_db, float last_gain_db, AudioFrame* frame) {
+ ApplyGainLinear(Db2Linear(gain_db), Db2Linear(last_gain_db), frame);
+}
+
+void SimulateMic(int gain_range_db, int mic_level, int last_mic_level,
+ AudioFrame* frame) {
+ assert(mic_level >= 0 && mic_level <= 255);
+ assert(last_mic_level >= 0 && last_mic_level <= 255);
+ ApplyGain(MicLevel2Gain(gain_range_db, mic_level),
+ MicLevel2Gain(gain_range_db, last_mic_level),
+ frame);
+}
+
+void SimulateMic(int gain_map[255], int mic_level, int last_mic_level,
+ AudioFrame* frame) {
+ assert(mic_level >= 0 && mic_level <= 255);
+ assert(last_mic_level >= 0 && last_mic_level <= 255);
+ ApplyGain(gain_map[mic_level], gain_map[last_mic_level], frame);
+}
+
+} // namespace webrtc
+
diff --git a/webrtc/modules/audio_processing/agc/test/test_utils.h b/webrtc/modules/audio_processing/agc/test/test_utils.h
new file mode 100644
index 0000000..25dc496
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/test/test_utils.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_
+namespace webrtc {
+
+class AudioFrame;
+
+float MicLevel2Gain(int gain_range_db, int level);
+float Db2Linear(float db);
+void ApplyGainLinear(float gain, float last_gain, AudioFrame* frame);
+void ApplyGain(float gain_db, float last_gain_db, AudioFrame* frame);
+void SimulateMic(int gain_range_db, int mic_level, int last_mic_level,
+ AudioFrame* frame);
+void SimulateMic(int gain_map[255], int mic_level, int last_mic_level,
+ AudioFrame* frame);
+
+} // namespace webrtc
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_
diff --git a/webrtc/modules/audio_processing/agc/utility.cc b/webrtc/modules/audio_processing/agc/utility.cc
new file mode 100644
index 0000000..48458ad
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/utility.cc
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/utility.h"
+
+#include <math.h>
+
+static const double kLog10 = 2.30258509299;
+static const double kLinear2DbScale = 20.0 / kLog10;
+static const double kLinear2LoudnessScale = 13.4 / kLog10;
+
+double Loudness2Db(double loudness) {
+ return loudness * kLinear2DbScale / kLinear2LoudnessScale;
+}
+
+double Linear2Loudness(double rms) {
+ if (rms == 0)
+ return -15;
+ return kLinear2LoudnessScale * log(rms);
+}
+
+double Db2Loudness(double db) {
+ return db * kLinear2LoudnessScale / kLinear2DbScale;
+}
+
+double Dbfs2Loudness(double dbfs) {
+ return Db2Loudness(90 + dbfs);
+}
diff --git a/webrtc/modules/audio_processing/agc/utility.h b/webrtc/modules/audio_processing/agc/utility.h
new file mode 100644
index 0000000..df85c2e
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/utility.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
+
+// TODO(turajs): Add description of function.
+double Loudness2Db(double loudness);
+
+double Linear2Loudness(double rms);
+
+double Db2Loudness(double db);
+
+double Dbfs2Loudness(double dbfs);
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
diff --git a/webrtc/modules/audio_processing/agc/voice_gmm_tables.h b/webrtc/modules/audio_processing/agc/voice_gmm_tables.h
new file mode 100644
index 0000000..9a490a4
--- /dev/null
+++ b/webrtc/modules/audio_processing/agc/voice_gmm_tables.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// GMM tables for active segments. Generated by MakeGmmTables.m.
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_VOICE_GMM_TABLES_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_VOICE_GMM_TABLES_H_
+
+static const int kVoiceGmmNumMixtures = 12;
+static const int kVoiceGmmDim = 3;
+
+static const double kVoiceGmmCovarInverse[kVoiceGmmNumMixtures]
+ [kVoiceGmmDim][kVoiceGmmDim] = {
+ {{ 1.83673825579513e+00, -8.09791637570095e-04, 4.60106414365986e-03},
+ {-8.09791637570095e-04, 8.89351738394608e-04, -9.80188953277734e-04},
+ { 4.60106414365986e-03, -9.80188953277734e-04, 1.38706060206582e-03}},
+ {{ 6.76228912850703e+01, -1.98893120119660e-02, -3.53548357253551e-03},
+ {-1.98893120119660e-02, 3.96216858500530e-05, -4.08492938394097e-05},
+ {-3.53548357253551e-03, -4.08492938394097e-05, 9.31864352856416e-04}},
+ {{ 9.98612435944558e+00, -5.27880954316893e-03, -6.30342541619017e-03},
+ {-5.27880954316893e-03, 4.54359480225226e-05, 6.30804591626044e-05},
+ {-6.30342541619017e-03, 6.30804591626044e-05, 5.36466441382942e-04}},
+ {{ 3.39917474216349e+01, -1.56213579433191e-03, -4.01459014990225e-02},
+ {-1.56213579433191e-03, 6.40415424897724e-05, 6.20076342427833e-05},
+ {-4.01459014990225e-02, 6.20076342427833e-05, 3.51199070103063e-03}},
+ {{ 1.34545062271428e+01, -7.94513610147144e-03, -5.34401019341728e-02},
+ {-7.94513610147144e-03, 1.16511820098649e-04, 4.66063702069293e-05},
+ {-5.34401019341728e-02, 4.66063702069293e-05, 2.72354323774163e-03}},
+ {{ 1.08557844314806e+02, -1.54885805673668e-02, -1.88029692674851e-02},
+ {-1.54885805673668e-02, 1.16404042786406e-04, 6.45579292702802e-06},
+ {-1.88029692674851e-02, 6.45579292702802e-06, 4.32330478391416e-04}},
+ {{ 8.22940066541450e+01, -1.15903110231303e-02, -4.92166764865343e-02},
+ {-1.15903110231303e-02, 7.42510742165261e-05, 3.73007314191290e-06},
+ {-4.92166764865343e-02, 3.73007314191290e-06, 3.64005221593244e-03}},
+ {{ 2.31133605685660e+00, -7.83261568950254e-04, 7.45744012346313e-04},
+ {-7.83261568950254e-04, 1.29460648214142e-05, -2.22774455093730e-06},
+ { 7.45744012346313e-04, -2.22774455093730e-06, 1.05117294093010e-04}},
+ {{ 3.78767849189611e+02, 1.57759761011568e-03, -2.08551217988774e-02},
+ { 1.57759761011568e-03, 4.76066236886865e-05, -2.33977412299324e-05},
+ {-2.08551217988774e-02, -2.33977412299324e-05, 5.24261005371196e-04}},
+ {{ 6.98580096506135e-01, -5.13850255217378e-04, -4.01124551717056e-04},
+ {-5.13850255217378e-04, 1.40501021984840e-06, -2.09496928716569e-06},
+ {-4.01124551717056e-04, -2.09496928716569e-06, 2.82879357740037e-04}},
+ {{ 2.62770945162399e+00, -2.31825753241430e-03, -5.30447217466318e-03},
+ {-2.31825753241430e-03, 4.59108572227649e-05, 7.67631886355405e-05},
+ {-5.30447217466318e-03, 7.67631886355405e-05, 2.28521601674098e-03}},
+ {{ 1.89940391362152e+02, -4.23280856852379e-03, -2.70608873541399e-02},
+ {-4.23280856852379e-03, 6.77547582742563e-05, 2.69154203800467e-05},
+ {-2.70608873541399e-02, 2.69154203800467e-05, 3.88574543373470e-03}}};
+
+static const double kVoiceGmmMean[kVoiceGmmNumMixtures][kVoiceGmmDim] = {
+ {-2.15020241646536e+00, 4.97079062999877e+02, 4.77078119504505e+02},
+ {-8.92097680029190e-01, 5.92064964199921e+02, 1.81045145941059e+02},
+ {-1.29435784144398e+00, 4.98450293410611e+02, 1.71991263804064e+02},
+ {-1.03925228397884e+00, 4.99511274321571e+02, 1.05838336539105e+02},
+ {-1.29229047206129e+00, 4.15026762566707e+02, 1.12861119017125e+02},
+ {-7.88748114599810e-01, 4.48739336688113e+02, 1.89784216956337e+02},
+ {-8.77777402332642e-01, 4.86620285054533e+02, 1.13477708016491e+02},
+ {-2.06465957063057e+00, 6.33385049870607e+02, 2.32758546796149e+02},
+ {-6.98893789231685e-01, 5.93622051503385e+02, 1.92536982473203e+02},
+ {-2.55901217508894e+00, 1.55914919756205e+03, 1.39769980835570e+02},
+ {-1.92070024165837e+00, 4.87983940444185e+02, 1.02745468128289e+02},
+ {-7.29187507662854e-01, 5.22717685022855e+02, 1.16377942283991e+02}};
+
+static const double kVoiceGmmWeights[kVoiceGmmNumMixtures] = {
+ -1.39789694361035e+01, -1.19527720202104e+01, -1.32396317929055e+01,
+ -1.09436815209238e+01, -1.13440027478149e+01, -1.12200721834504e+01,
+ -1.02537324043693e+01, -1.60789861938302e+01, -1.03394494048344e+01,
+ -1.83207938586818e+01, -1.31186044948288e+01, -9.52479998673554e+00};
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_VOICE_GMM_TABLES_H_
diff --git a/webrtc/modules/audio_processing/audio_processing.gypi b/webrtc/modules/audio_processing/audio_processing.gypi
index d36fe70..74398f2 100644
--- a/webrtc/modules/audio_processing/audio_processing.gypi
+++ b/webrtc/modules/audio_processing/audio_processing.gypi
@@ -9,6 +9,7 @@
{
'variables': {
'audio_processing_dependencies': [
+ '<(DEPTH)/webrtc/modules/modules.gyp:iSAC',
'<(webrtc_root)/base/base.gyp:rtc_base_approved',
'<(webrtc_root)/common_audio/common_audio.gyp:common_audio',
'<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers',
@@ -33,25 +34,52 @@
'<@(audio_processing_dependencies)',
],
'sources': [
- 'aec/include/echo_cancellation.h',
+ 'aec/aec_core.c',
+ 'aec/aec_core.h',
+ 'aec/aec_core_internal.h',
+ 'aec/aec_rdft.c',
+ 'aec/aec_rdft.h',
+ 'aec/aec_resampler.c',
+ 'aec/aec_resampler.h',
'aec/echo_cancellation.c',
'aec/echo_cancellation_internal.h',
- 'aec/aec_core.h',
- 'aec/aec_core.c',
- 'aec/aec_core_internal.h',
- 'aec/aec_rdft.h',
- 'aec/aec_rdft.c',
- 'aec/aec_resampler.h',
- 'aec/aec_resampler.c',
- 'aecm/include/echo_control_mobile.h',
- 'aecm/echo_control_mobile.c',
+ 'aec/include/echo_cancellation.h',
'aecm/aecm_core.c',
'aecm/aecm_core.h',
- 'agc/include/gain_control.h',
+ 'aecm/echo_control_mobile.c',
+ 'aecm/include/echo_control_mobile.h',
+ 'agc/agc.cc',
+ 'agc/agc.h',
+ 'agc/agc_audio_proc.cc',
+ 'agc/agc_audio_proc.h',
+ 'agc/agc_audio_proc_internal.h',
+ 'agc/agc_manager_direct.cc',
+ 'agc/agc_manager_direct.h',
'agc/analog_agc.c',
'agc/analog_agc.h',
+ 'agc/circular_buffer.cc',
+ 'agc/circular_buffer.h',
+ 'agc/common.h',
'agc/digital_agc.c',
'agc/digital_agc.h',
+ 'agc/gain_map_internal.h',
+ 'agc/gmm.cc',
+ 'agc/gmm.h',
+ 'agc/histogram.cc',
+ 'agc/histogram.h',
+ 'agc/include/gain_control.h',
+ 'agc/noise_gmm_tables.h',
+ 'agc/pitch_based_vad.cc',
+ 'agc/pitch_based_vad.h',
+ 'agc/pitch_internal.cc',
+ 'agc/pitch_internal.h',
+ 'agc/pole_zero_filter.cc',
+ 'agc/pole_zero_filter.h',
+ 'agc/standalone_vad.cc',
+ 'agc/standalone_vad.h',
+ 'agc/utility.cc',
+ 'agc/utility.h',
+ 'agc/voice_gmm_tables.h',
'audio_buffer.cc',
'audio_buffer.h',
'audio_processing_impl.cc',
@@ -74,10 +102,23 @@
'noise_suppression_impl.h',
'processing_component.cc',
'processing_component.h',
- 'splitting_filter.cc',
- 'splitting_filter.h',
'rms_level.cc',
'rms_level.h',
+ 'splitting_filter.cc',
+ 'splitting_filter.h',
+ 'transient/common.h',
+ 'transient/daubechies_8_wavelet_coeffs.h',
+ 'transient/dyadic_decimator.h',
+ 'transient/moving_moments.cc',
+ 'transient/moving_moments.h',
+ 'transient/transient_detector.cc',
+ 'transient/transient_detector.h',
+ 'transient/transient_suppressor.cc',
+ 'transient/transient_suppressor.h',
+ 'transient/wpd_node.cc',
+ 'transient/wpd_node.h',
+ 'transient/wpd_tree.cc',
+ 'transient/wpd_tree.h',
'typing_detection.cc',
'typing_detection.h',
'utility/delay_estimator.c',
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index ba22f33..3ce84fb 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -15,6 +15,8 @@
#include "webrtc/base/platform_file.h"
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"
+#include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
#include "webrtc/modules/audio_processing/audio_buffer.h"
#include "webrtc/modules/audio_processing/channel_buffer.h"
#include "webrtc/modules/audio_processing/common.h"
@@ -54,6 +56,85 @@
// Throughout webrtc, it's assumed that success is represented by zero.
COMPILE_ASSERT(AudioProcessing::kNoError == 0, no_error_must_be_zero);
+// This class has two main functionalities:
+//
+// 1) It is returned instead of the real GainControl after the new AGC has been
+// enabled in order to prevent an outside user from overriding compression
+// settings. It doesn't do anything in its implementation, except for
+// delegating the const methods and Enable calls to the real GainControl, so
+// AGC can still be disabled.
+//
+// 2) It is injected into AgcManagerDirect and implements volume callbacks for
+// getting and setting the volume level. It just caches this value to be used
+// in VoiceEngine later.
+class GainControlForNewAgc : public GainControl, public VolumeCallbacks {
+ public:
+ explicit GainControlForNewAgc(GainControlImpl* gain_control)
+ : real_gain_control_(gain_control),
+ volume_(0) {
+ }
+
+ // GainControl implementation.
+ virtual int Enable(bool enable) OVERRIDE {
+ return real_gain_control_->Enable(enable);
+ }
+ virtual bool is_enabled() const OVERRIDE {
+ return real_gain_control_->is_enabled();
+ }
+ virtual int set_stream_analog_level(int level) OVERRIDE {
+ volume_ = level;
+ return AudioProcessing::kNoError;
+ }
+ virtual int stream_analog_level() OVERRIDE {
+ return volume_;
+ }
+ virtual int set_mode(Mode mode) OVERRIDE { return AudioProcessing::kNoError; }
+ virtual Mode mode() const OVERRIDE { return GainControl::kAdaptiveAnalog; }
+ virtual int set_target_level_dbfs(int level) OVERRIDE {
+ return AudioProcessing::kNoError;
+ }
+ virtual int target_level_dbfs() const OVERRIDE {
+ return real_gain_control_->target_level_dbfs();
+ }
+ virtual int set_compression_gain_db(int gain) OVERRIDE {
+ return AudioProcessing::kNoError;
+ }
+ virtual int compression_gain_db() const OVERRIDE {
+ return real_gain_control_->compression_gain_db();
+ }
+ virtual int enable_limiter(bool enable) OVERRIDE {
+ return AudioProcessing::kNoError;
+ }
+ virtual bool is_limiter_enabled() const OVERRIDE {
+ return real_gain_control_->is_limiter_enabled();
+ }
+ virtual int set_analog_level_limits(int minimum,
+ int maximum) OVERRIDE {
+ return AudioProcessing::kNoError;
+ }
+ virtual int analog_level_minimum() const OVERRIDE {
+ return real_gain_control_->analog_level_minimum();
+ }
+ virtual int analog_level_maximum() const OVERRIDE {
+ return real_gain_control_->analog_level_maximum();
+ }
+ virtual bool stream_is_saturated() const OVERRIDE {
+ return real_gain_control_->stream_is_saturated();
+ }
+
+ // VolumeCallbacks implementation.
+ virtual void SetMicVolume(int volume) OVERRIDE {
+ volume_ = volume;
+ }
+ virtual int GetMicVolume() OVERRIDE {
+ return volume_;
+ }
+
+ private:
+ GainControl* real_gain_control_;
+ int volume_;
+};
+
AudioProcessing* AudioProcessing::Create(int id) {
return Create();
}
@@ -96,7 +177,13 @@
delay_offset_ms_(0),
was_stream_delay_set_(false),
output_will_be_muted_(false),
- key_pressed_(false) {
+ key_pressed_(false),
+#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
+ use_new_agc_(false),
+#else
+ use_new_agc_(config.Get<ExperimentalAgc>().enabled),
+#endif
+ transient_suppressor_enabled_(config.Get<ExperimentalNs>().enabled) {
echo_cancellation_ = new EchoCancellationImpl(this, crit_);
component_list_.push_back(echo_cancellation_);
@@ -118,12 +205,18 @@
voice_detection_ = new VoiceDetectionImpl(this, crit_);
component_list_.push_back(voice_detection_);
+ gain_control_for_new_agc_.reset(new GainControlForNewAgc(gain_control_));
+
SetExtraOptions(config);
}
AudioProcessingImpl::~AudioProcessingImpl() {
{
CriticalSectionScoped crit_scoped(crit_);
+ // Depends on gain_control_ and gain_control_for_new_agc_.
+ agc_manager_.reset();
+ // Depends on gain_control_.
+ gain_control_for_new_agc_.reset();
while (!component_list_.empty()) {
ProcessingComponent* component = component_list_.front();
component->Destroy();
@@ -192,6 +285,16 @@
}
}
+ int err = InitializeExperimentalAgc();
+ if (err != kNoError) {
+ return err;
+ }
+
+ err = InitializeTransient();
+ if (err != kNoError) {
+ return err;
+ }
+
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_file_->Open()) {
int err = WriteInitMessage();
@@ -303,6 +406,11 @@
std::list<ProcessingComponent*>::iterator it;
for (it = component_list_.begin(); it != component_list_.end(); ++it)
(*it)->SetExtraOptions(config);
+
+ if (transient_suppressor_enabled_ != config.Get<ExperimentalNs>().enabled) {
+ transient_suppressor_enabled_ = config.Get<ExperimentalNs>().enabled;
+ InitializeTransient();
+ }
}
int AudioProcessingImpl::input_sample_rate_hz() const {
@@ -337,6 +445,10 @@
void AudioProcessingImpl::set_output_will_be_muted(bool muted) {
output_will_be_muted_ = muted;
+ CriticalSectionScoped lock(crit_);
+ if (agc_manager_.get()) {
+ agc_manager_->SetCaptureMuted(output_will_be_muted_);
+ }
}
bool AudioProcessingImpl::output_will_be_muted() const {
@@ -470,6 +582,12 @@
#endif
AudioBuffer* ca = capture_audio_.get(); // For brevity.
+ if (use_new_agc_ && gain_control_->is_enabled()) {
+ agc_manager_->AnalyzePreProcess(ca->data(0),
+ ca->num_channels(),
+ fwd_proc_format_.samples_per_channel());
+ }
+
bool data_processed = is_data_processed();
if (analysis_needed(data_processed)) {
ca->SplitIntoFrequencyBands();
@@ -486,12 +604,35 @@
RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(ca));
RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(ca));
RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(ca));
+
+ if (use_new_agc_ && gain_control_->is_enabled()) {
+ agc_manager_->Process(ca->split_bands_const(0)[kBand0To8kHz],
+ ca->samples_per_split_channel(),
+ split_rate_);
+ }
RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(ca));
if (synthesis_needed(data_processed)) {
ca->MergeFrequencyBands();
}
+ // TODO(aluebs): Investigate if the transient suppression placement should be
+ // before or after the AGC.
+ if (transient_suppressor_enabled_) {
+ float voice_probability =
+ agc_manager_.get() ? agc_manager_->voice_probability() : 1.f;
+
+ transient_suppressor_->Suppress(ca->data_f(0),
+ ca->samples_per_channel(),
+ ca->num_channels(),
+ ca->split_bands_const_f(0)[kBand0To8kHz],
+ ca->samples_per_split_channel(),
+ ca->keyboard_data(),
+ ca->samples_per_keyboard_channel(),
+ voice_probability,
+ key_pressed_);
+ }
+
// The level estimator operates on the recombined data.
RETURN_ON_ERR(level_estimator_->ProcessStream(ca));
@@ -586,7 +727,9 @@
RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra));
RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra));
- RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra));
+ if (!use_new_agc_) {
+ RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra));
+ }
return kNoError;
}
@@ -728,6 +871,9 @@
}
GainControl* AudioProcessingImpl::gain_control() const {
+ if (use_new_agc_) {
+ return gain_control_for_new_agc_.get();
+ }
return gain_control_;
}
@@ -775,7 +921,7 @@
bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const {
// Check if we've upmixed or downmixed the audio.
return ((fwd_out_format_.num_channels() != fwd_in_format_.num_channels()) ||
- is_data_processed);
+ is_data_processed || transient_suppressor_enabled_);
}
bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const {
@@ -784,7 +930,8 @@
}
bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const {
- if (!is_data_processed && !voice_detection_->is_enabled()) {
+ if (!is_data_processed && !voice_detection_->is_enabled() &&
+ !transient_suppressor_enabled_) {
// Only level_estimator_ is enabled.
return false;
} else if (fwd_proc_format_.rate() == kSampleRate32kHz ||
@@ -795,6 +942,30 @@
return false;
}
+int AudioProcessingImpl::InitializeExperimentalAgc() {
+ if (use_new_agc_) {
+ if (!agc_manager_.get()) {
+ agc_manager_.reset(
+ new AgcManagerDirect(gain_control_, gain_control_for_new_agc_.get()));
+ }
+ agc_manager_->Initialize();
+ agc_manager_->SetCaptureMuted(output_will_be_muted_);
+ }
+ return kNoError;
+}
+
+int AudioProcessingImpl::InitializeTransient() {
+ if (transient_suppressor_enabled_) {
+ if (!transient_suppressor_.get()) {
+ transient_suppressor_.reset(new TransientSuppressor());
+ }
+ transient_suppressor_->Initialize(fwd_proc_format_.rate(),
+ split_rate_,
+ fwd_out_format_.num_channels());
+ }
+ return kNoError;
+}
+
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
int AudioProcessingImpl::WriteMessageToDebugFile() {
int32_t size = event_msg_->ByteSize();
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h
index caab379..be70273 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.h
+++ b/webrtc/modules/audio_processing/audio_processing_impl.h
@@ -8,28 +8,32 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_
#include "webrtc/modules/audio_processing/include/audio_processing.h"
#include <list>
#include <string>
+#include "webrtc/base/thread_annotations.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
namespace webrtc {
+class AgcManagerDirect;
class AudioBuffer;
class CriticalSectionWrapper;
class EchoCancellationImpl;
class EchoControlMobileImpl;
class FileWrapper;
class GainControlImpl;
+class GainControlForNewAgc;
class HighPassFilterImpl;
class LevelEstimatorImpl;
class NoiseSuppressionImpl;
class ProcessingComponent;
+class TransientSuppressor;
class VoiceDetectionImpl;
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
@@ -138,7 +142,7 @@
protected:
// Overridden in a mock.
- virtual int InitializeLocked();
+ virtual int InitializeLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);
private:
int InitializeLocked(int input_sample_rate_hz,
@@ -146,20 +150,24 @@
int reverse_sample_rate_hz,
int num_input_channels,
int num_output_channels,
- int num_reverse_channels);
+ int num_reverse_channels)
+ EXCLUSIVE_LOCKS_REQUIRED(crit_);
int MaybeInitializeLocked(int input_sample_rate_hz,
int output_sample_rate_hz,
int reverse_sample_rate_hz,
int num_input_channels,
int num_output_channels,
- int num_reverse_channels);
- int ProcessStreamLocked();
- int AnalyzeReverseStreamLocked();
+ int num_reverse_channels)
+ EXCLUSIVE_LOCKS_REQUIRED(crit_);
+ int ProcessStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);
+ int AnalyzeReverseStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);
bool is_data_processed() const;
bool output_copy_needed(bool is_data_processed) const;
bool synthesis_needed(bool is_data_processed) const;
bool analysis_needed(bool is_data_processed) const;
+ int InitializeExperimentalAgc() EXCLUSIVE_LOCKS_REQUIRED(crit_);
+ int InitializeTransient() EXCLUSIVE_LOCKS_REQUIRED(crit_);
EchoCancellationImpl* echo_cancellation_;
EchoControlMobileImpl* echo_control_mobile_;
@@ -168,6 +176,7 @@
LevelEstimatorImpl* level_estimator_;
NoiseSuppressionImpl* noise_suppression_;
VoiceDetectionImpl* voice_detection_;
+ scoped_ptr<GainControlForNewAgc> gain_control_for_new_agc_;
std::list<ProcessingComponent*> component_list_;
CriticalSectionWrapper* crit_;
@@ -199,8 +208,15 @@
bool output_will_be_muted_;
bool key_pressed_;
+
+ // Only set through the constructor's Config parameter.
+ const bool use_new_agc_;
+ scoped_ptr<AgcManagerDirect> agc_manager_ GUARDED_BY(crit_);
+
+ bool transient_suppressor_enabled_;
+ scoped_ptr<TransientSuppressor> transient_suppressor_;
};
} // namespace webrtc
-#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_
diff --git a/webrtc/modules/audio_processing/audio_processing_impl_unittest.cc b/webrtc/modules/audio_processing/audio_processing_impl_unittest.cc
index 0957617..f4c36d0 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl_unittest.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl_unittest.cc
@@ -27,7 +27,9 @@
}
MOCK_METHOD0(InitializeLocked, int());
- int RealInitializeLocked() { return AudioProcessingImpl::InitializeLocked(); }
+ int RealInitializeLocked() NO_THREAD_SAFETY_ANALYSIS {
+ return AudioProcessingImpl::InitializeLocked();
+ }
};
TEST(AudioProcessingImplTest, AudioParameterChangeTriggersInit) {
diff --git a/webrtc/modules/audio_processing/audio_processing_tests.gypi b/webrtc/modules/audio_processing/audio_processing_tests.gypi
index 99b80f2..627e669 100644
--- a/webrtc/modules/audio_processing/audio_processing_tests.gypi
+++ b/webrtc/modules/audio_processing/audio_processing_tests.gypi
@@ -46,6 +46,33 @@
],
'sources': [ 'test/unpack.cc', ],
},
+ {
+ 'target_name': 'transient_suppression_test',
+ 'type': 'executable',
+ 'dependencies': [
+ '<(DEPTH)/testing/gtest.gyp:gtest',
+ '<(DEPTH)/third_party/gflags/gflags.gyp:gflags',
+ '<(webrtc_root)/test/test.gyp:test_support',
+ '<(webrtc_root)/modules/modules.gyp:audio_processing',
+ ],
+ 'sources': [
+ 'transient/transient_suppression_test.cc',
+ 'transient/file_utils.cc',
+ 'transient/file_utils.h',
+ ],
+ }, # transient_suppression_test
+ {
+ 'target_name': 'click_annotate',
+ 'type': 'executable',
+ 'dependencies': [
+ '<(webrtc_root)/modules/modules.gyp:audio_processing',
+ ],
+ 'sources': [
+ 'transient/click_annotate.cc',
+ 'transient/file_utils.cc',
+ 'transient/file_utils.h',
+ ],
+ }, # click_annotate
],
}],
],
diff --git a/webrtc/modules/audio_processing/transient/click_annotate.cc b/webrtc/modules/audio_processing/transient/click_annotate.cc
new file mode 100644
index 0000000..f525366
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/click_annotate.cc
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <cfloat>
+#include <cstdio>
+#include <cstdlib>
+#include <vector>
+
+#include "webrtc/modules/audio_processing/transient/transient_detector.h"
+#include "webrtc/modules/audio_processing/transient/file_utils.h"
+#include "webrtc/system_wrappers/interface/file_wrapper.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+
+using webrtc::FileWrapper;
+using webrtc::TransientDetector;
+using webrtc::scoped_ptr;
+
+// Application to generate a RTP timing file.
+// Opens the PCM file and divides the signal in frames.
+// Creates a send times array, one for each step.
+// Each block that contains a transient, has an infinite send time.
+// The resultant array is written to a DAT file
+// Returns -1 on error or |lost_packets| otherwise.
+int main(int argc, char* argv[]) {
+ if (argc != 5) {
+ printf("\n%s - Application to generate a RTP timing file.\n\n", argv[0]);
+ printf("%s PCMfile DATfile chunkSize sampleRate\n\n", argv[0]);
+ printf("Opens the PCMfile with sampleRate in Hertz.\n");
+ printf("Creates a send times array, one for each chunkSize ");
+ printf("milliseconds step.\n");
+ printf("Each block that contains a transient, has an infinite send time. ");
+ printf("The resultant array is written to a DATfile.\n\n");
+ return 0;
+ }
+
+ scoped_ptr<FileWrapper> pcm_file(FileWrapper::Create());
+ pcm_file->OpenFile(argv[1], true, false, false);
+ if (!pcm_file->Open()) {
+ printf("\nThe %s could not be opened.\n\n", argv[1]);
+ return -1;
+ }
+
+ scoped_ptr<FileWrapper> dat_file(FileWrapper::Create());
+ dat_file->OpenFile(argv[2], false, false, false);
+ if (!dat_file->Open()) {
+ printf("\nThe %s could not be opened.\n\n", argv[2]);
+ return -1;
+ }
+
+ int chunk_size_ms = atoi(argv[3]);
+ if (chunk_size_ms <= 0) {
+ printf("\nThe chunkSize must be a positive integer\n\n");
+ return -1;
+ }
+
+ int sample_rate_hz = atoi(argv[4]);
+ if (sample_rate_hz <= 0) {
+ printf("\nThe sampleRate must be a positive integer\n\n");
+ return -1;
+ }
+
+ TransientDetector detector(sample_rate_hz);
+ int lost_packets = 0;
+ size_t audio_buffer_length = chunk_size_ms * sample_rate_hz / 1000;
+ scoped_ptr<float[]> audio_buffer(new float[audio_buffer_length]);
+ std::vector<float> send_times;
+
+ // Read first buffer from the PCM test file.
+ size_t file_samples_read = ReadInt16FromFileToFloatBuffer(
+ pcm_file.get(),
+ audio_buffer_length,
+ audio_buffer.get());
+ for (int time = 0; file_samples_read > 0; time += chunk_size_ms) {
+ // Pad the rest of the buffer with zeros.
+ for (size_t i = file_samples_read; i < audio_buffer_length; ++i) {
+ audio_buffer[i] = 0.0;
+ }
+ float value =
+ detector.Detect(audio_buffer.get(), audio_buffer_length, NULL, 0);
+ if (value < 0.5f) {
+ value = time;
+ } else {
+ value = FLT_MAX;
+ ++lost_packets;
+ }
+ send_times.push_back(value);
+
+ // Read next buffer from the PCM test file.
+ file_samples_read = ReadInt16FromFileToFloatBuffer(pcm_file.get(),
+ audio_buffer_length,
+ audio_buffer.get());
+ }
+
+ size_t floats_written = WriteFloatBufferToFile(dat_file.get(),
+ send_times.size(),
+ &send_times[0]);
+
+ if (floats_written == 0) {
+ printf("\nThe send times could not be written to DAT file\n\n");
+ return -1;
+ }
+
+ pcm_file->CloseFile();
+ dat_file->CloseFile();
+
+ return lost_packets;
+}
diff --git a/webrtc/modules/audio_processing/transient/common.h b/webrtc/modules/audio_processing/transient/common.h
new file mode 100644
index 0000000..92194e9
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/common.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_
+namespace webrtc {
+namespace ts {
+
+static const float kPi = 3.14159265358979323846f;
+static const int kChunkSizeMs = 10;
+enum {
+ kSampleRate8kHz = 8000,
+ kSampleRate16kHz = 16000,
+ kSampleRate32kHz = 32000,
+ kSampleRate48kHz = 48000
+};
+
+} // namespace ts
+} // namespace webrtc
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_
diff --git a/webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h b/webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h
new file mode 100644
index 0000000..b1236ac
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// This header file defines the coefficients of the FIR based approximation of
+// the Meyer Wavelet
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_
+
+// Decomposition coefficients Daubechies 8.
+
+namespace webrtc {
+
+const int kDaubechies8CoefficientsLength = 16;
+
+const float kDaubechies8HighPassCoefficients[kDaubechies8CoefficientsLength]
+ = {
+ -5.44158422430816093862e-02f,
+ 3.12871590914465924627e-01f,
+ -6.75630736298012846142e-01f,
+ 5.85354683654869090148e-01f,
+ 1.58291052560238926228e-02f,
+ -2.84015542962428091389e-01f,
+ -4.72484573997972536787e-04f,
+ 1.28747426620186011803e-01f,
+ 1.73693010020221083600e-02f,
+ -4.40882539310647192377e-02f,
+ -1.39810279170155156436e-02f,
+ 8.74609404701565465445e-03f,
+ 4.87035299301066034600e-03f,
+ -3.91740372995977108837e-04f,
+ -6.75449405998556772109e-04f,
+ -1.17476784002281916305e-04f
+};
+
+const float kDaubechies8LowPassCoefficients[kDaubechies8CoefficientsLength] = {
+ -1.17476784002281916305e-04f,
+ 6.75449405998556772109e-04f,
+ -3.91740372995977108837e-04f,
+ -4.87035299301066034600e-03f,
+ 8.74609404701565465445e-03f,
+ 1.39810279170155156436e-02f,
+ -4.40882539310647192377e-02f,
+ -1.73693010020221083600e-02f,
+ 1.28747426620186011803e-01f,
+ 4.72484573997972536787e-04f,
+ -2.84015542962428091389e-01f,
+ -1.58291052560238926228e-02f,
+ 5.85354683654869090148e-01f,
+ 6.75630736298012846142e-01f,
+ 3.12871590914465924627e-01f,
+ 5.44158422430816093862e-02f
+};
+
+} // namespace webrtc
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_
diff --git a/webrtc/modules/audio_processing/transient/dyadic_decimator.h b/webrtc/modules/audio_processing/transient/dyadic_decimator.h
new file mode 100644
index 0000000..c1046f2
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/dyadic_decimator.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_
+
+#include <cstdlib>
+
+#include "webrtc/typedefs.h"
+
+// Provides a set of static methods to perform dyadic decimations.
+
+namespace webrtc {
+
+// Returns the proper length of the output buffer that you should use for the
+// given |in_length| and decimation |odd_sequence|.
+// Return -1 on error.
+inline size_t GetOutLengthToDyadicDecimate(size_t in_length,
+ bool odd_sequence) {
+ size_t out_length = in_length / 2;
+
+ if (in_length % 2 == 1 && !odd_sequence) {
+ ++out_length;
+ }
+
+ return out_length;
+}
+
+// Performs a dyadic decimation: removes every odd/even member of a sequence
+// halving its overall length.
+// Arguments:
+// in: array of |in_length|.
+// odd_sequence: If false, the odd members will be removed (1, 3, 5, ...);
+// if true, the even members will be removed (0, 2, 4, ...).
+// out: array of |out_length|. |out_length| must be large enough to
+// hold the decimated output. The necessary length can be provided by
+// GetOutLengthToDyadicDecimate().
+// Must be previously allocated.
+// Returns the number of output samples, -1 on error.
+template<typename T>
+static size_t DyadicDecimate(const T* in,
+ size_t in_length,
+ bool odd_sequence,
+ T* out,
+ size_t out_length) {
+ size_t half_length = GetOutLengthToDyadicDecimate(in_length, odd_sequence);
+
+ if (!in || !out || in_length <= 0 || out_length < half_length) {
+ return 0;
+ }
+
+ size_t output_samples = 0;
+ size_t index_adjustment = odd_sequence ? 1 : 0;
+ for (output_samples = 0; output_samples < half_length; ++output_samples) {
+ out[output_samples] = in[output_samples * 2 + index_adjustment];
+ }
+
+ return output_samples;
+}
+
+} // namespace webrtc
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_
diff --git a/webrtc/modules/audio_processing/transient/dyadic_decimator_unittest.cc b/webrtc/modules/audio_processing/transient/dyadic_decimator_unittest.cc
new file mode 100644
index 0000000..f5c9f49
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/dyadic_decimator_unittest.cc
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/transient/dyadic_decimator.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace webrtc {
+
+static const size_t kEvenBufferLength = 6;
+static const size_t kOddBufferLength = 5;
+static const size_t kOutBufferLength = 3;
+
+int16_t const test_buffer_even_len[] = {0, 1, 2, 3, 4, 5};
+int16_t const test_buffer_odd_len[] = {0, 1, 2, 3, 4};
+int16_t test_buffer_out[kOutBufferLength];
+
+TEST(DyadicDecimatorTest, GetOutLengthToDyadicDecimate) {
+ EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(6, false));
+ EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(6, true));
+ EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(5, false));
+ EXPECT_EQ(2u, GetOutLengthToDyadicDecimate(5, true));
+}
+
+
+TEST(DyadicDecimatorTest, DyadicDecimateErrorValues) {
+ size_t out_samples = 0;
+
+ out_samples = DyadicDecimate(static_cast<int16_t*>(NULL),
+ kEvenBufferLength,
+ false, // Even sequence.
+ test_buffer_out,
+ kOutBufferLength);
+ EXPECT_EQ(0u, out_samples);
+
+ out_samples = DyadicDecimate(test_buffer_even_len,
+ kEvenBufferLength,
+ false, // Even sequence.
+ static_cast<int16_t*>(NULL),
+ kOutBufferLength);
+ EXPECT_EQ(0u, out_samples);
+
+ // Less than required |out_length|.
+ out_samples = DyadicDecimate(test_buffer_even_len,
+ kEvenBufferLength,
+ false, // Even sequence.
+ test_buffer_out,
+ 2);
+ EXPECT_EQ(0u, out_samples);
+}
+
+TEST(DyadicDecimatorTest, DyadicDecimateEvenLengthEvenSequence) {
+ size_t expected_out_samples =
+ GetOutLengthToDyadicDecimate(kEvenBufferLength, false);
+
+ size_t out_samples = DyadicDecimate(test_buffer_even_len,
+ kEvenBufferLength,
+ false, // Even sequence.
+ test_buffer_out,
+ kOutBufferLength);
+
+ EXPECT_EQ(expected_out_samples, out_samples);
+
+ EXPECT_EQ(0, test_buffer_out[0]);
+ EXPECT_EQ(2, test_buffer_out[1]);
+ EXPECT_EQ(4, test_buffer_out[2]);
+}
+
+TEST(DyadicDecimatorTest, DyadicDecimateEvenLengthOddSequence) {
+ size_t expected_out_samples =
+ GetOutLengthToDyadicDecimate(kEvenBufferLength, true);
+
+ size_t out_samples = DyadicDecimate(test_buffer_even_len,
+ kEvenBufferLength,
+ true, // Odd sequence.
+ test_buffer_out,
+ kOutBufferLength);
+
+ EXPECT_EQ(expected_out_samples, out_samples);
+
+ EXPECT_EQ(1, test_buffer_out[0]);
+ EXPECT_EQ(3, test_buffer_out[1]);
+ EXPECT_EQ(5, test_buffer_out[2]);
+}
+
+TEST(DyadicDecimatorTest, DyadicDecimateOddLengthEvenSequence) {
+ size_t expected_out_samples =
+ GetOutLengthToDyadicDecimate(kOddBufferLength, false);
+
+ size_t out_samples = DyadicDecimate(test_buffer_odd_len,
+ kOddBufferLength,
+ false, // Even sequence.
+ test_buffer_out,
+ kOutBufferLength);
+
+ EXPECT_EQ(expected_out_samples, out_samples);
+
+ EXPECT_EQ(0, test_buffer_out[0]);
+ EXPECT_EQ(2, test_buffer_out[1]);
+ EXPECT_EQ(4, test_buffer_out[2]);
+}
+
+TEST(DyadicDecimatorTest, DyadicDecimateOddLengthOddSequence) {
+ size_t expected_out_samples =
+ GetOutLengthToDyadicDecimate(kOddBufferLength, true);
+
+ size_t out_samples = DyadicDecimate(test_buffer_odd_len,
+ kOddBufferLength,
+ true, // Odd sequence.
+ test_buffer_out,
+ kOutBufferLength);
+
+ EXPECT_EQ(expected_out_samples, out_samples);
+
+ EXPECT_EQ(1, test_buffer_out[0]);
+ EXPECT_EQ(3, test_buffer_out[1]);
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/transient/file_utils.cc b/webrtc/modules/audio_processing/transient/file_utils.cc
new file mode 100644
index 0000000..c7415bd
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/file_utils.cc
@@ -0,0 +1,257 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/transient/file_utils.h"
+
+#include "webrtc/system_wrappers/interface/file_wrapper.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out) {
+ if (!bytes || !out) {
+ return -1;
+ }
+
+ uint32_t binary_value = 0;
+ for (int i = 3; i >= 0; --i) {
+ binary_value <<= 8;
+ binary_value += bytes[i];
+ }
+
+ *out = bit_cast<float>(binary_value);
+
+ return 0;
+}
+
+int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out) {
+ if (!bytes || !out) {
+ return -1;
+ }
+
+ uint64_t binary_value = 0;
+ for (int i = 7; i >= 0; --i) {
+ binary_value <<= 8;
+ binary_value += bytes[i];
+ }
+
+ *out = bit_cast<double>(binary_value);
+
+ return 0;
+}
+
+int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]) {
+ if (!out_bytes) {
+ return -1;
+ }
+
+ uint32_t binary_value = bit_cast<uint32_t>(value);
+ for (size_t i = 0; i < 4; ++i) {
+ out_bytes[i] = binary_value;
+ binary_value >>= 8;
+ }
+
+ return 0;
+}
+
+int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]) {
+ if (!out_bytes) {
+ return -1;
+ }
+
+ uint64_t binary_value = bit_cast<uint64_t>(value);
+ for (size_t i = 0; i < 8; ++i) {
+ out_bytes[i] = binary_value;
+ binary_value >>= 8;
+ }
+
+ return 0;
+}
+
+size_t ReadInt16BufferFromFile(FileWrapper* file,
+ size_t length,
+ int16_t* buffer) {
+ if (!file || !file->Open() || !buffer || length <= 0) {
+ return 0;
+ }
+
+ scoped_ptr<uint8_t[]> byte_array(new uint8_t[2]);
+
+ size_t int16s_read = 0;
+
+ while (int16s_read < length) {
+ size_t bytes_read = file->Read(byte_array.get(), 2);
+ if (bytes_read < 2) {
+ break;
+ }
+ int16_t value = byte_array[1];
+ value <<= 8;
+ value += byte_array[0];
+ buffer[int16s_read] = value;
+ ++int16s_read;
+ }
+
+ return int16s_read;
+}
+
+size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file,
+ size_t length,
+ float* buffer) {
+ if (!file || !file->Open() || !buffer || length <= 0) {
+ return 0;
+ }
+
+ scoped_ptr<int16_t[]> buffer16(new int16_t[length]);
+
+ size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get());
+
+ for (size_t i = 0; i < int16s_read; ++i) {
+ buffer[i] = buffer16[i];
+ }
+
+ return int16s_read;
+}
+
+size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file,
+ size_t length,
+ double* buffer) {
+ if (!file || !file->Open() || !buffer || length <= 0) {
+ return 0;
+ }
+
+ scoped_ptr<int16_t[]> buffer16(new int16_t[length]);
+
+ size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get());
+
+ for (size_t i = 0; i < int16s_read; ++i) {
+ buffer[i] = buffer16[i];
+ }
+
+ return int16s_read;
+}
+
+size_t ReadFloatBufferFromFile(FileWrapper* file,
+ size_t length,
+ float* buffer) {
+ if (!file || !file->Open() || !buffer || length <= 0) {
+ return 0;
+ }
+
+ scoped_ptr<uint8_t[]> byte_array(new uint8_t[4]);
+
+ size_t floats_read = 0;
+
+ while (floats_read < length) {
+ size_t bytes_read = file->Read(byte_array.get(), 4);
+ if (bytes_read < 4) {
+ break;
+ }
+ ConvertByteArrayToFloat(byte_array.get(), &buffer[floats_read]);
+ ++floats_read;
+ }
+
+ return floats_read;
+}
+
+size_t ReadDoubleBufferFromFile(FileWrapper* file,
+ size_t length,
+ double* buffer) {
+ if (!file || !file->Open() || !buffer || length <= 0) {
+ return 0;
+ }
+
+ scoped_ptr<uint8_t[]> byte_array(new uint8_t[8]);
+
+ size_t doubles_read = 0;
+
+ while (doubles_read < length) {
+ size_t bytes_read = file->Read(byte_array.get(), 8);
+ if (bytes_read < 8) {
+ break;
+ }
+ ConvertByteArrayToDouble(byte_array.get(), &buffer[doubles_read]);
+ ++doubles_read;
+ }
+
+ return doubles_read;
+}
+
+size_t WriteInt16BufferToFile(FileWrapper* file,
+ size_t length,
+ const int16_t* buffer) {
+ if (!file || !file->Open() || !buffer || length <= 0) {
+ return 0;
+ }
+
+ scoped_ptr<uint8_t[]> byte_array(new uint8_t[2]);
+
+ size_t int16s_written = 0;
+
+ for (int16s_written = 0; int16s_written < length; ++int16s_written) {
+ // Get byte representation.
+ byte_array[0] = buffer[int16s_written] & 0xFF;
+ byte_array[1] = (buffer[int16s_written] >> 8) & 0xFF;
+
+ file->Write(byte_array.get(), 2);
+ }
+
+ file->Flush();
+
+ return int16s_written;
+}
+
+size_t WriteFloatBufferToFile(FileWrapper* file,
+ size_t length,
+ const float* buffer) {
+ if (!file || !file->Open() || !buffer || length <= 0) {
+ return 0;
+ }
+
+ scoped_ptr<uint8_t[]> byte_array(new uint8_t[4]);
+
+ size_t floats_written = 0;
+
+ for (floats_written = 0; floats_written < length; ++floats_written) {
+ // Get byte representation.
+ ConvertFloatToByteArray(buffer[floats_written], byte_array.get());
+
+ file->Write(byte_array.get(), 4);
+ }
+
+ file->Flush();
+
+ return floats_written;
+}
+
+size_t WriteDoubleBufferToFile(FileWrapper* file,
+ size_t length,
+ const double* buffer) {
+ if (!file || !file->Open() || !buffer || length <= 0) {
+ return 0;
+ }
+
+ scoped_ptr<uint8_t[]> byte_array(new uint8_t[8]);
+
+ size_t doubles_written = 0;
+
+ for (doubles_written = 0; doubles_written < length; ++doubles_written) {
+ // Get byte representation.
+ ConvertDoubleToByteArray(buffer[doubles_written], byte_array.get());
+
+ file->Write(byte_array.get(), 8);
+ }
+
+ file->Flush();
+
+ return doubles_written;
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/transient/file_utils.h b/webrtc/modules/audio_processing/transient/file_utils.h
new file mode 100644
index 0000000..8dc477d
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/file_utils.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_
+
+#include <string.h>
+
+#include "webrtc/base/compile_assert.h"
+#include "webrtc/system_wrappers/interface/file_wrapper.h"
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+// This is a copy of the cast included in the Chromium codebase here:
+// http://cs.chromium.org/src/third_party/cld/base/casts.h
+template <class Dest, class Source>
+inline Dest bit_cast(const Source& source) {
+ // A compile error here means your Dest and Source have different sizes.
+ COMPILE_ASSERT(sizeof(Dest) == sizeof(Source),
+ dest_and_source_have_different_sizes);
+
+ Dest dest;
+ memcpy(&dest, &source, sizeof(dest));
+ return dest;
+}
+
+// Converts the byte array with binary float representation to float.
+// Bytes must be in little-endian order.
+// Returns 0 if correct, -1 on error.
+int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out);
+
+// Converts the byte array with binary double representation to double.
+// Bytes must be in little-endian order.
+// Returns 0 if correct, -1 on error.
+int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out);
+
+// Converts a float to a byte array with binary float representation.
+// Bytes will be in little-endian order.
+// Returns 0 if correct, -1 on error.
+int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]);
+
+// Converts a double to a byte array with binary double representation.
+// Bytes will be in little-endian order.
+// Returns 0 if correct, -1 on error.
+int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]);
+
+// Reads |length| 16-bit integers from |file| to |buffer|.
+// |file| must be previously opened.
+// Returns the number of 16-bit integers read or -1 on error.
+size_t ReadInt16BufferFromFile(FileWrapper* file,
+ size_t length,
+ int16_t* buffer);
+
+// Reads |length| 16-bit integers from |file| and stores those values
+// (converting them) in |buffer|.
+// |file| must be previously opened.
+// Returns the number of 16-bit integers read or -1 on error.
+size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file,
+ size_t length,
+ float* buffer);
+
+// Reads |length| 16-bit integers from |file| and stores those values
+// (converting them) in |buffer|.
+// |file| must be previously opened.
+// Returns the number of 16-bit integers read or -1 on error.
+size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file,
+ size_t length,
+ double* buffer);
+
+// Reads |length| floats in binary representation (4 bytes) from |file| to
+// |buffer|.
+// |file| must be previously opened.
+// Returns the number of floats read or -1 on error.
+size_t ReadFloatBufferFromFile(FileWrapper* file, size_t length, float* buffer);
+
+// Reads |length| doubles in binary representation (8 bytes) from |file| to
+// |buffer|.
+// |file| must be previously opened.
+// Returns the number of doubles read or -1 on error.
+size_t ReadDoubleBufferFromFile(FileWrapper* file,
+ size_t length,
+ double* buffer);
+
+// Writes |length| 16-bit integers from |buffer| in binary representation (2
+// bytes) to |file|. It flushes |file|, so after this call there are no
+// writings pending.
+// |file| must be previously opened.
+// Returns the number of doubles written or -1 on error.
+size_t WriteInt16BufferToFile(FileWrapper* file,
+ size_t length,
+ const int16_t* buffer);
+
+// Writes |length| floats from |buffer| in binary representation (4 bytes) to
+// |file|. It flushes |file|, so after this call there are no writtings pending.
+// |file| must be previously opened.
+// Returns the number of doubles written or -1 on error.
+size_t WriteFloatBufferToFile(FileWrapper* file,
+ size_t length,
+ const float* buffer);
+
+// Writes |length| doubles from |buffer| in binary representation (8 bytes) to
+// |file|. It flushes |file|, so after this call there are no writings pending.
+// |file| must be previously opened.
+// Returns the number of doubles written or -1 on error.
+size_t WriteDoubleBufferToFile(FileWrapper* file,
+ size_t length,
+ const double* buffer);
+
+} // namespace webrtc
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_
diff --git a/webrtc/modules/audio_processing/transient/file_utils_unittest.cc b/webrtc/modules/audio_processing/transient/file_utils_unittest.cc
new file mode 100644
index 0000000..af2f9b3
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/file_utils_unittest.cc
@@ -0,0 +1,484 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/transient/file_utils.h"
+
+#include <string.h>
+#include <string>
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webrtc/system_wrappers/interface/file_wrapper.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+#include "webrtc/test/testsupport/fileutils.h"
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+static const uint8_t kPiBytesf[4] = {0xDB, 0x0F, 0x49, 0x40};
+static const uint8_t kEBytesf[4] = {0x54, 0xF8, 0x2D, 0x40};
+static const uint8_t kAvogadroBytesf[4] = {0x2F, 0x0C, 0xFF, 0x66};
+
+static const uint8_t kPiBytes[8] =
+ {0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40};
+static const uint8_t kEBytes[8] =
+ {0x69, 0x57, 0x14, 0x8B, 0x0A, 0xBF, 0x05, 0x40};
+static const uint8_t kAvogadroBytes[8] =
+ {0xF4, 0xBC, 0xA8, 0xDF, 0x85, 0xE1, 0xDF, 0x44};
+
+static const double kPi = 3.14159265358979323846;
+static const double kE = 2.71828182845904523536;
+static const double kAvogadro = 602214100000000000000000.0;
+
+class TransientFileUtilsTest: public ::testing::Test {
+ protected:
+ TransientFileUtilsTest()
+ : kTestFileName(
+ test::ResourcePath("audio_processing/transient/double-utils",
+ "dat")),
+ kTestFileNamef(
+ test::ResourcePath("audio_processing/transient/float-utils",
+ "dat")) {}
+ // This file (used in some tests) contains binary data. The data correspond to
+ // the double representation of the constants: Pi, E, and the Avogadro's
+ // Number;
+ // appended in that order.
+ const std::string kTestFileName;
+
+ // This file (used in some tests) contains binary data. The data correspond to
+ // the float representation of the constants: Pi, E, and the Avogadro's
+ // Number;
+ // appended in that order.
+ const std::string kTestFileNamef;
+};
+
+TEST_F(TransientFileUtilsTest, ConvertByteArrayToFloat) {
+ float value = 0.0;
+
+ EXPECT_EQ(0, ConvertByteArrayToFloat(kPiBytesf, &value));
+ EXPECT_FLOAT_EQ(kPi, value);
+
+ EXPECT_EQ(0, ConvertByteArrayToFloat(kEBytesf, &value));
+ EXPECT_FLOAT_EQ(kE, value);
+
+ EXPECT_EQ(0, ConvertByteArrayToFloat(kAvogadroBytesf, &value));
+ EXPECT_FLOAT_EQ(kAvogadro, value);
+}
+
+TEST_F(TransientFileUtilsTest, ConvertByteArrayToDouble) {
+ double value = 0.0;
+
+ EXPECT_EQ(0, ConvertByteArrayToDouble(kPiBytes, &value));
+ EXPECT_DOUBLE_EQ(kPi, value);
+
+ EXPECT_EQ(0, ConvertByteArrayToDouble(kEBytes, &value));
+ EXPECT_DOUBLE_EQ(kE, value);
+
+ EXPECT_EQ(0, ConvertByteArrayToDouble(kAvogadroBytes, &value));
+ EXPECT_DOUBLE_EQ(kAvogadro, value);
+}
+
+TEST_F(TransientFileUtilsTest, ConvertFloatToByteArray) {
+ scoped_ptr<uint8_t[]> bytes(new uint8_t[4]);
+
+ EXPECT_EQ(0, ConvertFloatToByteArray(kPi, bytes.get()));
+ EXPECT_EQ(0, memcmp(bytes.get(), kPiBytesf, 4));
+
+ EXPECT_EQ(0, ConvertFloatToByteArray(kE, bytes.get()));
+ EXPECT_EQ(0, memcmp(bytes.get(), kEBytesf, 4));
+
+ EXPECT_EQ(0, ConvertFloatToByteArray(kAvogadro, bytes.get()));
+ EXPECT_EQ(0, memcmp(bytes.get(), kAvogadroBytesf, 4));
+}
+
+TEST_F(TransientFileUtilsTest, ConvertDoubleToByteArray) {
+ scoped_ptr<uint8_t[]> bytes(new uint8_t[8]);
+
+ EXPECT_EQ(0, ConvertDoubleToByteArray(kPi, bytes.get()));
+ EXPECT_EQ(0, memcmp(bytes.get(), kPiBytes, 8));
+
+ EXPECT_EQ(0, ConvertDoubleToByteArray(kE, bytes.get()));
+ EXPECT_EQ(0, memcmp(bytes.get(), kEBytes, 8));
+
+ EXPECT_EQ(0, ConvertDoubleToByteArray(kAvogadro, bytes.get()));
+ EXPECT_EQ(0, memcmp(bytes.get(), kAvogadroBytes, 8));
+}
+
+TEST_F(TransientFileUtilsTest, ReadInt16BufferFromFile) {
+ std::string test_filename = kTestFileName;
+
+ scoped_ptr<FileWrapper> file(FileWrapper::Create());
+
+ file->OpenFile(test_filename.c_str(),
+ true, // Read only.
+ true, // Loop.
+ false); // No text.
+ ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
+ << kTestFileName.c_str();
+
+ const size_t kBufferLength = 12;
+ scoped_ptr<int16_t[]> buffer(new int16_t[kBufferLength]);
+
+ EXPECT_EQ(kBufferLength, ReadInt16BufferFromFile(file.get(),
+ kBufferLength,
+ buffer.get()));
+ EXPECT_EQ(22377, buffer[4]);
+ EXPECT_EQ(16389, buffer[7]);
+ EXPECT_EQ(17631, buffer[kBufferLength - 1]);
+
+ file->Rewind();
+
+ // The next test is for checking the case where there are not as much data as
+ // needed in the file, but reads to the end, and it returns the number of
+ // int16s read.
+ const size_t kBufferLenghtLargerThanFile = kBufferLength * 2;
+ buffer.reset(new int16_t[kBufferLenghtLargerThanFile]);
+ EXPECT_EQ(kBufferLength, ReadInt16BufferFromFile(file.get(),
+ kBufferLenghtLargerThanFile,
+ buffer.get()));
+ EXPECT_EQ(11544, buffer[0]);
+ EXPECT_EQ(22377, buffer[4]);
+ EXPECT_EQ(16389, buffer[7]);
+ EXPECT_EQ(17631, buffer[kBufferLength - 1]);
+}
+
+TEST_F(TransientFileUtilsTest, ReadInt16FromFileToFloatBuffer) {
+ std::string test_filename = kTestFileName;
+
+ scoped_ptr<FileWrapper> file(FileWrapper::Create());
+
+ file->OpenFile(test_filename.c_str(),
+ true, // Read only.
+ true, // Loop.
+ false); // No text.
+ ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
+ << kTestFileName.c_str();
+
+ const size_t kBufferLength = 12;
+ scoped_ptr<float[]> buffer(new float[kBufferLength]);
+
+ EXPECT_EQ(kBufferLength, ReadInt16FromFileToFloatBuffer(file.get(),
+ kBufferLength,
+ buffer.get()));
+
+ EXPECT_DOUBLE_EQ(11544, buffer[0]);
+ EXPECT_DOUBLE_EQ(22377, buffer[4]);
+ EXPECT_DOUBLE_EQ(16389, buffer[7]);
+ EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]);
+
+ file->Rewind();
+
+ // The next test is for checking the case where there are not as much data as
+ // needed in the file, but reads to the end, and it returns the number of
+ // int16s read.
+ const size_t kBufferLenghtLargerThanFile = kBufferLength * 2;
+ buffer.reset(new float[kBufferLenghtLargerThanFile]);
+ EXPECT_EQ(kBufferLength,
+ ReadInt16FromFileToFloatBuffer(file.get(),
+ kBufferLenghtLargerThanFile,
+ buffer.get()));
+ EXPECT_DOUBLE_EQ(11544, buffer[0]);
+ EXPECT_DOUBLE_EQ(22377, buffer[4]);
+ EXPECT_DOUBLE_EQ(16389, buffer[7]);
+ EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]);
+}
+
+TEST_F(TransientFileUtilsTest, ReadInt16FromFileToDoubleBuffer) {
+ std::string test_filename = kTestFileName;
+
+ scoped_ptr<FileWrapper> file(FileWrapper::Create());
+
+ file->OpenFile(test_filename.c_str(),
+ true, // Read only.
+ true, // Loop.
+ false); // No text.
+ ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
+ << kTestFileName.c_str();
+
+ const size_t kBufferLength = 12;
+ scoped_ptr<double[]> buffer(new double[kBufferLength]);
+
+ EXPECT_EQ(kBufferLength, ReadInt16FromFileToDoubleBuffer(file.get(),
+ kBufferLength,
+ buffer.get()));
+ EXPECT_DOUBLE_EQ(11544, buffer[0]);
+ EXPECT_DOUBLE_EQ(22377, buffer[4]);
+ EXPECT_DOUBLE_EQ(16389, buffer[7]);
+ EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]);
+
+ file->Rewind();
+
+ // The next test is for checking the case where there are not as much data as
+ // needed in the file, but reads to the end, and it returns the number of
+ // int16s read.
+ const size_t kBufferLenghtLargerThanFile = kBufferLength * 2;
+ buffer.reset(new double[kBufferLenghtLargerThanFile]);
+ EXPECT_EQ(kBufferLength,
+ ReadInt16FromFileToDoubleBuffer(file.get(),
+ kBufferLenghtLargerThanFile,
+ buffer.get()));
+ EXPECT_DOUBLE_EQ(11544, buffer[0]);
+ EXPECT_DOUBLE_EQ(22377, buffer[4]);
+ EXPECT_DOUBLE_EQ(16389, buffer[7]);
+ EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]);
+}
+
+TEST_F(TransientFileUtilsTest, ReadFloatBufferFromFile) {
+ std::string test_filename = kTestFileNamef;
+
+ scoped_ptr<FileWrapper> file(FileWrapper::Create());
+
+ file->OpenFile(test_filename.c_str(),
+ true, // Read only.
+ true, // Loop.
+ false); // No text.
+ ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
+ << kTestFileNamef.c_str();
+
+ const size_t kBufferLength = 3;
+ scoped_ptr<float[]> buffer(new float[kBufferLength]);
+
+
+ EXPECT_EQ(kBufferLength, ReadFloatBufferFromFile(file.get(),
+ kBufferLength,
+ buffer.get()));
+ EXPECT_FLOAT_EQ(kPi, buffer[0]);
+ EXPECT_FLOAT_EQ(kE, buffer[1]);
+ EXPECT_FLOAT_EQ(kAvogadro, buffer[2]);
+
+ file->Rewind();
+
+ // The next test is for checking the case where there are not as much data as
+ // needed in the file, but reads to the end, and it returns the number of
+ // doubles read.
+ const size_t kBufferLenghtLargerThanFile = kBufferLength * 2;
+ buffer.reset(new float[kBufferLenghtLargerThanFile]);
+ EXPECT_EQ(kBufferLength, ReadFloatBufferFromFile(file.get(),
+ kBufferLenghtLargerThanFile,
+ buffer.get()));
+ EXPECT_FLOAT_EQ(kPi, buffer[0]);
+ EXPECT_FLOAT_EQ(kE, buffer[1]);
+ EXPECT_FLOAT_EQ(kAvogadro, buffer[2]);
+}
+
+TEST_F(TransientFileUtilsTest, ReadDoubleBufferFromFile) {
+ std::string test_filename = kTestFileName;
+
+ scoped_ptr<FileWrapper> file(FileWrapper::Create());
+
+ file->OpenFile(test_filename.c_str(),
+ true, // Read only.
+ true, // Loop.
+ false); // No text.
+ ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
+ << kTestFileName.c_str();
+
+ const size_t kBufferLength = 3;
+ scoped_ptr<double[]> buffer(new double[kBufferLength]);
+
+
+ EXPECT_EQ(kBufferLength, ReadDoubleBufferFromFile(file.get(),
+ kBufferLength,
+ buffer.get()));
+ EXPECT_DOUBLE_EQ(kPi, buffer[0]);
+ EXPECT_DOUBLE_EQ(kE, buffer[1]);
+ EXPECT_DOUBLE_EQ(kAvogadro, buffer[2]);
+
+ file->Rewind();
+
+ // The next test is for checking the case where there are not as much data as
+ // needed in the file, but reads to the end, and it returns the number of
+ // doubles read.
+ const size_t kBufferLenghtLargerThanFile = kBufferLength * 2;
+ buffer.reset(new double[kBufferLenghtLargerThanFile]);
+ EXPECT_EQ(kBufferLength, ReadDoubleBufferFromFile(file.get(),
+ kBufferLenghtLargerThanFile,
+ buffer.get()));
+ EXPECT_DOUBLE_EQ(kPi, buffer[0]);
+ EXPECT_DOUBLE_EQ(kE, buffer[1]);
+ EXPECT_DOUBLE_EQ(kAvogadro, buffer[2]);
+}
+
+TEST_F(TransientFileUtilsTest, WriteInt16BufferToFile) {
+ scoped_ptr<FileWrapper> file(FileWrapper::Create());
+
+ std::string kOutFileName = test::OutputPath() + "utils_test.out";
+
+ file->OpenFile(kOutFileName.c_str(),
+ false, // Write mode.
+ false, // No loop.
+ false); // No text.
+ ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
+ << kOutFileName.c_str();
+
+ const size_t kBufferLength = 3;
+ scoped_ptr<int16_t[]> written_buffer(new int16_t[kBufferLength]);
+ scoped_ptr<int16_t[]> read_buffer(new int16_t[kBufferLength]);
+
+ written_buffer[0] = 1;
+ written_buffer[1] = 2;
+ written_buffer[2] = 3;
+
+ EXPECT_EQ(kBufferLength, WriteInt16BufferToFile(file.get(),
+ kBufferLength,
+ written_buffer.get()));
+
+ file->CloseFile();
+
+ file->OpenFile(kOutFileName.c_str(),
+ true, // Read only.
+ false, // No loop.
+ false); // No text.
+ ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
+ << kOutFileName.c_str();
+
+ EXPECT_EQ(kBufferLength, ReadInt16BufferFromFile(file.get(),
+ kBufferLength,
+ read_buffer.get()));
+ EXPECT_EQ(0, memcmp(written_buffer.get(),
+ read_buffer.get(),
+ kBufferLength * sizeof(written_buffer[0])));
+}
+
+TEST_F(TransientFileUtilsTest, WriteFloatBufferToFile) {
+ scoped_ptr<FileWrapper> file(FileWrapper::Create());
+
+ std::string kOutFileName = test::OutputPath() + "utils_test.out";
+
+ file->OpenFile(kOutFileName.c_str(),
+ false, // Write mode.
+ false, // No loop.
+ false); // No text.
+ ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
+ << kOutFileName.c_str();
+
+ const size_t kBufferLength = 3;
+ scoped_ptr<float[]> written_buffer(new float[kBufferLength]);
+ scoped_ptr<float[]> read_buffer(new float[kBufferLength]);
+
+ written_buffer[0] = kPi;
+ written_buffer[1] = kE;
+ written_buffer[2] = kAvogadro;
+
+ EXPECT_EQ(kBufferLength, WriteFloatBufferToFile(file.get(),
+ kBufferLength,
+ written_buffer.get()));
+
+ file->CloseFile();
+
+ file->OpenFile(kOutFileName.c_str(),
+ true, // Read only.
+ false, // No loop.
+ false); // No text.
+ ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
+ << kOutFileName.c_str();
+
+ EXPECT_EQ(kBufferLength, ReadFloatBufferFromFile(file.get(),
+ kBufferLength,
+ read_buffer.get()));
+ EXPECT_EQ(0, memcmp(written_buffer.get(),
+ read_buffer.get(),
+ kBufferLength * sizeof(written_buffer[0])));
+}
+
+TEST_F(TransientFileUtilsTest, WriteDoubleBufferToFile) {
+ scoped_ptr<FileWrapper> file(FileWrapper::Create());
+
+ std::string kOutFileName = test::OutputPath() + "utils_test.out";
+
+ file->OpenFile(kOutFileName.c_str(),
+ false, // Write mode.
+ false, // No loop.
+ false); // No text.
+ ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
+ << kOutFileName.c_str();
+
+ const size_t kBufferLength = 3;
+ scoped_ptr<double[]> written_buffer(new double[kBufferLength]);
+ scoped_ptr<double[]> read_buffer(new double[kBufferLength]);
+
+ written_buffer[0] = kPi;
+ written_buffer[1] = kE;
+ written_buffer[2] = kAvogadro;
+
+ EXPECT_EQ(kBufferLength, WriteDoubleBufferToFile(file.get(),
+ kBufferLength,
+ written_buffer.get()));
+
+ file->CloseFile();
+
+ file->OpenFile(kOutFileName.c_str(),
+ true, // Read only.
+ false, // No loop.
+ false); // No text.
+ ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
+ << kOutFileName.c_str();
+
+ EXPECT_EQ(kBufferLength, ReadDoubleBufferFromFile(file.get(),
+ kBufferLength,
+ read_buffer.get()));
+ EXPECT_EQ(0, memcmp(written_buffer.get(),
+ read_buffer.get(),
+ kBufferLength * sizeof(written_buffer[0])));
+}
+
+TEST_F(TransientFileUtilsTest, ExpectedErrorReturnValues) {
+ std::string test_filename = kTestFileName;
+
+ double value;
+ scoped_ptr<int16_t[]> int16_buffer(new int16_t[1]);
+ scoped_ptr<double[]> double_buffer(new double[1]);
+ scoped_ptr<FileWrapper> file(FileWrapper::Create());
+
+ EXPECT_EQ(-1, ConvertByteArrayToDouble(NULL, &value));
+ EXPECT_EQ(-1, ConvertByteArrayToDouble(kPiBytes, NULL));
+
+ EXPECT_EQ(-1, ConvertDoubleToByteArray(kPi, NULL));
+
+ // Tests with file not opened.
+ EXPECT_EQ(0u, ReadInt16BufferFromFile(file.get(), 1, int16_buffer.get()));
+ EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(file.get(),
+ 1,
+ double_buffer.get()));
+ EXPECT_EQ(0u, ReadDoubleBufferFromFile(file.get(), 1, double_buffer.get()));
+ EXPECT_EQ(0u, WriteInt16BufferToFile(file.get(), 1, int16_buffer.get()));
+ EXPECT_EQ(0u, WriteDoubleBufferToFile(file.get(), 1, double_buffer.get()));
+
+ file->OpenFile(test_filename.c_str(),
+ true, // Read only.
+ true, // Loop.
+ false); // No text.
+ ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
+ << kTestFileName.c_str();
+
+ EXPECT_EQ(0u, ReadInt16BufferFromFile(NULL, 1, int16_buffer.get()));
+ EXPECT_EQ(0u, ReadInt16BufferFromFile(file.get(), 1, NULL));
+ EXPECT_EQ(0u, ReadInt16BufferFromFile(file.get(), 0, int16_buffer.get()));
+
+ EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(NULL, 1, double_buffer.get()));
+ EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(file.get(), 1, NULL));
+ EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(file.get(),
+ 0,
+ double_buffer.get()));
+
+ EXPECT_EQ(0u, ReadDoubleBufferFromFile(NULL, 1, double_buffer.get()));
+ EXPECT_EQ(0u, ReadDoubleBufferFromFile(file.get(), 1, NULL));
+ EXPECT_EQ(0u, ReadDoubleBufferFromFile(file.get(), 0, double_buffer.get()));
+
+ EXPECT_EQ(0u, WriteInt16BufferToFile(NULL, 1, int16_buffer.get()));
+ EXPECT_EQ(0u, WriteInt16BufferToFile(file.get(), 1, NULL));
+ EXPECT_EQ(0u, WriteInt16BufferToFile(file.get(), 0, int16_buffer.get()));
+
+ EXPECT_EQ(0u, WriteDoubleBufferToFile(NULL, 1, double_buffer.get()));
+ EXPECT_EQ(0u, WriteDoubleBufferToFile(file.get(), 1, NULL));
+ EXPECT_EQ(0u, WriteDoubleBufferToFile(file.get(), 0, double_buffer.get()));
+}
+
+} // namespace webrtc
+
diff --git a/webrtc/modules/audio_processing/transient/moving_moments.cc b/webrtc/modules/audio_processing/transient/moving_moments.cc
new file mode 100644
index 0000000..e116832
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/moving_moments.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/transient/moving_moments.h"
+
+#include <math.h>
+#include <string.h>
+
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+
+namespace webrtc {
+
+MovingMoments::MovingMoments(size_t length)
+ : length_(length),
+ queue_(),
+ sum_(0.0),
+ sum_of_squares_(0.0) {
+ assert(length > 0);
+ for (size_t i = 0; i < length; ++i) {
+ queue_.push(0.0);
+ }
+}
+
+MovingMoments::~MovingMoments() {}
+
+void MovingMoments::CalculateMoments(const float* in, size_t in_length,
+ float* first, float* second) {
+ assert(in && in_length > 0 && first && second);
+
+ for (size_t i = 0; i < in_length; ++i) {
+ const float old_value = queue_.front();
+ queue_.pop();
+ queue_.push(in[i]);
+
+ sum_ += in[i] - old_value;
+ sum_of_squares_ += in[i] * in[i] - old_value * old_value;
+ first[i] = sum_ / length_;
+ second[i] = sum_of_squares_ / length_;
+ }
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/transient/moving_moments.h b/webrtc/modules/audio_processing/transient/moving_moments.h
new file mode 100644
index 0000000..f063e7c
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/moving_moments.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_
+
+#include <queue>
+
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+
+namespace webrtc {
+
+// Calculates the first and second moments for each value of a buffer taking
+// into account a given number of previous values.
+// It preserves its state, so it can be multiple-called.
+// TODO(chadan): Implement a function that takes a buffer of first moments and a
+// buffer of second moments; and calculates the variances. When needed.
+// TODO(chadan): Add functionality to update with a buffer but only output are
+// the last values of the moments. When needed.
+class MovingMoments {
+ public:
+ // Creates a Moving Moments object, that uses the last |length| values
+ // (including the new value introduced in every new calculation).
+ explicit MovingMoments(size_t length);
+ ~MovingMoments();
+
+ // Calculates the new values using |in|. Results will be in the out buffers.
+ // |first| and |second| must be allocated with at least |in_length|.
+ void CalculateMoments(const float* in, size_t in_length,
+ float* first, float* second);
+
+ private:
+ size_t length_;
+ // A queue holding the |length_| latest input values.
+ std::queue<float> queue_;
+ // Sum of the values of the queue.
+ float sum_;
+ // Sum of the squares of the values of the queue.
+ float sum_of_squares_;
+};
+
+} // namespace webrtc
+
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_
diff --git a/webrtc/modules/audio_processing/transient/moving_moments_unittest.cc b/webrtc/modules/audio_processing/transient/moving_moments_unittest.cc
new file mode 100644
index 0000000..14cc5a2
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/moving_moments_unittest.cc
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/transient/moving_moments.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+
+namespace webrtc {
+
+static const float kTolerance = 0.0001f;
+
+class MovingMomentsTest : public ::testing::Test {
+ protected:
+ static const size_t kMovingMomentsBufferLength = 5;
+ static const size_t kMaxOutputLength = 20; // Valid for this tests only.
+
+ virtual void SetUp();
+ // Calls CalculateMoments and verifies that it produces the expected
+ // outputs.
+ void CalculateMomentsAndVerify(const float* input, size_t input_length,
+ const float* expected_mean,
+ const float* expected_mean_squares);
+
+ scoped_ptr<MovingMoments> moving_moments_;
+ float output_mean_[kMaxOutputLength];
+ float output_mean_squares_[kMaxOutputLength];
+};
+
+const size_t MovingMomentsTest::kMaxOutputLength;
+
+void MovingMomentsTest::SetUp() {
+ moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength));
+}
+
+void MovingMomentsTest::CalculateMomentsAndVerify(
+ const float* input, size_t input_length,
+ const float* expected_mean,
+ const float* expected_mean_squares) {
+ ASSERT_LE(input_length, kMaxOutputLength);
+
+ moving_moments_->CalculateMoments(input,
+ input_length,
+ output_mean_,
+ output_mean_squares_);
+
+ for (size_t i = 1; i < input_length; ++i) {
+ EXPECT_NEAR(expected_mean[i], output_mean_[i], kTolerance);
+ EXPECT_NEAR(expected_mean_squares[i], output_mean_squares_[i], kTolerance);
+ }
+}
+
+TEST_F(MovingMomentsTest, CorrectMomentsOfAnAllZerosBuffer) {
+ const float kInput[] = {0.f, 0.f, 0.f, 0.f, 0.f};
+ const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
+
+ const float expected_mean[kInputLength] = {0.f, 0.f, 0.f, 0.f, 0.f};
+ const float expected_mean_squares[kInputLength] = {0.f, 0.f, 0.f, 0.f, 0.f};
+
+ CalculateMomentsAndVerify(kInput, kInputLength, expected_mean,
+ expected_mean_squares);
+}
+
+TEST_F(MovingMomentsTest, CorrectMomentsOfAConstantBuffer) {
+ const float kInput[] = {5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f};
+ const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
+
+ const float expected_mean[kInputLength] =
+ {1.f, 2.f, 3.f, 4.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f};
+ const float expected_mean_squares[kInputLength] =
+ {5.f, 10.f, 15.f, 20.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f};
+
+ CalculateMomentsAndVerify(kInput, kInputLength, expected_mean,
+ expected_mean_squares);
+}
+
+TEST_F(MovingMomentsTest, CorrectMomentsOfAnIncreasingBuffer) {
+ const float kInput[] = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f};
+ const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
+
+ const float expected_mean[kInputLength] =
+ {0.2f, 0.6f, 1.2f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
+ const float expected_mean_squares[kInputLength] =
+ {0.2f, 1.f, 2.8f, 6.f, 11.f, 18.f, 27.f, 38.f, 51.f};
+
+ CalculateMomentsAndVerify(kInput, kInputLength, expected_mean,
+ expected_mean_squares);
+}
+
+TEST_F(MovingMomentsTest, CorrectMomentsOfADecreasingBuffer) {
+ const float kInput[] =
+ {-1.f, -2.f, -3.f, -4.f, -5.f, -6.f, -7.f, -8.f, -9.f};
+ const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
+
+ const float expected_mean[kInputLength] =
+ {-0.2f, -0.6f, -1.2f, -2.f, -3.f, -4.f, -5.f, -6.f, -7.f};
+ const float expected_mean_squares[kInputLength] =
+ {0.2f, 1.f, 2.8f, 6.f, 11.f, 18.f, 27.f, 38.f, 51.f};
+
+ CalculateMomentsAndVerify(kInput, kInputLength, expected_mean,
+ expected_mean_squares);
+}
+
+TEST_F(MovingMomentsTest, CorrectMomentsOfAZeroMeanSequence) {
+ const size_t kMovingMomentsBufferLength = 4;
+ moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength));
+ const float kInput[] =
+ {1.f, -1.f, 1.f, -1.f, 1.f, -1.f, 1.f, -1.f, 1.f, -1.f};
+ const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
+
+ const float expected_mean[kInputLength] =
+ {0.25f, 0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
+ const float expected_mean_squares[kInputLength] =
+ {0.25f, 0.5f, 0.75f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f};
+
+ CalculateMomentsAndVerify(kInput, kInputLength, expected_mean,
+ expected_mean_squares);
+}
+
+TEST_F(MovingMomentsTest, CorrectMomentsOfAnArbitraryBuffer) {
+ const float kInput[] =
+ {0.2f, 0.3f, 0.5f, 0.7f, 0.11f, 0.13f, 0.17f, 0.19f, 0.23f};
+ const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
+
+ const float expected_mean[kInputLength] =
+ {0.04f, 0.1f, 0.2f, 0.34f, 0.362f, 0.348f, 0.322f, 0.26f, 0.166f};
+ const float expected_mean_squares[kInputLength] =
+ {0.008f, 0.026f, 0.076f, 0.174f, 0.1764f, 0.1718f, 0.1596f, 0.1168f,
+ 0.0294f};
+
+ CalculateMomentsAndVerify(kInput, kInputLength, expected_mean,
+ expected_mean_squares);
+}
+
+TEST_F(MovingMomentsTest, MutipleCalculateMomentsCalls) {
+ const float kInputFirstCall[] =
+ {0.2f, 0.3f, 0.5f, 0.7f, 0.11f, 0.13f, 0.17f, 0.19f, 0.23f};
+ const size_t kInputFirstCallLength = sizeof(kInputFirstCall) /
+ sizeof(kInputFirstCall[0]);
+ const float kInputSecondCall[] = {0.29f, 0.31f};
+ const size_t kInputSecondCallLength = sizeof(kInputSecondCall) /
+ sizeof(kInputSecondCall[0]);
+ const float kInputThirdCall[] = {0.37f, 0.41f, 0.43f, 0.47f};
+ const size_t kInputThirdCallLength = sizeof(kInputThirdCall) /
+ sizeof(kInputThirdCall[0]);
+
+ const float expected_mean_first_call[kInputFirstCallLength] =
+ {0.04f, 0.1f, 0.2f, 0.34f, 0.362f, 0.348f, 0.322f, 0.26f, 0.166f};
+ const float expected_mean_squares_first_call[kInputFirstCallLength] =
+ {0.008f, 0.026f, 0.076f, 0.174f, 0.1764f, 0.1718f, 0.1596f, 0.1168f,
+ 0.0294f};
+
+ const float expected_mean_second_call[kInputSecondCallLength] =
+ {0.202f, 0.238f};
+ const float expected_mean_squares_second_call[kInputSecondCallLength] =
+ {0.0438f, 0.0596f};
+
+ const float expected_mean_third_call[kInputThirdCallLength] =
+ {0.278f, 0.322f, 0.362f, 0.398f};
+ const float expected_mean_squares_third_call[kInputThirdCallLength] =
+ {0.0812f, 0.1076f, 0.134f, 0.1614f};
+
+ CalculateMomentsAndVerify(kInputFirstCall, kInputFirstCallLength,
+ expected_mean_first_call, expected_mean_squares_first_call);
+
+ CalculateMomentsAndVerify(kInputSecondCall, kInputSecondCallLength,
+ expected_mean_second_call, expected_mean_squares_second_call);
+
+ CalculateMomentsAndVerify(kInputThirdCall, kInputThirdCallLength,
+ expected_mean_third_call, expected_mean_squares_third_call);
+}
+
+TEST_F(MovingMomentsTest,
+ VerifySampleBasedVsBlockBasedCalculation) {
+ const float kInput[] =
+ {0.2f, 0.3f, 0.5f, 0.7f, 0.11f, 0.13f, 0.17f, 0.19f, 0.23f};
+ const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
+
+ float output_mean_block_based[kInputLength];
+ float output_mean_squares_block_based[kInputLength];
+
+ float output_mean_sample_based;
+ float output_mean_squares_sample_based;
+
+ moving_moments_->CalculateMoments(
+ kInput, kInputLength, output_mean_block_based,
+ output_mean_squares_block_based);
+ moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength));
+ for (size_t i = 0; i < kInputLength; ++i) {
+ moving_moments_->CalculateMoments(
+ &kInput[i], 1, &output_mean_sample_based,
+ &output_mean_squares_sample_based);
+ EXPECT_FLOAT_EQ(output_mean_block_based[i], output_mean_sample_based);
+ EXPECT_FLOAT_EQ(output_mean_squares_block_based[i],
+ output_mean_squares_sample_based);
+ }
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/transient/test/plotDetection.m b/webrtc/modules/audio_processing/transient/test/plotDetection.m
new file mode 100644
index 0000000..f81ad50
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/test/plotDetection.m
@@ -0,0 +1,12 @@
+function [] = plotDetection(PCMfile, DATfile, fs, chunkSize)
+%[] = plotDetection(PCMfile, DATfile, fs, chunkSize)
+%
+%Plots the signal alongside the detection values.
+%
+%PCMfile: The file of the input signal in PCM format.
+%DATfile: The file containing the detection values in binary float format.
+%fs: The sample rate of the signal in Hertz.
+%chunkSize: The chunk size used to compute the detection values in seconds.
+[x, tx] = readPCM(PCMfile, fs);
+[d, td] = readDetection(DATfile, fs, chunkSize);
+plot(tx, x, td, d);
diff --git a/webrtc/modules/audio_processing/transient/test/readDetection.m b/webrtc/modules/audio_processing/transient/test/readDetection.m
new file mode 100644
index 0000000..1a9e8a7
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/test/readDetection.m
@@ -0,0 +1,16 @@
+function [d, t] = readDetection(file, fs, chunkSize)
+%[d, t] = readDetection(file, fs, chunkSize)
+%
+%Reads a detection signal from a DAT file.
+%
+%d: The detection signal.
+%t: The respective time vector.
+%
+%file: The DAT file where the detection signal is stored in float format.
+%fs: The signal sample rate in Hertz.
+%chunkSize: The chunk size used for the detection in seconds.
+fid = fopen(file);
+d = fread(fid, inf, 'float');
+fclose(fid);
+t = 0:(1 / fs):(length(d) * chunkSize - 1 / fs);
+d = d(floor(t / chunkSize) + 1);
diff --git a/webrtc/modules/audio_processing/transient/test/readPCM.m b/webrtc/modules/audio_processing/transient/test/readPCM.m
new file mode 100644
index 0000000..47ccac3
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/test/readPCM.m
@@ -0,0 +1,16 @@
+function [x, t] = readPCM(file, fs)
+%[x, t] = readPCM(file, fs)
+%
+%Reads a signal from a PCM file.
+%
+%x: The read signal after normalization.
+%t: The respective time vector.
+%
+%file: The PCM file where the signal is stored in int16 format.
+%fs: The signal sample rate in Hertz.
+fid = fopen(file);
+x = fread(fid, inf, 'int16');
+fclose(fid);
+x = x - mean(x);
+x = x / max(abs(x));
+t = 0:(1 / fs):((length(x) - 1) / fs);
diff --git a/webrtc/modules/audio_processing/transient/transient_detector.cc b/webrtc/modules/audio_processing/transient/transient_detector.cc
new file mode 100644
index 0000000..7f021ac
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/transient_detector.cc
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/transient/transient_detector.h"
+
+#include <assert.h>
+#include <float.h>
+#include <math.h>
+#include <string.h>
+
+#include "webrtc/modules/audio_processing/transient/common.h"
+#include "webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h"
+#include "webrtc/modules/audio_processing/transient/moving_moments.h"
+#include "webrtc/modules/audio_processing/transient/wpd_tree.h"
+
+namespace webrtc {
+
+static const int kTransientLengthMs = 30;
+static const int kChunksAtStartupLeftToDelete =
+ kTransientLengthMs / ts::kChunkSizeMs;
+static const float kDetectThreshold = 16.f;
+
+TransientDetector::TransientDetector(int sample_rate_hz)
+ : samples_per_chunk_(sample_rate_hz * ts::kChunkSizeMs / 1000),
+ last_first_moment_(),
+ last_second_moment_(),
+ chunks_at_startup_left_to_delete_(kChunksAtStartupLeftToDelete),
+ reference_energy_(1.f),
+ using_reference_(false) {
+ assert(sample_rate_hz == ts::kSampleRate8kHz ||
+ sample_rate_hz == ts::kSampleRate16kHz ||
+ sample_rate_hz == ts::kSampleRate32kHz ||
+ sample_rate_hz == ts::kSampleRate48kHz);
+ int samples_per_transient = sample_rate_hz * kTransientLengthMs / 1000;
+ // Adjustment to avoid data loss while downsampling, making
+ // |samples_per_chunk_| and |samples_per_transient| always divisible by
+ // |kLeaves|.
+ samples_per_chunk_ -= samples_per_chunk_ % kLeaves;
+ samples_per_transient -= samples_per_transient % kLeaves;
+
+ tree_leaves_data_length_ = samples_per_chunk_ / kLeaves;
+ wpd_tree_.reset(new WPDTree(samples_per_chunk_,
+ kDaubechies8HighPassCoefficients,
+ kDaubechies8LowPassCoefficients,
+ kDaubechies8CoefficientsLength,
+ kLevels));
+ for (size_t i = 0; i < kLeaves; ++i) {
+ moving_moments_[i].reset(
+ new MovingMoments(samples_per_transient / kLeaves));
+ }
+
+ first_moments_.reset(new float[tree_leaves_data_length_]);
+ second_moments_.reset(new float[tree_leaves_data_length_]);
+
+ for (int i = 0; i < kChunksAtStartupLeftToDelete; ++i) {
+ previous_results_.push_back(0.f);
+ }
+}
+
+TransientDetector::~TransientDetector() {}
+
+float TransientDetector::Detect(const float* data,
+ size_t data_length,
+ const float* reference_data,
+ size_t reference_length) {
+ assert(data && data_length == samples_per_chunk_);
+
+ // TODO(aluebs): Check if these errors can logically happen and if not assert
+ // on them.
+ if (wpd_tree_->Update(data, samples_per_chunk_) != 0) {
+ return -1.f;
+ }
+
+ float result = 0.f;
+
+ for (size_t i = 0; i < kLeaves; ++i) {
+ WPDNode* leaf = wpd_tree_->NodeAt(kLevels, i);
+
+ moving_moments_[i]->CalculateMoments(leaf->data(),
+ tree_leaves_data_length_,
+ first_moments_.get(),
+ second_moments_.get());
+
+ // Add value delayed (Use the last moments from the last call to Detect).
+ float unbiased_data = leaf->data()[0] - last_first_moment_[i];
+ result +=
+ unbiased_data * unbiased_data / (last_second_moment_[i] + FLT_MIN);
+
+ // Add new values.
+ for (size_t j = 1; j < tree_leaves_data_length_; ++j) {
+ unbiased_data = leaf->data()[j] - first_moments_[j - 1];
+ result +=
+ unbiased_data * unbiased_data / (second_moments_[j - 1] + FLT_MIN);
+ }
+
+ last_first_moment_[i] = first_moments_[tree_leaves_data_length_ - 1];
+ last_second_moment_[i] = second_moments_[tree_leaves_data_length_ - 1];
+ }
+
+ result /= tree_leaves_data_length_;
+
+ result *= ReferenceDetectionValue(reference_data, reference_length);
+
+ if (chunks_at_startup_left_to_delete_ > 0) {
+ chunks_at_startup_left_to_delete_--;
+ result = 0.f;
+ }
+
+ if (result >= kDetectThreshold) {
+ result = 1.f;
+ } else {
+ // Get proportional value.
+ // Proportion achieved with a squared raised cosine function with domain
+ // [0, kDetectThreshold) and image [0, 1), it's always increasing.
+ const float horizontal_scaling = ts::kPi / kDetectThreshold;
+ const float kHorizontalShift = ts::kPi;
+ const float kVerticalScaling = 0.5f;
+ const float kVerticalShift = 1.f;
+
+ result = (cos(result * horizontal_scaling + kHorizontalShift)
+ + kVerticalShift) * kVerticalScaling;
+ result *= result;
+ }
+
+ previous_results_.pop_front();
+ previous_results_.push_back(result);
+
+ // In the current implementation we return the max of the current result and
+ // the previous results, so the high results have a width equals to
+ // |transient_length|.
+ return *std::max_element(previous_results_.begin(), previous_results_.end());
+}
+
+// Looks for the highest slope and compares it with the previous ones.
+// An exponential transformation takes this to the [0, 1] range. This value is
+// multiplied by the detection result to avoid false positives.
+float TransientDetector::ReferenceDetectionValue(const float* data,
+ size_t length) {
+ if (data == NULL) {
+ using_reference_ = false;
+ return 1.f;
+ }
+ static const float kEnergyRatioThreshold = 0.2f;
+ static const float kReferenceNonLinearity = 20.f;
+ static const float kMemory = 0.99f;
+ float reference_energy = 0.f;
+ for (size_t i = 1; i < length; ++i) {
+ reference_energy += data[i] * data[i];
+ }
+ if (reference_energy == 0.f) {
+ using_reference_ = false;
+ return 1.f;
+ }
+ assert(reference_energy_ != 0);
+ float result = 1.f / (1.f + exp(kReferenceNonLinearity *
+ (kEnergyRatioThreshold -
+ reference_energy / reference_energy_)));
+ reference_energy_ =
+ kMemory * reference_energy_ + (1.f - kMemory) * reference_energy;
+
+ using_reference_ = true;
+
+ return result;
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/transient/transient_detector.h b/webrtc/modules/audio_processing/transient/transient_detector.h
new file mode 100644
index 0000000..04691d5
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/transient_detector.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_
+
+#include <deque>
+
+#include "webrtc/modules/audio_processing/transient/moving_moments.h"
+#include "webrtc/modules/audio_processing/transient/wpd_tree.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+
+namespace webrtc {
+
+// This is an implementation of the transient detector described in "Causal
+// Wavelet based transient detector".
+// Calculates the log-likelihood of a transient to happen on a signal at any
+// given time based on the previous samples; it uses a WPD tree to analyze the
+// signal. It preserves its state, so it can be multiple-called.
+class TransientDetector {
+ public:
+ // TODO(chadan): The only supported wavelet is Daubechies 8 using a WPD tree
+ // of 3 levels. Make an overloaded constructor to allow different wavelets and
+ // depths of the tree. When needed.
+
+ // Creates a wavelet based transient detector.
+ TransientDetector(int sample_rate_hz);
+
+ ~TransientDetector();
+
+ // Calculates the log-likelihood of the existence of a transient in |data|.
+ // |data_length| has to be equal to |samples_per_chunk_|.
+ // Returns a value between 0 and 1, as a non linear representation of this
+ // likelihood.
+ // Returns a negative value on error.
+ float Detect(const float* data,
+ size_t data_length,
+ const float* reference_data,
+ size_t reference_length);
+
+ bool using_reference() { return using_reference_; }
+
+ private:
+ float ReferenceDetectionValue(const float* data, size_t length);
+
+ static const size_t kLevels = 3;
+ static const size_t kLeaves = 1 << kLevels;
+
+ size_t samples_per_chunk_;
+
+ scoped_ptr<WPDTree> wpd_tree_;
+ size_t tree_leaves_data_length_;
+
+ // A MovingMoments object is needed for each leaf in the WPD tree.
+ scoped_ptr<MovingMoments> moving_moments_[kLeaves];
+
+ scoped_ptr<float[]> first_moments_;
+ scoped_ptr<float[]> second_moments_;
+
+ // Stores the last calculated moments from the previous detection.
+ float last_first_moment_[kLeaves];
+ float last_second_moment_[kLeaves];
+
+ // We keep track of the previous results from the previous chunks, so it can
+ // be used to effectively give results according to the |transient_length|.
+ std::deque<float> previous_results_;
+
+ // Number of chunks that are going to return only zeros at the beginning of
+ // the detection. It helps to avoid infs and nans due to the lack of
+ // information.
+ int chunks_at_startup_left_to_delete_;
+
+ float reference_energy_;
+
+ bool using_reference_;
+};
+
+} // namespace webrtc
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_
diff --git a/webrtc/modules/audio_processing/transient/transient_detector_unittest.cc b/webrtc/modules/audio_processing/transient/transient_detector_unittest.cc
new file mode 100644
index 0000000..ee8619f
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/transient_detector_unittest.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/transient/transient_detector.h"
+
+#include <sstream>
+#include <string>
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webrtc/modules/audio_processing/transient/common.h"
+#include "webrtc/modules/audio_processing/transient/file_utils.h"
+#include "webrtc/system_wrappers/interface/file_wrapper.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+#include "webrtc/test/testsupport/fileutils.h"
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+static const int kSampleRatesHz[] = {ts::kSampleRate8kHz,
+ ts::kSampleRate16kHz,
+ ts::kSampleRate32kHz,
+ ts::kSampleRate48kHz};
+static const size_t kNumberOfSampleRates =
+ sizeof(kSampleRatesHz) / sizeof(*kSampleRatesHz);
+
+// This test is for the correctness of the transient detector.
+// Checks the results comparing them with the ones stored in the detect files in
+// the directory: resources/audio_processing/transient/
+// The files contain all the results in double precision (Little endian).
+// The audio files used with different sample rates are stored in the same
+// directory.
+TEST(TransientDetectorTest, CorrectnessBasedOnFiles) {
+ for (size_t i = 0; i < kNumberOfSampleRates; ++i) {
+ int sample_rate_hz = kSampleRatesHz[i];
+
+ // Prepare detect file.
+ std::stringstream detect_file_name;
+ detect_file_name << "audio_processing/transient/detect"
+ << (sample_rate_hz / 1000) << "kHz";
+
+ scoped_ptr<FileWrapper> detect_file(FileWrapper::Create());
+
+ detect_file->OpenFile(
+ test::ResourcePath(detect_file_name.str(), "dat").c_str(),
+ true, // Read only.
+ false, // No loop.
+ false); // No text.
+
+ bool file_opened = detect_file->Open();
+ ASSERT_TRUE(file_opened) << "File could not be opened.\n"
+ << detect_file_name.str().c_str();
+
+ // Prepare audio file.
+ std::stringstream audio_file_name;
+ audio_file_name << "audio_processing/transient/audio"
+ << (sample_rate_hz / 1000) << "kHz";
+
+ scoped_ptr<FileWrapper> audio_file(FileWrapper::Create());
+
+ audio_file->OpenFile(
+ test::ResourcePath(audio_file_name.str(), "pcm").c_str(),
+ true, // Read only.
+ false, // No loop.
+ false); // No text.
+
+ // Create detector.
+ TransientDetector detector(sample_rate_hz);
+
+ const size_t buffer_length = sample_rate_hz * ts::kChunkSizeMs / 1000;
+ scoped_ptr<float[]> buffer(new float[buffer_length]);
+
+ const float kTolerance = 0.01f;
+
+ size_t frames_read = 0;
+
+ while (ReadInt16FromFileToFloatBuffer(audio_file.get(),
+ buffer_length,
+ buffer.get()) == buffer_length) {
+ ++frames_read;
+
+ float detector_value =
+ detector.Detect(buffer.get(), buffer_length, NULL, 0);
+ double file_value;
+ ASSERT_EQ(1u, ReadDoubleBufferFromFile(detect_file.get(), 1, &file_value))
+ << "Detect test file is malformed.\n";
+
+ // Compare results with data from the matlab test file.
+ EXPECT_NEAR(file_value, detector_value, kTolerance) << "Frame: "
+ << frames_read;
+ }
+
+ detect_file->CloseFile();
+ audio_file->CloseFile();
+ }
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/transient/transient_suppression_test.cc b/webrtc/modules/audio_processing/transient/transient_suppression_test.cc
new file mode 100644
index 0000000..a4c2ef1
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/transient_suppression_test.cc
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string>
+
+#include "gflags/gflags.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webrtc/common_audio/include/audio_util.h"
+#include "webrtc/modules/audio_processing/agc/agc.h"
+#include "webrtc/modules/interface/module_common_types.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+#include "webrtc/test/testsupport/fileutils.h"
+#include "webrtc/typedefs.h"
+
+DEFINE_string(in_file_name, "", "PCM file that contains the signal.");
+DEFINE_string(detection_file_name,
+ "",
+ "PCM file that contains the detection signal.");
+DEFINE_string(reference_file_name,
+ "",
+ "PCM file that contains the reference signal.");
+
+static bool ValidatePositiveInt(const char* flagname, int32_t value) {
+ if (value <= 0) {
+ printf("%s must be a positive integer.\n", flagname);
+ return false;
+ }
+ return true;
+}
+DEFINE_int32(chunk_size_ms,
+ 10,
+ "Time between each chunk of samples in milliseconds.");
+static const bool chunk_size_ms_dummy =
+ google::RegisterFlagValidator(&FLAGS_chunk_size_ms, &ValidatePositiveInt);
+
+DEFINE_int32(sample_rate_hz,
+ 16000,
+ "Sampling frequency of the signal in Hertz.");
+static const bool sample_rate_hz_dummy =
+ google::RegisterFlagValidator(&FLAGS_sample_rate_hz, &ValidatePositiveInt);
+DEFINE_int32(detection_rate_hz,
+ 0,
+ "Sampling frequency of the detection signal in Hertz.");
+
+DEFINE_int32(num_channels, 1, "Number of channels.");
+static const bool num_channels_dummy =
+ google::RegisterFlagValidator(&FLAGS_num_channels, &ValidatePositiveInt);
+
+namespace webrtc {
+
+const char kUsage[] =
+ "\nDetects and suppresses transients from file.\n\n"
+ "This application loads the signal from the in_file_name with a specific\n"
+ "num_channels and sample_rate_hz, the detection signal from the\n"
+ "detection_file_name with a specific detection_rate_hz, and the reference\n"
+ "signal from the reference_file_name with sample_rate_hz, divides them\n"
+ "into chunk_size_ms blocks, computes its voice value and depending on the\n"
+ "voice_threshold does the respective restoration. You can always get the\n"
+ "all-voiced or all-unvoiced cases by setting the voice_threshold to 0 or\n"
+ "1 respectively.\n\n";
+
+// Read next buffers from the test files (signed 16-bit host-endian PCM
+// format). audio_buffer has int16 samples, detection_buffer has float samples
+// with range [-32768,32767], and reference_buffer has float samples with range
+// [-1,1]. Return true iff all the buffers were filled completely.
+bool ReadBuffers(FILE* in_file,
+ size_t audio_buffer_size,
+ int num_channels,
+ int16_t* audio_buffer,
+ FILE* detection_file,
+ size_t detection_buffer_size,
+ float* detection_buffer,
+ FILE* reference_file,
+ float* reference_buffer) {
+ scoped_ptr<int16_t[]> tmpbuf;
+ int16_t* read_ptr = audio_buffer;
+ if (num_channels > 1) {
+ tmpbuf.reset(new int16_t[num_channels * audio_buffer_size]);
+ read_ptr = tmpbuf.get();
+ }
+ if (fread(read_ptr,
+ sizeof(*read_ptr),
+ num_channels * audio_buffer_size,
+ in_file) != num_channels * audio_buffer_size) {
+ return false;
+ }
+ // De-interleave.
+ if (num_channels > 1) {
+ for (int i = 0; i < num_channels; ++i) {
+ for (size_t j = 0; j < audio_buffer_size; ++j) {
+ audio_buffer[i * audio_buffer_size + j] =
+ read_ptr[i + j * num_channels];
+ }
+ }
+ }
+ if (detection_file) {
+ scoped_ptr<int16_t[]> ibuf(new int16_t[detection_buffer_size]);
+ if (fread(ibuf.get(), sizeof(ibuf[0]), detection_buffer_size,
+ detection_file) != detection_buffer_size)
+ return false;
+ for (size_t i = 0; i < detection_buffer_size; ++i)
+ detection_buffer[i] = ibuf[i];
+ }
+ if (reference_file) {
+ scoped_ptr<int16_t[]> ibuf(new int16_t[audio_buffer_size]);
+ if (fread(ibuf.get(), sizeof(ibuf[0]), audio_buffer_size, reference_file)
+ != audio_buffer_size)
+ return false;
+ S16ToFloat(ibuf.get(), audio_buffer_size, reference_buffer);
+ }
+ return true;
+}
+
+// Write a number of samples to an open signed 16-bit host-endian PCM file.
+static void WritePCM(FILE* f,
+ size_t num_samples,
+ int num_channels,
+ const float* buffer) {
+ scoped_ptr<int16_t[]> ibuf(new int16_t[num_channels * num_samples]);
+ // Interleave.
+ for (int i = 0; i < num_channels; ++i) {
+ for (size_t j = 0; j < num_samples; ++j) {
+ ibuf[i + j * num_channels] = FloatS16ToS16(buffer[i * num_samples + j]);
+ }
+ }
+ fwrite(ibuf.get(), sizeof(ibuf[0]), num_channels * num_samples, f);
+}
+
+// This application tests the transient suppression by providing a processed
+// PCM file, which has to be listened to in order to evaluate the
+// performance.
+// It gets an audio file, and its voice gain information, and the suppressor
+// process it giving the output file "suppressed_keystrokes.pcm".
+void void_main() {
+ // TODO(aluebs): Remove all FileWrappers.
+ // Prepare the input file.
+ FILE* in_file = fopen(FLAGS_in_file_name.c_str(), "rb");
+ ASSERT_TRUE(in_file != NULL);
+
+ // Prepare the detection file.
+ FILE* detection_file = NULL;
+ if (FLAGS_detection_file_name != "") {
+ detection_file = fopen(FLAGS_detection_file_name.c_str(), "rb");
+ }
+
+ // Prepare the reference file.
+ FILE* reference_file = NULL;
+ if (FLAGS_reference_file_name != "") {
+ reference_file = fopen(FLAGS_reference_file_name.c_str(), "rb");
+ }
+
+ // Prepare the output file.
+ std::string out_file_name = test::OutputPath() + "suppressed_keystrokes.pcm";
+ FILE* out_file = fopen(out_file_name.c_str(), "wb");
+ ASSERT_TRUE(out_file != NULL);
+
+ int detection_rate_hz = FLAGS_detection_rate_hz;
+ if (detection_rate_hz == 0) {
+ detection_rate_hz = FLAGS_sample_rate_hz;
+ }
+
+ Agc agc;
+
+ TransientSuppressor suppressor;
+ suppressor.Initialize(
+ FLAGS_sample_rate_hz, detection_rate_hz, FLAGS_num_channels);
+
+ const size_t audio_buffer_size =
+ FLAGS_chunk_size_ms * FLAGS_sample_rate_hz / 1000;
+ const size_t detection_buffer_size =
+ FLAGS_chunk_size_ms * detection_rate_hz / 1000;
+
+ // int16 and float variants of the same data.
+ scoped_ptr<int16_t[]> audio_buffer_i(
+ new int16_t[FLAGS_num_channels * audio_buffer_size]);
+ scoped_ptr<float[]> audio_buffer_f(
+ new float[FLAGS_num_channels * audio_buffer_size]);
+
+ scoped_ptr<float[]> detection_buffer, reference_buffer;
+
+ if (detection_file)
+ detection_buffer.reset(new float[detection_buffer_size]);
+ if (reference_file)
+ reference_buffer.reset(new float[audio_buffer_size]);
+
+ while (ReadBuffers(in_file,
+ audio_buffer_size,
+ FLAGS_num_channels,
+ audio_buffer_i.get(),
+ detection_file,
+ detection_buffer_size,
+ detection_buffer.get(),
+ reference_file,
+ reference_buffer.get())) {
+ ASSERT_EQ(0,
+ agc.Process(audio_buffer_i.get(),
+ static_cast<int>(audio_buffer_size),
+ FLAGS_sample_rate_hz))
+ << "The AGC could not process the frame";
+
+ for (size_t i = 0; i < FLAGS_num_channels * audio_buffer_size; ++i) {
+ audio_buffer_f[i] = audio_buffer_i[i];
+ }
+
+ ASSERT_EQ(0,
+ suppressor.Suppress(audio_buffer_f.get(),
+ audio_buffer_size,
+ FLAGS_num_channels,
+ detection_buffer.get(),
+ detection_buffer_size,
+ reference_buffer.get(),
+ audio_buffer_size,
+ agc.voice_probability(),
+ true))
+ << "The transient suppressor could not suppress the frame";
+
+ // Write result to out file.
+ WritePCM(
+ out_file, audio_buffer_size, FLAGS_num_channels, audio_buffer_f.get());
+ }
+
+ fclose(in_file);
+ if (detection_file) {
+ fclose(detection_file);
+ }
+ if (reference_file) {
+ fclose(reference_file);
+ }
+ fclose(out_file);
+}
+
+} // namespace webrtc
+
+int main(int argc, char* argv[]) {
+ google::SetUsageMessage(webrtc::kUsage);
+ google::ParseCommandLineFlags(&argc, &argv, true);
+ webrtc::void_main();
+ return 0;
+}
diff --git a/webrtc/modules/audio_processing/transient/transient_suppressor.cc b/webrtc/modules/audio_processing/transient/transient_suppressor.cc
new file mode 100644
index 0000000..7eb302b
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/transient_suppressor.cc
@@ -0,0 +1,424 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
+
+#include <math.h>
+#include <string.h>
+#include <cmath>
+#include <complex>
+#include <deque>
+#include <set>
+
+#include "webrtc/common_audio/include/audio_util.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/transient/common.h"
+#include "webrtc/modules/audio_processing/transient/transient_detector.h"
+#include "webrtc/modules/audio_processing/ns/windows_private.h"
+extern "C" {
+#include "webrtc/modules/audio_processing/utility/fft4g.h"
+}
+#include "webrtc/system_wrappers/interface/logging.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+static const float kMeanIIRCoefficient = 0.5f;
+static const float kVoiceThreshold = 0.02f;
+
+// TODO(aluebs): Check if these values work also for 48kHz.
+static const size_t kMinVoiceBin = 3;
+static const size_t kMaxVoiceBin = 60;
+
+namespace {
+float ComplexMagnitude(float a, float b) {
+ return std::abs(a) + std::abs(b);
+}
+}
+
+TransientSuppressor::TransientSuppressor()
+ : data_length_(0),
+ detection_length_(0),
+ analysis_length_(0),
+ buffer_delay_(0),
+ complex_analysis_length_(0),
+ num_channels_(0),
+ window_(NULL),
+ detector_smoothed_(0.f),
+ keypress_counter_(0),
+ chunks_since_keypress_(0),
+ detection_enabled_(false),
+ suppression_enabled_(false),
+ use_hard_restoration_(false),
+ chunks_since_voice_change_(0),
+ seed_(182),
+ using_reference_(false) {
+}
+
+TransientSuppressor::~TransientSuppressor() {}
+
+int TransientSuppressor::Initialize(int sample_rate_hz,
+ int detection_rate_hz,
+ int num_channels) {
+ switch (sample_rate_hz) {
+ case ts::kSampleRate8kHz:
+ analysis_length_ = 128u;
+ window_ = kBlocks80w128;
+ break;
+ case ts::kSampleRate16kHz:
+ analysis_length_ = 256u;
+ window_ = kBlocks160w256;
+ break;
+ case ts::kSampleRate32kHz:
+ analysis_length_ = 512u;
+ window_ = kBlocks320w512;
+ break;
+ case ts::kSampleRate48kHz:
+ analysis_length_ = 1024u;
+ window_ = kBlocks480w1024;
+ break;
+ default:
+ return -1;
+ }
+ if (detection_rate_hz != ts::kSampleRate8kHz &&
+ detection_rate_hz != ts::kSampleRate16kHz &&
+ detection_rate_hz != ts::kSampleRate32kHz &&
+ detection_rate_hz != ts::kSampleRate48kHz) {
+ return -1;
+ }
+ if (num_channels <= 0) {
+ return -1;
+ }
+
+ detector_.reset(new TransientDetector(detection_rate_hz));
+ data_length_ = sample_rate_hz * ts::kChunkSizeMs / 1000;
+ if (data_length_ > analysis_length_) {
+ assert(false);
+ return -1;
+ }
+ buffer_delay_ = analysis_length_ - data_length_;
+
+ complex_analysis_length_ = analysis_length_ / 2 + 1;
+ assert(complex_analysis_length_ >= kMaxVoiceBin);
+ num_channels_ = num_channels;
+ in_buffer_.reset(new float[analysis_length_ * num_channels_]);
+ memset(in_buffer_.get(),
+ 0,
+ analysis_length_ * num_channels_ * sizeof(in_buffer_[0]));
+ detection_length_ = detection_rate_hz * ts::kChunkSizeMs / 1000;
+ detection_buffer_.reset(new float[detection_length_]);
+ memset(detection_buffer_.get(),
+ 0,
+ detection_length_ * sizeof(detection_buffer_[0]));
+ out_buffer_.reset(new float[analysis_length_ * num_channels_]);
+ memset(out_buffer_.get(),
+ 0,
+ analysis_length_ * num_channels_ * sizeof(out_buffer_[0]));
+ // ip[0] must be zero to trigger initialization using rdft().
+ size_t ip_length = 2 + sqrtf(analysis_length_);
+ ip_.reset(new int[ip_length]());
+ memset(ip_.get(), 0, ip_length * sizeof(ip_[0]));
+ wfft_.reset(new float[complex_analysis_length_ - 1]);
+ memset(wfft_.get(), 0, (complex_analysis_length_ - 1) * sizeof(wfft_[0]));
+ spectral_mean_.reset(new float[complex_analysis_length_ * num_channels_]);
+ memset(spectral_mean_.get(),
+ 0,
+ complex_analysis_length_ * num_channels_ * sizeof(spectral_mean_[0]));
+ fft_buffer_.reset(new float[analysis_length_ + 2]);
+ memset(fft_buffer_.get(), 0, (analysis_length_ + 2) * sizeof(fft_buffer_[0]));
+ magnitudes_.reset(new float[complex_analysis_length_]);
+ memset(magnitudes_.get(),
+ 0,
+ complex_analysis_length_ * sizeof(magnitudes_[0]));
+ mean_factor_.reset(new float[complex_analysis_length_]);
+
+ static const float kFactorHeight = 10.f;
+ static const float kLowSlope = 1.f;
+ static const float kHighSlope = 0.3f;
+ for (size_t i = 0; i < complex_analysis_length_; ++i) {
+ mean_factor_[i] =
+ kFactorHeight /
+ (1.f + exp(kLowSlope * static_cast<int>(i - kMinVoiceBin))) +
+ kFactorHeight /
+ (1.f + exp(kHighSlope * static_cast<int>(kMaxVoiceBin - i)));
+ }
+ detector_smoothed_ = 0.f;
+ keypress_counter_ = 0;
+ chunks_since_keypress_ = 0;
+ detection_enabled_ = false;
+ suppression_enabled_ = false;
+ use_hard_restoration_ = false;
+ chunks_since_voice_change_ = 0;
+ seed_ = 182;
+ using_reference_ = false;
+ return 0;
+}
+
+int TransientSuppressor::Suppress(float* data,
+ size_t data_length,
+ int num_channels,
+ const float* detection_data,
+ size_t detection_length,
+ const float* reference_data,
+ size_t reference_length,
+ float voice_probability,
+ bool key_pressed) {
+ if (!data || data_length != data_length_ || num_channels != num_channels_ ||
+ detection_length != detection_length_ || voice_probability < 0 ||
+ voice_probability > 1) {
+ return -1;
+ }
+
+ UpdateKeypress(key_pressed);
+ UpdateBuffers(data);
+
+ int result = 0;
+ if (detection_enabled_) {
+ UpdateRestoration(voice_probability);
+
+ if (!detection_data) {
+ // Use the input data of the first channel if special detection data is
+ // not supplied.
+ detection_data = &in_buffer_[buffer_delay_];
+ }
+
+ float detector_result = detector_->Detect(
+ detection_data, detection_length, reference_data, reference_length);
+ if (detector_result < 0) {
+ return -1;
+ }
+
+ using_reference_ = detector_->using_reference();
+
+ // |detector_smoothed_| follows the |detector_result| when this last one is
+ // increasing, but has an exponential decaying tail to be able to suppress
+ // the ringing of keyclicks.
+ float smooth_factor = using_reference_ ? 0.6 : 0.1;
+ detector_smoothed_ = detector_result >= detector_smoothed_
+ ? detector_result
+ : smooth_factor * detector_smoothed_ +
+ (1 - smooth_factor) * detector_result;
+
+ for (int i = 0; i < num_channels_; ++i) {
+ Suppress(&in_buffer_[i * analysis_length_],
+ &spectral_mean_[i * complex_analysis_length_],
+ &out_buffer_[i * analysis_length_]);
+ }
+ }
+
+ // If the suppression isn't enabled, we use the in buffer to delay the signal
+ // appropriately. This also gives time for the out buffer to be refreshed with
+ // new data between detection and suppression getting enabled.
+ for (int i = 0; i < num_channels_; ++i) {
+ memcpy(&data[i * data_length_],
+ suppression_enabled_ ? &out_buffer_[i * analysis_length_]
+ : &in_buffer_[i * analysis_length_],
+ data_length_ * sizeof(*data));
+ }
+ return result;
+}
+
+// This should only be called when detection is enabled. UpdateBuffers() must
+// have been called. At return, |out_buffer_| will be filled with the
+// processed output.
+void TransientSuppressor::Suppress(float* in_ptr,
+ float* spectral_mean,
+ float* out_ptr) {
+ // Go to frequency domain.
+ for (size_t i = 0; i < analysis_length_; ++i) {
+ // TODO(aluebs): Rename windows
+ fft_buffer_[i] = in_ptr[i] * window_[i];
+ }
+
+ WebRtc_rdft(analysis_length_, 1, fft_buffer_.get(), ip_.get(), wfft_.get());
+
+ // Since WebRtc_rdft puts R[n/2] in fft_buffer_[1], we move it to the end
+ // for convenience.
+ fft_buffer_[analysis_length_] = fft_buffer_[1];
+ fft_buffer_[analysis_length_ + 1] = 0.f;
+ fft_buffer_[1] = 0.f;
+
+ for (size_t i = 0; i < complex_analysis_length_; ++i) {
+ magnitudes_[i] = ComplexMagnitude(fft_buffer_[i * 2],
+ fft_buffer_[i * 2 + 1]);
+ }
+ // Restore audio if necessary.
+ if (suppression_enabled_) {
+ if (use_hard_restoration_) {
+ HardRestoration(spectral_mean);
+ } else {
+ SoftRestoration(spectral_mean);
+ }
+ }
+
+ // Update the spectral mean.
+ for (size_t i = 0; i < complex_analysis_length_; ++i) {
+ spectral_mean[i] = (1 - kMeanIIRCoefficient) * spectral_mean[i] +
+ kMeanIIRCoefficient * magnitudes_[i];
+ }
+
+ // Back to time domain.
+ // Put R[n/2] back in fft_buffer_[1].
+ fft_buffer_[1] = fft_buffer_[analysis_length_];
+
+ WebRtc_rdft(analysis_length_,
+ -1,
+ fft_buffer_.get(),
+ ip_.get(),
+ wfft_.get());
+ const float fft_scaling = 2.f / analysis_length_;
+
+ for (size_t i = 0; i < analysis_length_; ++i) {
+ out_ptr[i] += fft_buffer_[i] * window_[i] * fft_scaling;
+ }
+}
+
+void TransientSuppressor::UpdateKeypress(bool key_pressed) {
+ const int kKeypressPenalty = 1000 / ts::kChunkSizeMs;
+ const int kIsTypingThreshold = 1000 / ts::kChunkSizeMs;
+ const int kChunksUntilNotTyping = 4000 / ts::kChunkSizeMs; // 4 seconds.
+
+ if (key_pressed) {
+ keypress_counter_ += kKeypressPenalty;
+ chunks_since_keypress_ = 0;
+ detection_enabled_ = true;
+ }
+ keypress_counter_ = std::max(0, keypress_counter_ - 1);
+
+ if (keypress_counter_ > kIsTypingThreshold) {
+ if (!suppression_enabled_) {
+ LOG(LS_INFO) << "[ts] Transient suppression is now enabled.";
+ }
+ suppression_enabled_ = true;
+ keypress_counter_ = 0;
+ }
+
+ if (detection_enabled_ &&
+ ++chunks_since_keypress_ > kChunksUntilNotTyping) {
+ if (suppression_enabled_) {
+ LOG(LS_INFO) << "[ts] Transient suppression is now disabled.";
+ }
+ detection_enabled_ = false;
+ suppression_enabled_ = false;
+ keypress_counter_ = 0;
+ }
+}
+
+void TransientSuppressor::UpdateRestoration(float voice_probability) {
+ const int kHardRestorationOffsetDelay = 3;
+ const int kHardRestorationOnsetDelay = 80;
+
+ bool not_voiced = voice_probability < kVoiceThreshold;
+
+ if (not_voiced == use_hard_restoration_) {
+ chunks_since_voice_change_ = 0;
+ } else {
+ ++chunks_since_voice_change_;
+
+ if ((use_hard_restoration_ &&
+ chunks_since_voice_change_ > kHardRestorationOffsetDelay) ||
+ (!use_hard_restoration_ &&
+ chunks_since_voice_change_ > kHardRestorationOnsetDelay)) {
+ use_hard_restoration_ = not_voiced;
+ chunks_since_voice_change_ = 0;
+ }
+ }
+}
+
+// Shift buffers to make way for new data. Must be called after
+// |detection_enabled_| is updated by UpdateKeypress().
+void TransientSuppressor::UpdateBuffers(float* data) {
+ // TODO(aluebs): Change to ring buffer.
+ memmove(in_buffer_.get(),
+ &in_buffer_[data_length_],
+ (buffer_delay_ + (num_channels_ - 1) * analysis_length_) *
+ sizeof(in_buffer_[0]));
+ // Copy new chunk to buffer.
+ for (int i = 0; i < num_channels_; ++i) {
+ memcpy(&in_buffer_[buffer_delay_ + i * analysis_length_],
+ &data[i * data_length_],
+ data_length_ * sizeof(*data));
+ }
+ if (detection_enabled_) {
+ // Shift previous chunk in out buffer.
+ memmove(out_buffer_.get(),
+ &out_buffer_[data_length_],
+ (buffer_delay_ + (num_channels_ - 1) * analysis_length_) *
+ sizeof(out_buffer_[0]));
+ // Initialize new chunk in out buffer.
+ for (int i = 0; i < num_channels_; ++i) {
+ memset(&out_buffer_[buffer_delay_ + i * analysis_length_],
+ 0,
+ data_length_ * sizeof(out_buffer_[0]));
+ }
+ }
+}
+
+// Restores the unvoiced signal if a click is present.
+// Attenuates by a certain factor every peak in the |fft_buffer_| that exceeds
+// the spectral mean. The attenuation depends on |detector_smoothed_|.
+// If a restoration takes place, the |magnitudes_| are updated to the new value.
+void TransientSuppressor::HardRestoration(float* spectral_mean) {
+ const float detector_result =
+ 1.f - pow(1.f - detector_smoothed_, using_reference_ ? 200.f : 50.f);
+ // To restore, we get the peaks in the spectrum. If higher than the previous
+ // spectral mean we adjust them.
+ for (size_t i = 0; i < complex_analysis_length_; ++i) {
+ if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0) {
+ // RandU() generates values on [0, int16::max()]
+ const float phase = 2 * ts::kPi * WebRtcSpl_RandU(&seed_) /
+ std::numeric_limits<int16_t>::max();
+ const float scaled_mean = detector_result * spectral_mean[i];
+
+ fft_buffer_[i * 2] = (1 - detector_result) * fft_buffer_[i * 2] +
+ scaled_mean * cosf(phase);
+ fft_buffer_[i * 2 + 1] = (1 - detector_result) * fft_buffer_[i * 2 + 1] +
+ scaled_mean * sinf(phase);
+ magnitudes_[i] = magnitudes_[i] -
+ detector_result * (magnitudes_[i] - spectral_mean[i]);
+ }
+ }
+}
+
+// Restores the voiced signal if a click is present.
+// Attenuates by a certain factor every peak in the |fft_buffer_| that exceeds
+// the spectral mean and that is lower than some function of the current block
+// frequency mean. The attenuation depends on |detector_smoothed_|.
+// If a restoration takes place, the |magnitudes_| are updated to the new value.
+void TransientSuppressor::SoftRestoration(float* spectral_mean) {
+ // Get the spectral magnitude mean of the current block.
+ float block_frequency_mean = 0;
+ for (size_t i = kMinVoiceBin; i < kMaxVoiceBin; ++i) {
+ block_frequency_mean += magnitudes_[i];
+ }
+ block_frequency_mean /= (kMaxVoiceBin - kMinVoiceBin);
+
+ // To restore, we get the peaks in the spectrum. If higher than the
+ // previous spectral mean and lower than a factor of the block mean
+ // we adjust them. The factor is a double sigmoid that has a minimum in the
+ // voice frequency range (300Hz - 3kHz).
+ for (size_t i = 0; i < complex_analysis_length_; ++i) {
+ if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0 &&
+ (using_reference_ ||
+ magnitudes_[i] < block_frequency_mean * mean_factor_[i])) {
+ const float new_magnitude =
+ magnitudes_[i] -
+ detector_smoothed_ * (magnitudes_[i] - spectral_mean[i]);
+ const float magnitude_ratio = new_magnitude / magnitudes_[i];
+
+ fft_buffer_[i * 2] *= magnitude_ratio;
+ fft_buffer_[i * 2 + 1] *= magnitude_ratio;
+ magnitudes_[i] = new_magnitude;
+ }
+ }
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/transient/transient_suppressor.h b/webrtc/modules/audio_processing/transient/transient_suppressor.h
new file mode 100644
index 0000000..3d7dba8
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/transient_suppressor.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
+
+#include <deque>
+#include <set>
+
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+#include "webrtc/test/testsupport/gtest_prod_util.h"
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+class TransientDetector;
+
+// Detects transients in an audio stream and suppress them using a simple
+// restoration algorithm that attenuates unexpected spikes in the spectrum.
+class TransientSuppressor {
+ public:
+ TransientSuppressor();
+ ~TransientSuppressor();
+
+ int Initialize(int sample_rate_hz, int detector_rate_hz, int num_channels);
+
+ // Processes a |data| chunk, and returns it with keystrokes suppressed from
+ // it. The float format is assumed to be int16 ranged. If there are more than
+ // one channel, the chunks are concatenated one after the other in |data|.
+ // |data_length| must be equal to |data_length_|.
+ // |num_channels| must be equal to |num_channels_|.
+ // A sub-band, ideally the higher, can be used as |detection_data|. If it is
+ // NULL, |data| is used for the detection too. The |detection_data| is always
+ // assumed mono.
+ // If a reference signal (e.g. keyboard microphone) is available, it can be
+ // passed in as |reference_data|. It is assumed mono and must have the same
+ // length as |data|. NULL is accepted if unavailable.
+ // This suppressor performs better if voice information is available.
+ // |voice_probability| is the probability of voice being present in this chunk
+ // of audio. If voice information is not available, |voice_probability| must
+ // always be set to 1.
+ // |key_pressed| determines if a key was pressed on this audio chunk.
+ // Returns 0 on success and -1 otherwise.
+ int Suppress(float* data,
+ size_t data_length,
+ int num_channels,
+ const float* detection_data,
+ size_t detection_length,
+ const float* reference_data,
+ size_t reference_length,
+ float voice_probability,
+ bool key_pressed);
+
+ private:
+ FRIEND_TEST_ALL_PREFIXES(TransientSuppressorTest,
+ TypingDetectionLogicWorksAsExpectedForMono);
+ void Suppress(float* in_ptr, float* spectral_mean, float* out_ptr);
+
+ void UpdateKeypress(bool key_pressed);
+ void UpdateRestoration(float voice_probability);
+
+ void UpdateBuffers(float* data);
+
+ void HardRestoration(float* spectral_mean);
+ void SoftRestoration(float* spectral_mean);
+
+ scoped_ptr<TransientDetector> detector_;
+
+ size_t data_length_;
+ size_t detection_length_;
+ size_t analysis_length_;
+ size_t buffer_delay_;
+ size_t complex_analysis_length_;
+ int num_channels_;
+ // Input buffer where the original samples are stored.
+ scoped_ptr<float[]> in_buffer_;
+ scoped_ptr<float[]> detection_buffer_;
+ // Output buffer where the restored samples are stored.
+ scoped_ptr<float[]> out_buffer_;
+
+ // Arrays for fft.
+ scoped_ptr<int[]> ip_;
+ scoped_ptr<float[]> wfft_;
+
+ scoped_ptr<float[]> spectral_mean_;
+
+ // Stores the data for the fft.
+ scoped_ptr<float[]> fft_buffer_;
+
+ scoped_ptr<float[]> magnitudes_;
+
+ const float* window_;
+
+ scoped_ptr<float[]> mean_factor_;
+
+ float detector_smoothed_;
+
+ int keypress_counter_;
+ int chunks_since_keypress_;
+ bool detection_enabled_;
+ bool suppression_enabled_;
+
+ bool use_hard_restoration_;
+ int chunks_since_voice_change_;
+
+ uint32_t seed_;
+
+ bool using_reference_;
+};
+
+} // namespace webrtc
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
diff --git a/webrtc/modules/audio_processing/transient/transient_suppressor_unittest.cc b/webrtc/modules/audio_processing/transient/transient_suppressor_unittest.cc
new file mode 100644
index 0000000..0c1010e
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/transient_suppressor_unittest.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webrtc/modules/audio_processing/transient/common.h"
+
+namespace webrtc {
+
+TEST(TransientSuppressorTest, TypingDetectionLogicWorksAsExpectedForMono) {
+ static const int kNumChannels = 1;
+
+ TransientSuppressor ts;
+ ts.Initialize(ts::kSampleRate16kHz, ts::kSampleRate16kHz, kNumChannels);
+
+ // Each key-press enables detection.
+ EXPECT_FALSE(ts.detection_enabled_);
+ ts.UpdateKeypress(true);
+ EXPECT_TRUE(ts.detection_enabled_);
+
+ // It takes four seconds without any key-press to disable the detection
+ for (int time_ms = 0; time_ms < 3990; time_ms += ts::kChunkSizeMs) {
+ ts.UpdateKeypress(false);
+ EXPECT_TRUE(ts.detection_enabled_);
+ }
+ ts.UpdateKeypress(false);
+ EXPECT_FALSE(ts.detection_enabled_);
+
+ // Key-presses that are more than a second apart from each other don't enable
+ // suppression.
+ for (int i = 0; i < 100; ++i) {
+ EXPECT_FALSE(ts.suppression_enabled_);
+ ts.UpdateKeypress(true);
+ EXPECT_TRUE(ts.detection_enabled_);
+ EXPECT_FALSE(ts.suppression_enabled_);
+ for (int time_ms = 0; time_ms < 990; time_ms += ts::kChunkSizeMs) {
+ ts.UpdateKeypress(false);
+ EXPECT_TRUE(ts.detection_enabled_);
+ EXPECT_FALSE(ts.suppression_enabled_);
+ }
+ ts.UpdateKeypress(false);
+ }
+
+ // Two consecutive key-presses is enough to enable the suppression.
+ ts.UpdateKeypress(true);
+ EXPECT_FALSE(ts.suppression_enabled_);
+ ts.UpdateKeypress(true);
+ EXPECT_TRUE(ts.suppression_enabled_);
+
+ // Key-presses that are less than a second apart from each other don't disable
+ // detection nor suppression.
+ for (int i = 0; i < 100; ++i) {
+ for (int time_ms = 0; time_ms < 1000; time_ms += ts::kChunkSizeMs) {
+ ts.UpdateKeypress(false);
+ EXPECT_TRUE(ts.detection_enabled_);
+ EXPECT_TRUE(ts.suppression_enabled_);
+ }
+ ts.UpdateKeypress(true);
+ EXPECT_TRUE(ts.detection_enabled_);
+ EXPECT_TRUE(ts.suppression_enabled_);
+ }
+
+ // It takes four seconds without any key-press to disable the detection and
+ // suppression.
+ for (int time_ms = 0; time_ms < 3990; time_ms += ts::kChunkSizeMs) {
+ ts.UpdateKeypress(false);
+ EXPECT_TRUE(ts.detection_enabled_);
+ EXPECT_TRUE(ts.suppression_enabled_);
+ }
+ for (int time_ms = 0; time_ms < 1000; time_ms += ts::kChunkSizeMs) {
+ ts.UpdateKeypress(false);
+ EXPECT_FALSE(ts.detection_enabled_);
+ EXPECT_FALSE(ts.suppression_enabled_);
+ }
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/transient/wpd_node.cc b/webrtc/modules/audio_processing/transient/wpd_node.cc
new file mode 100644
index 0000000..8854516
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/wpd_node.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/transient/wpd_node.h"
+
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+
+#include "webrtc/common_audio/fir_filter.h"
+#include "webrtc/modules/audio_processing/transient/dyadic_decimator.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+
+namespace webrtc {
+
+WPDNode::WPDNode(size_t length,
+ const float* coefficients,
+ size_t coefficients_length)
+ : // The data buffer has parent data length to be able to contain and filter
+ // it.
+ data_(new float[2 * length + 1]),
+ length_(length),
+ filter_(FIRFilter::Create(coefficients,
+ coefficients_length,
+ 2 * length + 1)) {
+ assert(length > 0 && coefficients && coefficients_length > 0);
+ memset(data_.get(), 0.f, (2 * length + 1) * sizeof(data_[0]));
+}
+
+WPDNode::~WPDNode() {}
+
+int WPDNode::Update(const float* parent_data, size_t parent_data_length) {
+ if (!parent_data || (parent_data_length / 2) != length_) {
+ return -1;
+ }
+
+ // Filter data.
+ filter_->Filter(parent_data, parent_data_length, data_.get());
+
+ // Decimate data.
+ const bool kOddSequence = true;
+ size_t output_samples = DyadicDecimate(
+ data_.get(), parent_data_length, kOddSequence, data_.get(), length_);
+ if (output_samples != length_) {
+ return -1;
+ }
+
+ // Get abs to all values.
+ for (size_t i = 0; i < length_; ++i) {
+ data_[i] = fabs(data_[i]);
+ }
+
+ return 0;
+}
+
+int WPDNode::set_data(const float* new_data, size_t length) {
+ if (!new_data || length != length_) {
+ return -1;
+ }
+ memcpy(data_.get(), new_data, length * sizeof(data_[0]));
+ return 0;
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/transient/wpd_node.h b/webrtc/modules/audio_processing/transient/wpd_node.h
new file mode 100644
index 0000000..d7c2463
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/wpd_node.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_
+
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+class FIRFilter;
+
+// A single node of a Wavelet Packet Decomposition (WPD) tree.
+class WPDNode {
+ public:
+ // Creates a WPDNode. The data vector will contain zeros. The filter will have
+ // the coefficients provided.
+ WPDNode(size_t length, const float* coefficients, size_t coefficients_length);
+ ~WPDNode();
+
+ // Updates the node data. |parent_data| / 2 must be equals to |length_|.
+ // Returns 0 if correct, and -1 otherwise.
+ int Update(const float* parent_data, size_t parent_data_length);
+
+ const float* data() const { return data_.get(); }
+ // Returns 0 if correct, and -1 otherwise.
+ int set_data(const float* new_data, size_t length);
+ size_t length() const { return length_; }
+
+ private:
+ scoped_ptr<float[]> data_;
+ size_t length_;
+ scoped_ptr<FIRFilter> filter_;
+};
+
+} // namespace webrtc
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_
diff --git a/webrtc/modules/audio_processing/transient/wpd_node_unittest.cc b/webrtc/modules/audio_processing/transient/wpd_node_unittest.cc
new file mode 100644
index 0000000..631a6db
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/wpd_node_unittest.cc
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/transient/wpd_node.h"
+
+#include <string.h>
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+
+namespace webrtc {
+
+static const size_t kDataLength = 5;
+static const float kTolerance = 0.0001f;
+
+static const size_t kParentDataLength = kDataLength * 2;
+static const float kParentData[kParentDataLength] =
+ {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f};
+
+static const float kCoefficients[] = {0.2f, -0.3f, 0.5f, -0.7f, 0.11f};
+static const size_t kCoefficientsLength = sizeof(kCoefficients) /
+ sizeof(kCoefficients[0]);
+
+TEST(WPDNodeTest, Accessors) {
+ WPDNode node(kDataLength, kCoefficients, kCoefficientsLength);
+ EXPECT_EQ(0, node.set_data(kParentData, kDataLength));
+ EXPECT_EQ(0, memcmp(node.data(),
+ kParentData,
+ kDataLength * sizeof(node.data()[0])));
+}
+
+TEST(WPDNodeTest, UpdateThatOnlyDecimates) {
+ const float kIndentyCoefficient = 1.f;
+ WPDNode node(kDataLength, &kIndentyCoefficient, 1);
+ EXPECT_EQ(0, node.Update(kParentData, kParentDataLength));
+ for (size_t i = 0; i < kDataLength; ++i) {
+ EXPECT_FLOAT_EQ(kParentData[i * 2 + 1], node.data()[i]);
+ }
+}
+
+TEST(WPDNodeTest, UpdateWithArbitraryDataAndArbitraryFilter) {
+ WPDNode node(kDataLength, kCoefficients, kCoefficientsLength);
+ EXPECT_EQ(0, node.Update(kParentData, kParentDataLength));
+ EXPECT_NEAR(0.1f, node.data()[0], kTolerance);
+ EXPECT_NEAR(0.2f, node.data()[1], kTolerance);
+ EXPECT_NEAR(0.18f, node.data()[2], kTolerance);
+ EXPECT_NEAR(0.56f, node.data()[3], kTolerance);
+ EXPECT_NEAR(0.94f, node.data()[4], kTolerance);
+}
+
+TEST(WPDNodeTest, ExpectedErrorReturnValue) {
+ WPDNode node(kDataLength, kCoefficients, kCoefficientsLength);
+ EXPECT_EQ(-1, node.Update(kParentData, kParentDataLength - 1));
+ EXPECT_EQ(-1, node.Update(NULL, kParentDataLength));
+ EXPECT_EQ(-1, node.set_data(kParentData, kDataLength - 1));
+ EXPECT_EQ(-1, node.set_data(NULL, kDataLength));
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/transient/wpd_tree.cc b/webrtc/modules/audio_processing/transient/wpd_tree.cc
new file mode 100644
index 0000000..a3c3ec0
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/wpd_tree.cc
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/transient/wpd_tree.h"
+
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+
+#include "webrtc/modules/audio_processing/transient/dyadic_decimator.h"
+#include "webrtc/modules/audio_processing/transient/wpd_node.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+
+namespace webrtc {
+
+WPDTree::WPDTree(size_t data_length, const float* high_pass_coefficients,
+ const float* low_pass_coefficients, size_t coefficients_length,
+ int levels)
+ : data_length_(data_length),
+ levels_(levels),
+ num_nodes_((1 << (levels + 1)) - 1) {
+ assert(data_length > (static_cast<size_t>(1) << levels) &&
+ high_pass_coefficients &&
+ low_pass_coefficients &&
+ levels > 0);
+ // Size is 1 more, so we can use the array as 1-based. nodes_[0] is never
+ // allocated.
+ nodes_.reset(new scoped_ptr<WPDNode>[num_nodes_ + 1]);
+
+ // Create the first node
+ const float kRootCoefficient = 1.f; // Identity Coefficient.
+ nodes_[1].reset(new WPDNode(data_length, &kRootCoefficient, 1));
+ // Variables used to create the rest of the nodes.
+ size_t index = 1;
+ size_t index_left_child = 0;
+ size_t index_right_child = 0;
+
+ int num_nodes_at_curr_level = 0;
+
+ // Branching each node in each level to create its children. The last level is
+ // not branched (all the nodes of that level are leaves).
+ for (int current_level = 0; current_level < levels; ++current_level) {
+ num_nodes_at_curr_level = 1 << current_level;
+ for (int i = 0; i < num_nodes_at_curr_level; ++i) {
+ index = (1 << current_level) + i;
+ // Obtain the index of the current node children.
+ index_left_child = index * 2;
+ index_right_child = index_left_child + 1;
+ nodes_[index_left_child].reset(new WPDNode(nodes_[index]->length() / 2,
+ low_pass_coefficients,
+ coefficients_length));
+ nodes_[index_right_child].reset(new WPDNode(nodes_[index]->length() / 2,
+ high_pass_coefficients,
+ coefficients_length));
+ }
+ }
+}
+
+WPDTree::~WPDTree() {}
+
+WPDNode* WPDTree::NodeAt(int level, int index) {
+ const int kNumNodesAtLevel = 1 << level;
+ if (level < 0 || level > levels_ || index < 0 || index >= kNumNodesAtLevel) {
+ return NULL;
+ }
+ return nodes_[(1 << level) + index].get();
+}
+
+int WPDTree::Update(const float* data, size_t data_length) {
+ if (!data || data_length != data_length_) {
+ return -1;
+ }
+
+ // Update the root node.
+ int update_result = nodes_[1]->set_data(data, data_length);
+ if (update_result != 0) {
+ return -1;
+ }
+
+ // Variables used to update the rest of the nodes.
+ size_t index = 1;
+ size_t index_left_child = 0;
+ size_t index_right_child = 0;
+
+ int num_nodes_at_curr_level = 0;
+
+ for (int current_level = 0; current_level < levels_; ++current_level) {
+ num_nodes_at_curr_level = 1 << current_level;
+ for (int i = 0; i < num_nodes_at_curr_level; ++i) {
+ index = (1 << current_level) + i;
+ // Obtain the index of the current node children.
+ index_left_child = index * 2;
+ index_right_child = index_left_child + 1;
+
+ update_result = nodes_[index_left_child]->Update(
+ nodes_[index]->data(), nodes_[index]->length());
+ if (update_result != 0) {
+ return -1;
+ }
+
+ update_result = nodes_[index_right_child]->Update(
+ nodes_[index]->data(), nodes_[index]->length());
+ if (update_result != 0) {
+ return -1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/transient/wpd_tree.h b/webrtc/modules/audio_processing/transient/wpd_tree.h
new file mode 100644
index 0000000..e488c9d
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/wpd_tree.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_
+
+#include "webrtc/modules/audio_processing/transient/wpd_node.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+
+namespace webrtc {
+
+// Tree of a Wavelet Packet Decomposition (WPD).
+//
+// The root node contains all the data provided; for each node in the tree, the
+// left child contains the approximation coefficients extracted from the node,
+// and the right child contains the detail coefficients.
+// It preserves its state, so it can be multiple-called.
+//
+// The number of nodes in the tree will be 2 ^ levels - 1.
+//
+// Implementation details: Since the tree always will be a complete binary tree,
+// it is implemented using a single linear array instead of managing the
+// relationships in each node. For convience is better to use a array that
+// starts in 1 (instead of 0). Taking that into account, the following formulas
+// apply:
+// Root node index: 1.
+// Node(Level, Index in that level): 2 ^ Level + (Index in that level).
+// Left Child: Current node index * 2.
+// Right Child: Current node index * 2 + 1.
+// Parent: Current Node Index / 2 (Integer division).
+class WPDTree {
+ public:
+ // Creates a WPD tree using the data length and coefficients provided.
+ WPDTree(size_t data_length,
+ const float* high_pass_coefficients,
+ const float* low_pass_coefficients,
+ size_t coefficients_length,
+ int levels);
+ ~WPDTree();
+
+ // Returns the number of nodes at any given level.
+ static int NumberOfNodesAtLevel(int level) {
+ return 1 << level;
+ }
+
+ // Returns a pointer to the node at the given level and index(of that level).
+ // Level goes from 0 to levels().
+ // Index goes from 0 to the number of NumberOfNodesAtLevel(level) - 1.
+ //
+ // You can use the following formulas to get any node within the tree:
+ // Notation: (Level, Index of node in that level).
+ // Root node: (0/0).
+ // Left Child: (Current node level + 1, Current node index * 2).
+ // Right Child: (Current node level + 1, Current node index * 2 + 1).
+ // Parent: (Current node level - 1, Current node index / 2) (Integer division)
+ //
+ // If level or index are out of bounds the function will return NULL.
+ WPDNode* NodeAt(int level, int index);
+
+ // Updates all the nodes of the tree with the new data. |data_length| must be
+ // teh same that was used for the creation of the tree.
+ // Returns 0 if correct, and -1 otherwise.
+ int Update(const float* data, size_t data_length);
+
+ // Returns the total number of levels below the root. Root is cosidered level
+ // 0.
+ int levels() const { return levels_; }
+
+ // Returns the total number of nodes.
+ int num_nodes() const { return num_nodes_; }
+
+ // Returns the total number of leaves.
+ int num_leaves() const { return 1 << levels_; }
+
+ private:
+ size_t data_length_;
+ int levels_;
+ int num_nodes_;
+ scoped_ptr<scoped_ptr<WPDNode>[]> nodes_;
+};
+
+} // namespace webrtc
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_
diff --git a/webrtc/modules/audio_processing/transient/wpd_tree_unittest.cc b/webrtc/modules/audio_processing/transient/wpd_tree_unittest.cc
new file mode 100644
index 0000000..eecdd95
--- /dev/null
+++ b/webrtc/modules/audio_processing/transient/wpd_tree_unittest.cc
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/transient/wpd_tree.h"
+
+#include <sstream>
+#include <string>
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h"
+#include "webrtc/modules/audio_processing/transient/file_utils.h"
+#include "webrtc/system_wrappers/interface/file_wrapper.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+#include "webrtc/test/testsupport/fileutils.h"
+
+namespace webrtc {
+
+TEST(WPDTreeTest, Construction) {
+ const size_t kTestBufferSize = 100;
+ const int kLevels = 5;
+ const int kExpectedNumberOfNodes = (1 << (kLevels + 1)) - 1;
+
+ float test_buffer[kTestBufferSize];
+ memset(test_buffer, 0.f, kTestBufferSize * sizeof(*test_buffer));
+ float test_coefficients[] = {1.f, 2.f, 3.f, 4.f, 5.f};
+ const size_t kTestCoefficientsLength = sizeof(test_coefficients) /
+ sizeof(test_coefficients[0]);
+ WPDTree tree(kTestBufferSize,
+ test_coefficients,
+ test_coefficients,
+ kTestCoefficientsLength,
+ kLevels);
+ ASSERT_EQ(kExpectedNumberOfNodes, tree.num_nodes());
+ // Checks for NodeAt(level, index).
+ int nodes_at_level = 0;
+ for (int level = 0; level <= kLevels; ++level) {
+ nodes_at_level = 1 << level;
+ for (int i = 0; i < nodes_at_level; ++i) {
+ ASSERT_TRUE(NULL != tree.NodeAt(level, i));
+ }
+ // Out of bounds.
+ EXPECT_EQ(NULL, tree.NodeAt(level, -1));
+ EXPECT_EQ(NULL, tree.NodeAt(level, -12));
+ EXPECT_EQ(NULL, tree.NodeAt(level, nodes_at_level));
+ EXPECT_EQ(NULL, tree.NodeAt(level, nodes_at_level + 5));
+ }
+ // Out of bounds.
+ EXPECT_EQ(NULL, tree.NodeAt(-1, 0));
+ EXPECT_EQ(NULL, tree.NodeAt(-12, 0));
+ EXPECT_EQ(NULL, tree.NodeAt(kLevels + 1, 0));
+ EXPECT_EQ(NULL, tree.NodeAt(kLevels + 5, 0));
+ // Checks for Update().
+ EXPECT_EQ(0, tree.Update(test_buffer, kTestBufferSize));
+ EXPECT_EQ(-1, tree.Update(NULL, kTestBufferSize));
+ EXPECT_EQ(-1, tree.Update(test_buffer, kTestBufferSize - 1));
+}
+
+// This test is for the correctness of the tree.
+// Checks the results from the Matlab equivalent, it is done comparing the
+// results that are stored in the output files from Matlab.
+// It also writes the results in its own set of files in the out directory.
+// Matlab and output files contain all the results in double precision (Little
+// endian) appended.
+TEST(WPDTreeTest, CorrectnessBasedOnMatlabFiles) {
+ // 10 ms at 16000 Hz.
+ const size_t kTestBufferSize = 160;
+ const int kLevels = 3;
+ const int kLeaves = 1 << kLevels;
+ const size_t kLeavesSamples = kTestBufferSize >> kLevels;
+ // Create tree with Discrete Meyer Wavelet Coefficients.
+ WPDTree tree(kTestBufferSize,
+ kDaubechies8HighPassCoefficients,
+ kDaubechies8LowPassCoefficients,
+ kDaubechies8CoefficientsLength,
+ kLevels);
+ // Allocate and open all matlab and out files.
+ scoped_ptr<FileWrapper> matlab_files_data[kLeaves];
+ scoped_ptr<FileWrapper> out_files_data[kLeaves];
+
+ for (int i = 0; i < kLeaves; ++i) {
+ // Matlab files.
+ matlab_files_data[i].reset(FileWrapper::Create());
+
+ std::ostringstream matlab_stream;
+ matlab_stream << "audio_processing/transient/wpd" << i;
+ std::string matlab_string = test::ResourcePath(matlab_stream.str(), "dat");
+ matlab_files_data[i]->OpenFile(matlab_string.c_str(),
+ true, // Read only.
+ false, // No loop.
+ false); // No text.
+
+ bool file_opened = matlab_files_data[i]->Open();
+ ASSERT_TRUE(file_opened) << "File could not be opened.\n" << matlab_string;
+
+ // Out files.
+ out_files_data[i].reset(FileWrapper::Create());
+
+ std::ostringstream out_stream;
+ out_stream << test::OutputPath() << "wpd_" << i << ".out";
+ std::string out_string = out_stream.str();
+
+ out_files_data[i]->OpenFile(out_string.c_str(),
+ false, // Write mode.
+ false, // No loop.
+ false); // No text.
+
+ file_opened = out_files_data[i]->Open();
+ ASSERT_TRUE(file_opened) << "File could not be opened.\n" << out_string;
+ }
+
+ // Prepare the test file.
+ std::string test_file_name = test::ResourcePath(
+ "audio_processing/transient/ajm-macbook-1-spke16m", "pcm");
+
+ scoped_ptr<FileWrapper> test_file(FileWrapper::Create());
+
+ test_file->OpenFile(test_file_name.c_str(),
+ true, // Read only.
+ false, // No loop.
+ false); // No text.
+
+ bool file_opened = test_file->Open();
+ ASSERT_TRUE(file_opened) << "File could not be opened.\n" << test_file_name;
+
+ float test_buffer[kTestBufferSize];
+
+ // Only the first frames of the audio file are tested. The matlab files also
+ // only contains information about the first frames.
+ const size_t kMaxFramesToTest = 100;
+ const float kTolerance = 0.03f;
+
+ size_t frames_read = 0;
+
+ // Read first buffer from the PCM test file.
+ size_t file_samples_read = ReadInt16FromFileToFloatBuffer(test_file.get(),
+ kTestBufferSize,
+ test_buffer);
+ while (file_samples_read > 0 && frames_read < kMaxFramesToTest) {
+ ++frames_read;
+
+ if (file_samples_read < kTestBufferSize) {
+ // Pad the rest of the buffer with zeros.
+ for (size_t i = file_samples_read; i < kTestBufferSize; ++i) {
+ test_buffer[i] = 0.0;
+ }
+ }
+ tree.Update(test_buffer, kTestBufferSize);
+ double matlab_buffer[kTestBufferSize];
+
+ // Compare results with data from the matlab test files.
+ for (int i = 0; i < kLeaves; ++i) {
+ // Compare data values
+ size_t matlab_samples_read =
+ ReadDoubleBufferFromFile(matlab_files_data[i].get(),
+ kLeavesSamples,
+ matlab_buffer);
+
+ ASSERT_EQ(kLeavesSamples, matlab_samples_read)
+ << "Matlab test files are malformed.\n"
+ << "File: 3_" << i;
+ // Get output data from the corresponding node
+ const float* node_data = tree.NodeAt(kLevels, i)->data();
+ // Compare with matlab files.
+ for (size_t j = 0; j < kLeavesSamples; ++j) {
+ EXPECT_NEAR(matlab_buffer[j], node_data[j], kTolerance)
+ << "\nLeaf: " << i << "\nSample: " << j
+ << "\nFrame: " << frames_read - 1;
+ }
+
+ // Write results to out files.
+ WriteFloatBufferToFile(out_files_data[i].get(),
+ kLeavesSamples,
+ node_data);
+ }
+
+ // Read next buffer from the PCM test file.
+ file_samples_read = ReadInt16FromFileToFloatBuffer(test_file.get(),
+ kTestBufferSize,
+ test_buffer);
+ }
+
+ // Close all matlab and out files.
+ for (int i = 0; i < kLeaves; ++i) {
+ matlab_files_data[i]->CloseFile();
+ out_files_data[i]->CloseFile();
+ }
+
+ test_file->CloseFile();
+}
+
+} // namespace webrtc
diff --git a/webrtc/modules/modules.gyp b/webrtc/modules/modules.gyp
index f129d35..a7ae7f9 100644
--- a/webrtc/modules/modules.gyp
+++ b/webrtc/modules/modules.gyp
@@ -159,10 +159,31 @@
'audio_coding/neteq/mock/mock_payload_splitter.h',
'audio_coding/neteq/tools/input_audio_file_unittest.cc',
'audio_coding/neteq/tools/packet_unittest.cc',
- 'audio_processing/aec/system_delay_unittest.cc',
'audio_processing/aec/echo_cancellation_unittest.cc',
+ 'audio_processing/aec/system_delay_unittest.cc',
+ # TODO(ajm): Fix to match new interface.
+ # 'audio_processing/agc/agc_unittest.cc',
+ 'audio_processing/agc/agc_audio_proc_unittest.cc',
+ 'audio_processing/agc/circular_buffer_unittest.cc',
+ 'audio_processing/agc/gmm_unittest.cc',
+ 'audio_processing/agc/histogram_unittest.cc',
+ 'audio_processing/agc/include/mock_agc.h',
+ 'audio_processing/agc/pitch_based_vad_unittest.cc',
+ 'audio_processing/agc/pitch_internal_unittest.cc',
+ 'audio_processing/agc/pole_zero_filter_unittest.cc',
+ 'audio_processing/agc/standalone_vad_unittest.cc',
+ 'audio_processing/agc/test/test_utils.cc',
'audio_processing/echo_cancellation_impl_unittest.cc',
'audio_processing/splitting_filter_unittest.cc',
+ 'audio_processing/transient/dyadic_decimator_unittest.cc',
+ 'audio_processing/transient/file_utils.cc',
+ 'audio_processing/transient/file_utils.h',
+ 'audio_processing/transient/file_utils_unittest.cc',
+ 'audio_processing/transient/moving_moments_unittest.cc',
+ 'audio_processing/transient/transient_detector_unittest.cc',
+ 'audio_processing/transient/transient_suppressor_unittest.cc',
+ 'audio_processing/transient/wpd_node_unittest.cc',
+ 'audio_processing/transient/wpd_tree_unittest.cc',
'audio_processing/utility/delay_estimator_unittest.cc',
'audio_processing/utility/ring_buffer_unittest.cc',
'bitrate_controller/bitrate_controller_unittest.cc',
@@ -326,11 +347,6 @@
'target_name': 'modules_tests',
'type': '<(gtest_target_type)',
'dependencies': [
- 'audio_coding_module',
- 'rtp_rtcp',
- 'video_codecs_test_framework',
- 'webrtc_utility',
- 'webrtc_video_coding',
'<(DEPTH)/testing/gtest.gyp:gtest',
'<(webrtc_root)/common_video/common_video.gyp:common_video',
'<(webrtc_root)/modules/video_coding/codecs/vp8/vp8.gyp:webrtc_vp8',
@@ -339,6 +355,11 @@
'<(webrtc_root)/test/metrics.gyp:metrics',
'<(webrtc_root)/test/test.gyp:test_support',
'<(webrtc_root)/test/test.gyp:test_support_main',
+ 'audio_coding_module',
+ 'rtp_rtcp',
+ 'video_codecs_test_framework',
+ 'webrtc_utility',
+ 'webrtc_video_coding',
],
'defines': [
'<@(audio_coding_defines)',
@@ -346,23 +367,23 @@
'sources': [
'audio_coding/main/test/APITest.cc',
'audio_coding/main/test/Channel.cc',
- 'audio_coding/main/test/dual_stream_unittest.cc',
'audio_coding/main/test/EncodeDecodeTest.cc',
- 'audio_coding/main/test/iSACTest.cc',
- 'audio_coding/main/test/opus_test.cc',
- 'audio_coding/main/test/PacketLossTest.cc',
'audio_coding/main/test/PCMFile.cc',
+ 'audio_coding/main/test/PacketLossTest.cc',
'audio_coding/main/test/RTPFile.cc',
'audio_coding/main/test/SpatialAudio.cc',
'audio_coding/main/test/TestAllCodecs.cc',
- 'audio_coding/main/test/target_delay_unittest.cc',
- 'audio_coding/main/test/Tester.cc',
'audio_coding/main/test/TestRedFec.cc',
'audio_coding/main/test/TestStereo.cc',
'audio_coding/main/test/TestVADDTX.cc',
+ 'audio_coding/main/test/Tester.cc',
'audio_coding/main/test/TimedTrace.cc',
'audio_coding/main/test/TwoWayCommunication.cc',
+ 'audio_coding/main/test/dual_stream_unittest.cc',
+ 'audio_coding/main/test/iSACTest.cc',
'audio_coding/main/test/initial_delay_unittest.cc',
+ 'audio_coding/main/test/opus_test.cc',
+ 'audio_coding/main/test/target_delay_unittest.cc',
'audio_coding/main/test/utility.cc',
'rtp_rtcp/test/testFec/test_fec.cc',
'video_coding/codecs/test/videoprocessor_integrationtest.cc',
diff --git a/webrtc/modules/modules_unittests.isolate b/webrtc/modules/modules_unittests.isolate
index c5a0a28..700c506 100644
--- a/webrtc/modules/modules_unittests.isolate
+++ b/webrtc/modules/modules_unittests.isolate
@@ -34,6 +34,38 @@
'<(DEPTH)/resources/audio_coding/speech_mono_32_48kHz.pcm',
'<(DEPTH)/resources/audio_coding/testfile32kHz.pcm',
'<(DEPTH)/resources/audio_coding/teststereo32kHz.pcm',
+ '<(DEPTH)/resources/audio_processing/agc/agc_audio.pcm',
+ '<(DEPTH)/resources/audio_processing/agc/agc_no_circular_buffer.dat',
+ '<(DEPTH)/resources/audio_processing/agc/agc_pitch_gain.dat',
+ '<(DEPTH)/resources/audio_processing/agc/agc_pitch_lag.dat',
+ '<(DEPTH)/resources/audio_processing/agc/agc_spectral_peak.dat',
+ '<(DEPTH)/resources/audio_processing/agc/agc_vad.dat',
+ '<(DEPTH)/resources/audio_processing/agc/agc_voicing_prob.dat',
+ '<(DEPTH)/resources/audio_processing/agc/agc_with_circular_buffer.dat',
+ '<(DEPTH)/resources/audio_processing/transient/ajm-macbook-1-spke.gai',
+ '<(DEPTH)/resources/audio_processing/transient/ajm-macbook-1-spke16m.pcm',
+ '<(DEPTH)/resources/audio_processing/transient/ajm-macbook-1-spke16m_chunk_10_transient_30_rational.dat',
+ '<(DEPTH)/resources/audio_processing/transient/audio16kHz.pcm',
+ '<(DEPTH)/resources/audio_processing/transient/audio32kHz.pcm',
+ '<(DEPTH)/resources/audio_processing/transient/audio48kHz.pcm',
+ '<(DEPTH)/resources/audio_processing/transient/audio8kHz.pcm',
+ '<(DEPTH)/resources/audio_processing/transient/detect16kHz.dat',
+ '<(DEPTH)/resources/audio_processing/transient/detect32kHz.dat',
+ '<(DEPTH)/resources/audio_processing/transient/detect48kHz.dat',
+ '<(DEPTH)/resources/audio_processing/transient/detect8kHz.dat',
+ '<(DEPTH)/resources/audio_processing/transient/double-utils.dat',
+ '<(DEPTH)/resources/audio_processing/transient/float-utils.dat',
+ '<(DEPTH)/resources/audio_processing/transient/suppressed16kHz.pcm',
+ '<(DEPTH)/resources/audio_processing/transient/suppressed32kHz.pcm',
+ '<(DEPTH)/resources/audio_processing/transient/suppressed8kHz.pcm',
+ '<(DEPTH)/resources/audio_processing/transient/wpd0.dat',
+ '<(DEPTH)/resources/audio_processing/transient/wpd1.dat',
+ '<(DEPTH)/resources/audio_processing/transient/wpd2.dat',
+ '<(DEPTH)/resources/audio_processing/transient/wpd3.dat',
+ '<(DEPTH)/resources/audio_processing/transient/wpd4.dat',
+ '<(DEPTH)/resources/audio_processing/transient/wpd5.dat',
+ '<(DEPTH)/resources/audio_processing/transient/wpd6.dat',
+ '<(DEPTH)/resources/audio_processing/transient/wpd7.dat',
'<(DEPTH)/resources/deflicker_before_cif_short.yuv',
'<(DEPTH)/resources/far16_stereo.pcm',
'<(DEPTH)/resources/far32_stereo.pcm',