Added run time ARM-Neon detection feature in SPL functions.
Review URL: https://webrtc-codereview.appspot.com/728010

git-svn-id: http://webrtc.googlecode.com/svn/trunk@2721 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/Android.mk b/Android.mk
index 5d567e8..b9d31f8 100644
--- a/Android.mk
+++ b/Android.mk
@@ -45,7 +45,8 @@
 ifeq ($(WEBRTC_BUILD_NEON_LIBS),true)
 LOCAL_WHOLE_STATIC_LIBRARIES += \
     libwebrtc_aecm_neon \
-    libwebrtc_ns_neon
+    libwebrtc_ns_neon \
+    libwebrtc_spl_neon
 endif
 
 LOCAL_STATIC_LIBRARIES := \
diff --git a/android-webrtc.mk b/android-webrtc.mk
index cd495b0..98b2821 100644
--- a/android-webrtc.mk
+++ b/android-webrtc.mk
@@ -29,13 +29,16 @@
 ifeq ($(ARCH_ARM_HAVE_NEON),true)
 MY_WEBRTC_COMMON_DEFS += \
     '-DWEBRTC_ARCH_ARM_NEON'
-MY_ARM_CFLAGS_NEON := \
-    -flax-vector-conversions
 endif
 
 ifneq (,$(filter '-DWEBRTC_DETECT_ARM_NEON' '-DWEBRTC_ARCH_ARM_NEON', \
     $(MY_WEBRTC_COMMON_DEFS)))
 WEBRTC_BUILD_NEON_LIBS := true
+# TODO(kma): Use MY_WEBRTC_COMMON_DEFS for Neon libraies in AECM, NS, and iSAC.
+MY_WEBRTC_COMMON_DEFS += \
+    -mfpu=neon \
+    -mfloat-abi=softfp \
+    -flax-vector-conversions
 endif
 
 ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
diff --git a/src/common_audio/signal_processing/Android.mk b/src/common_audio/signal_processing/Android.mk
index 0ae2b3c..653bd02 100644
--- a/src/common_audio/signal_processing/Android.mk
+++ b/src/common_audio/signal_processing/Android.mk
@@ -21,8 +21,10 @@
     auto_correlation.c \
     complex_fft.c \
     copy_set_operations.c \
+    cross_correlation.c \
     division_operations.c \
     dot_product_with_scale.c \
+    downsample_fast.c \
     energy.c \
     filter_ar.c \
     filter_ma_fast_q12.c \
@@ -39,6 +41,7 @@
     resample_by_2.c \
     resample_by_2_internal.c \
     resample_fractional.c \
+    spl_init.c \
     spl_sqrt.c \
     spl_version.c \
     splitting_filter.c \
@@ -53,20 +56,6 @@
     $(LOCAL_PATH)/include \
     $(LOCAL_PATH)/../.. 
 
-ifeq ($(ARCH_ARM_HAVE_NEON),true)
-LOCAL_SRC_FILES += \
-    cross_correlation_neon.s \
-    downsample_fast_neon.s \
-    min_max_operations_neon.s \
-    vector_scaling_operations_neon.s
-LOCAL_CFLAGS += \
-    $(MY_ARM_CFLAGS_NEON)
-else
-LOCAL_SRC_FILES += \
-    cross_correlation.c \
-    downsample_fast.c
-endif
-
 ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
 LOCAL_SRC_FILES += \
     filter_ar_fast_q12_armv7.s
@@ -99,3 +88,34 @@
 include external/stlport/libstlport.mk
 endif
 include $(BUILD_STATIC_LIBRARY)
+
+#########################
+# Build the neon library.
+ifeq ($(WEBRTC_BUILD_NEON_LIBS),true)
+
+include $(CLEAR_VARS)
+
+LOCAL_ARM_MODE := arm
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+LOCAL_MODULE := libwebrtc_spl_neon
+LOCAL_MODULE_TAGS := optional
+LOCAL_SRC_FILES := \
+    cross_correlation_neon.s \
+    downsample_fast_neon.s \
+    min_max_operations_neon.s \
+    vector_scaling_operations_neon.s
+
+# Flags passed to both C and C++ files.
+LOCAL_CFLAGS := $(MY_WEBRTC_COMMON_DEFS)
+
+LOCAL_C_INCLUDES := \
+    $(LOCAL_PATH)/include \
+    $(LOCAL_PATH)/../.. 
+
+ifndef NDK_ROOT
+include external/stlport/libstlport.mk
+endif
+include $(BUILD_STATIC_LIBRARY)
+
+endif # ifeq ($(WEBRTC_BUILD_NEON_LIBS),true)
+
diff --git a/src/common_audio/signal_processing/cross_correlation.c b/src/common_audio/signal_processing/cross_correlation.c
index cf7705c..05506a7 100644
--- a/src/common_audio/signal_processing/cross_correlation.c
+++ b/src/common_audio/signal_processing/cross_correlation.c
@@ -10,13 +10,14 @@
 
 #include "signal_processing_library.h"
 
-void WebRtcSpl_CrossCorrelation(int32_t* cross_correlation,
-                                const int16_t* seq1,
-                                const int16_t* seq2,
-                                int16_t dim_seq,
-                                int16_t dim_cross_correlation,
-                                int16_t right_shifts,
-                                int16_t step_seq2) {
+/* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */
+void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
+                                 const int16_t* seq1,
+                                 const int16_t* seq2,
+                                 int16_t dim_seq,
+                                 int16_t dim_cross_correlation,
+                                 int16_t right_shifts,
+                                 int16_t step_seq2) {
   int i = 0, j = 0;
 
   for (i = 0; i < dim_cross_correlation; i++) {
diff --git a/src/common_audio/signal_processing/cross_correlation_neon.s b/src/common_audio/signal_processing/cross_correlation_neon.s
index e9b1c69..a18f672 100644
--- a/src/common_audio/signal_processing/cross_correlation_neon.s
+++ b/src/common_audio/signal_processing/cross_correlation_neon.s
@@ -9,7 +9,7 @@
 @
 
 @ cross_correlation_neon.s
-@ This file contains the function WebRtcSpl_CrossCorrelation(),
+@ This file contains the function WebRtcSpl_CrossCorrelationNeon(),
 @ optimized for ARM Neon platform.
 @
 @ Reference Ccode at end of this file.
@@ -33,9 +33,9 @@
 .fpu neon
 
 .align  2
-.global WebRtcSpl_CrossCorrelation
+.global WebRtcSpl_CrossCorrelationNeon
 
-WebRtcSpl_CrossCorrelation:
+WebRtcSpl_CrossCorrelationNeon:
 
 .fnstart
 
@@ -109,13 +109,13 @@
 
 
 @ TODO(kma): Place this piece of reference code into a C code file.
-@ void WebRtcSpl_CrossCorrelation(WebRtc_Word32* cross_correlation,
-@                                 WebRtc_Word16* seq1,
-@                                 WebRtc_Word16* seq2,
-@                                 WebRtc_Word16 dim_seq,
-@                                 WebRtc_Word16 dim_cross_correlation,
-@                                 WebRtc_Word16 right_shifts,
-@                                 WebRtc_Word16 step_seq2) {
+@ void WebRtcSpl_CrossCorrelationNeon(WebRtc_Word32* cross_correlation,
+@                                     WebRtc_Word16* seq1,
+@                                     WebRtc_Word16* seq2,
+@                                     WebRtc_Word16 dim_seq,
+@                                     WebRtc_Word16 dim_cross_correlation,
+@                                     WebRtc_Word16 right_shifts,
+@                                     WebRtc_Word16 step_seq2) {
 @   int i = 0;
 @   int j = 0;
 @   int inner_loop_len1 = dim_seq >> 3;
diff --git a/src/common_audio/signal_processing/downsample_fast.c b/src/common_audio/signal_processing/downsample_fast.c
index 526cdca..4784aba 100644
--- a/src/common_audio/signal_processing/downsample_fast.c
+++ b/src/common_audio/signal_processing/downsample_fast.c
@@ -11,14 +11,15 @@
 #include "signal_processing_library.h"
 
 // TODO(Bjornv): Change the function parameter order to WebRTC code style.
-int WebRtcSpl_DownsampleFast(const int16_t* data_in,
-                             int data_in_length,
-                             int16_t* data_out,
-                             int data_out_length,
-                             const int16_t* __restrict coefficients,
-                             int coefficients_length,
-                             int factor,
-                             int delay) {
+// C version of WebRtcSpl_DownsampleFast() for generic platforms.
+int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
+                              int data_in_length,
+                              int16_t* data_out,
+                              int data_out_length,
+                              const int16_t* __restrict coefficients,
+                              int coefficients_length,
+                              int factor,
+                              int delay) {
   int i = 0;
   int j = 0;
   int32_t out_s32 = 0;
diff --git a/src/common_audio/signal_processing/downsample_fast_neon.s b/src/common_audio/signal_processing/downsample_fast_neon.s
index 906b0a1..13a825d 100644
--- a/src/common_audio/signal_processing/downsample_fast_neon.s
+++ b/src/common_audio/signal_processing/downsample_fast_neon.s
@@ -8,7 +8,7 @@
 @ be found in the AUTHORS file in the root of the source tree.
 @
 
-@ This file contains the function WebRtcSpl_DownsampleFast(), optimized for
+@ This file contains the function WebRtcSpl_DownsampleFastNeon(), optimized for
 @ ARM Neon platform. The description header can be found in
 @ signal_processing_library.h
 @
@@ -18,9 +18,9 @@
 .fpu neon
 
 .align  2
-.global WebRtcSpl_DownsampleFast
+.global WebRtcSpl_DownsampleFastNeon
 
-WebRtcSpl_DownsampleFast:
+WebRtcSpl_DownsampleFastNeon:
 
 .fnstart
 
diff --git a/src/common_audio/signal_processing/include/signal_processing_library.h b/src/common_audio/signal_processing/include/signal_processing_library.h
index 5dcb111..1738e8e 100644
--- a/src/common_audio/signal_processing/include/signal_processing_library.h
+++ b/src/common_audio/signal_processing/include/signal_processing_library.h
@@ -162,6 +162,16 @@
 // inline functions:
 #include "spl_inl.h"
 
+// Initialize SPL. Currently it contains only function pointer initialization.
+// If the underlying platform is known to be ARM-Neon (WEBRTC_ARCH_ARM_NEON
+// defined), the pointers will be assigned to code optimized for Neon; otherwise
+// if run-time Neon detection (WEBRTC_DETECT_ARM_NEON) is enabled, the pointers
+// will be assigned to either Neon code or generic C code; otherwise, generic C
+// code will be assigned.
+// Note that this function MUST be called in any application that uses SPL
+// functions.
+void WebRtcSpl_Init();
+
 // Get SPL Version
 WebRtc_Word16 WebRtcSpl_get_version(char* version,
                                     WebRtc_Word16 length_in_bytes);
@@ -196,7 +206,8 @@
 // End: Copy and set operations.
 
 
-// Minimum and maximum operations. Implementation in min_max_operations.c.
+// Minimum and maximum operation functions and their pointers.
+// Implementation in min_max_operations.c.
 
 // Returns the largest absolute value in a signed 16-bit vector.
 //
@@ -206,7 +217,12 @@
 //
 // Return value  : Maximum absolute value in vector;
 //                 or -1, if (vector == NULL || length <= 0).
-int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
+typedef int16_t (*MaxAbsValueW16)(const int16_t* vector, int length);
+extern MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16;
+int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, int length);
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, int length);
+#endif
 
 // Returns the largest absolute value in a signed 32-bit vector.
 //
@@ -216,7 +232,12 @@
 //
 // Return value  : Maximum absolute value in vector;
 //                 or -1, if (vector == NULL || length <= 0).
-int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
+typedef int32_t (*MaxAbsValueW32)(const int32_t* vector, int length);
+extern MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32;
+int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, int length);
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, int length);
+#endif
 
 // Returns the maximum value of a 16-bit vector.
 //
@@ -228,7 +249,12 @@
 //                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MIN
 //                 is returned. Note that WEBRTC_SPL_WORD16_MIN is a feasible
 //                 value and we can't catch errors purely based on it.
-int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
+typedef int16_t (*MaxValueW16)(const int16_t* vector, int length);
+extern MaxValueW16 WebRtcSpl_MaxValueW16;
+int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, int length);
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, int length);
+#endif
 
 // Returns the maximum value of a 32-bit vector.
 //
@@ -240,7 +266,12 @@
 //                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MIN
 //                 is returned. Note that WEBRTC_SPL_WORD32_MIN is a feasible
 //                 value and we can't catch errors purely based on it.
-int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
+typedef int32_t (*MaxValueW32)(const int32_t* vector, int length);
+extern MaxValueW32 WebRtcSpl_MaxValueW32;
+int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, int length);
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, int length);
+#endif
 
 // Returns the minimum value of a 16-bit vector.
 //
@@ -252,7 +283,12 @@
 //                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MAX
 //                 is returned. Note that WEBRTC_SPL_WORD16_MAX is a feasible
 //                 value and we can't catch errors purely based on it.
-int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
+typedef int16_t (*MinValueW16)(const int16_t* vector, int length);
+extern MinValueW16 WebRtcSpl_MinValueW16;
+int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, int length);
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, int length);
+#endif
 
 // Returns the minimum value of a 32-bit vector.
 //
@@ -264,7 +300,12 @@
 //                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MAX
 //                 is returned. Note that WEBRTC_SPL_WORD32_MAX is a feasible
 //                 value and we can't catch errors purely based on it.
-int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length);
+typedef int32_t (*MinValueW32)(const int32_t* vector, int length);
+extern MinValueW32 WebRtcSpl_MinValueW32;
+int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, int length);
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, int length);
+#endif
 
 // Returns the vector index to the largest absolute value of a 16-bit vector.
 //
@@ -358,7 +399,7 @@
                                   WebRtc_Word16* out_vector,
                                   int vector_length);
 
-// Performs the vector operation:
+// The functions (with related pointer) perform the vector operation:
 //   out_vector[k] = ((scale1 * in_vector1[k]) + (scale2 * in_vector2[k])
 //        + round_value) >> right_shifts,
 //   where  round_value = (1 << right_shifts) >> 1.
@@ -376,14 +417,30 @@
 // Return value            : 0 if OK, -1 if (in_vector1 == NULL
 //                           || in_vector2 == NULL || out_vector == NULL
 //                           || length <= 0 || right_shift < 0).
-int WebRtcSpl_ScaleAndAddVectorsWithRound(const int16_t* in_vector1,
-                                          int16_t in_vector1_scale,
-                                          const int16_t* in_vector2,
-                                          int16_t in_vector2_scale,
-                                          int right_shifts,
-                                          int16_t* out_vector,
-                                          int length);
-
+typedef int (*ScaleAndAddVectorsWithRound)(const int16_t* in_vector1,
+                                           int16_t in_vector1_scale,
+                                           const int16_t* in_vector2,
+                                           int16_t in_vector2_scale,
+                                           int right_shifts,
+                                           int16_t* out_vector,
+                                           int length);
+extern ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
+int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
+                                           int16_t in_vector1_scale,
+                                           const int16_t* in_vector2,
+                                           int16_t in_vector2_scale,
+                                           int right_shifts,
+                                           int16_t* out_vector,
+                                           int length);
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+int WebRtcSpl_ScaleAndAddVectorsWithRoundNeon(const int16_t* in_vector1,
+                                              int16_t in_vector1_scale,
+                                              const int16_t* in_vector2,
+                                              int16_t in_vector2_scale,
+                                              int right_shifts,
+                                              int16_t* out_vector,
+                                              int length);
+#endif
 // End: Vector scaling operations.
 
 // iLBC specific functions. Implementations in ilbc_specific_functions.c.
@@ -508,7 +565,8 @@
                                   int use_order,
                                   WebRtc_Word16* refl_coef);
 
-// Calculates the cross-correlation between two sequences |seq1| and |seq2|.
+// The functions (with related pointer) calculate the cross-correlation between
+// two sequences |seq1| and |seq2|.
 // |seq1| is fixed and |seq2| slides as the pointer is increased with the
 // amount |step_seq2|. Note the arguments should obey the relationship:
 // |dim_seq| - 1 + |step_seq2| * (|dim_cross_correlation| - 1) <
@@ -530,13 +588,30 @@
 //
 // Output:
 //      - cross_correlation : The cross-correlation in Q(-right_shifts)
-void WebRtcSpl_CrossCorrelation(int32_t* cross_correlation,
-                                const int16_t* seq1,
-                                const int16_t* seq2,
-                                int16_t dim_seq,
-                                int16_t dim_cross_correlation,
-                                int16_t right_shifts,
-                                int16_t step_seq2);
+typedef void (*CrossCorrelation)(int32_t* cross_correlation,
+                                 const int16_t* seq1,
+                                 const int16_t* seq2,
+                                 int16_t dim_seq,
+                                 int16_t dim_cross_correlation,
+                                 int16_t right_shifts,
+                                 int16_t step_seq2);
+extern CrossCorrelation WebRtcSpl_CrossCorrelation;
+void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
+                                 const int16_t* seq1,
+                                 const int16_t* seq2,
+                                 int16_t dim_seq,
+                                 int16_t dim_cross_correlation,
+                                 int16_t right_shifts,
+                                 int16_t step_seq2);
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation,
+                                    const int16_t* seq1,
+                                    const int16_t* seq2,
+                                    int16_t dim_seq,
+                                    int16_t dim_cross_correlation,
+                                    int16_t right_shifts,
+                                    int16_t step_seq2);
+#endif
 
 // Creates (the first half of) a Hanning window. Size must be at least 1 and
 // at most 512.
@@ -636,7 +711,8 @@
                                int coefficients_length,
                                int data_length);
 
-// Performs a MA down sampling filter on a vector
+// The functions (with related pointer) perform a MA down sampling filter
+// on a vector.
 // Input:
 //      - data_in            : Input samples (state in positions
 //                               data_in[-order] .. data_in[-1])
@@ -651,14 +727,33 @@
 // Output:
 //      - data_out           : Filtered samples
 // Return value              : 0 if OK, -1 if |in_vector| is too short
-int WebRtcSpl_DownsampleFast(const int16_t* data_in,
-                             int data_in_length,
-                             int16_t* data_out,
-                             int data_out_length,
-                             const int16_t* __restrict coefficients,
-                             int coefficients_length,
-                             int factor,
-                             int delay);
+typedef int (*DownsampleFast)(const int16_t* data_in,
+                              int data_in_length,
+                              int16_t* data_out,
+                              int data_out_length,
+                              const int16_t* __restrict coefficients,
+                              int coefficients_length,
+                              int factor,
+                              int delay);
+extern DownsampleFast WebRtcSpl_DownsampleFast;
+int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
+                              int data_in_length,
+                              int16_t* data_out,
+                              int data_out_length,
+                              const int16_t* __restrict coefficients,
+                              int coefficients_length,
+                              int factor,
+                              int delay);
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
+                                 int data_in_length,
+                                 int16_t* data_out,
+                                 int data_out_length,
+                                 const int16_t* __restrict coefficients,
+                                 int coefficients_length,
+                                 int factor,
+                                 int delay);
+#endif
 
 // End: Filter operations.
 
diff --git a/src/common_audio/signal_processing/min_max_operations.c b/src/common_audio/signal_processing/min_max_operations.c
index d3539d7..63a8a99 100644
--- a/src/common_audio/signal_processing/min_max_operations.c
+++ b/src/common_audio/signal_processing/min_max_operations.c
@@ -10,12 +10,12 @@
 
 /*
  * This file contains the implementation of functions
- * WebRtcSpl_MaxAbsValueW16()
- * WebRtcSpl_MaxAbsValueW32()
- * WebRtcSpl_MaxValueW16()
- * WebRtcSpl_MaxValueW32()
- * WebRtcSpl_MinValueW16()
- * WebRtcSpl_MinValueW32()
+ * WebRtcSpl_MaxAbsValueW16C()
+ * WebRtcSpl_MaxAbsValueW32C()
+ * WebRtcSpl_MaxValueW16C()
+ * WebRtcSpl_MaxValueW32C()
+ * WebRtcSpl_MinValueW16C()
+ * WebRtcSpl_MinValueW32C()
  * WebRtcSpl_MaxAbsIndexW16()
  * WebRtcSpl_MaxIndexW16()
  * WebRtcSpl_MaxIndexW32()
@@ -29,13 +29,11 @@
 #include <stdlib.h>
 
 // TODO(bjorn/kma): Consolidate function pairs (e.g. combine
-// WebRtcSpl_MaxAbsValueW16 and WebRtcSpl_MaxAbsIndexW16 into a single one.)
+//   WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.)
+// TODO(kma): Move the next six functions into min_max_operations_c.c.
 
-// TODO(kma): Move the code in the #ifndef block into min_max_operations_c.c.
-#ifndef WEBRTC_ARCH_ARM_NEON
-
-// Maximum absolute value of word16 vector.
-int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
+// Maximum absolute value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, int length) {
   int i = 0, absolute = 0, maximum = 0;
 
   if (vector == NULL || length <= 0) {
@@ -58,8 +56,8 @@
   return (int16_t)maximum;
 }
 
-// Maximum absolute value of word32 vector.
-int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length) {
+// Maximum absolute value of word32 vector. C version for generic platforms.
+int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, int length) {
   // Use uint32_t for the local variables, to accommodate the return value
   // of abs(0x80000000), which is 0x80000000.
 
@@ -82,8 +80,8 @@
   return (int32_t)maximum;
 }
 
-// Maximum value of word16 vector.
-int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length) {
+// Maximum value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, int length) {
   int16_t maximum = WEBRTC_SPL_WORD16_MIN;
   int i = 0;
 
@@ -98,8 +96,8 @@
   return maximum;
 }
 
-// Maximum value of word32 vector.
-int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length) {
+// Maximum value of word32 vector. C version for generic platforms.
+int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, int length) {
   int32_t maximum = WEBRTC_SPL_WORD32_MIN;
   int i = 0;
 
@@ -114,8 +112,8 @@
   return maximum;
 }
 
-// Minimum value of word16 vector.
-int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length) {
+// Minimum value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, int length) {
   int16_t minimum = WEBRTC_SPL_WORD16_MAX;
   int i = 0;
 
@@ -130,8 +128,8 @@
   return minimum;
 }
 
-// Minimum value of word32 vector.
-int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length) {
+// Minimum value of word32 vector. C version for generic platforms.
+int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, int length) {
   int32_t minimum = WEBRTC_SPL_WORD32_MAX;
   int i = 0;
 
@@ -145,8 +143,6 @@
   }
   return minimum;
 }
-#endif  // WEBRTC_ARCH_ARM_NEON
-
 
 // Index of maximum absolute value in a word16 vector.
 int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length) {
diff --git a/src/common_audio/signal_processing/min_max_operations_neon.s b/src/common_audio/signal_processing/min_max_operations_neon.s
index 01831ef..85dd2fb 100644
--- a/src/common_audio/signal_processing/min_max_operations_neon.s
+++ b/src/common_audio/signal_processing/min_max_operations_neon.s
@@ -8,7 +8,7 @@
 @ be found in the AUTHORS file in the root of the source tree.
 @
 
-@ This file contains the function WebRtcSpl_MaxAbsValueW16(), optimized for
+@ This file contains some minimum and maximum functions, optimized for
 @ ARM Neon platform. The description header can be found in
 @ signal_processing_library.h
 @
@@ -17,16 +17,16 @@
 
 .arch armv7-a
 .fpu neon
-.global WebRtcSpl_MaxAbsValueW16
-.global WebRtcSpl_MaxAbsValueW32
-.global WebRtcSpl_MaxValueW16
-.global WebRtcSpl_MaxValueW32
-.global WebRtcSpl_MinValueW16
-.global WebRtcSpl_MinValueW32
+.global WebRtcSpl_MaxAbsValueW16Neon
+.global WebRtcSpl_MaxAbsValueW32Neon
+.global WebRtcSpl_MaxValueW16Neon
+.global WebRtcSpl_MaxValueW32Neon
+.global WebRtcSpl_MinValueW16Neon
+.global WebRtcSpl_MinValueW32Neon
 .align  2
 
-@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
-WebRtcSpl_MaxAbsValueW16:
+@ int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, int length);
+WebRtcSpl_MaxAbsValueW16Neon:
 .fnstart
 
   mov r2, #-1                 @ Initialize the return value.
@@ -73,8 +73,8 @@
 
 .fnend
 
-@ int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
-WebRtcSpl_MaxAbsValueW32:
+@ int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, int length);
+WebRtcSpl_MaxAbsValueW32Neon:
 .fnstart
 
   cmp r0, #0
@@ -127,8 +127,8 @@
 
 .fnend
 
-@ int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
-WebRtcSpl_MaxValueW16:
+@ int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, int length);
+WebRtcSpl_MaxValueW16Neon:
 .fnstart
 
   mov r2, #0x8000             @ Initialize the return value.
@@ -170,8 +170,8 @@
 
 .fnend
 
-@ int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
-WebRtcSpl_MaxValueW32:
+@ int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, int length);
+WebRtcSpl_MaxValueW32Neon:
 .fnstart
 
   mov r2, #0x80000000         @ Initialize the return value.
@@ -215,8 +215,8 @@
 
 .fnend
 
-@ int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
-WebRtcSpl_MinValueW16:
+@ int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, int length);
+WebRtcSpl_MinValueW16Neon:
 .fnstart
 
   movw r2, #0x7FFF            @ Initialize the return value.
@@ -259,8 +259,8 @@
 
 .fnend
 
-@ int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length);
-WebRtcSpl_MinValueW32:
+@ int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, int length);
+WebRtcSpl_MinValueW32Neon:
 .fnstart
 
   mov r2, #0x7FFFFFFF         @ Initialize the return value.
diff --git a/src/common_audio/signal_processing/real_fft_unittest.cc b/src/common_audio/signal_processing/real_fft_unittest.cc
index dab0d15..d250797 100644
--- a/src/common_audio/signal_processing/real_fft_unittest.cc
+++ b/src/common_audio/signal_processing/real_fft_unittest.cc
@@ -25,6 +25,10 @@
 };
 
 class RealFFTTest : public ::testing::Test {
+ protected:
+   RealFFTTest() {
+     WebRtcSpl_Init();
+   }
 };
 
 TEST_F(RealFFTTest, CreateFailsOnBadInput) {
diff --git a/src/common_audio/signal_processing/signal_processing.gypi b/src/common_audio/signal_processing/signal_processing.gypi
index 9d6a977..b09c767 100644
--- a/src/common_audio/signal_processing/signal_processing.gypi
+++ b/src/common_audio/signal_processing/signal_processing.gypi
@@ -14,6 +14,9 @@
       'include_dirs': [
         'include',
       ],
+      'dependencies': [
+        '<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers',
+      ],
       'direct_dependent_settings': {
         'include_dirs': [
           'include',
@@ -51,6 +54,7 @@
         'resample_by_2_internal.c',
         'resample_by_2_internal.h',
         'resample_fractional.c',
+        'spl_init.c',
         'spl_sqrt.c',
         'spl_sqrt_floor.c',
         'spl_version.c',
@@ -70,6 +74,7 @@
           ],
           'conditions': [
             ['armv7==1', {
+              'dependencies': ['signal_processing_neon',],
               'sources': [
                 'filter_ar_fast_q12_armv7.s',
               ],
@@ -77,18 +82,6 @@
                 'filter_ar_fast_q12.c',
               ],
             }],
-            ['arm_neon==1', {
-              'sources': [
-                'cross_correlation_neon.s',
-                'downsample_fast_neon.s',
-                'min_max_operations_neon.s',
-                'vector_scaling_operations_neon.s',
-              ],
-              'sources!': [
-                'cross_correlation.c',
-                'downsample_fast.c',
-              ],
-            }],
           ],
         }],
       ],
@@ -112,5 +105,20 @@
         }, # spl_unittests
       ], # targets
     }], # include_tests
+    ['target_arch=="arm" and armv7==1', {
+      'targets': [
+        {
+          'target_name': 'signal_processing_neon',
+          'type': '<(library)',
+          'includes': ['../../build/arm_neon.gypi',],
+          'sources': [
+            'cross_correlation_neon.s',
+            'downsample_fast_neon.s',
+            'min_max_operations_neon.s',
+            'vector_scaling_operations_neon.s',
+          ],
+        },
+      ],
+    }], # 'target_arch=="arm" and armv7==1'
   ], # conditions
 }
diff --git a/src/common_audio/signal_processing/signal_processing_unittest.cc b/src/common_audio/signal_processing/signal_processing_unittest.cc
index a7c69b2..4acef5b 100644
--- a/src/common_audio/signal_processing/signal_processing_unittest.cc
+++ b/src/common_audio/signal_processing/signal_processing_unittest.cc
@@ -17,12 +17,11 @@
 
 class SplTest : public testing::Test {
  protected:
-  virtual ~SplTest() {
-  }
-  void SetUp() {
-  }
-  void TearDown() {
-  }
+   SplTest() {
+     WebRtcSpl_Init();
+   }
+   virtual ~SplTest() {
+   }
 };
 
 TEST_F(SplTest, MacroTest) {
diff --git a/src/common_audio/signal_processing/spl_init.c b/src/common_audio/signal_processing/spl_init.c
new file mode 100644
index 0000000..b5c7709
--- /dev/null
+++ b/src/common_audio/signal_processing/spl_init.c
@@ -0,0 +1,118 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/* The global function contained in this file initializes SPL function
+ * pointers, currently only for ARM platforms.
+ *
+ * Some code came from common/rtcd.c in the WebM project.
+ */
+
+/* TODO(kma): Add calls to WebRtcSpl_Init() in all related modules
+ * (AEC, NS, codecs etc.).
+ */
+
+#include "common_audio/signal_processing/include/signal_processing_library.h"
+#include "system_wrappers/interface/cpu_features_wrapper.h"
+
+/* Declare function pointers. */
+MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16;
+MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32;
+MaxValueW16 WebRtcSpl_MaxValueW16;
+MaxValueW32 WebRtcSpl_MaxValueW32;
+MinValueW16 WebRtcSpl_MinValueW16;
+MinValueW32 WebRtcSpl_MinValueW32;
+CrossCorrelation WebRtcSpl_CrossCorrelation;
+DownsampleFast WebRtcSpl_DownsampleFast;
+ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
+
+/* Initialize function pointers to the generic C version. */
+static void InitPointersToC() {
+  WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C;
+  WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
+  WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C;
+  WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C;
+  WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C;
+  WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C;
+  WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC;
+  WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC;
+  WebRtcSpl_ScaleAndAddVectorsWithRound =
+      WebRtcSpl_ScaleAndAddVectorsWithRoundC;
+}
+
+#if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON)
+/* Initialize function pointers to the Neon version. */
+static void InitPointersToNeon() {
+  WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon;
+  WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon;
+  WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon;
+  WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon;
+  WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon;
+  WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon;
+  WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationNeon;
+  WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon;
+  WebRtcSpl_ScaleAndAddVectorsWithRound =
+      WebRtcSpl_ScaleAndAddVectorsWithRoundNeon;
+}
+#endif
+
+static void InitFunctionPointers(void) {
+#if defined(WEBRTC_DETECT_ARM_NEON)
+  if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
+    InitPointersToNeon();
+  } else {
+    InitPointersToC();
+  }
+#elif defined(WEBRTC_ARCH_ARM_NEON)
+  InitPointersToNeon();
+#else
+  InitPointersToC();
+#endif  /* WEBRTC_DETECT_ARM_NEON */
+}
+
+
+#if defined(WEBRTC_POSIX)
+#include <pthread.h>
+
+static void once(void (*func)(void)) {
+  static pthread_once_t lock = PTHREAD_ONCE_INIT;
+  pthread_once(&lock, func);
+}
+
+#elif defined(_WIN32)
+#include <windows.h>
+
+static void once(void (*func)(void)) {
+  /* Didn't use InitializeCriticalSection() since there's no race-free context
+   * in which to execute it.
+   *
+   * TODO(kma): Change to different implementation (e.g.
+   * InterlockedCompareExchangePointer) to avoid issues similar to
+   * http://code.google.com/p/webm/issues/detail?id=467.
+   */
+  static CRITICAL_SECTION lock = {(void *)-1, -1, 0, 0, 0, 0};
+  static int done = 0;
+
+  EnterCriticalSection(&lock);
+  if (!done) {
+    func();
+    done = 1;
+  }
+  LeaveCriticalSection(&lock);
+}
+
+/* There's no fallback version as an #else block here to ensure thread safety.
+ * In case of neither pthread for WEBRTC_POSIX nor _WIN32 is present, build
+ * system should pick it up.
+ */
+#endif  /* WEBRTC_POSIX */
+
+void WebRtcSpl_Init() {
+  once(InitFunctionPointers);
+}
diff --git a/src/common_audio/signal_processing/vector_scaling_operations.c b/src/common_audio/signal_processing/vector_scaling_operations.c
index 91d9671..242955c 100644
--- a/src/common_audio/signal_processing/vector_scaling_operations.c
+++ b/src/common_audio/signal_processing/vector_scaling_operations.c
@@ -17,7 +17,7 @@
  * WebRtcSpl_ScaleVector()
  * WebRtcSpl_ScaleVectorWithSat()
  * WebRtcSpl_ScaleAndAddVectors()
- * WebRtcSpl_ScaleAndAddVectorsWithRound()
+ * WebRtcSpl_ScaleAndAddVectorsWithRoundC()
  */
 
 #include "signal_processing_library.h"
@@ -148,14 +148,14 @@
     }
 }
 
-#ifndef WEBRTC_ARCH_ARM_NEON
-int WebRtcSpl_ScaleAndAddVectorsWithRound(const int16_t* in_vector1,
-                                          int16_t in_vector1_scale,
-                                          const int16_t* in_vector2,
-                                          int16_t in_vector2_scale,
-                                          int right_shifts,
-                                          int16_t* out_vector,
-                                          int length) {
+// C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms.
+int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
+                                           int16_t in_vector1_scale,
+                                           const int16_t* in_vector2,
+                                           int16_t in_vector2_scale,
+                                           int right_shifts,
+                                           int16_t* out_vector,
+                                           int length) {
   int i = 0;
   int round_value = (1 << right_shifts) >> 1;
 
@@ -173,4 +173,3 @@
 
   return 0;
 }
-#endif  // WEBRTC_ARCH_ARM_NEON
diff --git a/src/common_audio/signal_processing/vector_scaling_operations_neon.s b/src/common_audio/signal_processing/vector_scaling_operations_neon.s
index 003943b..562425b 100644
--- a/src/common_audio/signal_processing/vector_scaling_operations_neon.s
+++ b/src/common_audio/signal_processing/vector_scaling_operations_neon.s
@@ -9,7 +9,7 @@
 @
 
 @ vector_scaling_operations_neon.s
-@ This file contains the function WebRtcSpl_ScaleAndAddVectorsWithRound(),
+@ This file contains the function WebRtcSpl_ScaleAndAddVectorsWithRoundNeon(),
 @ optimized for ARM Neon platform. Output is bit-exact with the reference
 @ C code in vector_scaling_operations.c.
 
@@ -17,9 +17,9 @@
 .fpu neon
 
 .align  2
-.global WebRtcSpl_ScaleAndAddVectorsWithRound
+.global WebRtcSpl_ScaleAndAddVectorsWithRoundNeon
 
-WebRtcSpl_ScaleAndAddVectorsWithRound:
+WebRtcSpl_ScaleAndAddVectorsWithRoundNeon:
 .fnstart
 
   push {r4-r9}
diff --git a/src/common_audio/vad/webrtc_vad.c b/src/common_audio/vad/webrtc_vad.c
index ab2e492..034f99b 100644
--- a/src/common_audio/vad/webrtc_vad.c
+++ b/src/common_audio/vad/webrtc_vad.c
@@ -13,6 +13,7 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include "common_audio/signal_processing/include/signal_processing_library.h"
 #include "common_audio/vad/vad_core.h"
 #include "typedefs.h"
 
@@ -53,6 +54,7 @@
 
 // TODO(bjornv): Move WebRtcVad_InitCore() code here.
 int WebRtcVad_Init(VadInst* handle) {
+  WebRtcSpl_Init();
   // Initialize the core VAD component.
   return WebRtcVad_InitCore((VadInstT*) handle);
 }
diff --git a/src/modules/audio_coding/codecs/ilbc/init_decode.c b/src/modules/audio_coding/codecs/ilbc/init_decode.c
index b654f1e..7e51d0f 100644
--- a/src/modules/audio_coding/codecs/ilbc/init_decode.c
+++ b/src/modules/audio_coding/codecs/ilbc/init_decode.c
@@ -23,14 +23,14 @@
  *  Initiation of decoder instance.
  *---------------------------------------------------------------*/
 
-WebRtc_Word16 WebRtcIlbcfix_InitDecode(		/* (o) Number of decoded samples */
-    iLBC_Dec_Inst_t *iLBCdec_inst,	/* (i/o) Decoder instance */
-    WebRtc_Word16 mode,					/* (i) frame size mode */
-    int use_enhancer           /* (i) 1 to use enhancer
-                                  0 to run without enhancer */
-                                                ) {
+WebRtc_Word16 WebRtcIlbcfix_InitDecode(  /* (o) Number of decoded samples */
+    iLBC_Dec_Inst_t *iLBCdec_inst,  /* (i/o) Decoder instance */
+    WebRtc_Word16 mode,  /* (i) frame size mode */
+    int use_enhancer) {  /* (i) 1: use enhancer, 0: no enhancer */
   int i;
 
+  WebRtcSpl_Init();
+
   iLBCdec_inst->mode = mode;
 
   /* Set all the variables that are dependent on the frame size mode */
diff --git a/src/modules/audio_coding/codecs/ilbc/init_encode.c b/src/modules/audio_coding/codecs/ilbc/init_encode.c
index e034bb0..79b3231 100644
--- a/src/modules/audio_coding/codecs/ilbc/init_encode.c
+++ b/src/modules/audio_coding/codecs/ilbc/init_encode.c
@@ -23,10 +23,11 @@
  *  Initiation of encoder instance.
  *---------------------------------------------------------------*/
 
-WebRtc_Word16 WebRtcIlbcfix_InitEncode( /* (o) Number of bytes encoded */
-    iLBC_Enc_Inst_t *iLBCenc_inst,     /* (i/o) Encoder instance */
-    WebRtc_Word16 mode     /* (i) frame size mode */
-                                        ){
+WebRtc_Word16 WebRtcIlbcfix_InitEncode(  /* (o) Number of bytes encoded */
+    iLBC_Enc_Inst_t *iLBCenc_inst,  /* (i/o) Encoder instance */
+    WebRtc_Word16 mode) {  /* (i) frame size mode */
+  WebRtcSpl_Init();
+
   iLBCenc_inst->mode = mode;
 
   /* Set all the variables that are dependent on the frame size mode */
diff --git a/src/modules/audio_coding/codecs/isac/fix/source/isacfix.c b/src/modules/audio_coding/codecs/isac/fix/source/isacfix.c
index 8786b12..02e18e3 100644
--- a/src/modules/audio_coding/codecs/isac/fix/source/isacfix.c
+++ b/src/modules/audio_coding/codecs/isac/fix/source/isacfix.c
@@ -217,6 +217,8 @@
   /* flag encoder init */
   ISAC_inst->initflag |= 2;
 
+  WebRtcSpl_Init();
+
   if (CodingMode == 0)
     /* Adaptive mode */
     ISAC_inst->ISACenc_obj.new_framelength  = INITIAL_FRAMESAMPLES;
@@ -527,6 +529,7 @@
   /* flag decoder init */
   ISAC_inst->initflag |= 1;
 
+  WebRtcSpl_Init();
 
   WebRtcIsacfix_InitMaskingDec(&ISAC_inst->ISACdec_obj.maskfiltstr_obj);
   WebRtcIsacfix_InitPostFilterbank(&ISAC_inst->ISACdec_obj.postfiltbankstr_obj);
diff --git a/src/modules/audio_coding/neteq/webrtc_neteq.c b/src/modules/audio_coding/neteq/webrtc_neteq.c
index 5e99fd8..707c756 100644
--- a/src/modules/audio_coding/neteq/webrtc_neteq.c
+++ b/src/modules/audio_coding/neteq/webrtc_neteq.c
@@ -414,6 +414,8 @@
         return (-1);
     }
 
+    WebRtcSpl_Init();
+
 #ifdef NETEQ_VAD
     /* Start out with no PostDecode VAD instance */
     NetEqMainInst->DSPinst.VADInst.VADState = NULL;
diff --git a/src/modules/audio_device/main/source/audio_device_impl.cc b/src/modules/audio_device/main/source/audio_device_impl.cc
index 7d75640..455e234 100644
--- a/src/modules/audio_device/main/source/audio_device_impl.cc
+++ b/src/modules/audio_device/main/source/audio_device_impl.cc
@@ -10,6 +10,7 @@
 
 #include "audio_device_impl.h"
 #include "audio_device_config.h"
+#include "common_audio/signal_processing/include/signal_processing_library.h"
 #include "system_wrappers/interface/ref_count.h"
 
 #include <assert.h>
@@ -604,6 +605,8 @@
     if (!_ptrAudioDevice)
         return -1;
 
+    WebRtcSpl_Init();
+
     _ptrAudioDeviceUtility->Init();
 
     if (_ptrAudioDevice->Init() == -1)
diff --git a/src/modules/audio_processing/aecm/echo_control_mobile.c b/src/modules/audio_processing/aecm/echo_control_mobile.c
index 30800af..b238b2d 100644
--- a/src/modules/audio_processing/aecm/echo_control_mobile.c
+++ b/src/modules/audio_processing/aecm/echo_control_mobile.c
@@ -13,6 +13,7 @@
 
 #include "echo_control_mobile.h"
 #include "aecm_core.h"
+#include "common_audio/signal_processing/include/signal_processing_library.h"
 #include "ring_buffer.h"
 #ifdef AEC_DEBUG
 #include <stdio.h>
@@ -170,6 +171,8 @@
     }
     aecm->sampFreq = sampFreq;
 
+    WebRtcSpl_Init();
+
     // Initialize AECM core
     if (WebRtcAecm_InitCore(aecm->aecmCore, aecm->sampFreq) == -1)
     {
diff --git a/src/modules/audio_processing/ns/noise_suppression.c b/src/modules/audio_processing/ns/noise_suppression.c
index 6684b82..7ac7232 100644
--- a/src/modules/audio_processing/ns/noise_suppression.c
+++ b/src/modules/audio_processing/ns/noise_suppression.c
@@ -8,12 +8,14 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include "noise_suppression.h"
+
 #include <stdlib.h>
 #include <string.h>
 
-#include "noise_suppression.h"
-#include "ns_core.h"
+#include "common_audio/signal_processing/include/signal_processing_library.h"
 #include "defines.h"
+#include "ns_core.h"
 
 int WebRtcNs_Create(NsHandle** NS_inst) {
   *NS_inst = (NsHandle*) malloc(sizeof(NSinst_t));
@@ -33,6 +35,7 @@
 
 
 int WebRtcNs_Init(NsHandle* NS_inst, WebRtc_UWord32 fs) {
+  WebRtcSpl_Init();
   return WebRtcNs_InitCore((NSinst_t*) NS_inst, fs);
 }
 
diff --git a/src/voice_engine/voe_base_impl.cc b/src/voice_engine/voe_base_impl.cc
index 2ab249d..7039c4e 100644
--- a/src/voice_engine/voe_base_impl.cc
+++ b/src/voice_engine/voe_base_impl.cc
@@ -336,6 +336,8 @@
         "Init(external_adm=0x%p)", external_adm);
     CriticalSectionScoped cs(_shared->crit_sec());
 
+    WebRtcSpl_Init();
+
     if (_shared->statistics().Initialized())
     {
         return 0;