The FFT functionality in aec_rdft* is based on legacy C
code which is not thread-safe in the sense that the
rdft_init method can only be run in a single-threaded.

Currently, inside WebRTC multiple instances of the audio-
processing module are set up which means that the init
method may be run concurrently.

In order to avoid having to protect the init method with
a lock to ensure single-threaded behavior that, this CL
places the FFT functionality inside a class so that there
is no global component of the FFT functionality.

Note that:
1) The nonstandard header for the ooura_fft.cc was copied
   from the aec_rdft.cc header, and augmented with a
   description of the changes introduced in this CL.
2) The clang warnings for the ooura_fft_sse2.cc,
   ooura_fft_neon.cc and ooura_fft_mips.cc were not
   addressed as this code was kept as it was before this CL
3) Clang-format was run on all files apart from
   ooura_fft_mips.cc (as that would change the format of
   the inline assempbly code).

Adding bypass of presubmit to avoid code style and header errors caused by the fact that files with legacy code are being renamed.

NOPRESUBMIT=true

BUG=chromium:638583

Review-Url: https://codereview.webrtc.org/2348213002
Cr-Commit-Position: refs/heads/master@{#14554}
diff --git a/webrtc/modules/audio_processing/BUILD.gn b/webrtc/modules/audio_processing/BUILD.gn
index 54c33e3..b4d8759 100644
--- a/webrtc/modules/audio_processing/BUILD.gn
+++ b/webrtc/modules/audio_processing/BUILD.gn
@@ -22,8 +22,6 @@
     "aec/aec_core.cc",
     "aec/aec_core.h",
     "aec/aec_core_optimized_methods.h",
-    "aec/aec_rdft.cc",
-    "aec/aec_rdft.h",
     "aec/aec_resampler.cc",
     "aec/aec_resampler.h",
     "aec/echo_cancellation.cc",
@@ -129,6 +127,9 @@
     "utility/delay_estimator_internal.h",
     "utility/delay_estimator_wrapper.cc",
     "utility/delay_estimator_wrapper.h",
+    "utility/ooura_fft.cc",
+    "utility/ooura_fft.h",
+    "utility/ooura_fft_tables_common.h",
     "vad/common.h",
     "vad/gmm.cc",
     "vad/gmm.h",
@@ -225,7 +226,7 @@
     if (mips_float_abi == "hard") {
       sources += [
         "aec/aec_core_mips.cc",
-        "aec/aec_rdft_mips.cc",
+        "utility/ooura_fft_mips.cc",
       ]
     }
   } else {
@@ -256,7 +257,8 @@
   rtc_static_library("audio_processing_sse2") {
     sources = [
       "aec/aec_core_sse2.cc",
-      "aec/aec_rdft_sse2.cc",
+      "utility/ooura_fft_sse2.cc",
+      "utility/ooura_fft_tables_neon_sse2.h",
     ]
 
     if (is_posix) {
@@ -275,9 +277,10 @@
   rtc_static_library("audio_processing_neon") {
     sources = [
       "aec/aec_core_neon.cc",
-      "aec/aec_rdft_neon.cc",
       "aecm/aecm_core_neon.cc",
       "ns/nsx_core_neon.c",
+      "utility/ooura_fft_neon.cc",
+      "utility/ooura_fft_tables_neon_sse2.h",
     ]
 
     if (current_cpu != "arm64") {
diff --git a/webrtc/modules/audio_processing/aec/aec_core.cc b/webrtc/modules/audio_processing/aec/aec_core.cc
index f249833..e3fd14c 100644
--- a/webrtc/modules/audio_processing/aec/aec_core.cc
+++ b/webrtc/modules/audio_processing/aec/aec_core.cc
@@ -28,7 +28,6 @@
 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 #include "webrtc/modules/audio_processing/aec/aec_common.h"
 #include "webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h"
-#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
 #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
 #include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
 #include "webrtc/system_wrappers/include/cpu_features_wrapper.h"
@@ -337,6 +336,7 @@
 }
 
 static void FilterAdaptation(
+    const OouraFft& ooura_fft,
     int num_partitions,
     int x_fft_buf_block_pos,
     float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
@@ -364,7 +364,7 @@
         MulRe(x_fft_buf[0][xPos + PART_LEN], -x_fft_buf[1][xPos + PART_LEN],
               e_fft[0][PART_LEN], e_fft[1][PART_LEN]);
 
-    aec_rdft_inverse_128(fft);
+    ooura_fft.InverseFft(fft);
     memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
 
     // fft scaling
@@ -374,7 +374,7 @@
         fft[j] *= scale;
       }
     }
-    aec_rdft_forward_128(fft);
+    ooura_fft.Fft(fft);
 
     h_fft_buf[0][pos] += fft[0];
     h_fft_buf[0][pos + PART_LEN] += fft[1];
@@ -835,7 +835,8 @@
   return;
 }
 
-static void ScaledInverseFft(float freq_data[2][PART_LEN1],
+static void ScaledInverseFft(const OouraFft& ooura_fft,
+                             float freq_data[2][PART_LEN1],
                              float time_data[PART_LEN2],
                              float scale,
                              int conjugate) {
@@ -848,12 +849,14 @@
     time_data[2 * i] = freq_data[0][i] * normalization;
     time_data[2 * i + 1] = sign * freq_data[1][i] * normalization;
   }
-  aec_rdft_inverse_128(time_data);
+  ooura_fft.InverseFft(time_data);
 }
 
-static void Fft(float time_data[PART_LEN2], float freq_data[2][PART_LEN1]) {
+static void Fft(const OouraFft& ooura_fft,
+                float time_data[PART_LEN2],
+                float freq_data[2][PART_LEN1]) {
   int i;
-  aec_rdft_forward_128(time_data);
+  ooura_fft.Fft(time_data);
 
   // Reorder fft output data.
   freq_data[1][0] = 0;
@@ -970,7 +973,8 @@
   }
 }
 
-static void EchoSubtraction(int num_partitions,
+static void EchoSubtraction(const OouraFft& ooura_fft,
+                            int num_partitions,
                             int extended_filter_enabled,
                             int* extreme_filter_divergence,
                             float filter_step_size,
@@ -1019,7 +1023,7 @@
                       h_fft_buf, s_fft);
 
   // Compute the time-domain echo estimate s.
-  ScaledInverseFft(s_fft, s_extended, 2.0f, 0);
+  ScaledInverseFft(ooura_fft, s_fft, s_extended, 2.0f, 0);
   s = &s_extended[PART_LEN];
 
   // Compute the time-domain echo prediction error.
@@ -1030,12 +1034,12 @@
   // Compute the frequency domain echo prediction error.
   memset(e_extended, 0, sizeof(float) * PART_LEN);
   memcpy(e_extended + PART_LEN, e, sizeof(float) * PART_LEN);
-  Fft(e_extended, e_fft);
+  Fft(ooura_fft, e_extended, e_fft);
 
   // Scale error signal inversely with far power.
   WebRtcAec_ScaleErrorSignal(filter_step_size, error_threshold, x_pow, e_fft);
-  WebRtcAec_FilterAdaptation(num_partitions, *x_fft_buf_block_pos, x_fft_buf,
-                             e_fft, h_fft_buf);
+  WebRtcAec_FilterAdaptation(ooura_fft, num_partitions, *x_fft_buf_block_pos,
+                             x_fft_buf, e_fft, h_fft_buf);
   memcpy(echo_subtractor_output, e, sizeof(float) * PART_LEN);
 }
 
@@ -1152,7 +1156,8 @@
   WebRtcAec_Overdrive(aec->overdrive_scaling, hNlFb, hNl);
 }
 
-static void EchoSuppression(AecCore* aec,
+static void EchoSuppression(const OouraFft& ooura_fft,
+                            AecCore* aec,
                             float* nearend_extended_block_lowest_band,
                             float farend_extended_block[PART_LEN2],
                             float* echo_subtractor_output,
@@ -1182,19 +1187,19 @@
   // Analysis filter banks for the echo suppressor.
   // Windowed near-end ffts.
   WindowData(fft, nearend_extended_block_lowest_band);
-  aec_rdft_forward_128(fft);
+  ooura_fft.Fft(fft);
   StoreAsComplex(fft, dfw);
 
   // Windowed echo suppressor output ffts.
   WindowData(fft, aec->eBuf);
-  aec_rdft_forward_128(fft);
+  ooura_fft.Fft(fft);
   StoreAsComplex(fft, efw);
 
   // NLP
 
   // Convert far-end partition to the frequency domain with windowing.
   WindowData(fft, farend_extended_block);
-  Fft(fft, xfw);
+  Fft(ooura_fft, fft, xfw);
   xfw_ptr = &xfw[0][0];
 
   // Buffer far.
@@ -1236,7 +1241,7 @@
                aec->noisePow, hNl);
 
   // Inverse error fft.
-  ScaledInverseFft(efw, fft, 2.0f, 1);
+  ScaledInverseFft(ooura_fft, efw, fft, 2.0f, 1);
 
   // Overlap and add to obtain output.
   for (i = 0; i < PART_LEN; i++) {
@@ -1257,7 +1262,7 @@
     GetHighbandGain(hNl, &nlpGainHband);
 
     // Inverse comfort_noise
-    ScaledInverseFft(comfortNoiseHband, fft, 2.0f, 0);
+    ScaledInverseFft(ooura_fft, comfortNoiseHband, fft, 2.0f, 0);
 
     // compute gain factor
     for (j = 1; j < aec->num_bands; ++j) {
@@ -1330,7 +1335,7 @@
 
   // Convert far-end signal to the frequency domain.
   memcpy(fft, farend_extended_block_lowest_band, sizeof(float) * PART_LEN2);
-  Fft(fft, farend_fft);
+  Fft(aec->ooura_fft, fft, farend_fft);
 
   // Form extended nearend frame.
   memcpy(&nearend_extended_block_lowest_band[0],
@@ -1340,7 +1345,7 @@
 
   // Convert near-end signal to the frequency domain.
   memcpy(fft, nearend_extended_block_lowest_band, sizeof(float) * PART_LEN2);
-  Fft(fft, nearend_fft);
+  Fft(aec->ooura_fft, fft, nearend_fft);
 
   // Power smoothing.
   if (aec->refined_adaptive_filter_enabled) {
@@ -1419,11 +1424,11 @@
   }
 
   // Perform echo subtraction.
-  EchoSubtraction(aec->num_partitions, aec->extended_filter_enabled,
-                  &aec->extreme_filter_divergence, aec->filter_step_size,
-                  aec->error_threshold, &farend_fft[0][0], &aec->xfBufBlockPos,
-                  aec->xfBuf, &nearend_block[0][0], aec->xPow, aec->wfBuf,
-                  echo_subtractor_output);
+  EchoSubtraction(
+      aec->ooura_fft, aec->num_partitions, aec->extended_filter_enabled,
+      &aec->extreme_filter_divergence, aec->filter_step_size,
+      aec->error_threshold, &farend_fft[0][0], &aec->xfBufBlockPos, aec->xfBuf,
+      &nearend_block[0][0], aec->xPow, aec->wfBuf, echo_subtractor_output);
   aec->data_dumper->DumpRaw("aec_h_fft", PART_LEN1 * aec->num_partitions,
                             &aec->wfBuf[0][0]);
   aec->data_dumper->DumpRaw("aec_h_fft", PART_LEN1 * aec->num_partitions,
@@ -1438,7 +1443,7 @@
   }
 
   // Perform echo suppression.
-  EchoSuppression(aec, nearend_extended_block_lowest_band,
+  EchoSuppression(aec->ooura_fft, aec, nearend_extended_block_lowest_band,
                   farend_extended_block_lowest_band, echo_subtractor_output,
                   output_block);
 
@@ -1524,8 +1529,6 @@
   WebRtcAec_InitAec_neon();
 #endif
 
-  aec_rdft_init();
-
   return aec;
 }
 
diff --git a/webrtc/modules/audio_processing/aec/aec_core.h b/webrtc/modules/audio_processing/aec/aec_core.h
index e41842e..0c49198 100644
--- a/webrtc/modules/audio_processing/aec/aec_core.h
+++ b/webrtc/modules/audio_processing/aec/aec_core.h
@@ -26,6 +26,7 @@
 #include "webrtc/common_audio/wav_file.h"
 #include "webrtc/modules/audio_processing/aec/aec_common.h"
 #include "webrtc/modules/audio_processing/utility/block_mean_calculator.h"
+#include "webrtc/modules/audio_processing/utility/ooura_fft.h"
 #include "webrtc/typedefs.h"
 
 namespace webrtc {
@@ -134,6 +135,7 @@
   ~AecCore();
 
   std::unique_ptr<ApmDataDumper> data_dumper;
+  const OouraFft ooura_fft;
 
   CoherenceState coherence_state;
 
diff --git a/webrtc/modules/audio_processing/aec/aec_core_mips.cc b/webrtc/modules/audio_processing/aec/aec_core_mips.cc
index a9b5cd4..93f075b 100644
--- a/webrtc/modules/audio_processing/aec/aec_core_mips.cc
+++ b/webrtc/modules/audio_processing/aec/aec_core_mips.cc
@@ -20,7 +20,7 @@
 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 }
 #include "webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h"
-#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+#include "webrtc/modules/audio_processing/utility/ooura_fft.h"
 
 namespace webrtc {
 
@@ -144,6 +144,7 @@
 }
 
 void WebRtcAec_FilterAdaptation_mips(
+    const OouraFft& ooura_fft,
     int num_partitions,
     int x_fft_buf_block_pos,
     float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
@@ -238,7 +239,7 @@
       : [fft] "r" (fft)
       : "memory");
 
-    aec_rdft_inverse_128(fft);
+    ooura_fft.InverseFft(fft);
     memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
 
     // fft scaling
@@ -285,7 +286,7 @@
         : [scale] "f" (scale), [fft] "r" (fft)
         : "memory");
     }
-    aec_rdft_forward_128(fft);
+    ooura_fft.Fft(fft);
     aRe = h_fft_buf[0] + pos;
     aIm = h_fft_buf[1] + pos;
     __asm __volatile(
diff --git a/webrtc/modules/audio_processing/aec/aec_core_neon.cc b/webrtc/modules/audio_processing/aec/aec_core_neon.cc
index bc503ba..4995ebf 100644
--- a/webrtc/modules/audio_processing/aec/aec_core_neon.cc
+++ b/webrtc/modules/audio_processing/aec/aec_core_neon.cc
@@ -23,7 +23,7 @@
 }
 #include "webrtc/modules/audio_processing/aec/aec_common.h"
 #include "webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h"
-#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+#include "webrtc/modules/audio_processing/utility/ooura_fft.h"
 
 namespace webrtc {
 
@@ -184,6 +184,7 @@
 }
 
 static void FilterAdaptationNEON(
+    const OouraFft& ooura_fft,
     int num_partitions,
     int x_fft_buf_block_pos,
     float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
@@ -225,7 +226,7 @@
         MulRe(x_fft_buf[0][xPos + PART_LEN], -x_fft_buf[1][xPos + PART_LEN],
               e_fft[0][PART_LEN], e_fft[1][PART_LEN]);
 
-    aec_rdft_inverse_128(fft);
+    ooura_fft.InverseFft(fft);
     memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
 
     // fft scaling
@@ -238,7 +239,7 @@
         vst1q_f32(&fft[j], fft_scale);
       }
     }
-    aec_rdft_forward_128(fft);
+    ooura_fft.Fft(fft);
 
     {
       const float wt1 = h_fft_buf[1][pos];
diff --git a/webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h b/webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h
index d1fb6e8..5e873c8 100644
--- a/webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h
+++ b/webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h
@@ -31,6 +31,7 @@
                                           float ef[2][PART_LEN1]);
 extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal;
 typedef void (*WebRtcAecFilterAdaptation)(
+    const OouraFft& ooura_fft,
     int num_partitions,
     int x_fft_buf_block_pos,
     float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
diff --git a/webrtc/modules/audio_processing/aec/aec_core_sse2.cc b/webrtc/modules/audio_processing/aec/aec_core_sse2.cc
index 47ba12f..ac93919 100644
--- a/webrtc/modules/audio_processing/aec/aec_core_sse2.cc
+++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.cc
@@ -21,7 +21,7 @@
 }
 #include "webrtc/modules/audio_processing/aec/aec_common.h"
 #include "webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h"
-#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+#include "webrtc/modules/audio_processing/utility/ooura_fft.h"
 
 namespace webrtc {
 
@@ -140,6 +140,7 @@
 }
 
 static void FilterAdaptationSSE2(
+    const OouraFft& ooura_fft,
     int num_partitions,
     int x_fft_buf_block_pos,
     float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
@@ -183,7 +184,7 @@
         MulRe(x_fft_buf[0][xPos + PART_LEN], -x_fft_buf[1][xPos + PART_LEN],
               e_fft[0][PART_LEN], e_fft[1][PART_LEN]);
 
-    aec_rdft_inverse_128(fft);
+    ooura_fft.InverseFft(fft);
     memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
 
     // fft scaling
@@ -196,7 +197,7 @@
         _mm_storeu_ps(&fft[j], fft_scale);
       }
     }
-    aec_rdft_forward_128(fft);
+    ooura_fft.Fft(fft);
 
     {
       float wt1 = h_fft_buf[1][pos];
diff --git a/webrtc/modules/audio_processing/aec/aec_rdft.h b/webrtc/modules/audio_processing/aec/aec_rdft.h
deleted file mode 100644
index d83eb27..0000000
--- a/webrtc/modules/audio_processing/aec/aec_rdft.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
-
-#include "webrtc/modules/audio_processing/aec/aec_common.h"
-
-// These intrinsics were unavailable before VS 2008.
-// TODO(andrew): move to a common file.
-#if defined(_MSC_VER) && _MSC_VER < 1500
-#include <emmintrin.h>
-static __inline __m128 _mm_castsi128_ps(__m128i a) { return *(__m128*)&a; }
-static __inline __m128i _mm_castps_si128(__m128 a) { return *(__m128i*)&a; }
-#endif
-
-// Constants shared by all paths (C, SSE2, NEON).
-extern const float rdft_w[64];
-// Constants used by the C path.
-extern const float rdft_wk3ri_first[16];
-extern const float rdft_wk3ri_second[16];
-// Constants used by SSE2 and NEON but initialized in the C path.
-extern ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32];
-extern ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32];
-extern ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32];
-extern ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32];
-extern ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32];
-extern ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32];
-extern ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4];
-
-// code path selection function pointers
-typedef void (*RftSub128)(float* a);
-extern RftSub128 rftfsub_128;
-extern RftSub128 rftbsub_128;
-extern RftSub128 cft1st_128;
-extern RftSub128 cftmdl_128;
-extern RftSub128 cftfsub_128;
-extern RftSub128 cftbsub_128;
-extern RftSub128 bitrv2_128;
-
-// entry points
-void aec_rdft_init(void);
-void aec_rdft_init_sse2(void);
-void aec_rdft_forward_128(float* a);
-void aec_rdft_inverse_128(float* a);
-
-#if defined(MIPS_FPU_LE)
-void aec_rdft_init_mips(void);
-#endif
-#if defined(WEBRTC_HAS_NEON)
-void aec_rdft_init_neon(void);
-#endif
-
-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
diff --git a/webrtc/modules/audio_processing/audio_processing.gypi b/webrtc/modules/audio_processing/audio_processing.gypi
index 78f133a..9338789 100644
--- a/webrtc/modules/audio_processing/audio_processing.gypi
+++ b/webrtc/modules/audio_processing/audio_processing.gypi
@@ -34,8 +34,6 @@
         'aec/aec_core.cc',
         'aec/aec_core.h',
         'aec/aec_core_optimized_methods.h',
-        'aec/aec_rdft.cc',
-        'aec/aec_rdft.h',
         'aec/aec_resampler.cc',
         'aec/aec_resampler.h',
         'aec/echo_cancellation.cc',
@@ -141,6 +139,9 @@
         'utility/delay_estimator_internal.h',
         'utility/delay_estimator_wrapper.cc',
         'utility/delay_estimator_wrapper.h',
+        'utility/ooura_fft.cc',
+        'utility/ooura_fft.h',
+        'utility/ooura_fft_tables_common.h',
         'vad/common.h',
         'vad/gmm.cc',
         'vad/gmm.h',
@@ -236,7 +237,7 @@
             ['mips_float_abi=="hard"', {
               'sources': [
                 'aec/aec_core_mips.cc',
-                'aec/aec_rdft_mips.cc',
+                'utility/ooura_fft_mips.cc',
               ],
             }],
           ],
@@ -275,7 +276,8 @@
           'type': 'static_library',
           'sources': [
             'aec/aec_core_sse2.cc',
-            'aec/aec_rdft_sse2.cc',
+            'utility/ooura_fft_sse2.cc',
+            'utility/ooura_fft_tables_neon_sse2.h',
           ],
           'conditions': [
             ['apm_debug_dump==1', {
@@ -303,9 +305,10 @@
         ],
         'sources': [
           'aec/aec_core_neon.cc',
-          'aec/aec_rdft_neon.cc',
           'aecm/aecm_core_neon.cc',
           'ns/nsx_core_neon.c',
+          'utility/ooura_fft_neon.cc',
+          'utility/ooura_fft_tables_neon_sse2.h',
         ],
         'conditions': [
           ['apm_debug_dump==1', {
diff --git a/webrtc/modules/audio_processing/level_controller/signal_classifier.cc b/webrtc/modules/audio_processing/level_controller/signal_classifier.cc
index c9a07e7..dd67737 100644
--- a/webrtc/modules/audio_processing/level_controller/signal_classifier.cc
+++ b/webrtc/modules/audio_processing/level_controller/signal_classifier.cc
@@ -16,7 +16,6 @@
 
 #include "webrtc/base/array_view.h"
 #include "webrtc/base/constructormagic.h"
-#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
 #include "webrtc/modules/audio_processing/audio_buffer.h"
 #include "webrtc/modules/audio_processing/level_controller/down_sampler.h"
 #include "webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h"
@@ -35,13 +34,14 @@
   }
 }
 
-void PowerSpectrum(rtc::ArrayView<const float> x,
+void PowerSpectrum(const OouraFft* ooura_fft,
+                   rtc::ArrayView<const float> x,
                    rtc::ArrayView<float> spectrum) {
   RTC_DCHECK_EQ(65u, spectrum.size());
   RTC_DCHECK_EQ(128u, x.size());
   float X[128];
   std::copy(x.data(), x.data() + x.size(), X);
-  aec_rdft_forward_128(X);
+  ooura_fft->Fft(X);
 
   float* X_p = X;
   RTC_DCHECK_EQ(X_p, &X[0]);
@@ -118,7 +118,6 @@
 SignalClassifier::~SignalClassifier() {}
 
 void SignalClassifier::Initialize(int sample_rate_hz) {
-  aec_rdft_init();
   down_sampler_.Initialize(sample_rate_hz);
   noise_spectrum_estimator_.Initialize();
   frame_extender_.reset(new FrameExtender(80, 128));
@@ -141,7 +140,7 @@
   frame_extender_->ExtendFrame(downsampled_frame, extended_frame);
   RemoveDcLevel(extended_frame);
   float signal_spectrum[65];
-  PowerSpectrum(extended_frame, signal_spectrum);
+  PowerSpectrum(&ooura_fft_, extended_frame, signal_spectrum);
 
   // Classify the signal based on the estimate of the noise spectrum and the
   // signal spectrum estimate.
diff --git a/webrtc/modules/audio_processing/level_controller/signal_classifier.h b/webrtc/modules/audio_processing/level_controller/signal_classifier.h
index 2afa724..8c791fe 100644
--- a/webrtc/modules/audio_processing/level_controller/signal_classifier.h
+++ b/webrtc/modules/audio_processing/level_controller/signal_classifier.h
@@ -18,6 +18,7 @@
 #include "webrtc/base/constructormagic.h"
 #include "webrtc/modules/audio_processing/level_controller/down_sampler.h"
 #include "webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h"
+#include "webrtc/modules/audio_processing/utility/ooura_fft.h"
 
 namespace webrtc {
 
@@ -57,6 +58,7 @@
   int initialization_frames_left_;
   int consistent_classification_counter_;
   SignalType last_signal_type_;
+  const OouraFft ooura_fft_;
   RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(SignalClassifier);
 };
 
diff --git a/webrtc/modules/audio_processing/aec/aec_rdft.cc b/webrtc/modules/audio_processing/utility/ooura_fft.cc
similarity index 65%
rename from webrtc/modules/audio_processing/aec/aec_rdft.cc
rename to webrtc/modules/audio_processing/utility/ooura_fft.cc
index 690fe9f..4ba88d7 100644
--- a/webrtc/modules/audio_processing/aec/aec_rdft.cc
+++ b/webrtc/modules/audio_processing/utility/ooura_fft.cc
@@ -10,6 +10,8 @@
  *    - Trivial type modifications.
  *    - Minimal code subset to do rdft of length 128.
  *    - Optimizations because of known length.
+ *    - Removed the global variables by moving the code in to a class in order
+ *      to make it thread safe.
  *
  *  All changes are covered by the WebRTC license and IP grant:
  *  Use of this source code is governed by a BSD-style license
@@ -19,184 +21,19 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+#include "webrtc/modules/audio_processing//utility/ooura_fft.h"
 
 #include <math.h>
 
+#include "webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h"
 #include "webrtc/system_wrappers/include/cpu_features_wrapper.h"
 #include "webrtc/typedefs.h"
 
-// These tables used to be computed at run-time. For example, refer to:
-// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/aec/aec_rdft.c?r=6564
-// to see the initialization code.
-const float rdft_w[64] = {
-    1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f,
-    0.9238795638f, 0.3826834559f, 0.3826834559f, 0.9238795638f,
-    0.9807852507f, 0.1950903237f, 0.5555702448f, 0.8314695954f,
-    0.8314695954f, 0.5555702448f, 0.1950903237f, 0.9807852507f,
-    0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f,
-    0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f,
-    0.9569403529f, 0.2902846634f, 0.4713967443f, 0.8819212914f,
-    0.7730104327f, 0.6343933344f, 0.0980171412f, 0.9951847196f,
-    0.7071067691f, 0.4993977249f, 0.4975923598f, 0.4945882559f,
-    0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f,
-    0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f,
-    0.4157347977f, 0.4016037583f, 0.3865052164f, 0.3704755902f,
-    0.3535533845f, 0.3357794881f, 0.3171966672f, 0.2978496552f,
-    0.2777851224f, 0.2570513785f, 0.2356983721f, 0.2137775421f,
-    0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f,
-    0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f,
-};
-const float rdft_wk3ri_first[16] = {
-    1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f,
-    0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f,
-    0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f,
-    0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f,
-};
-const float rdft_wk3ri_second[16] = {
-    -0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f,
-    -0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f,
-    -0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f,
-    -0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f,
-};
-ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32] = {
-    1.000000000f, 1.000000000f, 0.707106769f, 0.707106769f,
-    0.923879564f, 0.923879564f, 0.382683456f, 0.382683456f,
-    0.980785251f, 0.980785251f, 0.555570245f, 0.555570245f,
-    0.831469595f, 0.831469595f, 0.195090324f, 0.195090324f,
-    0.995184720f, 0.995184720f, 0.634393334f, 0.634393334f,
-    0.881921291f, 0.881921291f, 0.290284663f, 0.290284663f,
-    0.956940353f, 0.956940353f, 0.471396744f, 0.471396744f,
-    0.773010433f, 0.773010433f, 0.098017141f, 0.098017141f,
-};
-ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32] = {
-    1.000000000f, 1.000000000f, -0.000000000f, -0.000000000f,
-    0.707106769f, 0.707106769f, -0.707106769f, -0.707106769f,
-    0.923879564f, 0.923879564f, -0.382683456f, -0.382683456f,
-    0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f,
-    0.980785251f, 0.980785251f, -0.195090324f, -0.195090324f,
-    0.555570245f, 0.555570245f, -0.831469595f, -0.831469595f,
-    0.831469595f, 0.831469595f, -0.555570245f, -0.555570245f,
-    0.195090324f, 0.195090324f, -0.980785251f, -0.980785251f,
-};
-ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32] = {
-    1.000000000f, 1.000000000f, -0.707106769f, -0.707106769f,
-    0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f,
-    0.831469536f, 0.831469536f, -0.980785251f, -0.980785251f,
-    -0.195090353f, -0.195090353f, -0.555570245f, -0.555570245f,
-    0.956940353f, 0.956940353f, -0.881921172f, -0.881921172f,
-    0.098017156f, 0.098017156f, -0.773010492f, -0.773010492f,
-    0.634393334f, 0.634393334f, -0.995184720f, -0.995184720f,
-    -0.471396863f, -0.471396863f, -0.290284693f, -0.290284693f,
-};
-ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32] = {
-    -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f,
-    -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f,
-    -0.195090324f, 0.195090324f, -0.831469595f, 0.831469595f,
-    -0.555570245f, 0.555570245f, -0.980785251f, 0.980785251f,
-    -0.098017141f, 0.098017141f, -0.773010433f, 0.773010433f,
-    -0.471396744f, 0.471396744f, -0.956940353f, 0.956940353f,
-    -0.290284663f, 0.290284663f, -0.881921291f, 0.881921291f,
-    -0.634393334f, 0.634393334f, -0.995184720f, 0.995184720f,
-};
-ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32] = {
-    -0.000000000f, 0.000000000f, -1.000000000f, 1.000000000f,
-    -0.707106769f, 0.707106769f, -0.707106769f, 0.707106769f,
-    -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f,
-    -0.923879564f, 0.923879564f, -0.382683456f, 0.382683456f,
-    -0.195090324f, 0.195090324f, -0.980785251f, 0.980785251f,
-    -0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f,
-    -0.555570245f, 0.555570245f, -0.831469595f, 0.831469595f,
-    -0.980785251f, 0.980785251f, -0.195090324f, 0.195090324f,
-};
-ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32] = {
-    -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f,
-    -0.923879564f, 0.923879564f, 0.382683456f, -0.382683456f,
-    -0.555570245f, 0.555570245f, -0.195090353f, 0.195090353f,
-    -0.980785251f, 0.980785251f, 0.831469536f, -0.831469536f,
-    -0.290284693f, 0.290284693f, -0.471396863f, 0.471396863f,
-    -0.995184720f, 0.995184720f, 0.634393334f, -0.634393334f,
-    -0.773010492f, 0.773010492f, 0.098017156f, -0.098017156f,
-    -0.881921172f, 0.881921172f, 0.956940353f, -0.956940353f,
-};
-ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4] = {
-    0.707106769f, 0.707106769f, 0.707106769f, -0.707106769f,
-};
+namespace webrtc {
 
-static void bitrv2_128_C(float* a) {
-  /*
-      Following things have been attempted but are no faster:
-      (a) Storing the swap indexes in a LUT (index calculations are done
-          for 'free' while waiting on memory/L1).
-      (b) Consolidate the load/store of two consecutive floats by a 64 bit
-          integer (execution is memory/L1 bound).
-      (c) Do a mix of floats and 64 bit integer to maximize register
-          utilization (execution is memory/L1 bound).
-      (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5).
-      (e) Hard-coding of the offsets to completely eliminates index
-          calculations.
-  */
+namespace {
 
-  unsigned int j, j1, k, k1;
-  float xr, xi, yr, yi;
-
-  static const int ip[4] = {0, 64, 32, 96};
-  for (k = 0; k < 4; k++) {
-    for (j = 0; j < k; j++) {
-      j1 = 2 * j + ip[k];
-      k1 = 2 * k + ip[j];
-      xr = a[j1 + 0];
-      xi = a[j1 + 1];
-      yr = a[k1 + 0];
-      yi = a[k1 + 1];
-      a[j1 + 0] = yr;
-      a[j1 + 1] = yi;
-      a[k1 + 0] = xr;
-      a[k1 + 1] = xi;
-      j1 += 8;
-      k1 += 16;
-      xr = a[j1 + 0];
-      xi = a[j1 + 1];
-      yr = a[k1 + 0];
-      yi = a[k1 + 1];
-      a[j1 + 0] = yr;
-      a[j1 + 1] = yi;
-      a[k1 + 0] = xr;
-      a[k1 + 1] = xi;
-      j1 += 8;
-      k1 -= 8;
-      xr = a[j1 + 0];
-      xi = a[j1 + 1];
-      yr = a[k1 + 0];
-      yi = a[k1 + 1];
-      a[j1 + 0] = yr;
-      a[j1 + 1] = yi;
-      a[k1 + 0] = xr;
-      a[k1 + 1] = xi;
-      j1 += 8;
-      k1 += 16;
-      xr = a[j1 + 0];
-      xi = a[j1 + 1];
-      yr = a[k1 + 0];
-      yi = a[k1 + 1];
-      a[j1 + 0] = yr;
-      a[j1 + 1] = yi;
-      a[k1 + 0] = xr;
-      a[k1 + 1] = xi;
-    }
-    j1 = 2 * k + 8 + ip[k];
-    k1 = j1 + 8;
-    xr = a[j1 + 0];
-    xi = a[j1 + 1];
-    yr = a[k1 + 0];
-    yi = a[k1 + 1];
-    a[j1 + 0] = yr;
-    a[j1 + 1] = yi;
-    a[k1 + 0] = xr;
-    a[k1 + 1] = xi;
-  }
-}
-
+#if !(defined(MIPS_FPU_LE) || defined(WEBRTC_HAS_NEON))
 static void cft1st_128_C(float* a) {
   const int n = 128;
   int j, k1, k2;
@@ -431,67 +268,6 @@
   }
 }
 
-static void cftfsub_128_C(float* a) {
-  int j, j1, j2, j3, l;
-  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
-
-  cft1st_128(a);
-  cftmdl_128(a);
-  l = 32;
-  for (j = 0; j < l; j += 2) {
-    j1 = j + l;
-    j2 = j1 + l;
-    j3 = j2 + l;
-    x0r = a[j] + a[j1];
-    x0i = a[j + 1] + a[j1 + 1];
-    x1r = a[j] - a[j1];
-    x1i = a[j + 1] - a[j1 + 1];
-    x2r = a[j2] + a[j3];
-    x2i = a[j2 + 1] + a[j3 + 1];
-    x3r = a[j2] - a[j3];
-    x3i = a[j2 + 1] - a[j3 + 1];
-    a[j] = x0r + x2r;
-    a[j + 1] = x0i + x2i;
-    a[j2] = x0r - x2r;
-    a[j2 + 1] = x0i - x2i;
-    a[j1] = x1r - x3i;
-    a[j1 + 1] = x1i + x3r;
-    a[j3] = x1r + x3i;
-    a[j3 + 1] = x1i - x3r;
-  }
-}
-
-static void cftbsub_128_C(float* a) {
-  int j, j1, j2, j3, l;
-  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
-
-  cft1st_128(a);
-  cftmdl_128(a);
-  l = 32;
-
-  for (j = 0; j < l; j += 2) {
-    j1 = j + l;
-    j2 = j1 + l;
-    j3 = j2 + l;
-    x0r = a[j] + a[j1];
-    x0i = -a[j + 1] - a[j1 + 1];
-    x1r = a[j] - a[j1];
-    x1i = -a[j + 1] + a[j1 + 1];
-    x2r = a[j2] + a[j3];
-    x2i = a[j2 + 1] + a[j3 + 1];
-    x3r = a[j2] - a[j3];
-    x3i = a[j2 + 1] - a[j3 + 1];
-    a[j] = x0r + x2r;
-    a[j + 1] = x0i - x2i;
-    a[j2] = x0r - x2r;
-    a[j2 + 1] = x0i + x2i;
-    a[j1] = x1r - x3i;
-    a[j1 + 1] = x1i - x3r;
-    a[j3] = x1r + x3i;
-    a[j3 + 1] = x1i + x3r;
-  }
-}
-
 static void rftfsub_128_C(float* a) {
   const float* c = rdft_w + 32;
   int j1, j2, k1, k2;
@@ -535,8 +311,22 @@
   }
   a[65] = -a[65];
 }
+#endif
 
-void aec_rdft_forward_128(float* a) {
+
+}  // namespace
+
+OouraFft::OouraFft() {
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+  use_sse2_ = (WebRtc_GetCPUInfo(kSSE2) != 0);
+#else
+  use_sse2_ = false;
+#endif
+}
+
+OouraFft::~OouraFft() = default;
+
+void OouraFft::Fft(float* a) const {
   float xi;
   bitrv2_128(a);
   cftfsub_128(a);
@@ -545,8 +335,7 @@
   a[0] += a[1];
   a[1] = xi;
 }
-
-void aec_rdft_inverse_128(float* a) {
+void OouraFft::InverseFft(float* a) const {
   a[1] = 0.5f * (a[0] - a[1]);
   a[0] -= a[1];
   rftbsub_128(a);
@@ -554,32 +343,193 @@
   cftbsub_128(a);
 }
 
-// code path selection
-RftSub128 cft1st_128;
-RftSub128 cftmdl_128;
-RftSub128 rftfsub_128;
-RftSub128 rftbsub_128;
-RftSub128 cftfsub_128;
-RftSub128 cftbsub_128;
-RftSub128 bitrv2_128;
-
-void aec_rdft_init(void) {
-  cft1st_128 = cft1st_128_C;
-  cftmdl_128 = cftmdl_128_C;
-  rftfsub_128 = rftfsub_128_C;
-  rftbsub_128 = rftbsub_128_C;
-  cftfsub_128 = cftfsub_128_C;
-  cftbsub_128 = cftbsub_128_C;
-  bitrv2_128 = bitrv2_128_C;
-#if defined(WEBRTC_ARCH_X86_FAMILY)
-  if (WebRtc_GetCPUInfo(kSSE2)) {
-    aec_rdft_init_sse2();
+void OouraFft::cft1st_128(float* a) const {
+#if defined(MIPS_FPU_LE)
+  cft1st_128_mips(a);
+#elif defined(WEBRTC_HAS_NEON)
+  cft1st_128_neon(a);
+#else
+  if (use_sse2_) {
+    cft1st_128_SSE2(a);
+  } else {
+    cft1st_128_C(a);
   }
 #endif
+}
+void OouraFft::cftmdl_128(float* a) const {
 #if defined(MIPS_FPU_LE)
-  aec_rdft_init_mips();
-#endif
-#if defined(WEBRTC_HAS_NEON)
-  aec_rdft_init_neon();
+  cftmdl_128_mips(a);
+#elif defined(WEBRTC_HAS_NEON)
+  cftmdl_128_neon(a);
+#else
+  if (use_sse2_) {
+    cftmdl_128_SSE2(a);
+  } else {
+    cftmdl_128_C(a);
+  }
 #endif
 }
+void OouraFft::rftfsub_128(float* a) const {
+#if defined(MIPS_FPU_LE)
+  rftfsub_128_mips(a);
+#elif defined(WEBRTC_HAS_NEON)
+  rftfsub_128_neon(a);
+#else
+  if (use_sse2_) {
+    rftfsub_128_SSE2(a);
+  } else {
+    rftfsub_128_C(a);
+  }
+#endif
+}
+
+void OouraFft::rftbsub_128(float* a) const {
+#if defined(MIPS_FPU_LE)
+  rftbsub_128_mips(a);
+#elif defined(WEBRTC_HAS_NEON)
+  rftbsub_128_neon(a);
+#else
+  if (use_sse2_) {
+    rftbsub_128_SSE2(a);
+  } else {
+    rftbsub_128_C(a);
+  }
+#endif
+}
+
+void OouraFft::cftbsub_128(float* a) const {
+  int j, j1, j2, j3, l;
+  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+  cft1st_128(a);
+  cftmdl_128(a);
+  l = 32;
+
+  for (j = 0; j < l; j += 2) {
+    j1 = j + l;
+    j2 = j1 + l;
+    j3 = j2 + l;
+    x0r = a[j] + a[j1];
+    x0i = -a[j + 1] - a[j1 + 1];
+    x1r = a[j] - a[j1];
+    x1i = -a[j + 1] + a[j1 + 1];
+    x2r = a[j2] + a[j3];
+    x2i = a[j2 + 1] + a[j3 + 1];
+    x3r = a[j2] - a[j3];
+    x3i = a[j2 + 1] - a[j3 + 1];
+    a[j] = x0r + x2r;
+    a[j + 1] = x0i - x2i;
+    a[j2] = x0r - x2r;
+    a[j2 + 1] = x0i + x2i;
+    a[j1] = x1r - x3i;
+    a[j1 + 1] = x1i - x3r;
+    a[j3] = x1r + x3i;
+    a[j3 + 1] = x1i + x3r;
+  }
+}
+
+void OouraFft::cftfsub_128(float* a) const {
+  int j, j1, j2, j3, l;
+  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+  cft1st_128(a);
+  cftmdl_128(a);
+  l = 32;
+  for (j = 0; j < l; j += 2) {
+    j1 = j + l;
+    j2 = j1 + l;
+    j3 = j2 + l;
+    x0r = a[j] + a[j1];
+    x0i = a[j + 1] + a[j1 + 1];
+    x1r = a[j] - a[j1];
+    x1i = a[j + 1] - a[j1 + 1];
+    x2r = a[j2] + a[j3];
+    x2i = a[j2 + 1] + a[j3 + 1];
+    x3r = a[j2] - a[j3];
+    x3i = a[j2 + 1] - a[j3 + 1];
+    a[j] = x0r + x2r;
+    a[j + 1] = x0i + x2i;
+    a[j2] = x0r - x2r;
+    a[j2 + 1] = x0i - x2i;
+    a[j1] = x1r - x3i;
+    a[j1 + 1] = x1i + x3r;
+    a[j3] = x1r + x3i;
+    a[j3 + 1] = x1i - x3r;
+  }
+}
+
+void OouraFft::bitrv2_128(float* a) const {
+  /*
+      Following things have been attempted but are no faster:
+      (a) Storing the swap indexes in a LUT (index calculations are done
+          for 'free' while waiting on memory/L1).
+      (b) Consolidate the load/store of two consecutive floats by a 64 bit
+          integer (execution is memory/L1 bound).
+      (c) Do a mix of floats and 64 bit integer to maximize register
+          utilization (execution is memory/L1 bound).
+      (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5).
+      (e) Hard-coding of the offsets to completely eliminates index
+          calculations.
+  */
+
+  unsigned int j, j1, k, k1;
+  float xr, xi, yr, yi;
+
+  const int ip[4] = {0, 64, 32, 96};
+  for (k = 0; k < 4; k++) {
+    for (j = 0; j < k; j++) {
+      j1 = 2 * j + ip[k];
+      k1 = 2 * k + ip[j];
+      xr = a[j1 + 0];
+      xi = a[j1 + 1];
+      yr = a[k1 + 0];
+      yi = a[k1 + 1];
+      a[j1 + 0] = yr;
+      a[j1 + 1] = yi;
+      a[k1 + 0] = xr;
+      a[k1 + 1] = xi;
+      j1 += 8;
+      k1 += 16;
+      xr = a[j1 + 0];
+      xi = a[j1 + 1];
+      yr = a[k1 + 0];
+      yi = a[k1 + 1];
+      a[j1 + 0] = yr;
+      a[j1 + 1] = yi;
+      a[k1 + 0] = xr;
+      a[k1 + 1] = xi;
+      j1 += 8;
+      k1 -= 8;
+      xr = a[j1 + 0];
+      xi = a[j1 + 1];
+      yr = a[k1 + 0];
+      yi = a[k1 + 1];
+      a[j1 + 0] = yr;
+      a[j1 + 1] = yi;
+      a[k1 + 0] = xr;
+      a[k1 + 1] = xi;
+      j1 += 8;
+      k1 += 16;
+      xr = a[j1 + 0];
+      xi = a[j1 + 1];
+      yr = a[k1 + 0];
+      yi = a[k1 + 1];
+      a[j1 + 0] = yr;
+      a[j1 + 1] = yi;
+      a[k1 + 0] = xr;
+      a[k1 + 1] = xi;
+    }
+    j1 = 2 * k + 8 + ip[k];
+    k1 = j1 + 8;
+    xr = a[j1 + 0];
+    xi = a[j1 + 1];
+    yr = a[k1 + 0];
+    yi = a[k1 + 1];
+    a[j1 + 0] = yr;
+    a[j1 + 1] = yi;
+    a[k1 + 0] = xr;
+    a[k1 + 1] = xi;
+  }
+}
+
+}  // namespace webrtc
diff --git a/webrtc/modules/audio_processing/utility/ooura_fft.h b/webrtc/modules/audio_processing/utility/ooura_fft.h
new file mode 100644
index 0000000..a1b9f04
--- /dev/null
+++ b/webrtc/modules/audio_processing/utility/ooura_fft.h
@@ -0,0 +1,60 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_
+
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+void cft1st_128_SSE2(float* a);
+void cftmdl_128_SSE2(float* a);
+void rftfsub_128_SSE2(float* a);
+void rftbsub_128_SSE2(float* a);
+#endif
+
+#if defined(MIPS_FPU_LE)
+void cft1st_128_mips(float* a);
+void cftmdl_128_mips(float* a);
+void rftfsub_128_mips(float* a);
+void rftbsub_128_mips(float* a);
+#endif
+
+#if defined(WEBRTC_HAS_NEON)
+void cft1st_128_neon(float* a);
+void cftmdl_128_neon(float* a);
+void rftfsub_128_neon(float* a);
+void rftbsub_128_neon(float* a);
+#endif
+
+class OouraFft {
+ public:
+  OouraFft();
+  ~OouraFft();
+  void Fft(float* a) const;
+  void InverseFft(float* a) const;
+
+ private:
+  void cft1st_128(float* a) const;
+  void cftmdl_128(float* a) const;
+  void rftfsub_128(float* a) const;
+  void rftbsub_128(float* a) const;
+
+  void cftfsub_128(float* a) const;
+  void cftbsub_128(float* a) const;
+  void bitrv2_128(float* a) const;
+  bool use_sse2_;
+};
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_
diff --git a/webrtc/modules/audio_processing/aec/aec_rdft_mips.cc b/webrtc/modules/audio_processing/utility/ooura_fft_mips.cc
similarity index 98%
rename from webrtc/modules/audio_processing/aec/aec_rdft_mips.cc
rename to webrtc/modules/audio_processing/utility/ooura_fft_mips.cc
index 7e64e65..3e9db6e 100644
--- a/webrtc/modules/audio_processing/aec/aec_rdft_mips.cc
+++ b/webrtc/modules/audio_processing/utility/ooura_fft_mips.cc
@@ -8,10 +8,15 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+#include "webrtc/modules/audio_processing/utility/ooura_fft.h"
+
+#include "webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h"
 #include "webrtc/typedefs.h"
 
-static void bitrv2_128_mips(float* a) {
+namespace webrtc {
+
+#if defined(MIPS_FPU_LE)
+void bitrv2_128_mips(float* a) {
   // n is 128
   float xr, xi, yr, yi;
 
@@ -268,7 +273,7 @@
   a[119] = xi;
 }
 
-static void cft1st_128_mips(float* a) {
+void cft1st_128_mips(float* a) {
   float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14;
   int a_ptr, p1_rdft, p2_rdft, count;
   const float* first = rdft_wk3ri_first;
@@ -517,7 +522,7 @@
   );
 }
 
-static void cftmdl_128_mips(float* a) {
+void cftmdl_128_mips(float* a) {
   float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14;
   int tmp_a, count;
   __asm __volatile (
@@ -803,12 +808,12 @@
   );
 }
 
-static void cftfsub_128_mips(float* a) {
+void cftfsub_128_mips(float* a) {
   float f0, f1, f2, f3, f4, f5, f6, f7, f8;
   int tmp_a, count;
 
-  cft1st_128(a);
-  cftmdl_128(a);
+  cft1st_128_mips(a);
+  cftmdl_128_mips(a);
 
   __asm __volatile (
     ".set       push                                      \n\t"
@@ -861,12 +866,12 @@
   );
 }
 
-static void cftbsub_128_mips(float* a) {
+void cftbsub_128_mips(float* a) {
   float f0, f1, f2, f3, f4, f5, f6, f7, f8;
   int tmp_a, count;
 
-  cft1st_128(a);
-  cftmdl_128(a);
+  cft1st_128_mips(a);
+  cftmdl_128_mips(a);
 
   __asm __volatile (
     ".set       push                                        \n\t"
@@ -919,7 +924,7 @@
   );
 }
 
-static void rftfsub_128_mips(float* a) {
+void rftfsub_128_mips(float* a) {
   const float* c = rdft_w + 32;
   const float f0 = 0.5f;
   float* a1 = &a[2];
@@ -1046,7 +1051,7 @@
   );
 }
 
-static void rftbsub_128_mips(float* a) {
+void rftbsub_128_mips(float* a) {
   const float *c = rdft_w + 32;
   const float f0 = 0.5f;
   float* a1 = &a[2];
@@ -1175,13 +1180,6 @@
     : "memory"
   );
 }
+#endif
 
-void aec_rdft_init_mips(void) {
-  cft1st_128 = cft1st_128_mips;
-  cftmdl_128 = cftmdl_128_mips;
-  rftfsub_128 = rftfsub_128_mips;
-  rftbsub_128 = rftbsub_128_mips;
-  cftfsub_128 = cftfsub_128_mips;
-  cftbsub_128 = cftbsub_128_mips;
-  bitrv2_128 = bitrv2_128_mips;
-}
+}  // namespace webrtc
diff --git a/webrtc/modules/audio_processing/aec/aec_rdft_neon.cc b/webrtc/modules/audio_processing/utility/ooura_fft_neon.cc
similarity index 93%
rename from webrtc/modules/audio_processing/aec/aec_rdft_neon.cc
rename to webrtc/modules/audio_processing/utility/ooura_fft_neon.cc
index 43b6a68..4ed043d 100644
--- a/webrtc/modules/audio_processing/aec/aec_rdft_neon.cc
+++ b/webrtc/modules/audio_processing/utility/ooura_fft_neon.cc
@@ -14,15 +14,17 @@
  * Based on the sse2 version.
  */
 
-
-#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+#include "webrtc/modules/audio_processing/utility/ooura_fft.h"
 
 #include <arm_neon.h>
 
-static const ALIGN16_BEG float ALIGN16_END
-    k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};
+#include "webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h"
+#include "webrtc/modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h"
 
-static void cft1st_128_neon(float* a) {
+namespace webrtc {
+
+#if defined(WEBRTC_HAS_NEON)
+void cft1st_128_neon(float* a) {
   const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
   int j, k2;
 
@@ -71,7 +73,7 @@
   }
 }
 
-static void cftmdl_128_neon(float* a) {
+void cftmdl_128_neon(float* a) {
   int j;
   const int l = 8;
   const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
@@ -185,7 +187,7 @@
   return vrev64q_f32(rev);
 }
 
-static void rftfsub_128_neon(float* a) {
+void rftfsub_128_neon(float* a) {
   const float* c = rdft_w + 32;
   int j1, j2;
   const float32x4_t mm_half = vdupq_n_f32(0.5f);
@@ -264,7 +266,7 @@
   }
 }
 
-static void rftbsub_128_neon(float* a) {
+void rftbsub_128_neon(float* a) {
   const float* c = rdft_w + 32;
   int j1, j2;
   const float32x4_t mm_half = vdupq_n_f32(0.5f);
@@ -274,11 +276,11 @@
   //    Note: commented number are indexes for the first iteration of the loop.
   for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
     // Load 'wk'.
-    const float32x4_t c_j1 = vld1q_f32(&c[j1]);         //  1,  2,  3,  4,
-    const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]);    // 28, 29, 30, 31,
-    const float32x4_t wkrt = vsubq_f32(mm_half, c_k1);  // 28, 29, 30, 31,
-    const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28,
-    const float32x4_t wki_ = c_j1;                      //  1,  2,  3,  4,
+    const float32x4_t c_j1 = vld1q_f32(&c[j1]);          //  1,  2,  3,  4,
+    const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]);     // 28, 29, 30, 31,
+    const float32x4_t wkrt = vsubq_f32(mm_half, c_k1);   // 28, 29, 30, 31,
+    const float32x4_t wkr_ = reverse_order_f32x4(wkrt);  // 31, 30, 29, 28,
+    const float32x4_t wki_ = c_j1;                       //  1,  2,  3,  4,
     // Load and shuffle 'a'.
     //   2,   4,   6,   8,   3,   5,   7,   9
     float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
@@ -345,11 +347,6 @@
   }
   a[65] = -a[65];
 }
+#endif
 
-void aec_rdft_init_neon(void) {
-  cft1st_128 = cft1st_128_neon;
-  cftmdl_128 = cftmdl_128_neon;
-  rftfsub_128 = rftfsub_128_neon;
-  rftbsub_128 = rftbsub_128_neon;
-}
-
+}  // namespace webrtc
diff --git a/webrtc/modules/audio_processing/aec/aec_rdft_sse2.cc b/webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc
similarity index 85%
rename from webrtc/modules/audio_processing/aec/aec_rdft_sse2.cc
rename to webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc
index b4e453f..03f6b31 100644
--- a/webrtc/modules/audio_processing/aec/aec_rdft_sse2.cc
+++ b/webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc
@@ -8,14 +8,32 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+#include "webrtc/modules/audio_processing//utility/ooura_fft.h"
 
 #include <emmintrin.h>
 
-static const ALIGN16_BEG float ALIGN16_END
-    k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};
+#include "webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h"
+#include "webrtc/modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h"
 
-static void cft1st_128_SSE2(float* a) {
+namespace webrtc {
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+
+namespace {
+// These intrinsics were unavailable before VS 2008.
+// TODO(andrew): move to a common file.
+#if defined(_MSC_VER) && _MSC_VER < 1500
+static __inline __m128 _mm_castsi128_ps(__m128i a) {
+  return *(__m128*)&a;
+}
+static __inline __m128i _mm_castps_si128(__m128 a) {
+  return *(__m128i*)&a;
+}
+#endif
+
+}  // namespace
+
+void cft1st_128_SSE2(float* a) {
   const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
   int j, k2;
 
@@ -78,7 +96,7 @@
   }
 }
 
-static void cftmdl_128_SSE2(float* a) {
+void cftmdl_128_SSE2(float* a) {
   const int l = 8;
   const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
   int j0;
@@ -89,12 +107,12 @@
     const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
     const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
     const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
-    const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
-                                          _mm_castsi128_ps(a_32),
-                                          _MM_SHUFFLE(1, 0, 1, 0));
-    const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
-                                          _mm_castsi128_ps(a_40),
-                                          _MM_SHUFFLE(1, 0, 1, 0));
+    const __m128 a_00_32 =
+        _mm_shuffle_ps(_mm_castsi128_ps(a_00), _mm_castsi128_ps(a_32),
+                       _MM_SHUFFLE(1, 0, 1, 0));
+    const __m128 a_08_40 =
+        _mm_shuffle_ps(_mm_castsi128_ps(a_08), _mm_castsi128_ps(a_40),
+                       _MM_SHUFFLE(1, 0, 1, 0));
     __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
     const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
 
@@ -102,12 +120,12 @@
     const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
     const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
     const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
-    const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
-                                          _mm_castsi128_ps(a_48),
-                                          _MM_SHUFFLE(1, 0, 1, 0));
-    const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
-                                          _mm_castsi128_ps(a_56),
-                                          _MM_SHUFFLE(1, 0, 1, 0));
+    const __m128 a_16_48 =
+        _mm_shuffle_ps(_mm_castsi128_ps(a_16), _mm_castsi128_ps(a_48),
+                       _MM_SHUFFLE(1, 0, 1, 0));
+    const __m128 a_24_56 =
+        _mm_shuffle_ps(_mm_castsi128_ps(a_24), _mm_castsi128_ps(a_56),
+                       _MM_SHUFFLE(1, 0, 1, 0));
     const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
     const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
 
@@ -163,12 +181,12 @@
       const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
       const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
       const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
-      const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
-                                            _mm_castsi128_ps(a_32),
-                                            _MM_SHUFFLE(1, 0, 1, 0));
-      const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
-                                            _mm_castsi128_ps(a_40),
-                                            _MM_SHUFFLE(1, 0, 1, 0));
+      const __m128 a_00_32 =
+          _mm_shuffle_ps(_mm_castsi128_ps(a_00), _mm_castsi128_ps(a_32),
+                         _MM_SHUFFLE(1, 0, 1, 0));
+      const __m128 a_08_40 =
+          _mm_shuffle_ps(_mm_castsi128_ps(a_08), _mm_castsi128_ps(a_40),
+                         _MM_SHUFFLE(1, 0, 1, 0));
       __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
       const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
 
@@ -176,22 +194,21 @@
       const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
       const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
       const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
-      const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
-                                            _mm_castsi128_ps(a_48),
-                                            _MM_SHUFFLE(1, 0, 1, 0));
-      const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
-                                            _mm_castsi128_ps(a_56),
-                                            _MM_SHUFFLE(1, 0, 1, 0));
+      const __m128 a_16_48 =
+          _mm_shuffle_ps(_mm_castsi128_ps(a_16), _mm_castsi128_ps(a_48),
+                         _MM_SHUFFLE(1, 0, 1, 0));
+      const __m128 a_24_56 =
+          _mm_shuffle_ps(_mm_castsi128_ps(a_24), _mm_castsi128_ps(a_56),
+                         _MM_SHUFFLE(1, 0, 1, 0));
       const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
       const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
 
       const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
       const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
       const __m128 xx2 = _mm_mul_ps(xx1, wk2rv);
-      const __m128 xx3 =
-          _mm_mul_ps(wk2iv,
-                     _mm_castsi128_ps(_mm_shuffle_epi32(
-                         _mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 0, 1))));
+      const __m128 xx3 = _mm_mul_ps(
+          wk2iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xx1),
+                                                    _MM_SHUFFLE(2, 3, 0, 1))));
       const __m128 xx4 = _mm_add_ps(xx2, xx3);
 
       const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
@@ -202,16 +219,14 @@
 
       const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv);
       const __m128 xx11 = _mm_mul_ps(
-          wk1iv,
-          _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add),
-                                             _MM_SHUFFLE(2, 3, 0, 1))));
+          wk1iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add),
+                                                    _MM_SHUFFLE(2, 3, 0, 1))));
       const __m128 xx12 = _mm_add_ps(xx10, xx11);
 
       const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv);
       const __m128 xx21 = _mm_mul_ps(
-          wk3iv,
-          _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub),
-                                             _MM_SHUFFLE(2, 3, 0, 1))));
+          wk3iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub),
+                                                    _MM_SHUFFLE(2, 3, 0, 1))));
       const __m128 xx22 = _mm_add_ps(xx20, xx21);
 
       _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx));
@@ -237,13 +252,13 @@
   }
 }
 
-static void rftfsub_128_SSE2(float* a) {
+void rftfsub_128_SSE2(float* a) {
   const float* c = rdft_w + 32;
   int j1, j2, k1, k2;
   float wkr, wki, xr, xi, yr, yi;
 
-  static const ALIGN16_BEG float ALIGN16_END
-      k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
+  static const ALIGN16_BEG float ALIGN16_END k_half[4] = {0.5f, 0.5f, 0.5f,
+                                                          0.5f};
   const __m128 mm_half = _mm_load_ps(k_half);
 
   // Vectorized code (four at once).
@@ -327,13 +342,13 @@
   }
 }
 
-static void rftbsub_128_SSE2(float* a) {
+void rftbsub_128_SSE2(float* a) {
   const float* c = rdft_w + 32;
   int j1, j2, k1, k2;
   float wkr, wki, xr, xi, yr, yi;
 
-  static const ALIGN16_BEG float ALIGN16_END
-      k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
+  static const ALIGN16_BEG float ALIGN16_END k_half[4] = {0.5f, 0.5f, 0.5f,
+                                                          0.5f};
   const __m128 mm_half = _mm_load_ps(k_half);
 
   a[1] = -a[1];
@@ -418,10 +433,6 @@
   }
   a[65] = -a[65];
 }
+#endif
 
-void aec_rdft_init_sse2(void) {
-  cft1st_128 = cft1st_128_SSE2;
-  cftmdl_128 = cftmdl_128_SSE2;
-  rftfsub_128 = rftfsub_128_SSE2;
-  rftbsub_128 = rftbsub_128_SSE2;
-}
+}  // namespace webrtc
diff --git a/webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h b/webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h
new file mode 100644
index 0000000..548027c
--- /dev/null
+++ b/webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h
@@ -0,0 +1,54 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_
+
+#include "webrtc/modules/audio_processing/utility/ooura_fft.h"
+
+namespace webrtc {
+
+// This tables used to be computed at run-time. For example, refer to:
+// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/utility/apm_rdft.c?r=6564
+// to see the initialization code.
+// Constants shared by all paths (C, SSE2, NEON).
+const float rdft_w[64] = {
+    1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f, 0.9238795638f,
+    0.3826834559f, 0.3826834559f, 0.9238795638f, 0.9807852507f, 0.1950903237f,
+    0.5555702448f, 0.8314695954f, 0.8314695954f, 0.5555702448f, 0.1950903237f,
+    0.9807852507f, 0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f,
+    0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f, 0.9569403529f,
+    0.2902846634f, 0.4713967443f, 0.8819212914f, 0.7730104327f, 0.6343933344f,
+    0.0980171412f, 0.9951847196f, 0.7071067691f, 0.4993977249f, 0.4975923598f,
+    0.4945882559f, 0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f,
+    0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f, 0.4157347977f,
+    0.4016037583f, 0.3865052164f, 0.3704755902f, 0.3535533845f, 0.3357794881f,
+    0.3171966672f, 0.2978496552f, 0.2777851224f, 0.2570513785f, 0.2356983721f,
+    0.2137775421f, 0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f,
+    0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f,
+};
+
+// Constants used by the C and MIPS paths.
+const float rdft_wk3ri_first[16] = {
+    1.000000000f, 0.000000000f, 0.382683456f,  0.923879564f,
+    0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f,
+    0.956940353f, 0.290284693f, 0.098017156f,  0.995184720f,
+    0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f,
+};
+const float rdft_wk3ri_second[16] = {
+    -0.707106769f, 0.707106769f,  -0.923879564f, -0.382683456f,
+    -0.980785251f, 0.195090353f,  -0.555570245f, -0.831469536f,
+    -0.881921172f, 0.471396863f,  -0.773010492f, -0.634393334f,
+    -0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f,
+};
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_
diff --git a/webrtc/modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h b/webrtc/modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h
new file mode 100644
index 0000000..1ed646d
--- /dev/null
+++ b/webrtc/modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h
@@ -0,0 +1,94 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_NEON_SSE2_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_NEON_SSE2_H_
+
+#include "webrtc/modules/audio_processing/utility/ooura_fft.h"
+
+#ifdef _MSC_VER /* visual c++ */
+#define ALIGN16_BEG __declspec(align(16))
+#define ALIGN16_END
+#else /* gcc or icc */
+#define ALIGN16_BEG
+#define ALIGN16_END __attribute__((aligned(16)))
+#endif
+
+namespace webrtc {
+
+// These tables used to be computed at run-time. For example, refer to:
+// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/utility/apm_rdft.c?r=6564
+// to see the initialization code.
+#if defined(WEBRTC_ARCH_X86_FAMILY) || defined(WEBRTC_HAS_NEON)
+// Constants used by SSE2 and NEON but initialized in the C path.
+const ALIGN16_BEG float ALIGN16_END k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};
+
+ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32] = {
+    1.000000000f, 1.000000000f, 0.707106769f, 0.707106769f, 0.923879564f,
+    0.923879564f, 0.382683456f, 0.382683456f, 0.980785251f, 0.980785251f,
+    0.555570245f, 0.555570245f, 0.831469595f, 0.831469595f, 0.195090324f,
+    0.195090324f, 0.995184720f, 0.995184720f, 0.634393334f, 0.634393334f,
+    0.881921291f, 0.881921291f, 0.290284663f, 0.290284663f, 0.956940353f,
+    0.956940353f, 0.471396744f, 0.471396744f, 0.773010433f, 0.773010433f,
+    0.098017141f, 0.098017141f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32] = {
+    1.000000000f,  1.000000000f,  -0.000000000f, -0.000000000f, 0.707106769f,
+    0.707106769f,  -0.707106769f, -0.707106769f, 0.923879564f,  0.923879564f,
+    -0.382683456f, -0.382683456f, 0.382683456f,  0.382683456f,  -0.923879564f,
+    -0.923879564f, 0.980785251f,  0.980785251f,  -0.195090324f, -0.195090324f,
+    0.555570245f,  0.555570245f,  -0.831469595f, -0.831469595f, 0.831469595f,
+    0.831469595f,  -0.555570245f, -0.555570245f, 0.195090324f,  0.195090324f,
+    -0.980785251f, -0.980785251f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32] = {
+    1.000000000f,  1.000000000f,  -0.707106769f, -0.707106769f, 0.382683456f,
+    0.382683456f,  -0.923879564f, -0.923879564f, 0.831469536f,  0.831469536f,
+    -0.980785251f, -0.980785251f, -0.195090353f, -0.195090353f, -0.555570245f,
+    -0.555570245f, 0.956940353f,  0.956940353f,  -0.881921172f, -0.881921172f,
+    0.098017156f,  0.098017156f,  -0.773010492f, -0.773010492f, 0.634393334f,
+    0.634393334f,  -0.995184720f, -0.995184720f, -0.471396863f, -0.471396863f,
+    -0.290284693f, -0.290284693f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32] = {
+    -0.000000000f, 0.000000000f,  -0.707106769f, 0.707106769f,  -0.382683456f,
+    0.382683456f,  -0.923879564f, 0.923879564f,  -0.195090324f, 0.195090324f,
+    -0.831469595f, 0.831469595f,  -0.555570245f, 0.555570245f,  -0.980785251f,
+    0.980785251f,  -0.098017141f, 0.098017141f,  -0.773010433f, 0.773010433f,
+    -0.471396744f, 0.471396744f,  -0.956940353f, 0.956940353f,  -0.290284663f,
+    0.290284663f,  -0.881921291f, 0.881921291f,  -0.634393334f, 0.634393334f,
+    -0.995184720f, 0.995184720f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32] = {
+    -0.000000000f, 0.000000000f,  -1.000000000f, 1.000000000f,  -0.707106769f,
+    0.707106769f,  -0.707106769f, 0.707106769f,  -0.382683456f, 0.382683456f,
+    -0.923879564f, 0.923879564f,  -0.923879564f, 0.923879564f,  -0.382683456f,
+    0.382683456f,  -0.195090324f, 0.195090324f,  -0.980785251f, 0.980785251f,
+    -0.831469595f, 0.831469595f,  -0.555570245f, 0.555570245f,  -0.555570245f,
+    0.555570245f,  -0.831469595f, 0.831469595f,  -0.980785251f, 0.980785251f,
+    -0.195090324f, 0.195090324f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32] = {
+    -0.000000000f, 0.000000000f,  -0.707106769f, 0.707106769f,  -0.923879564f,
+    0.923879564f,  0.382683456f,  -0.382683456f, -0.555570245f, 0.555570245f,
+    -0.195090353f, 0.195090353f,  -0.980785251f, 0.980785251f,  0.831469536f,
+    -0.831469536f, -0.290284693f, 0.290284693f,  -0.471396863f, 0.471396863f,
+    -0.995184720f, 0.995184720f,  0.634393334f,  -0.634393334f, -0.773010492f,
+    0.773010492f,  0.098017156f,  -0.098017156f, -0.881921172f, 0.881921172f,
+    0.956940353f,  -0.956940353f,
+};
+ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4] = {
+    0.707106769f, 0.707106769f, 0.707106769f, -0.707106769f,
+};
+#endif
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_NEON_SSE2_H_