Targets for requantization tests and benchmarks

PiperOrigin-RevId: 299993900
diff --git a/BUILD.bazel b/BUILD.bazel
index 74bd94a..8986c2f 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -242,6 +242,10 @@
     "src/x8-zip/x3-scalar.c",
     "src/x8-zip/x4-scalar.c",
     "src/x8-zip/xm-scalar.c",
+    "src/requantization/precise-scalar.c",
+    "src/requantization/fp32-scalar.c",
+    "src/requantization/q31-scalar.c",
+    "src/requantization/gemmlowp-scalar.c",
 ]
 
 WASM_UKERNELS = [
@@ -424,6 +428,8 @@
     "src/x32-zip/x3-psimd.c",
     "src/x32-zip/x4-psimd.c",
     "src/x32-zip/xm-psimd.c",
+    "src/requantization/precise-psimd.c",
+    "src/requantization/fp32-psimd.c",
 ]
 
 PSIMD_ACCMATH_UKERNELS = [
@@ -616,6 +622,10 @@
     "src/math/sigmoid-neon-rr2-lut2048-p1-nr2recps.c",
     "src/math/sigmoid-neon-rr2-lut64-p2-nr2recps.c",
     "src/math/sigmoid-neon-rr2-p5-nr2recps.c",
+    "src/requantization/precise-neon.c",
+    "src/requantization/fp32-neon.c",
+    "src/requantization/q31-neon.c",
+    "src/requantization/gemmlowp-neon.c",
 ]
 
 NEONFMA_UKERNELS = [
@@ -986,6 +996,16 @@
     "src/math/exp-sse2-p5.c",
     "src/math/expminus-sse2-p5.c",
     "src/math/sigmoid-sse2-p5-div.c",
+    "src/requantization/precise-sse2.c",
+    "src/requantization/fp32-sse2.c",
+    "src/requantization/q31-sse2.c",
+    "src/requantization/gemmlowp-sse2.c",
+]
+
+SSSE3_UKERNELS = [
+    "src/requantization/precise-ssse3.c",
+    "src/requantization/q31-ssse3.c",
+    "src/requantization/gemmlowp-ssse3.c",
 ]
 
 SSE41_UKERNELS = [
@@ -997,6 +1017,9 @@
     "src/f32-sigmoid/gen/sse41-p5-div-x16.c",
     "src/f32-sigmoid/gen/sse41-p5-div-x20.c",
     "src/f32-sigmoid/gen/sse41-p5-div-x24.c",
+    "src/requantization/precise-sse4.c",
+    "src/requantization/q31-sse4.c",
+    "src/requantization/gemmlowp-sse4.c",
 ]
 
 AVX_UKERNELS = [
@@ -1439,6 +1462,7 @@
 ]
 
 INTERNAL_MICROKERNEL_HDRS = [
+    "src/requantization/gemmlowp-requantization.h",
     "src/xnnpack/argmaxpool.h",
     "src/xnnpack/avgpool.h",
     "src/xnnpack/bilinear.h",
@@ -1669,6 +1693,19 @@
 )
 
 xnnpack_cc_library(
+    name = "ssse3_ukernels",
+    hdrs = INTERNAL_HDRS,
+    copts = xnnpack_std_copts(),
+    x86_copts = ["-mssse3"],
+    x86_srcs = SSSE3_UKERNELS,
+    deps = [
+        ":tables",
+        "@FP16",
+        "@pthreadpool",
+    ],
+)
+
+xnnpack_cc_library(
     name = "sse41_ukernels",
     hdrs = INTERNAL_HDRS,
     copts = xnnpack_std_copts(),
@@ -1775,6 +1812,7 @@
         ":psimd_fastmath_ukernels",
         ":psimd_accmath_ukernels",
         ":sse2_ukernels",
+        ":ssse3_ukernels",
         ":sse41_ukernels",
         ":avx_ukernels",
         ":fma3_ukernels",
@@ -2126,6 +2164,16 @@
     deps = MICROKERNEL_BENCHMARK_DEPS + [":im2col"],
 )
 
+xnnpack_benchmark(
+    name = "requantization_bench",
+    srcs = [
+        "bench/requantization.cc",
+        "src/xnnpack/requantization-stubs.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + MICROKERNEL_BENCHMARK_HDRS,
+    deps = MICROKERNEL_BENCHMARK_DEPS,
+)
+
 ########################### Benchmarks for operators ###########################
 
 xnnpack_benchmark(
@@ -2894,6 +2942,16 @@
     deps = MICROKERNEL_TEST_DEPS,
 )
 
+xnnpack_unit_test(
+    name = "requantization_test",
+    srcs = [
+        "src/xnnpack/requantization-stubs.h",
+        "test/requantization.cc",
+        "test/requantization-tester.h",
+    ] + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS,
+)
+
 ########################### Size test for the library ##########################
 
 xnnpack_binary(
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8e6f5a2..d20a53d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -340,7 +340,11 @@
   src/x8-zip/x2-scalar.c
   src/x8-zip/x3-scalar.c
   src/x8-zip/x4-scalar.c
-  src/x8-zip/xm-scalar.c)
+  src/x8-zip/xm-scalar.c
+  src/requantization/precise-scalar.c
+  src/requantization/fp32-scalar.c
+  src/requantization/q31-scalar.c
+  src/requantization/gemmlowp-scalar.c)
 
 SET(XNNPACK_PSIMD_FASTMATH_MICROKERNEL_SRCS
   src/f32-argmaxpool/4x-psimd-c4.c
@@ -439,7 +443,9 @@
   src/x32-zip/x2-psimd.c
   src/x32-zip/x3-psimd.c
   src/x32-zip/x4-psimd.c
-  src/x32-zip/xm-psimd.c)
+  src/x32-zip/xm-psimd.c
+  src/requantization/precise-psimd.c
+  src/requantization/fp32-psimd.c)
 
 SET(XNNPACK_PSIMD_ACCMATH_MICROKERNEL_SRCS
   src/f32-raddstoreexpminusmax/gen/psimd-p5-x4.c
@@ -628,7 +634,11 @@
   src/math/sigmoid-neon-rr1-p5-nr2recps.c
   src/math/sigmoid-neon-rr2-lut2048-p1-nr2recps.c
   src/math/sigmoid-neon-rr2-lut64-p2-nr2recps.c
-  src/math/sigmoid-neon-rr2-p5-nr2recps.c)
+  src/math/sigmoid-neon-rr2-p5-nr2recps.c
+  src/requantization/precise-neon.c
+  src/requantization/fp32-neon.c
+  src/requantization/q31-neon.c
+  src/requantization/gemmlowp-neon.c)
 
 SET(XNNPACK_NEONFMA_MICROKERNEL_SRCS
   src/f32-bilinear/gen/neonfma-c4.c
@@ -985,7 +995,16 @@
   src/x8-zip/xm-sse2.c
   src/math/exp-sse2-p5.c
   src/math/expminus-sse2-p5.c
-  src/math/sigmoid-sse2-p5-div.c)
+  src/math/sigmoid-sse2-p5-div.c
+  src/requantization/precise-sse2.c
+  src/requantization/fp32-sse2.c
+  src/requantization/q31-sse2.c
+  src/requantization/gemmlowp-sse2.c)
+
+SET(XNNPACK_SSSE3_MICROKERNEL_SRCS
+  src/requantization/precise-ssse3.c
+  src/requantization/q31-ssse3.c
+  src/requantization/gemmlowp-ssse3.c)
 
 SET(XNNPACK_SSE41_MICROKERNEL_SRCS
   src/f32-prelu/gen/sse41-2x4.c
@@ -995,7 +1014,10 @@
   src/f32-sigmoid/gen/sse41-p5-div-x12.c
   src/f32-sigmoid/gen/sse41-p5-div-x16.c
   src/f32-sigmoid/gen/sse41-p5-div-x20.c
-  src/f32-sigmoid/gen/sse41-p5-div-x24.c)
+  src/f32-sigmoid/gen/sse41-p5-div-x24.c
+  src/requantization/precise-sse4.c
+  src/requantization/q31-sse4.c
+  src/requantization/gemmlowp-sse4.c)
 
 SET(XNNPACK_AVX_MICROKERNEL_SRCS
   src/f32-clamp/avx.c
@@ -1450,6 +1472,7 @@
 IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|x86_64|AMD64)$" OR IOS_ARCH MATCHES "^(i386|x86_64|AMD64)$")
   LIST(APPEND XNNPACK_MICROKERNEL_SRCS ${XNNPACK_SSE_MICROKERNEL_SRCS})
   LIST(APPEND XNNPACK_MICROKERNEL_SRCS ${XNNPACK_SSE2_MICROKERNEL_SRCS})
+  LIST(APPEND XNNPACK_MICROKERNEL_SRCS ${XNNPACK_SSSE3_MICROKERNEL_SRCS})
   LIST(APPEND XNNPACK_MICROKERNEL_SRCS ${XNNPACK_SSE41_MICROKERNEL_SRCS})
   LIST(APPEND XNNPACK_MICROKERNEL_SRCS ${XNNPACK_AVX_MICROKERNEL_SRCS})
   LIST(APPEND XNNPACK_MICROKERNEL_SRCS ${XNNPACK_FMA3_MICROKERNEL_SRCS})
@@ -1487,6 +1510,7 @@
 IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|x86_64|AMD64)$" OR IOS_ARCH MATCHES "^(i386|x86_64|AMD64)$")
   SET_PROPERTY(SOURCE ${XNNPACK_SSE_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -msse ")
   SET_PROPERTY(SOURCE ${XNNPACK_SSE2_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -msse2 ")
+  SET_PROPERTY(SOURCE ${XNNPACK_SSSE3_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -mssse3 ")
   SET_PROPERTY(SOURCE ${XNNPACK_SSE41_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -msse4.1 ")
   SET_PROPERTY(SOURCE ${XNNPACK_AVX_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -mavx ")
   SET_PROPERTY(SOURCE ${XNNPACK_FMA3_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -mfma ")
@@ -2349,6 +2373,15 @@
   TARGET_INCLUDE_DIRECTORIES(x8-zip-test PRIVATE src test)
   TARGET_LINK_LIBRARIES(x8-zip-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
   ADD_TEST(x8-zip-test x8-zip-test)
+
+  ADD_EXECUTABLE(requantization-test test/requantization.cc)
+  SET_TARGET_PROPERTIES(requantization-test PROPERTIES
+    CXX_STANDARD 11
+    CXX_STANDARD_REQUIRED YES
+    CXX_EXTENSIONS YES)
+  TARGET_INCLUDE_DIRECTORIES(requantization-test PRIVATE src test)
+  TARGET_LINK_LIBRARIES(requantization-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+  ADD_TEST(requantization-test requantization-test)
 ENDIF()
 
 # ---[ XNNPACK microbenchmarks
@@ -2631,4 +2664,13 @@
   TARGET_INCLUDE_DIRECTORIES(q8-gemm-bench PRIVATE src)
   TARGET_INCLUDE_DIRECTORIES(q8-gemm-bench PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
   TARGET_LINK_LIBRARIES(q8-gemm-bench PRIVATE XNNPACK cpuinfo fp16 benchmark bench-utils)
+
+  ADD_EXECUTABLE(requantization-bench bench/requantization.cc)
+  SET_TARGET_PROPERTIES(requantization-bench PROPERTIES
+    CXX_STANDARD 11
+    CXX_STANDARD_REQUIRED YES
+    CXX_EXTENSIONS YES)
+  TARGET_INCLUDE_DIRECTORIES(requantization-bench PRIVATE src)
+  TARGET_INCLUDE_DIRECTORIES(requantization-bench PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
+  TARGET_LINK_LIBRARIES(requantization-bench PRIVATE XNNPACK cpuinfo fp16 benchmark bench-utils)
 ENDIF()
diff --git a/bench/requantization.cc b/bench/requantization.cc
index d2db0b0..cfd95f7 100644
--- a/bench/requantization.cc
+++ b/bench/requantization.cc
@@ -21,18 +21,16 @@
 #include <xnnpack/common.h>
 #include <xnnpack/requantization-stubs.h>
 
-inline uint32_t divideRoundUp(uint32_t x, uint32_t q)
-{
+
+inline uint32_t divide_round_up(uint32_t x, uint32_t q) {
   return x / q + uint32_t(x % q != 0);
 }
 
-inline uint32_t roundUp(uint32_t x, uint32_t q)
-{
-  return q * divideRoundUp(x, q);
+inline uint32_t round_up(uint32_t x, uint32_t q) {
+  return q * divide_round_up(x, q);
 }
 
-inline uint32_t min(uint32_t a, uint32_t b)
-{
+inline uint32_t min(uint32_t a, uint32_t b) {
   return a < b ? a : b;
 }
 
@@ -88,184 +86,165 @@
   size_t n_;
 };
 
-BENCHMARK_F(Requantization, precise__scalar_unsigned32)(benchmark::State& state)
-{
+BENCHMARK_F(Requantization, precise__scalar_unsigned32)(benchmark::State& state) {
   for (auto _ : state) {
     xnn_requantize_precise__scalar_unsigned32(
         n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
   }
 }
 
-BENCHMARK_F(Requantization, precise__scalar_unsigned64)(benchmark::State& state)
-{
+BENCHMARK_F(Requantization, precise__scalar_unsigned64)(benchmark::State& state) {
   for (auto _ : state) {
     xnn_requantize_precise__scalar_unsigned64(
         n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
   }
 }
 
-BENCHMARK_F(Requantization, precise__scalar_signed64)(benchmark::State& state)
-{
+BENCHMARK_F(Requantization, precise__scalar_signed64)(benchmark::State& state) {
   for (auto _ : state) {
     xnn_requantize_precise__scalar_signed64(
         n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
   }
 }
 
-BENCHMARK_F(Requantization, fp32__scalar_lrintf)(benchmark::State& state)
-{
+BENCHMARK_F(Requantization, fp32__scalar_lrintf)(benchmark::State& state) {
   for (auto _ : state) {
     xnn_requantize_fp32__scalar_lrintf(
         n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
   }
 }
 
-BENCHMARK_F(Requantization, fp32__scalar_magic)(benchmark::State& state)
-{
+BENCHMARK_F(Requantization, fp32__scalar_magic)(benchmark::State& state) {
   for (auto _ : state) {
     xnn_requantize_fp32__scalar_magic(
         n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
   }
 }
 
-BENCHMARK_F(Requantization, gemmlowp__scalar)(benchmark::State& state)
-{
+BENCHMARK_F(Requantization, gemmlowp__scalar)(benchmark::State& state) {
   for (auto _ : state) {
     xnn_requantize_gemmlowp__scalar(
         n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
   }
 }
 
-BENCHMARK_F(Requantization, precise__psimd)(benchmark::State& state)
-{
-  for (auto _ : state) {
-    xnn_requantize_precise__psimd(
-        n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
+  BENCHMARK_F(Requantization, precise__psimd)(benchmark::State& state) {
+    for (auto _ : state) {
+      xnn_requantize_precise__psimd(
+          n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+    }
   }
-}
 
-BENCHMARK_F(Requantization, fp32__psimd)(benchmark::State& state)
-{
-  for (auto _ : state) {
-    xnn_requantize_fp32__psimd(
-        n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+  BENCHMARK_F(Requantization, fp32__psimd)(benchmark::State& state) {
+    for (auto _ : state) {
+      xnn_requantize_fp32__psimd(
+          n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+    }
   }
-}
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
+
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-BENCHMARK_F(Requantization, precise__neon)(benchmark::State& state)
-{
-  for (auto _ : state) {
-    xnn_requantize_precise__neon(
-        n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+  BENCHMARK_F(Requantization, precise__neon)(benchmark::State& state) {
+    for (auto _ : state) {
+      xnn_requantize_precise__neon(
+          n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+    }
   }
-}
 
-BENCHMARK_F(Requantization, fp32__neon)(benchmark::State& state)
-{
-  for (auto _ : state) {
-    xnn_requantize_fp32__neon(
-        n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+  BENCHMARK_F(Requantization, fp32__neon)(benchmark::State& state) {
+    for (auto _ : state) {
+      xnn_requantize_fp32__neon(
+          n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+    }
   }
-}
 
-BENCHMARK_F(Requantization, q31__neon)(benchmark::State& state)
-{
-  for (auto _ : state) {
-    xnn_requantize_q31__neon(
-        n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+  BENCHMARK_F(Requantization, q31__neon)(benchmark::State& state) {
+    for (auto _ : state) {
+      xnn_requantize_q31__neon(
+          n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+    }
   }
-}
 
-BENCHMARK_F(Requantization, gemmlowp__neon)(benchmark::State& state)
-{
-  for (auto _ : state) {
-    xnn_requantize_gemmlowp__neon(
-        n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+  BENCHMARK_F(Requantization, gemmlowp__neon)(benchmark::State& state) {
+    for (auto _ : state) {
+      xnn_requantize_gemmlowp__neon(
+          n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+    }
   }
-}
 #endif
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-BENCHMARK_F(Requantization, precise__sse2)(benchmark::State& state)
-{
-  for (auto _ : state) {
-    xnn_requantize_precise__sse2(
-        n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+  BENCHMARK_F(Requantization, precise__sse2)(benchmark::State& state) {
+    for (auto _ : state) {
+      xnn_requantize_precise__sse2(
+          n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+    }
   }
-}
 
-BENCHMARK_F(Requantization, precise__ssse3)(benchmark::State& state)
-{
-  for (auto _ : state) {
-    xnn_requantize_precise__ssse3(
-        n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+  BENCHMARK_F(Requantization, precise__ssse3)(benchmark::State& state) {
+    for (auto _ : state) {
+      xnn_requantize_precise__ssse3(
+          n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+    }
   }
-}
 
-BENCHMARK_F(Requantization, precise__sse4)(benchmark::State& state)
-{
-  for (auto _ : state) {
-    xnn_requantize_precise__sse4(
-        n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+  BENCHMARK_F(Requantization, precise__sse4)(benchmark::State& state) {
+    for (auto _ : state) {
+      xnn_requantize_precise__sse4(
+          n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+    }
   }
-}
 
-BENCHMARK_F(Requantization, fp32__sse2)(benchmark::State& state)
-{
-  for (auto _ : state) {
-    xnn_requantize_fp32__sse2(
-        n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+  BENCHMARK_F(Requantization, fp32__sse2)(benchmark::State& state) {
+    for (auto _ : state) {
+      xnn_requantize_fp32__sse2(
+          n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+    }
   }
-}
 
-BENCHMARK_F(Requantization, q31__sse2)(benchmark::State& state)
-{
-  for (auto _ : state) {
-    xnn_requantize_q31__sse2(
-        n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+  BENCHMARK_F(Requantization, q31__sse2)(benchmark::State& state) {
+    for (auto _ : state) {
+      xnn_requantize_q31__sse2(
+          n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+    }
   }
-}
 
-BENCHMARK_F(Requantization, q31__ssse3)(benchmark::State& state)
-{
-  for (auto _ : state) {
-    xnn_requantize_q31__ssse3(
-        n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+  BENCHMARK_F(Requantization, q31__ssse3)(benchmark::State& state) {
+    for (auto _ : state) {
+      xnn_requantize_q31__ssse3(
+          n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+    }
   }
-}
 
-BENCHMARK_F(Requantization, q31__sse4)(benchmark::State& state)
-{
-  for (auto _ : state) {
-    xnn_requantize_q31__sse4(
-        n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+  BENCHMARK_F(Requantization, q31__sse4)(benchmark::State& state) {
+    for (auto _ : state) {
+      xnn_requantize_q31__sse4(
+          n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+    }
   }
-}
 
-BENCHMARK_F(Requantization, gemmlowp__sse2)(benchmark::State& state)
-{
-  for (auto _ : state) {
-    xnn_requantize_gemmlowp__sse2(
-        n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+  BENCHMARK_F(Requantization, gemmlowp__sse2)(benchmark::State& state) {
+    for (auto _ : state) {
+      xnn_requantize_gemmlowp__sse2(
+          n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+    }
   }
-}
 
-BENCHMARK_F(Requantization, gemmlowp__ssse3)(benchmark::State& state)
-{
-  for (auto _ : state) {
-    xnn_requantize_gemmlowp__ssse3(
-        n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+  BENCHMARK_F(Requantization, gemmlowp__ssse3)(benchmark::State& state) {
+    for (auto _ : state) {
+      xnn_requantize_gemmlowp__ssse3(
+          n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+    }
   }
-}
 
-BENCHMARK_F(Requantization, gemmlowp__sse4)(benchmark::State& state)
-{
-  for (auto _ : state) {
-    xnn_requantize_gemmlowp__sse4(
-        n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+  BENCHMARK_F(Requantization, gemmlowp__sse4)(benchmark::State& state) {
+    for (auto _ : state) {
+      xnn_requantize_gemmlowp__sse4(
+          n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+    }
   }
-}
 #endif
 
 #ifndef XNNPACK_BENCHMARK_NO_MAIN
diff --git a/src/requantization/gemmlowp-requantization.h b/src/requantization/gemmlowp-requantization.h
new file mode 100644
index 0000000..9ad0275
--- /dev/null
+++ b/src/requantization/gemmlowp-requantization.h
@@ -0,0 +1,143 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+// All rights reserved.
+//
+// Copyright 2019 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#pragma once
+
+#include <stdint.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/scalar-utils.h>
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  #include <immintrin.h>
+#endif
+
+// The code below is adapted from Google's gemmlowp library.
+// It is only used in XNNPACK unit tests and comparative benchmarks, but not the library itself.
+//
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+inline static int32_t gemmlowp_scalar_vqrdmulh_s32(int32_t a, int32_t b)
+{
+  const bool overflow = a == b && a == INT32_MIN;
+  const int64_t ab_64 = (int64_t) a * (int64_t) b;
+  const int32_t nudge = (a ^ b) >= 0 ? INT32_C(0x40000000) : -INT32_C(0x3FFFFFFF);
+  const int32_t ab_x2_high32 = (int32_t) ((ab_64 + nudge) / INT64_C(0x80000000));
+  return overflow ? INT32_MAX : ab_x2_high32;
+}
+
+inline static int32_t gemmlowp_scalar_rdivbypo2_s32(int32_t x, int exponent)
+{
+  const int32_t mask = ((1 << exponent) - 1);
+  const int32_t remainder = x & mask;
+  const int32_t threshold = (mask >> 1) + (int32_t) (x < 0);
+  return asr_s32(x, exponent) + (int32_t) (remainder > threshold);
+}
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  static inline __m128i gemmlowp_sse_rdivbypo2_s32(__m128i x, int exponent) {
+    const __m128i mask = _mm_set1_epi32((int32_t) ((UINT64_C(1) << exponent) - UINT64_C(1)));
+    const __m128i remainder = _mm_and_si128(x, mask);
+    const __m128i threshold = _mm_sub_epi32(
+        _mm_srli_epi32(mask, 1), _mm_cmplt_epi32(x, _mm_setzero_si128()));
+    return _mm_sub_epi32(
+        _mm_sra_epi32(x, _mm_cvtsi32_si128(exponent)),
+        _mm_cmpgt_epi32(remainder, threshold));
+  }
+
+  static inline __m128i gemmlowp_sse_mul_s32(__m128i a, __m128i b) {
+  #ifdef __SSE4_1__
+    return _mm_mul_epi32(a, b);
+  #else
+    __m128i sign, zero, mul_us, a_neg, b_neg, mul_us_neg;
+    sign = _mm_xor_si128(a, b);
+    sign = _mm_srai_epi32(sign, 31); // promote sign bit to all fields, all fff if
+                                     // negative and all 0 if positive
+    sign = _mm_shuffle_epi32(
+        sign,
+        _MM_SHUFFLE(2, 2, 0, 0)); // promote sign bit to 3 and 1st data lanes
+    zero = _mm_setzero_si128();
+  #ifdef __SSSE3__
+    a_neg = _mm_abs_epi32(a); // negate a and b
+    b_neg = _mm_abs_epi32(b); // negate a and b
+  #else  // pre-SSSE3
+    const __m128i a_neg_mask = _mm_cmplt_epi32(a, zero);
+    a_neg = _mm_sub_epi32(_mm_xor_si128(a, a_neg_mask), a_neg_mask);
+    const __m128i b_neg_mask = _mm_cmplt_epi32(b, zero);
+    b_neg = _mm_sub_epi32(_mm_xor_si128(b, b_neg_mask), b_neg_mask);
+  #endif  // pre-SSSE3
+    mul_us = _mm_mul_epu32(a_neg, b_neg); // uses 0 and 2nd data lanes, (abs), the
+                                          // multiplication gives 64 bit result
+    mul_us_neg = _mm_sub_epi64(zero, mul_us);
+    mul_us_neg = _mm_and_si128(sign, mul_us_neg);
+    mul_us = _mm_andnot_si128(sign, mul_us);
+    return _mm_or_si128(mul_us, mul_us_neg);
+  #endif
+  }
+
+  static inline __m128i gemmlowp_sse_vqrdmulh_s32(__m128i a, __m128i b) {
+    // saturation only happen if a == b == INT32_MIN
+    const __m128i min = _mm_set1_epi32(INT32_MIN);
+    const __m128i saturation_mask =
+        _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_cmpeq_epi32(a, min));
+
+    // a = a0 | a1 | a2 | a3
+    // b = b0 | b1 | b2 | b3
+    const __m128i a0_a2 = a;
+    const __m128i a1_a3 = _mm_srli_si128(a, 4);
+    const __m128i b0_b2 = b;
+    const __m128i b1_b3 = _mm_srli_si128(b, 4);
+
+    const __m128i a0b0_a2b2 = gemmlowp_sse_mul_s32(a0_a2, b0_b2);
+    const __m128i a1b1_a3b3 = gemmlowp_sse_mul_s32(a1_a3, b1_b3);
+
+    // do the rounding and take into account that it will be doubled
+    const __m128i nudge = _mm_set1_epi64x(1 << 30);
+    const __m128i a0b0_a2b2_rounded = _mm_add_epi64(a0b0_a2b2, nudge);
+    const __m128i a1b1_a3b3_rounded = _mm_add_epi64(a1b1_a3b3, nudge);
+
+    // do the doubling
+    const __m128i a0b0_a2b2_rounded_2x = _mm_slli_epi64(a0b0_a2b2_rounded, 1);
+    const __m128i a1b1_a3b3_rounded_2x = _mm_slli_epi64(a1b1_a3b3_rounded, 1);
+
+  // get the high part of the products
+  #ifdef __SSE4_1__
+    const __m128i result = _mm_blend_epi16(
+        _mm_srli_epi64(a0b0_a2b2_rounded_2x, 32), a1b1_a3b3_rounded_2x, 0xCC);
+  #else
+    const __m128i result0213 = _mm_castps_si128(_mm_shuffle_ps(
+        _mm_castsi128_ps(a0b0_a2b2_rounded_2x),
+        _mm_castsi128_ps(a1b1_a3b3_rounded_2x),
+        _MM_SHUFFLE(3, 1, 3, 1)));
+    const __m128i result = _mm_shuffle_epi32(result0213, _MM_SHUFFLE(3, 1, 2, 0));
+  #endif
+
+  // saturate those which overflowed
+  #ifdef __SSE4_1__
+    const __m128i saturated_result = _mm_blendv_epi8(result, min, saturation_mask);
+  #else
+    const __m128i saturated_result = _mm_or_si128(
+        _mm_and_si128(saturation_mask, min),
+        _mm_andnot_si128(saturation_mask, result));
+  #endif
+    return saturated_result;
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
diff --git a/src/requantization/gemmlowp-scalar.c b/src/requantization/gemmlowp-scalar.c
index 23968b8..a6f9acb 100644
--- a/src/requantization/gemmlowp-scalar.c
+++ b/src/requantization/gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/requantization-stubs.h>
 #include <xnnpack/scalar-utils.h>
 
-#include "gemmlowp-scalar.h"
+#include "gemmlowp-requantization.h"
 
 
 void xnn_requantize_gemmlowp__scalar(
diff --git a/src/requantization/gemmlowp-scalar.h b/src/requantization/gemmlowp-scalar.h
deleted file mode 100644
index d4d85c9..0000000
--- a/src/requantization/gemmlowp-scalar.h
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates.
-// All rights reserved.
-//
-// Copyright 2019 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#pragma once
-
-#include <stdint.h>
-#include <limits.h>
-
-// The code below is adapted from Google's gemmlowp library.
-// It is only used in XNNPACK unit tests and comparative benchmarks, but not the library itself.
-//
-// Copyright 2015 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-inline static int32_t gemmlowp_scalar_vqrdmulh_s32(int32_t a, int32_t b)
-{
-  const bool overflow = a == b && a == INT32_MIN;
-  const int64_t ab_64 = (int64_t) a * (int64_t) b;
-  const int32_t nudge = (a ^ b) >= 0 ? INT32_C(0x40000000) : -INT32_C(0x3FFFFFFF);
-  const int32_t ab_x2_high32 = (int32_t) ((ab_64 + nudge) / INT64_C(0x80000000));
-  return overflow ? INT32_MAX : ab_x2_high32;
-}
-
-inline static int32_t gemmlowp_scalar_rdivbypo2_s32(int32_t x, int exponent)
-{
-  const int32_t mask = ((1 << exponent) - 1);
-  const int32_t remainder = x & mask;
-  const int32_t threshold = (mask >> 1) + (int32_t) (x < 0);
-  return asr_s32(x, exponent) + (int32_t) (remainder > threshold);
-}
diff --git a/src/requantization/gemmlowp-sse.h b/src/requantization/gemmlowp-sse.h
deleted file mode 100644
index d8e2cda..0000000
--- a/src/requantization/gemmlowp-sse.h
+++ /dev/null
@@ -1,119 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates.
-// All rights reserved.
-//
-// Copyright 2019 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#pragma once
-
-#include <limits.h>
-
-#include <immintrin.h>
-
-// The code below is adapted from Google's gemmlowp library.
-// It is only used in XNNPACK unit tests and comparative benchmarks,
-// but not the library itself.
-//
-// Copyright 2015 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-static inline __m128i gemmlowp_sse_rdivbypo2_s32(__m128i x, int exponent) {
-  const __m128i mask = _mm_set1_epi32((int32_t) ((UINT64_C(1) << exponent) - UINT64_C(1)));
-  const __m128i remainder = _mm_and_si128(x, mask);
-  const __m128i threshold = _mm_sub_epi32(
-      _mm_srli_epi32(mask, 1), _mm_cmplt_epi32(x, _mm_setzero_si128()));
-  return _mm_sub_epi32(
-      _mm_sra_epi32(x, _mm_cvtsi32_si128(exponent)),
-      _mm_cmpgt_epi32(remainder, threshold));
-}
-
-static inline __m128i gemmlowp_sse_mul_s32(__m128i a, __m128i b) {
-#ifdef __SSE4_1__
-  return _mm_mul_epi32(a, b);
-#else
-  __m128i sign, zero, mul_us, a_neg, b_neg, mul_us_neg;
-  sign = _mm_xor_si128(a, b);
-  sign = _mm_srai_epi32(sign, 31); // promote sign bit to all fields, all fff if
-                                   // negative and all 0 if positive
-  sign = _mm_shuffle_epi32(
-      sign,
-      _MM_SHUFFLE(2, 2, 0, 0)); // promote sign bit to 3 and 1st data lanes
-  zero = _mm_setzero_si128();
-#ifdef __SSSE3__
-  a_neg = _mm_abs_epi32(a); // negate a and b
-  b_neg = _mm_abs_epi32(b); // negate a and b
-#else  // pre-SSSE3
-  const __m128i a_neg_mask = _mm_cmplt_epi32(a, zero);
-  a_neg = _mm_sub_epi32(_mm_xor_si128(a, a_neg_mask), a_neg_mask);
-  const __m128i b_neg_mask = _mm_cmplt_epi32(b, zero);
-  b_neg = _mm_sub_epi32(_mm_xor_si128(b, b_neg_mask), b_neg_mask);
-#endif  // pre-SSSE3
-  mul_us = _mm_mul_epu32(a_neg, b_neg); // uses 0 and 2nd data lanes, (abs), the
-                                        // multiplication gives 64 bit result
-  mul_us_neg = _mm_sub_epi64(zero, mul_us);
-  mul_us_neg = _mm_and_si128(sign, mul_us_neg);
-  mul_us = _mm_andnot_si128(sign, mul_us);
-  return _mm_or_si128(mul_us, mul_us_neg);
-#endif
-}
-
-static inline __m128i gemmlowp_sse_vqrdmulh_s32(__m128i a, __m128i b) {
-  // saturation only happen if a == b == INT32_MIN
-  const __m128i min = _mm_set1_epi32(INT32_MIN);
-  const __m128i saturation_mask =
-      _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_cmpeq_epi32(a, min));
-
-  // a = a0 | a1 | a2 | a3
-  // b = b0 | b1 | b2 | b3
-  const __m128i a0_a2 = a;
-  const __m128i a1_a3 = _mm_srli_si128(a, 4);
-  const __m128i b0_b2 = b;
-  const __m128i b1_b3 = _mm_srli_si128(b, 4);
-
-  const __m128i a0b0_a2b2 = gemmlowp_sse_mul_s32(a0_a2, b0_b2);
-  const __m128i a1b1_a3b3 = gemmlowp_sse_mul_s32(a1_a3, b1_b3);
-
-  // do the rounding and take into account that it will be doubled
-  const __m128i nudge = _mm_set1_epi64x(1 << 30);
-  const __m128i a0b0_a2b2_rounded = _mm_add_epi64(a0b0_a2b2, nudge);
-  const __m128i a1b1_a3b3_rounded = _mm_add_epi64(a1b1_a3b3, nudge);
-
-  // do the doubling
-  const __m128i a0b0_a2b2_rounded_2x = _mm_slli_epi64(a0b0_a2b2_rounded, 1);
-  const __m128i a1b1_a3b3_rounded_2x = _mm_slli_epi64(a1b1_a3b3_rounded, 1);
-
-// get the high part of the products
-#ifdef __SSE4_1__
-  const __m128i result = _mm_blend_epi16(
-      _mm_srli_epi64(a0b0_a2b2_rounded_2x, 32), a1b1_a3b3_rounded_2x, 0xCC);
-#else
-  const __m128i result0213 = _mm_castps_si128(_mm_shuffle_ps(
-      _mm_castsi128_ps(a0b0_a2b2_rounded_2x),
-      _mm_castsi128_ps(a1b1_a3b3_rounded_2x),
-      _MM_SHUFFLE(3, 1, 3, 1)));
-  const __m128i result = _mm_shuffle_epi32(result0213, _MM_SHUFFLE(3, 1, 2, 0));
-#endif
-
-// saturate those which overflowed
-#ifdef __SSE4_1__
-  const __m128i saturated_result = _mm_blendv_epi8(result, min, saturation_mask);
-#else
-  const __m128i saturated_result = _mm_or_si128(
-      _mm_and_si128(saturation_mask, min),
-      _mm_andnot_si128(saturation_mask, result));
-#endif
-  return saturated_result;
-}
diff --git a/src/requantization/gemmlowp-sse2.c b/src/requantization/gemmlowp-sse2.c
index 105bdc4..5c380ab 100644
--- a/src/requantization/gemmlowp-sse2.c
+++ b/src/requantization/gemmlowp-sse2.c
@@ -15,7 +15,7 @@
 
 #include <xnnpack/requantization-stubs.h>
 
-#include "gemmlowp-sse.h"
+#include "gemmlowp-requantization.h"
 
 
 void xnn_requantize_gemmlowp__sse2(
diff --git a/src/requantization/gemmlowp-sse4.c b/src/requantization/gemmlowp-sse4.c
index 1855a6d..533645d 100644
--- a/src/requantization/gemmlowp-sse4.c
+++ b/src/requantization/gemmlowp-sse4.c
@@ -15,7 +15,7 @@
 
 #include <xnnpack/requantization-stubs.h>
 
-#include "gemmlowp-sse.h"
+#include "gemmlowp-requantization.h"
 
 
 void xnn_requantize_gemmlowp__sse4(
diff --git a/src/requantization/gemmlowp-ssse3.c b/src/requantization/gemmlowp-ssse3.c
index ce2357f..5974541 100644
--- a/src/requantization/gemmlowp-ssse3.c
+++ b/src/requantization/gemmlowp-ssse3.c
@@ -15,7 +15,7 @@
 
 #include <xnnpack/requantization-stubs.h>
 
-#include "gemmlowp-sse.h"
+#include "gemmlowp-requantization.h"
 
 
 void xnn_requantize_gemmlowp__ssse3(
diff --git a/test/requantization-tester.h b/test/requantization-tester.h
index 289f926..bd1b4db 100644
--- a/test/requantization-tester.h
+++ b/test/requantization-tester.h
@@ -21,6 +21,7 @@
 #include <vector>
 
 #include <xnnpack/params.h>
+#include <xnnpack/requantization-stubs.h>
 #include <xnnpack/scalar-utils.h>
 
 
@@ -39,13 +40,13 @@
     return ldexpf(1.0f, -s());
   }
 
-  inline RequantizationTester& zeroPoint(int32_t zeroPoint) {
-    this->zeroPoint_ = zeroPoint;
+  inline RequantizationTester& zero_point(int32_t zero_point) {
+    this->zero_point_ = zero_point;
     return *this;
   }
 
-  inline int32_t zeroPoint() const {
-    return this->zeroPoint_;
+  inline int32_t zero_point() const {
+    return this->zero_point_;
   }
 
   inline RequantizationTester& qmin(uint8_t qmin) {
@@ -82,9 +83,9 @@
    * - no output clamping
    * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow.
    */
-  void testExactDivideByPO2(requantization_function requantize) const {
-    ASSERT_GE(zeroPoint(), 0);
-    ASSERT_LE(zeroPoint(), 255);
+  void TestExactDivideByPO2(requantization_function requantize) const {
+    ASSERT_GE(zero_point(), 0);
+    ASSERT_LE(zero_point(), 255);
 
     /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
     ASSERT_GE(s(), 1);
@@ -92,20 +93,20 @@
 
     std::vector<int32_t> inputs(256);
     std::vector<uint8_t> outputs(inputs.size());
-    const int32_t maxI = (uint32_t(std::numeric_limits<int32_t>::max()) >> s()) + zeroPoint();
-    const int32_t minI = -(-uint32_t(std::numeric_limits<int32_t>::min()) >> s()) + zeroPoint();
+    const int32_t maxI = (uint32_t(std::numeric_limits<int32_t>::max()) >> s()) + zero_point();
+    const int32_t minI = -(-uint32_t(std::numeric_limits<int32_t>::min()) >> s()) + zero_point();
     for (int32_t i = 0; i < 256; i++) {
       const int32_t clampedI = std::max(minI, std::min(maxI, i));
-      inputs[i] = int32_t(uint32_t(clampedI - zeroPoint()) << s());
+      inputs[i] = int32_t(uint32_t(clampedI - zero_point()) << s());
     }
     requantize(inputs.size(), inputs.data(),
-        scale(), zeroPoint(), qmin(), qmax(),
+        scale(), zero_point(), qmin(), qmax(),
         outputs.data());
     for (int32_t i = 0; i < 256; i++) {
       const int32_t clampedI = std::max(minI, std::min(maxI, i));
       ASSERT_EQ(clampedI, outputs[i]) << "i = " << i << ", clamped i = " << clampedI <<
         ", min i = " << minI << ", max i = " << maxI <<
-        ", s = " << s() << ", zero point = " << zeroPoint();
+        ", s = " << s() << ", zero point = " << zero_point();
     }
   }
 
@@ -116,9 +117,9 @@
    * - no output clamping
    * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow.
    */
-  void testDivideByPO2WithRoundingUp(requantization_function requantize) {
-    ASSERT_GE(zeroPoint(), 0);
-    ASSERT_LE(zeroPoint(), 255);
+  void TestDivideByPO2WithRoundingUp(requantization_function requantize) {
+    ASSERT_GE(zero_point(), 0);
+    ASSERT_LE(zero_point(), 255);
 
     /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
     ASSERT_GE(s(), 1);
@@ -127,19 +128,19 @@
     std::vector<int32_t> inputs(256);
     std::vector<uint8_t> outputs(inputs.size());
     for (int32_t i = 0; i < 256; i++) {
-      const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) -
-        (INT64_C(1) << (s() - 1)) + (int64_t) (i <= zeroPoint());
+      const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) -
+        (INT64_C(1) << (s() - 1)) + (int64_t) (i <= zero_point());
       inputs[i] = int32_t(input);
     }
     requantize(inputs.size(), inputs.data(),
-        scale(), zeroPoint(), qmin(), qmax(),
+        scale(), zero_point(), qmin(), qmax(),
         outputs.data());
     for (int32_t i = 0; i < 256; i++) {
-      const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) -
-        (INT64_C(1) << (s() - 1)) + (int64_t) (i <= zeroPoint());
+      const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) -
+        (INT64_C(1) << (s() - 1)) + (int64_t) (i <= zero_point());
       if (int32_t(input) == input) {
         ASSERT_EQ(i, uint32_t(outputs[i])) << "i = " << i << ", input = " << input <<
-          ", s = " << s() << ", zero point = " << zeroPoint();
+          ", s = " << s() << ", zero point = " << zero_point();
       }
     }
   }
@@ -151,9 +152,9 @@
    * - no output clamping
    * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow.
    */
-  void testDivideByPO2WithRoundingDown(requantization_function requantize) {
-    ASSERT_GE(zeroPoint(), 0);
-    ASSERT_LE(zeroPoint(), 255);
+  void TestDivideByPO2WithRoundingDown(requantization_function requantize) {
+    ASSERT_GE(zero_point(), 0);
+    ASSERT_LE(zero_point(), 255);
 
     /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
     ASSERT_GE(s(), 1);
@@ -162,26 +163,26 @@
     std::vector<int32_t> inputs(256);
     std::vector<uint8_t> outputs(inputs.size());
     for (int32_t i = 0; i < 256; i++) {
-      const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) +
-        (INT64_C(1) << (s() - 1)) - (int64_t) (i >= zeroPoint());
+      const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) +
+        (INT64_C(1) << (s() - 1)) - (int64_t) (i >= zero_point());
       inputs[i] = int32_t(input);
     }
     requantize(inputs.size(), inputs.data(),
-        scale(), zeroPoint(), qmin(), qmax(),
+        scale(), zero_point(), qmin(), qmax(),
         outputs.data());
     for (int32_t i = 0; i < 256; i++) {
-      const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) +
-        (INT64_C(1) << (s() - 1)) - (int64_t) (i >= zeroPoint());
+      const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) +
+        (INT64_C(1) << (s() - 1)) - (int64_t) (i >= zero_point());
       if (int32_t(input) == input) {
         ASSERT_EQ(i, uint32_t(outputs[i])) << "i = " << i << ", input = " << input <<
-          ", s = " << s() << ", zero point = " << zeroPoint();
+          ", s = " << s() << ", zero point = " << zero_point();
       }
     }
   }
 
-  void testDivideByPO2WithRoundingAway(requantization_function requantize) {
-    ASSERT_GE(zeroPoint(), 0);
-    ASSERT_LE(zeroPoint(), 255);
+  void TestDivideByPO2WithRoundingAway(requantization_function requantize) {
+    ASSERT_GE(zero_point(), 0);
+    ASSERT_LE(zero_point(), 255);
 
     /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
     ASSERT_GE(s(), 1);
@@ -190,7 +191,7 @@
     std::vector<int32_t> inputs(256);
     std::vector<uint8_t> outputs(inputs.size());
     for (int32_t i = 0; i < 256; i++) {
-      int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s());
+      int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s());
       if (input > 0) {
         input -= INT64_C(1) << (s() - 1);
       } else if (input < 0) {
@@ -199,10 +200,10 @@
       inputs[i] = int32_t(input);
     }
     requantize(inputs.size(), inputs.data(),
-        scale(), zeroPoint(), qmin(), qmax(),
+        scale(), zero_point(), qmin(), qmax(),
         outputs.data());
     for (uint32_t i = 0; i < 256; i++) {
-      int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s());
+      int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s());
       if (input > 0) {
         input -= INT64_C(1) << (s() - 1);
       } else if (input < 0) {
@@ -210,26 +211,26 @@
       }
       if (int32_t(input) == input) {
         ASSERT_EQ(i, uint32_t(outputs[i])) << "i = " << i << ", input = " << input <<
-          ", s = " << s() << ", zero point = " << zeroPoint();
+          ", s = " << s() << ", zero point = " << zero_point();
       }
     }
   }
 
-  void testSpecialCases(requantization_function requantize) {
+  void TestSpecialCases(requantization_function requantize) {
     std::vector<int32_t> inputs(256);
     std::vector<uint8_t> outputs(inputs.size());
 
     std::fill(inputs.begin(), inputs.end(), std::numeric_limits<int32_t>::min());
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       requantize(
           inputs.size(),
           inputs.data(),
           ldexpf(1.0f, -32) /* scale */,
-          zeroPoint /* zero point */,
+          zero_point /* zero point */,
           std::numeric_limits<uint8_t>::min(),
           std::numeric_limits<uint8_t>::max(),
           outputs.data());
-      ASSERT_EQ(std::max(int32_t(0), zeroPoint - 1), *std::min_element(outputs.cbegin(), outputs.cend()));
+      ASSERT_EQ(std::max(int32_t(0), zero_point - 1), *std::min_element(outputs.cbegin(), outputs.cend()));
     }
 
     std::fill(inputs.begin(), inputs.end(), std::numeric_limits<int32_t>::max());
@@ -246,7 +247,7 @@
     }
   }
 
-  void testRandomCasesPrecise(requantization_function requantize) {
+  void TestRandomCasesPrecise(requantization_function requantize) {
     std::random_device random_device;
     std::mt19937 mtRng(random_device());
     for (size_t iteration = 0; iteration < iterations(); iteration++) {
@@ -255,17 +256,17 @@
       std::vector<int32_t> inputs(4096);
       std::vector<uint8_t> outputs(inputs.size());
 
-      const uint8_t zeroPoint = UINT8_C(128);
-      std::uniform_real_distribution<float> scaleDistribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
-      const float scale = scaleDistribution(mtRng);
+      const uint8_t zero_point = UINT8_C(128);
+      std::uniform_real_distribution<float> scale_distribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
+      const float scale = scale_distribution(mtRng);
       for (size_t i = 0; i < inputs.size(); i++) {
-        const uint8_t approximateOutput = rng();
-        const int32_t input = int32_t(double(approximateOutput) / double(scale));
+        const uint8_t approximate_output = rng();
+        const int32_t input = int32_t(double(approximate_output) / double(scale));
         inputs[i] = input;
       }
 
       requantize(
-        inputs.size(), inputs.data(), scale, zeroPoint,
+        inputs.size(), inputs.data(), scale, zero_point,
         std::numeric_limits<uint8_t>::min(),
         std::numeric_limits<uint8_t>::max(),
         outputs.data());
@@ -276,17 +277,17 @@
         *std::min_element(outputs.cbegin(), outputs.cend()));
 
       for (size_t i = 0; i < inputs.size(); i++) {
-        const uint8_t referenceOutput =
+        const uint8_t reference_output =
           scalar_requantize_precise(
-            inputs[i], scale, zeroPoint,
+            inputs[i], scale, zero_point,
             std::numeric_limits<uint8_t>::min(),
             std::numeric_limits<uint8_t>::max());
-        ASSERT_EQ(uint32_t(referenceOutput), uint32_t(outputs[i]));
+        ASSERT_EQ(uint32_t(reference_output), uint32_t(outputs[i]));
       }
     }
   }
 
-  void testRandomCasesApproximate(requantization_function requantize) {
+  void TestRandomCasesApproximate(requantization_function requantize) {
     std::random_device random_device;
     std::mt19937 mtRng(random_device());
     for (size_t iteration = 0; iteration < iterations(); iteration++) {
@@ -295,17 +296,17 @@
       std::vector<int32_t> inputs(4096);
       std::vector<uint8_t> outputs(inputs.size());
 
-      const uint8_t zeroPoint = UINT8_C(128);
-      std::uniform_real_distribution<float> scaleDistribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
-      const float scale = scaleDistribution(mtRng);
+      const uint8_t zero_point = UINT8_C(128);
+      std::uniform_real_distribution<float> scale_distribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
+      const float scale = scale_distribution(mtRng);
       for (size_t i = 0; i < inputs.size(); i++) {
-        const uint8_t approximateOutput = rng();
-        const int32_t input = int32_t(double(approximateOutput) / double(scale));
+        const uint8_t approximate_output = rng();
+        const int32_t input = int32_t(double(approximate_output) / double(scale));
         inputs[i] = input;
       }
 
       requantize(
-        inputs.size(), inputs.data(), scale, zeroPoint,
+        inputs.size(), inputs.data(), scale, zero_point,
         std::numeric_limits<uint8_t>::min(),
         std::numeric_limits<uint8_t>::max(),
         outputs.data());
@@ -316,19 +317,19 @@
         *std::min_element(outputs.cbegin(), outputs.cend()));
 
       for (size_t i = 0; i < inputs.size(); i++) {
-        const double referenceOutput =
-          RequantizationTester::requantizeApproximate(
-            inputs[i], scale, zeroPoint,
+        const double reference_output =
+          RequantizationTester::RequantizeApproximate(
+            inputs[i], scale, zero_point,
             std::numeric_limits<uint8_t>::min(),
             std::numeric_limits<uint8_t>::max());
-        ASSERT_LE(fabs(referenceOutput - double(outputs[i])), 0.55) <<
+        ASSERT_LE(fabs(reference_output - double(outputs[i])), 0.55) <<
           "input = " << inputs[i] <<
-          ", output = " << uint32_t(outputs[i]) << ", reference output = " << referenceOutput;
+          ", output = " << uint32_t(outputs[i]) << ", reference output = " << reference_output;
       }
     }
   }
 
-  void testRandomCasesAgainstReference(requantization_function requantize, requantization_function requantizeReference) {
+  void TestRandomCasesAgainstReference(requantization_function requantize, requantization_function requantize_reference) {
     std::random_device random_device;
     std::mt19937 mtRng(random_device());
     for (size_t iteration = 0; iteration < iterations(); iteration++) {
@@ -336,28 +337,28 @@
 
       std::vector<int32_t> inputs(4096);
       std::vector<uint8_t> outputs(inputs.size());
-      std::vector<uint8_t> referenceOutputs(inputs.size());
+      std::vector<uint8_t> reference_outputs(inputs.size());
 
-      const uint8_t zeroPoint = UINT8_C(128);
-      std::uniform_real_distribution<float> scaleDistribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
-      const float scale = scaleDistribution(mtRng);
+      const uint8_t zero_point = UINT8_C(128);
+      std::uniform_real_distribution<float> scale_distribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
+      const float scale = scale_distribution(mtRng);
       for (size_t i = 0; i < inputs.size(); i++) {
-        const uint8_t approximateOutput = rng();
-        const int32_t input = int32_t(double(approximateOutput) / double(scale));
+        const uint8_t approximate_output = rng();
+        const int32_t input = int32_t(double(approximate_output) / double(scale));
         inputs[i] = input;
       }
 
       requantize(
-        inputs.size(), inputs.data(), scale, zeroPoint,
+        inputs.size(), inputs.data(), scale, zero_point,
         std::numeric_limits<uint8_t>::min(),
         std::numeric_limits<uint8_t>::max(),
         outputs.data());
 
-      requantizeReference(
-        inputs.size(), inputs.data(), scale, zeroPoint,
+      requantize_reference(
+        inputs.size(), inputs.data(), scale, zero_point,
         std::numeric_limits<uint8_t>::min(),
         std::numeric_limits<uint8_t>::max(),
-        referenceOutputs.data());
+        reference_outputs.data());
 
       /* Ensure that outputs are not all identical, as in this case Test doesn't validate much */
       ASSERT_NE(
@@ -365,42 +366,42 @@
         *std::min_element(outputs.cbegin(), outputs.cend()));
 
       for (size_t i = 0; i < inputs.size(); i++) {
-        ASSERT_EQ(uint32_t(referenceOutputs[i]), uint32_t(outputs[i]));
+        ASSERT_EQ(uint32_t(reference_outputs[i]), uint32_t(outputs[i]));
       }
     }
   }
 
-  static inline int64_t shiftLeft(int64_t w, uint32_t n) {
+  static inline int64_t ShiftLeft(int64_t w, uint32_t n) {
     return (int64_t) ((uint64_t) w << n);
   }
 
-  static inline double requantizeApproximate(
+  static inline double RequantizeApproximate(
     int32_t value,
     float scale,
-    uint8_t zeroPoint,
+    uint8_t zero_point,
     uint8_t qmin,
     uint8_t qmax)
   {
     assert(scale < 1.0f);
     assert(scale >= 0x1.0p-32f);
 
-    double clampedValue = double(value) * double(scale) + double(zeroPoint);
+    double clamped_value = double(value) * double(scale) + double(zero_point);
 
     const double fmin = double(qmin);
-    if (clampedValue < fmin) {
-      clampedValue = fmin;
+    if (clamped_value < fmin) {
+      clamped_value = fmin;
     }
 
     const double fmax = double(qmax);
-    if (clampedValue > fmax) {
-      clampedValue = fmax;
+    if (clamped_value > fmax) {
+      clamped_value = fmax;
     }
 
-    return clampedValue;
+    return clamped_value;
   }
 
  private:
-  size_t zeroPoint_{0};
+  size_t zero_point_{0};
   size_t s_{1};
   uint8_t qmin_{std::numeric_limits<uint8_t>::min()};
   uint8_t qmax_{std::numeric_limits<uint8_t>::max()};
diff --git a/test/requantization.cc b/test/requantization.cc
index 2337722..fe14d71 100644
--- a/test/requantization.cc
+++ b/test/requantization.cc
@@ -26,63 +26,63 @@
   for (uint32_t s = 1; s < 32; s++) {
     RequantizationTester()
       .s(s)
-      .testExactDivideByPO2(xnn_requantize_precise__scalar_unsigned32);
+      .TestExactDivideByPO2(xnn_requantize_precise__scalar_unsigned32);
   }
 }
 
 TEST(PRECISE__SCALAR_UNSIGNED32, exact_divide_by_po2_with_zero_point) {
-  for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+  for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
+        .zero_point(zero_point)
         .s(s)
-        .testExactDivideByPO2(xnn_requantize_precise__scalar_unsigned32);
+        .TestExactDivideByPO2(xnn_requantize_precise__scalar_unsigned32);
     }
   }
 }
 
 TEST(PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_up) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+  for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
+        .zero_point(zero_point)
         .s(s)
-        .testDivideByPO2WithRoundingUp(xnn_requantize_precise__scalar_unsigned32);
+        .TestDivideByPO2WithRoundingUp(xnn_requantize_precise__scalar_unsigned32);
     }
   }
 }
 
 TEST(PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_down) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+  for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
+        .zero_point(zero_point)
         .s(s)
-        .testDivideByPO2WithRoundingDown(xnn_requantize_precise__scalar_unsigned32);
+        .TestDivideByPO2WithRoundingDown(xnn_requantize_precise__scalar_unsigned32);
     }
   }
 }
 
 TEST(PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_away) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+  for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
+        .zero_point(zero_point)
         .s(s)
-        .testDivideByPO2WithRoundingAway(xnn_requantize_precise__scalar_unsigned32);
+        .TestDivideByPO2WithRoundingAway(xnn_requantize_precise__scalar_unsigned32);
     }
   }
 }
 
 TEST(PRECISE__SCALAR_UNSIGNED32, special_cases) {
   RequantizationTester()
-    .testSpecialCases(xnn_requantize_precise__scalar_unsigned32);
+    .TestSpecialCases(xnn_requantize_precise__scalar_unsigned32);
 }
 
 TEST(PRECISE__SCALAR_UNSIGNED32, random_cases) {
   RequantizationTester()
     .iterations(100)
-    .testRandomCasesPrecise(xnn_requantize_precise__scalar_unsigned32);
+    .TestRandomCasesPrecise(xnn_requantize_precise__scalar_unsigned32);
 }
 
 
@@ -94,63 +94,63 @@
   for (uint32_t s = 1; s < 32; s++) {
     RequantizationTester()
       .s(s)
-      .testExactDivideByPO2(xnn_requantize_precise__scalar_unsigned64);
+      .TestExactDivideByPO2(xnn_requantize_precise__scalar_unsigned64);
   }
 }
 
 TEST(PRECISE__SCALAR_UNSIGNED64, exact_divide_by_po2_with_zero_point) {
-  for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+  for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
+        .zero_point(zero_point)
         .s(s)
-        .testExactDivideByPO2(xnn_requantize_precise__scalar_unsigned64);
+        .TestExactDivideByPO2(xnn_requantize_precise__scalar_unsigned64);
     }
   }
 }
 
 TEST(PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_up) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+  for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
+        .zero_point(zero_point)
         .s(s)
-        .testDivideByPO2WithRoundingUp(xnn_requantize_precise__scalar_unsigned64);
+        .TestDivideByPO2WithRoundingUp(xnn_requantize_precise__scalar_unsigned64);
     }
   }
 }
 
 TEST(PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_down) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+  for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
+        .zero_point(zero_point)
         .s(s)
-        .testDivideByPO2WithRoundingDown(xnn_requantize_precise__scalar_unsigned64);
+        .TestDivideByPO2WithRoundingDown(xnn_requantize_precise__scalar_unsigned64);
     }
   }
 }
 
 TEST(PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_away) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+  for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
+        .zero_point(zero_point)
         .s(s)
-        .testDivideByPO2WithRoundingAway(xnn_requantize_precise__scalar_unsigned64);
+        .TestDivideByPO2WithRoundingAway(xnn_requantize_precise__scalar_unsigned64);
     }
   }
 }
 
 TEST(PRECISE__SCALAR_UNSIGNED64, special_cases) {
   RequantizationTester()
-    .testSpecialCases(xnn_requantize_precise__scalar_unsigned64);
+    .TestSpecialCases(xnn_requantize_precise__scalar_unsigned64);
 }
 
 TEST(PRECISE__SCALAR_UNSIGNED64, random_cases) {
   RequantizationTester()
     .iterations(100)
-    .testRandomCasesPrecise(xnn_requantize_precise__scalar_unsigned64);
+    .TestRandomCasesPrecise(xnn_requantize_precise__scalar_unsigned64);
 }
 
 
@@ -162,63 +162,63 @@
   for (uint32_t s = 1; s < 32; s++) {
     RequantizationTester()
       .s(s)
-      .testExactDivideByPO2(xnn_requantize_precise__scalar_signed64);
+      .TestExactDivideByPO2(xnn_requantize_precise__scalar_signed64);
   }
 }
 
 TEST(PRECISE__SCALAR_SIGNED64, exact_divide_by_po2_with_zero_point) {
-  for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+  for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
+        .zero_point(zero_point)
         .s(s)
-        .testExactDivideByPO2(xnn_requantize_precise__scalar_signed64);
+        .TestExactDivideByPO2(xnn_requantize_precise__scalar_signed64);
     }
   }
 }
 
 TEST(PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_up) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+  for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
+        .zero_point(zero_point)
         .s(s)
-        .testDivideByPO2WithRoundingUp(xnn_requantize_precise__scalar_signed64);
+        .TestDivideByPO2WithRoundingUp(xnn_requantize_precise__scalar_signed64);
     }
   }
 }
 
 TEST(PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_down) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+  for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
+        .zero_point(zero_point)
         .s(s)
-        .testDivideByPO2WithRoundingDown(xnn_requantize_precise__scalar_signed64);
+        .TestDivideByPO2WithRoundingDown(xnn_requantize_precise__scalar_signed64);
     }
   }
 }
 
 TEST(PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_away) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+  for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
+        .zero_point(zero_point)
         .s(s)
-        .testDivideByPO2WithRoundingAway(xnn_requantize_precise__scalar_signed64);
+        .TestDivideByPO2WithRoundingAway(xnn_requantize_precise__scalar_signed64);
     }
   }
 }
 
 TEST(PRECISE__SCALAR_SIGNED64, special_cases) {
   RequantizationTester()
-    .testSpecialCases(xnn_requantize_precise__scalar_signed64);
+    .TestSpecialCases(xnn_requantize_precise__scalar_signed64);
 }
 
 TEST(PRECISE__SCALAR_SIGNED64, random_cases) {
   RequantizationTester()
     .iterations(100)
-    .testRandomCasesPrecise(xnn_requantize_precise__scalar_signed64);
+    .TestRandomCasesPrecise(xnn_requantize_precise__scalar_signed64);
 }
 
 
@@ -229,7 +229,7 @@
 TEST(FP32__SCALAR_LRINTF, random_cases) {
   RequantizationTester()
     .iterations(1000)
-    .testRandomCasesApproximate(xnn_requantize_fp32__scalar_lrintf);
+    .TestRandomCasesApproximate(xnn_requantize_fp32__scalar_lrintf);
 }
 
 
@@ -240,7 +240,7 @@
 TEST(FP32__SCALAR_MAGIC, random_cases) {
   RequantizationTester()
     .iterations(1000)
-    .testRandomCasesApproximate(xnn_requantize_fp32__scalar_magic);
+    .TestRandomCasesApproximate(xnn_requantize_fp32__scalar_magic);
 }
 
 
@@ -252,28 +252,28 @@
   for (uint32_t s = 1; s < 32; s++) {
     RequantizationTester()
       .s(s)
-      .testExactDivideByPO2(xnn_requantize_q31__scalar);
+      .TestExactDivideByPO2(xnn_requantize_q31__scalar);
   }
 }
 
 TEST(Q31__SCALAR, exact_divide_by_po2_with_zero_point) {
-  for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+  for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
+        .zero_point(zero_point)
         .s(s)
-        .testExactDivideByPO2(xnn_requantize_q31__scalar);
+        .TestExactDivideByPO2(xnn_requantize_q31__scalar);
     }
   }
 }
 
 TEST(Q31__SCALAR, divide_by_po2_with_rounding_up) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+  for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
+        .zero_point(zero_point)
         .s(s)
-        .testDivideByPO2WithRoundingUp(xnn_requantize_q31__scalar);
+        .TestDivideByPO2WithRoundingUp(xnn_requantize_q31__scalar);
     }
   }
 }
@@ -281,31 +281,31 @@
 /* No rounding down Test - it fails because of upward bias in multiplication */
 
 TEST(Q31__SCALAR, divide_by_po2_with_rounding_away) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+  for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
-        .zeroPoint(zeroPoint)
+        .zero_point(zero_point)
         .s(s)
-        .testDivideByPO2WithRoundingAway(xnn_requantize_q31__scalar);
+        .TestDivideByPO2WithRoundingAway(xnn_requantize_q31__scalar);
     }
   }
 }
 
 TEST(Q31__SCALAR, special_cases) {
   RequantizationTester()
-    .testSpecialCases(xnn_requantize_q31__scalar);
+    .TestSpecialCases(xnn_requantize_q31__scalar);
 }
 
 TEST(Q31__SCALAR, random_cases) {
   RequantizationTester()
     .iterations(100)
-    .testRandomCasesApproximate(xnn_requantize_q31__scalar);
+    .TestRandomCasesApproximate(xnn_requantize_q31__scalar);
 }
 
 TEST(Q31__SCALAR, random_match_gemmlowp) {
   RequantizationTester()
     .iterations(100)
-    .testRandomCasesAgainstReference(xnn_requantize_q31__scalar, xnn_requantize_gemmlowp__scalar);
+    .TestRandomCasesAgainstReference(xnn_requantize_q31__scalar, xnn_requantize_gemmlowp__scalar);
 }
 
 
@@ -316,87 +316,89 @@
 TEST(GEMMLOWP__SCALAR, random_cases) {
   RequantizationTester()
     .iterations(100)
-    .testRandomCasesApproximate(xnn_requantize_gemmlowp__scalar);
+    .TestRandomCasesApproximate(xnn_requantize_gemmlowp__scalar);
 }
 
 
-/*
- * Precise PSIMD implementation using unsigned 32-bit arithmetics.
- */
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
+  /*
+   * Precise PSIMD implementation using unsigned 32-bit arithmetics.
+   */
 
-TEST(PRECISE__PSIMD, exact_divide_by_po2) {
-  for (uint32_t s = 1; s < 32; s++) {
+  TEST(PRECISE__PSIMD, exact_divide_by_po2) {
+    for (uint32_t s = 1; s < 32; s++) {
+      RequantizationTester()
+        .s(s)
+        .TestExactDivideByPO2(xnn_requantize_precise__psimd);
+    }
+  }
+
+  TEST(PRECISE__PSIMD, exact_divide_by_po2_with_zero_point) {
+    for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zero_point(zero_point)
+          .s(s)
+          .TestExactDivideByPO2(xnn_requantize_precise__psimd);
+      }
+    }
+  }
+
+  TEST(PRECISE__PSIMD, divide_by_po2_with_rounding_up) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zero_point(zero_point)
+          .s(s)
+          .TestDivideByPO2WithRoundingUp(xnn_requantize_precise__psimd);
+      }
+    }
+  }
+
+  TEST(PRECISE__PSIMD, divide_by_po2_with_rounding_down) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zero_point(zero_point)
+          .s(s)
+          .TestDivideByPO2WithRoundingDown(xnn_requantize_precise__psimd);
+      }
+    }
+  }
+
+  TEST(PRECISE__PSIMD, divide_by_po2_with_rounding_away) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
+      for (uint32_t s = 1; s < 32; s++) {
+        RequantizationTester()
+          .zero_point(zero_point)
+          .s(s)
+          .TestDivideByPO2WithRoundingAway(xnn_requantize_precise__psimd);
+      }
+    }
+  }
+
+  TEST(PRECISE__PSIMD, special_cases) {
     RequantizationTester()
-      .s(s)
-      .testExactDivideByPO2(xnn_requantize_precise__psimd);
+      .TestSpecialCases(xnn_requantize_precise__psimd);
   }
-}
 
-TEST(PRECISE__PSIMD, exact_divide_by_po2_with_zero_point) {
-  for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testExactDivideByPO2(xnn_requantize_precise__psimd);
-    }
+  TEST(PRECISE__PSIMD, random_cases) {
+    RequantizationTester()
+      .iterations(100)
+      .TestRandomCasesPrecise(xnn_requantize_precise__psimd);
   }
-}
 
-TEST(PRECISE__PSIMD, divide_by_po2_with_rounding_up) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingUp(xnn_requantize_precise__psimd);
-    }
+
+  /*
+   * FP32-based PSIMD implementation using magic trick for FP32->INT32 conversion.
+   */
+
+  TEST(FP32__PSIMD, random_cases) {
+    RequantizationTester()
+      .iterations(1000)
+      .TestRandomCasesApproximate(xnn_requantize_fp32__psimd);
   }
-}
-
-TEST(PRECISE__PSIMD, divide_by_po2_with_rounding_down) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingDown(xnn_requantize_precise__psimd);
-    }
-  }
-}
-
-TEST(PRECISE__PSIMD, divide_by_po2_with_rounding_away) {
-  for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
-    for (uint32_t s = 1; s < 32; s++) {
-      RequantizationTester()
-        .zeroPoint(zeroPoint)
-        .s(s)
-        .testDivideByPO2WithRoundingAway(xnn_requantize_precise__psimd);
-    }
-  }
-}
-
-TEST(PRECISE__PSIMD, special_cases) {
-  RequantizationTester()
-    .testSpecialCases(xnn_requantize_precise__psimd);
-}
-
-TEST(PRECISE__PSIMD, random_cases) {
-  RequantizationTester()
-    .iterations(100)
-    .testRandomCasesPrecise(xnn_requantize_precise__psimd);
-}
-
-
-/*
- * FP32-based PSIMD implementation using magic trick for FP32->INT32 conversion.
- */
-
-TEST(FP32__PSIMD, random_cases) {
-  RequantizationTester()
-    .iterations(1000)
-    .testRandomCasesApproximate(xnn_requantize_fp32__psimd);
-}
+#endif  // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -408,63 +410,63 @@
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
         .s(s)
-        .testExactDivideByPO2(xnn_requantize_precise__sse2);
+        .TestExactDivideByPO2(xnn_requantize_precise__sse2);
     }
   }
 
   TEST(PRECISE__SSE2, exact_divide_by_po2_with_zero_point) {
-    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testExactDivideByPO2(xnn_requantize_precise__sse2);
+          .TestExactDivideByPO2(xnn_requantize_precise__sse2);
       }
     }
   }
 
   TEST(PRECISE__SSE2, divide_by_po2_with_rounding_up) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingUp(xnn_requantize_precise__sse2);
+          .TestDivideByPO2WithRoundingUp(xnn_requantize_precise__sse2);
       }
     }
   }
 
   TEST(PRECISE__SSE2, divide_by_po2_with_rounding_down) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingDown(xnn_requantize_precise__sse2);
+          .TestDivideByPO2WithRoundingDown(xnn_requantize_precise__sse2);
       }
     }
   }
 
   TEST(PRECISE__SSE2, divide_by_po2_with_rounding_away) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingAway(xnn_requantize_precise__sse2);
+          .TestDivideByPO2WithRoundingAway(xnn_requantize_precise__sse2);
       }
     }
   }
 
   TEST(PRECISE__SSE2, special_cases) {
     RequantizationTester()
-      .testSpecialCases(xnn_requantize_precise__sse2);
+      .TestSpecialCases(xnn_requantize_precise__sse2);
   }
 
   TEST(PRECISE__SSE2, random_cases) {
     RequantizationTester()
       .iterations(100)
-      .testRandomCasesPrecise(xnn_requantize_precise__sse2);
+      .TestRandomCasesPrecise(xnn_requantize_precise__sse2);
   }
 
 
@@ -476,63 +478,63 @@
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
         .s(s)
-        .testExactDivideByPO2(xnn_requantize_precise__ssse3);
+        .TestExactDivideByPO2(xnn_requantize_precise__ssse3);
     }
   }
 
   TEST(PRECISE__SSSE3, exact_divide_by_po2_with_zero_point) {
-    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testExactDivideByPO2(xnn_requantize_precise__ssse3);
+          .TestExactDivideByPO2(xnn_requantize_precise__ssse3);
       }
     }
   }
 
   TEST(PRECISE__SSSE3, divide_by_po2_with_rounding_up) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingUp(xnn_requantize_precise__ssse3);
+          .TestDivideByPO2WithRoundingUp(xnn_requantize_precise__ssse3);
       }
     }
   }
 
   TEST(PRECISE__SSSE3, divide_by_po2_with_rounding_down) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingDown(xnn_requantize_precise__ssse3);
+          .TestDivideByPO2WithRoundingDown(xnn_requantize_precise__ssse3);
       }
     }
   }
 
   TEST(PRECISE__SSSE3, divide_by_po2_with_rounding_away) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingAway(xnn_requantize_precise__ssse3);
+          .TestDivideByPO2WithRoundingAway(xnn_requantize_precise__ssse3);
       }
     }
   }
 
   TEST(PRECISE__SSSE3, special_cases) {
     RequantizationTester()
-      .testSpecialCases(xnn_requantize_precise__ssse3);
+      .TestSpecialCases(xnn_requantize_precise__ssse3);
   }
 
   TEST(PRECISE__SSSE3, random_cases) {
     RequantizationTester()
       .iterations(100)
-      .testRandomCasesPrecise(xnn_requantize_precise__ssse3);
+      .TestRandomCasesPrecise(xnn_requantize_precise__ssse3);
   }
 
 
@@ -544,63 +546,63 @@
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
         .s(s)
-        .testExactDivideByPO2(xnn_requantize_precise__sse4);
+        .TestExactDivideByPO2(xnn_requantize_precise__sse4);
     }
   }
 
   TEST(PRECISE__SSE4, exact_divide_by_po2_with_zero_point) {
-    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testExactDivideByPO2(xnn_requantize_precise__sse4);
+          .TestExactDivideByPO2(xnn_requantize_precise__sse4);
       }
     }
   }
 
   TEST(PRECISE__SSE4, divide_by_po2_with_rounding_up) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingUp(xnn_requantize_precise__sse4);
+          .TestDivideByPO2WithRoundingUp(xnn_requantize_precise__sse4);
       }
     }
   }
 
   TEST(PRECISE__SSE4, divide_by_po2_with_rounding_down) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingDown(xnn_requantize_precise__sse4);
+          .TestDivideByPO2WithRoundingDown(xnn_requantize_precise__sse4);
       }
     }
   }
 
   TEST(PRECISE__SSE4, divide_by_po2_with_rounding_away) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingAway(xnn_requantize_precise__sse4);
+          .TestDivideByPO2WithRoundingAway(xnn_requantize_precise__sse4);
       }
     }
   }
 
   TEST(PRECISE__SSE4, special_cases) {
     RequantizationTester()
-      .testSpecialCases(xnn_requantize_precise__sse4);
+      .TestSpecialCases(xnn_requantize_precise__sse4);
   }
 
   TEST(PRECISE__SSE4, random_cases) {
     RequantizationTester()
       .iterations(100)
-      .testRandomCasesPrecise(xnn_requantize_precise__sse4);
+      .TestRandomCasesPrecise(xnn_requantize_precise__sse4);
   }
 
 
@@ -611,7 +613,7 @@
   TEST(FP32__SSE2, random_cases) {
     RequantizationTester()
       .iterations(1000)
-      .testRandomCasesApproximate(xnn_requantize_fp32__sse2);
+      .TestRandomCasesApproximate(xnn_requantize_fp32__sse2);
   }
 
 
@@ -623,28 +625,28 @@
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
         .s(s)
-        .testExactDivideByPO2(xnn_requantize_q31__sse2);
+        .TestExactDivideByPO2(xnn_requantize_q31__sse2);
     }
   }
 
   TEST(Q31__SSE2, exact_divide_by_po2_with_zero_point) {
-    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testExactDivideByPO2(xnn_requantize_q31__sse2);
+          .TestExactDivideByPO2(xnn_requantize_q31__sse2);
       }
     }
   }
 
   TEST(Q31__SSE2, divide_by_po2_with_rounding_up) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingUp(xnn_requantize_q31__sse2);
+          .TestDivideByPO2WithRoundingUp(xnn_requantize_q31__sse2);
       }
     }
   }
@@ -652,31 +654,31 @@
   /* No rounding down Test - it fails because of upward bias in multiplication */
 
   TEST(Q31__SSE2, divide_by_po2_with_rounding_away) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingAway(xnn_requantize_q31__sse2);
+          .TestDivideByPO2WithRoundingAway(xnn_requantize_q31__sse2);
       }
     }
   }
 
   TEST(Q31__SSE2, special_cases) {
     RequantizationTester()
-      .testSpecialCases(xnn_requantize_q31__sse2);
+      .TestSpecialCases(xnn_requantize_q31__sse2);
   }
 
   TEST(Q31__SSE2, random_cases) {
     RequantizationTester()
       .iterations(100)
-      .testRandomCasesApproximate(xnn_requantize_q31__sse2);
+      .TestRandomCasesApproximate(xnn_requantize_q31__sse2);
   }
 
   TEST(Q31__SSE2, random_match_gemmlowp) {
     RequantizationTester()
       .iterations(100)
-      .testRandomCasesAgainstReference(xnn_requantize_q31__sse2, xnn_requantize_gemmlowp__sse2);
+      .TestRandomCasesAgainstReference(xnn_requantize_q31__sse2, xnn_requantize_gemmlowp__sse2);
   }
 
 
@@ -688,28 +690,28 @@
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
         .s(s)
-        .testExactDivideByPO2(xnn_requantize_q31__ssse3);
+        .TestExactDivideByPO2(xnn_requantize_q31__ssse3);
     }
   }
 
   TEST(Q31__SSSE3, exact_divide_by_po2_with_zero_point) {
-    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testExactDivideByPO2(xnn_requantize_q31__ssse3);
+          .TestExactDivideByPO2(xnn_requantize_q31__ssse3);
       }
     }
   }
 
   TEST(Q31__SSSE3, divide_by_po2_with_rounding_up) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingUp(xnn_requantize_q31__ssse3);
+          .TestDivideByPO2WithRoundingUp(xnn_requantize_q31__ssse3);
       }
     }
   }
@@ -717,31 +719,31 @@
   /* No rounding down Test - it fails because of upward bias in multiplication */
 
   TEST(Q31__SSSE3, divide_by_po2_with_rounding_away) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingAway(xnn_requantize_q31__ssse3);
+          .TestDivideByPO2WithRoundingAway(xnn_requantize_q31__ssse3);
       }
     }
   }
 
   TEST(Q31__SSSE3, special_cases) {
     RequantizationTester()
-      .testSpecialCases(xnn_requantize_q31__ssse3);
+      .TestSpecialCases(xnn_requantize_q31__ssse3);
   }
 
   TEST(Q31__SSSE3, random_cases) {
     RequantizationTester()
       .iterations(100)
-      .testRandomCasesApproximate(xnn_requantize_q31__ssse3);
+      .TestRandomCasesApproximate(xnn_requantize_q31__ssse3);
   }
 
   TEST(Q31__SSSE3, random_match_gemmlowp) {
     RequantizationTester()
       .iterations(100)
-      .testRandomCasesAgainstReference(xnn_requantize_q31__ssse3, xnn_requantize_gemmlowp__ssse3);
+      .TestRandomCasesAgainstReference(xnn_requantize_q31__ssse3, xnn_requantize_gemmlowp__ssse3);
   }
 
 
@@ -753,28 +755,28 @@
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
         .s(s)
-        .testExactDivideByPO2(xnn_requantize_q31__sse4);
+        .TestExactDivideByPO2(xnn_requantize_q31__sse4);
     }
   }
 
   TEST(Q31__SSE4, exact_divide_by_po2_with_zero_point) {
-    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testExactDivideByPO2(xnn_requantize_q31__sse4);
+          .TestExactDivideByPO2(xnn_requantize_q31__sse4);
       }
     }
   }
 
   TEST(Q31__SSE4, divide_by_po2_with_rounding_up) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingUp(xnn_requantize_q31__sse4);
+          .TestDivideByPO2WithRoundingUp(xnn_requantize_q31__sse4);
       }
     }
   }
@@ -782,31 +784,31 @@
   /* No rounding down Test - it fails because of upward bias in multiplication */
 
   TEST(Q31__SSE4, divide_by_po2_with_rounding_away) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingAway(xnn_requantize_q31__sse4);
+          .TestDivideByPO2WithRoundingAway(xnn_requantize_q31__sse4);
       }
     }
   }
 
   TEST(Q31__SSE4, special_cases) {
     RequantizationTester()
-      .testSpecialCases(xnn_requantize_q31__sse4);
+      .TestSpecialCases(xnn_requantize_q31__sse4);
   }
 
   TEST(Q31__SSE4, random_cases) {
     RequantizationTester()
       .iterations(100)
-      .testRandomCasesApproximate(xnn_requantize_q31__sse4);
+      .TestRandomCasesApproximate(xnn_requantize_q31__sse4);
   }
 
   TEST(Q31__SSE4, random_match_gemmlowp) {
     RequantizationTester()
       .iterations(100)
-      .testRandomCasesAgainstReference(xnn_requantize_q31__sse4, xnn_requantize_gemmlowp__sse4);
+      .TestRandomCasesAgainstReference(xnn_requantize_q31__sse4, xnn_requantize_gemmlowp__sse4);
   }
 
 
@@ -818,28 +820,28 @@
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
         .s(s)
-        .testExactDivideByPO2(xnn_requantize_gemmlowp__sse2);
+        .TestExactDivideByPO2(xnn_requantize_gemmlowp__sse2);
     }
   }
 
   TEST(GEMMLOWP__SSE2, exact_divide_by_po2_with_zero_point) {
-    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testExactDivideByPO2(xnn_requantize_gemmlowp__sse2);
+          .TestExactDivideByPO2(xnn_requantize_gemmlowp__sse2);
       }
     }
   }
 
   TEST(GEMMLOWP__SSE2, divide_by_po2_with_rounding_up) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__sse2);
+          .TestDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__sse2);
       }
     }
   }
@@ -847,25 +849,25 @@
   /* No rounding down Test - it fails because of upward bias in multiplication */
 
   TEST(GEMMLOWP__SSE2, divide_by_po2_with_rounding_away) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__sse2);
+          .TestDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__sse2);
       }
     }
   }
 
   TEST(GEMMLOWP__SSE2, special_cases) {
     RequantizationTester()
-      .testSpecialCases(xnn_requantize_gemmlowp__sse2);
+      .TestSpecialCases(xnn_requantize_gemmlowp__sse2);
   }
 
   TEST(GEMMLOWP__SSE2, random_cases) {
     RequantizationTester()
       .iterations(100)
-      .testRandomCasesApproximate(xnn_requantize_gemmlowp__sse2);
+      .TestRandomCasesApproximate(xnn_requantize_gemmlowp__sse2);
   }
 
 
@@ -877,28 +879,28 @@
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
         .s(s)
-        .testExactDivideByPO2(xnn_requantize_gemmlowp__ssse3);
+        .TestExactDivideByPO2(xnn_requantize_gemmlowp__ssse3);
     }
   }
 
   TEST(GEMMLOWP__SSSE3, exact_divide_by_po2_with_zero_point) {
-    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testExactDivideByPO2(xnn_requantize_gemmlowp__ssse3);
+          .TestExactDivideByPO2(xnn_requantize_gemmlowp__ssse3);
       }
     }
   }
 
   TEST(GEMMLOWP__SSSE3, divide_by_po2_with_rounding_up) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__ssse3);
+          .TestDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__ssse3);
       }
     }
   }
@@ -906,25 +908,25 @@
   /* No rounding down Test - it fails because of upward bias in multiplication */
 
   TEST(GEMMLOWP__SSSE3, divide_by_po2_with_rounding_away) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__ssse3);
+          .TestDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__ssse3);
       }
     }
   }
 
   TEST(GEMMLOWP__SSSE3, special_cases) {
     RequantizationTester()
-      .testSpecialCases(xnn_requantize_gemmlowp__ssse3);
+      .TestSpecialCases(xnn_requantize_gemmlowp__ssse3);
   }
 
   TEST(GEMMLOWP__SSSE3, random_cases) {
     RequantizationTester()
       .iterations(100)
-      .testRandomCasesApproximate(xnn_requantize_gemmlowp__ssse3);
+      .TestRandomCasesApproximate(xnn_requantize_gemmlowp__ssse3);
   }
 
 
@@ -936,28 +938,28 @@
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
         .s(s)
-        .testExactDivideByPO2(xnn_requantize_gemmlowp__sse4);
+        .TestExactDivideByPO2(xnn_requantize_gemmlowp__sse4);
     }
   }
 
   TEST(GEMMLOWP__SSE4, exact_divide_by_po2_with_zero_point) {
-    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testExactDivideByPO2(xnn_requantize_gemmlowp__sse4);
+          .TestExactDivideByPO2(xnn_requantize_gemmlowp__sse4);
       }
     }
   }
 
   TEST(GEMMLOWP__SSE4, divide_by_po2_with_rounding_up) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__sse4);
+          .TestDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__sse4);
       }
     }
   }
@@ -965,25 +967,25 @@
   /* No rounding down Test - it fails because of upward bias in multiplication */
 
   TEST(GEMMLOWP__SSE4, divide_by_po2_with_rounding_away) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__sse4);
+          .TestDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__sse4);
       }
     }
   }
 
   TEST(GEMMLOWP__SSE4, special_cases) {
     RequantizationTester()
-      .testSpecialCases(xnn_requantize_gemmlowp__sse4);
+      .TestSpecialCases(xnn_requantize_gemmlowp__sse4);
   }
 
   TEST(GEMMLOWP__SSE4, random_cases) {
     RequantizationTester()
       .iterations(100)
-      .testRandomCasesApproximate(xnn_requantize_gemmlowp__sse4);
+      .TestRandomCasesApproximate(xnn_requantize_gemmlowp__sse4);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -996,63 +998,63 @@
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
         .s(s)
-        .testExactDivideByPO2(xnn_requantize_precise__neon);
+        .TestExactDivideByPO2(xnn_requantize_precise__neon);
     }
   }
 
   TEST(PRECISE__NEON, exact_divide_by_po2_with_zero_point) {
-    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testExactDivideByPO2(xnn_requantize_precise__neon);
+          .TestExactDivideByPO2(xnn_requantize_precise__neon);
       }
     }
   }
 
   TEST(PRECISE__NEON, divide_by_po2_with_rounding_up) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingUp(xnn_requantize_precise__neon);
+          .TestDivideByPO2WithRoundingUp(xnn_requantize_precise__neon);
       }
     }
   }
 
   TEST(PRECISE__NEON, divide_by_po2_with_rounding_down) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingDown(xnn_requantize_precise__neon);
+          .TestDivideByPO2WithRoundingDown(xnn_requantize_precise__neon);
       }
     }
   }
 
   TEST(PRECISE__NEON, divide_by_po2_with_rounding_away) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingAway(xnn_requantize_precise__neon);
+          .TestDivideByPO2WithRoundingAway(xnn_requantize_precise__neon);
       }
     }
   }
 
   TEST(PRECISE__NEON, special_cases) {
     RequantizationTester()
-      .testSpecialCases(xnn_requantize_precise__neon);
+      .TestSpecialCases(xnn_requantize_precise__neon);
   }
 
   TEST(PRECISE__NEON, random_cases) {
     RequantizationTester()
       .iterations(100)
-      .testRandomCasesPrecise(xnn_requantize_precise__neon);
+      .TestRandomCasesPrecise(xnn_requantize_precise__neon);
   }
 
 
@@ -1063,7 +1065,7 @@
   TEST(FP32__NEON, random_cases) {
     RequantizationTester()
       .iterations(1000)
-      .testRandomCasesApproximate(xnn_requantize_fp32__neon);
+      .TestRandomCasesApproximate(xnn_requantize_fp32__neon);
   }
 
 
@@ -1075,28 +1077,28 @@
     for (uint32_t s = 1; s < 32; s++) {
       RequantizationTester()
         .s(s)
-        .testExactDivideByPO2(xnn_requantize_q31__neon);
+        .TestExactDivideByPO2(xnn_requantize_q31__neon);
     }
   }
 
   TEST(Q31__NEON, exact_divide_by_po2_with_zero_point) {
-    for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testExactDivideByPO2(xnn_requantize_q31__neon);
+          .TestExactDivideByPO2(xnn_requantize_q31__neon);
       }
     }
   }
 
   TEST(Q31__NEON, divide_by_po2_with_rounding_up) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingUp(xnn_requantize_q31__neon);
+          .TestDivideByPO2WithRoundingUp(xnn_requantize_q31__neon);
       }
     }
   }
@@ -1104,31 +1106,31 @@
   /* No rounding down Test - it fails because of upward bias in multiplication */
 
   TEST(Q31__NEON, divide_by_po2_with_rounding_away) {
-    for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+    for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
       for (uint32_t s = 1; s < 32; s++) {
         RequantizationTester()
-          .zeroPoint(zeroPoint)
+          .zero_point(zero_point)
           .s(s)
-          .testDivideByPO2WithRoundingAway(xnn_requantize_q31__neon);
+          .TestDivideByPO2WithRoundingAway(xnn_requantize_q31__neon);
       }
     }
   }
 
   TEST(Q31__NEON, special_cases) {
     RequantizationTester()
-      .testSpecialCases(xnn_requantize_q31__neon);
+      .TestSpecialCases(xnn_requantize_q31__neon);
   }
 
   TEST(Q31__NEON, random_cases) {
     RequantizationTester()
       .iterations(100)
-      .testRandomCasesApproximate(xnn_requantize_q31__neon);
+      .TestRandomCasesApproximate(xnn_requantize_q31__neon);
   }
 
   TEST(Q31__NEON, random_match_gemmlowp) {
     RequantizationTester()
       .iterations(100)
-      .testRandomCasesAgainstReference(xnn_requantize_q31__neon, xnn_requantize_gemmlowp__neon);
+      .TestRandomCasesAgainstReference(xnn_requantize_q31__neon, xnn_requantize_gemmlowp__neon);
   }
 
 
@@ -1139,6 +1141,6 @@
   TEST(GEMMLOWP__NEON, random_cases) {
     RequantizationTester()
       .iterations(100)
-      .testRandomCasesApproximate(xnn_requantize_gemmlowp__neon);
+      .TestRandomCasesApproximate(xnn_requantize_gemmlowp__neon);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64