Targets for requantization tests and benchmarks
PiperOrigin-RevId: 299993900
diff --git a/BUILD.bazel b/BUILD.bazel
index 74bd94a..8986c2f 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -242,6 +242,10 @@
"src/x8-zip/x3-scalar.c",
"src/x8-zip/x4-scalar.c",
"src/x8-zip/xm-scalar.c",
+ "src/requantization/precise-scalar.c",
+ "src/requantization/fp32-scalar.c",
+ "src/requantization/q31-scalar.c",
+ "src/requantization/gemmlowp-scalar.c",
]
WASM_UKERNELS = [
@@ -424,6 +428,8 @@
"src/x32-zip/x3-psimd.c",
"src/x32-zip/x4-psimd.c",
"src/x32-zip/xm-psimd.c",
+ "src/requantization/precise-psimd.c",
+ "src/requantization/fp32-psimd.c",
]
PSIMD_ACCMATH_UKERNELS = [
@@ -616,6 +622,10 @@
"src/math/sigmoid-neon-rr2-lut2048-p1-nr2recps.c",
"src/math/sigmoid-neon-rr2-lut64-p2-nr2recps.c",
"src/math/sigmoid-neon-rr2-p5-nr2recps.c",
+ "src/requantization/precise-neon.c",
+ "src/requantization/fp32-neon.c",
+ "src/requantization/q31-neon.c",
+ "src/requantization/gemmlowp-neon.c",
]
NEONFMA_UKERNELS = [
@@ -986,6 +996,16 @@
"src/math/exp-sse2-p5.c",
"src/math/expminus-sse2-p5.c",
"src/math/sigmoid-sse2-p5-div.c",
+ "src/requantization/precise-sse2.c",
+ "src/requantization/fp32-sse2.c",
+ "src/requantization/q31-sse2.c",
+ "src/requantization/gemmlowp-sse2.c",
+]
+
+SSSE3_UKERNELS = [
+ "src/requantization/precise-ssse3.c",
+ "src/requantization/q31-ssse3.c",
+ "src/requantization/gemmlowp-ssse3.c",
]
SSE41_UKERNELS = [
@@ -997,6 +1017,9 @@
"src/f32-sigmoid/gen/sse41-p5-div-x16.c",
"src/f32-sigmoid/gen/sse41-p5-div-x20.c",
"src/f32-sigmoid/gen/sse41-p5-div-x24.c",
+ "src/requantization/precise-sse4.c",
+ "src/requantization/q31-sse4.c",
+ "src/requantization/gemmlowp-sse4.c",
]
AVX_UKERNELS = [
@@ -1439,6 +1462,7 @@
]
INTERNAL_MICROKERNEL_HDRS = [
+ "src/requantization/gemmlowp-requantization.h",
"src/xnnpack/argmaxpool.h",
"src/xnnpack/avgpool.h",
"src/xnnpack/bilinear.h",
@@ -1669,6 +1693,19 @@
)
xnnpack_cc_library(
+ name = "ssse3_ukernels",
+ hdrs = INTERNAL_HDRS,
+ copts = xnnpack_std_copts(),
+ x86_copts = ["-mssse3"],
+ x86_srcs = SSSE3_UKERNELS,
+ deps = [
+ ":tables",
+ "@FP16",
+ "@pthreadpool",
+ ],
+)
+
+xnnpack_cc_library(
name = "sse41_ukernels",
hdrs = INTERNAL_HDRS,
copts = xnnpack_std_copts(),
@@ -1775,6 +1812,7 @@
":psimd_fastmath_ukernels",
":psimd_accmath_ukernels",
":sse2_ukernels",
+ ":ssse3_ukernels",
":sse41_ukernels",
":avx_ukernels",
":fma3_ukernels",
@@ -2126,6 +2164,16 @@
deps = MICROKERNEL_BENCHMARK_DEPS + [":im2col"],
)
+xnnpack_benchmark(
+ name = "requantization_bench",
+ srcs = [
+ "bench/requantization.cc",
+ "src/xnnpack/requantization-stubs.h",
+ "src/xnnpack/AlignedAllocator.h",
+ ] + MICROKERNEL_BENCHMARK_HDRS,
+ deps = MICROKERNEL_BENCHMARK_DEPS,
+)
+
########################### Benchmarks for operators ###########################
xnnpack_benchmark(
@@ -2894,6 +2942,16 @@
deps = MICROKERNEL_TEST_DEPS,
)
+xnnpack_unit_test(
+ name = "requantization_test",
+ srcs = [
+ "src/xnnpack/requantization-stubs.h",
+ "test/requantization.cc",
+ "test/requantization-tester.h",
+ ] + MICROKERNEL_TEST_HDRS,
+ deps = MICROKERNEL_TEST_DEPS,
+)
+
########################### Size test for the library ##########################
xnnpack_binary(
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8e6f5a2..d20a53d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -340,7 +340,11 @@
src/x8-zip/x2-scalar.c
src/x8-zip/x3-scalar.c
src/x8-zip/x4-scalar.c
- src/x8-zip/xm-scalar.c)
+ src/x8-zip/xm-scalar.c
+ src/requantization/precise-scalar.c
+ src/requantization/fp32-scalar.c
+ src/requantization/q31-scalar.c
+ src/requantization/gemmlowp-scalar.c)
SET(XNNPACK_PSIMD_FASTMATH_MICROKERNEL_SRCS
src/f32-argmaxpool/4x-psimd-c4.c
@@ -439,7 +443,9 @@
src/x32-zip/x2-psimd.c
src/x32-zip/x3-psimd.c
src/x32-zip/x4-psimd.c
- src/x32-zip/xm-psimd.c)
+ src/x32-zip/xm-psimd.c
+ src/requantization/precise-psimd.c
+ src/requantization/fp32-psimd.c)
SET(XNNPACK_PSIMD_ACCMATH_MICROKERNEL_SRCS
src/f32-raddstoreexpminusmax/gen/psimd-p5-x4.c
@@ -628,7 +634,11 @@
src/math/sigmoid-neon-rr1-p5-nr2recps.c
src/math/sigmoid-neon-rr2-lut2048-p1-nr2recps.c
src/math/sigmoid-neon-rr2-lut64-p2-nr2recps.c
- src/math/sigmoid-neon-rr2-p5-nr2recps.c)
+ src/math/sigmoid-neon-rr2-p5-nr2recps.c
+ src/requantization/precise-neon.c
+ src/requantization/fp32-neon.c
+ src/requantization/q31-neon.c
+ src/requantization/gemmlowp-neon.c)
SET(XNNPACK_NEONFMA_MICROKERNEL_SRCS
src/f32-bilinear/gen/neonfma-c4.c
@@ -985,7 +995,16 @@
src/x8-zip/xm-sse2.c
src/math/exp-sse2-p5.c
src/math/expminus-sse2-p5.c
- src/math/sigmoid-sse2-p5-div.c)
+ src/math/sigmoid-sse2-p5-div.c
+ src/requantization/precise-sse2.c
+ src/requantization/fp32-sse2.c
+ src/requantization/q31-sse2.c
+ src/requantization/gemmlowp-sse2.c)
+
+SET(XNNPACK_SSSE3_MICROKERNEL_SRCS
+ src/requantization/precise-ssse3.c
+ src/requantization/q31-ssse3.c
+ src/requantization/gemmlowp-ssse3.c)
SET(XNNPACK_SSE41_MICROKERNEL_SRCS
src/f32-prelu/gen/sse41-2x4.c
@@ -995,7 +1014,10 @@
src/f32-sigmoid/gen/sse41-p5-div-x12.c
src/f32-sigmoid/gen/sse41-p5-div-x16.c
src/f32-sigmoid/gen/sse41-p5-div-x20.c
- src/f32-sigmoid/gen/sse41-p5-div-x24.c)
+ src/f32-sigmoid/gen/sse41-p5-div-x24.c
+ src/requantization/precise-sse4.c
+ src/requantization/q31-sse4.c
+ src/requantization/gemmlowp-sse4.c)
SET(XNNPACK_AVX_MICROKERNEL_SRCS
src/f32-clamp/avx.c
@@ -1450,6 +1472,7 @@
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|x86_64|AMD64)$" OR IOS_ARCH MATCHES "^(i386|x86_64|AMD64)$")
LIST(APPEND XNNPACK_MICROKERNEL_SRCS ${XNNPACK_SSE_MICROKERNEL_SRCS})
LIST(APPEND XNNPACK_MICROKERNEL_SRCS ${XNNPACK_SSE2_MICROKERNEL_SRCS})
+ LIST(APPEND XNNPACK_MICROKERNEL_SRCS ${XNNPACK_SSSE3_MICROKERNEL_SRCS})
LIST(APPEND XNNPACK_MICROKERNEL_SRCS ${XNNPACK_SSE41_MICROKERNEL_SRCS})
LIST(APPEND XNNPACK_MICROKERNEL_SRCS ${XNNPACK_AVX_MICROKERNEL_SRCS})
LIST(APPEND XNNPACK_MICROKERNEL_SRCS ${XNNPACK_FMA3_MICROKERNEL_SRCS})
@@ -1487,6 +1510,7 @@
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|x86_64|AMD64)$" OR IOS_ARCH MATCHES "^(i386|x86_64|AMD64)$")
SET_PROPERTY(SOURCE ${XNNPACK_SSE_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -msse ")
SET_PROPERTY(SOURCE ${XNNPACK_SSE2_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -msse2 ")
+ SET_PROPERTY(SOURCE ${XNNPACK_SSSE3_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -mssse3 ")
SET_PROPERTY(SOURCE ${XNNPACK_SSE41_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -msse4.1 ")
SET_PROPERTY(SOURCE ${XNNPACK_AVX_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -mavx ")
SET_PROPERTY(SOURCE ${XNNPACK_FMA3_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -mfma ")
@@ -2349,6 +2373,15 @@
TARGET_INCLUDE_DIRECTORIES(x8-zip-test PRIVATE src test)
TARGET_LINK_LIBRARIES(x8-zip-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
ADD_TEST(x8-zip-test x8-zip-test)
+
+ ADD_EXECUTABLE(requantization-test test/requantization.cc)
+ SET_TARGET_PROPERTIES(requantization-test PROPERTIES
+ CXX_STANDARD 11
+ CXX_STANDARD_REQUIRED YES
+ CXX_EXTENSIONS YES)
+ TARGET_INCLUDE_DIRECTORIES(requantization-test PRIVATE src test)
+ TARGET_LINK_LIBRARIES(requantization-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+ ADD_TEST(requantization-test requantization-test)
ENDIF()
# ---[ XNNPACK microbenchmarks
@@ -2631,4 +2664,13 @@
TARGET_INCLUDE_DIRECTORIES(q8-gemm-bench PRIVATE src)
TARGET_INCLUDE_DIRECTORIES(q8-gemm-bench PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
TARGET_LINK_LIBRARIES(q8-gemm-bench PRIVATE XNNPACK cpuinfo fp16 benchmark bench-utils)
+
+ ADD_EXECUTABLE(requantization-bench bench/requantization.cc)
+ SET_TARGET_PROPERTIES(requantization-bench PROPERTIES
+ CXX_STANDARD 11
+ CXX_STANDARD_REQUIRED YES
+ CXX_EXTENSIONS YES)
+ TARGET_INCLUDE_DIRECTORIES(requantization-bench PRIVATE src)
+ TARGET_INCLUDE_DIRECTORIES(requantization-bench PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
+ TARGET_LINK_LIBRARIES(requantization-bench PRIVATE XNNPACK cpuinfo fp16 benchmark bench-utils)
ENDIF()
diff --git a/bench/requantization.cc b/bench/requantization.cc
index d2db0b0..cfd95f7 100644
--- a/bench/requantization.cc
+++ b/bench/requantization.cc
@@ -21,18 +21,16 @@
#include <xnnpack/common.h>
#include <xnnpack/requantization-stubs.h>
-inline uint32_t divideRoundUp(uint32_t x, uint32_t q)
-{
+
+inline uint32_t divide_round_up(uint32_t x, uint32_t q) {
return x / q + uint32_t(x % q != 0);
}
-inline uint32_t roundUp(uint32_t x, uint32_t q)
-{
- return q * divideRoundUp(x, q);
+inline uint32_t round_up(uint32_t x, uint32_t q) {
+ return q * divide_round_up(x, q);
}
-inline uint32_t min(uint32_t a, uint32_t b)
-{
+inline uint32_t min(uint32_t a, uint32_t b) {
return a < b ? a : b;
}
@@ -88,184 +86,165 @@
size_t n_;
};
-BENCHMARK_F(Requantization, precise__scalar_unsigned32)(benchmark::State& state)
-{
+BENCHMARK_F(Requantization, precise__scalar_unsigned32)(benchmark::State& state) {
for (auto _ : state) {
xnn_requantize_precise__scalar_unsigned32(
n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
}
}
-BENCHMARK_F(Requantization, precise__scalar_unsigned64)(benchmark::State& state)
-{
+BENCHMARK_F(Requantization, precise__scalar_unsigned64)(benchmark::State& state) {
for (auto _ : state) {
xnn_requantize_precise__scalar_unsigned64(
n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
}
}
-BENCHMARK_F(Requantization, precise__scalar_signed64)(benchmark::State& state)
-{
+BENCHMARK_F(Requantization, precise__scalar_signed64)(benchmark::State& state) {
for (auto _ : state) {
xnn_requantize_precise__scalar_signed64(
n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
}
}
-BENCHMARK_F(Requantization, fp32__scalar_lrintf)(benchmark::State& state)
-{
+BENCHMARK_F(Requantization, fp32__scalar_lrintf)(benchmark::State& state) {
for (auto _ : state) {
xnn_requantize_fp32__scalar_lrintf(
n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
}
}
-BENCHMARK_F(Requantization, fp32__scalar_magic)(benchmark::State& state)
-{
+BENCHMARK_F(Requantization, fp32__scalar_magic)(benchmark::State& state) {
for (auto _ : state) {
xnn_requantize_fp32__scalar_magic(
n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
}
}
-BENCHMARK_F(Requantization, gemmlowp__scalar)(benchmark::State& state)
-{
+BENCHMARK_F(Requantization, gemmlowp__scalar)(benchmark::State& state) {
for (auto _ : state) {
xnn_requantize_gemmlowp__scalar(
n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
}
}
-BENCHMARK_F(Requantization, precise__psimd)(benchmark::State& state)
-{
- for (auto _ : state) {
- xnn_requantize_precise__psimd(
- n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
+ BENCHMARK_F(Requantization, precise__psimd)(benchmark::State& state) {
+ for (auto _ : state) {
+ xnn_requantize_precise__psimd(
+ n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ }
}
-}
-BENCHMARK_F(Requantization, fp32__psimd)(benchmark::State& state)
-{
- for (auto _ : state) {
- xnn_requantize_fp32__psimd(
- n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ BENCHMARK_F(Requantization, fp32__psimd)(benchmark::State& state) {
+ for (auto _ : state) {
+ xnn_requantize_fp32__psimd(
+ n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ }
}
-}
+#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
+
#if XNN_ARCH_ARM || XNN_ARCH_ARM64
-BENCHMARK_F(Requantization, precise__neon)(benchmark::State& state)
-{
- for (auto _ : state) {
- xnn_requantize_precise__neon(
- n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ BENCHMARK_F(Requantization, precise__neon)(benchmark::State& state) {
+ for (auto _ : state) {
+ xnn_requantize_precise__neon(
+ n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ }
}
-}
-BENCHMARK_F(Requantization, fp32__neon)(benchmark::State& state)
-{
- for (auto _ : state) {
- xnn_requantize_fp32__neon(
- n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ BENCHMARK_F(Requantization, fp32__neon)(benchmark::State& state) {
+ for (auto _ : state) {
+ xnn_requantize_fp32__neon(
+ n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ }
}
-}
-BENCHMARK_F(Requantization, q31__neon)(benchmark::State& state)
-{
- for (auto _ : state) {
- xnn_requantize_q31__neon(
- n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ BENCHMARK_F(Requantization, q31__neon)(benchmark::State& state) {
+ for (auto _ : state) {
+ xnn_requantize_q31__neon(
+ n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ }
}
-}
-BENCHMARK_F(Requantization, gemmlowp__neon)(benchmark::State& state)
-{
- for (auto _ : state) {
- xnn_requantize_gemmlowp__neon(
- n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ BENCHMARK_F(Requantization, gemmlowp__neon)(benchmark::State& state) {
+ for (auto _ : state) {
+ xnn_requantize_gemmlowp__neon(
+ n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ }
}
-}
#endif
#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-BENCHMARK_F(Requantization, precise__sse2)(benchmark::State& state)
-{
- for (auto _ : state) {
- xnn_requantize_precise__sse2(
- n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ BENCHMARK_F(Requantization, precise__sse2)(benchmark::State& state) {
+ for (auto _ : state) {
+ xnn_requantize_precise__sse2(
+ n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ }
}
-}
-BENCHMARK_F(Requantization, precise__ssse3)(benchmark::State& state)
-{
- for (auto _ : state) {
- xnn_requantize_precise__ssse3(
- n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ BENCHMARK_F(Requantization, precise__ssse3)(benchmark::State& state) {
+ for (auto _ : state) {
+ xnn_requantize_precise__ssse3(
+ n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ }
}
-}
-BENCHMARK_F(Requantization, precise__sse4)(benchmark::State& state)
-{
- for (auto _ : state) {
- xnn_requantize_precise__sse4(
- n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ BENCHMARK_F(Requantization, precise__sse4)(benchmark::State& state) {
+ for (auto _ : state) {
+ xnn_requantize_precise__sse4(
+ n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ }
}
-}
-BENCHMARK_F(Requantization, fp32__sse2)(benchmark::State& state)
-{
- for (auto _ : state) {
- xnn_requantize_fp32__sse2(
- n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ BENCHMARK_F(Requantization, fp32__sse2)(benchmark::State& state) {
+ for (auto _ : state) {
+ xnn_requantize_fp32__sse2(
+ n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ }
}
-}
-BENCHMARK_F(Requantization, q31__sse2)(benchmark::State& state)
-{
- for (auto _ : state) {
- xnn_requantize_q31__sse2(
- n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ BENCHMARK_F(Requantization, q31__sse2)(benchmark::State& state) {
+ for (auto _ : state) {
+ xnn_requantize_q31__sse2(
+ n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ }
}
-}
-BENCHMARK_F(Requantization, q31__ssse3)(benchmark::State& state)
-{
- for (auto _ : state) {
- xnn_requantize_q31__ssse3(
- n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ BENCHMARK_F(Requantization, q31__ssse3)(benchmark::State& state) {
+ for (auto _ : state) {
+ xnn_requantize_q31__ssse3(
+ n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ }
}
-}
-BENCHMARK_F(Requantization, q31__sse4)(benchmark::State& state)
-{
- for (auto _ : state) {
- xnn_requantize_q31__sse4(
- n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ BENCHMARK_F(Requantization, q31__sse4)(benchmark::State& state) {
+ for (auto _ : state) {
+ xnn_requantize_q31__sse4(
+ n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ }
}
-}
-BENCHMARK_F(Requantization, gemmlowp__sse2)(benchmark::State& state)
-{
- for (auto _ : state) {
- xnn_requantize_gemmlowp__sse2(
- n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ BENCHMARK_F(Requantization, gemmlowp__sse2)(benchmark::State& state) {
+ for (auto _ : state) {
+ xnn_requantize_gemmlowp__sse2(
+ n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ }
}
-}
-BENCHMARK_F(Requantization, gemmlowp__ssse3)(benchmark::State& state)
-{
- for (auto _ : state) {
- xnn_requantize_gemmlowp__ssse3(
- n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ BENCHMARK_F(Requantization, gemmlowp__ssse3)(benchmark::State& state) {
+ for (auto _ : state) {
+ xnn_requantize_gemmlowp__ssse3(
+ n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ }
}
-}
-BENCHMARK_F(Requantization, gemmlowp__sse4)(benchmark::State& state)
-{
- for (auto _ : state) {
- xnn_requantize_gemmlowp__sse4(
- n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ BENCHMARK_F(Requantization, gemmlowp__sse4)(benchmark::State& state) {
+ for (auto _ : state) {
+ xnn_requantize_gemmlowp__sse4(
+ n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
+ }
}
-}
#endif
#ifndef XNNPACK_BENCHMARK_NO_MAIN
diff --git a/src/requantization/gemmlowp-requantization.h b/src/requantization/gemmlowp-requantization.h
new file mode 100644
index 0000000..9ad0275
--- /dev/null
+++ b/src/requantization/gemmlowp-requantization.h
@@ -0,0 +1,143 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+// All rights reserved.
+//
+// Copyright 2019 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#pragma once
+
+#include <stdint.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/scalar-utils.h>
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ #include <immintrin.h>
+#endif
+
+// The code below is adapted from Google's gemmlowp library.
+// It is only used in XNNPACK unit tests and comparative benchmarks, but not the library itself.
+//
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+inline static int32_t gemmlowp_scalar_vqrdmulh_s32(int32_t a, int32_t b)
+{
+ const bool overflow = a == b && a == INT32_MIN;
+ const int64_t ab_64 = (int64_t) a * (int64_t) b;
+ const int32_t nudge = (a ^ b) >= 0 ? INT32_C(0x40000000) : -INT32_C(0x3FFFFFFF);
+ const int32_t ab_x2_high32 = (int32_t) ((ab_64 + nudge) / INT64_C(0x80000000));
+ return overflow ? INT32_MAX : ab_x2_high32;
+}
+
+inline static int32_t gemmlowp_scalar_rdivbypo2_s32(int32_t x, int exponent)
+{
+ const int32_t mask = ((1 << exponent) - 1);
+ const int32_t remainder = x & mask;
+ const int32_t threshold = (mask >> 1) + (int32_t) (x < 0);
+ return asr_s32(x, exponent) + (int32_t) (remainder > threshold);
+}
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ static inline __m128i gemmlowp_sse_rdivbypo2_s32(__m128i x, int exponent) {
+ const __m128i mask = _mm_set1_epi32((int32_t) ((UINT64_C(1) << exponent) - UINT64_C(1)));
+ const __m128i remainder = _mm_and_si128(x, mask);
+ const __m128i threshold = _mm_sub_epi32(
+ _mm_srli_epi32(mask, 1), _mm_cmplt_epi32(x, _mm_setzero_si128()));
+ return _mm_sub_epi32(
+ _mm_sra_epi32(x, _mm_cvtsi32_si128(exponent)),
+ _mm_cmpgt_epi32(remainder, threshold));
+ }
+
+ static inline __m128i gemmlowp_sse_mul_s32(__m128i a, __m128i b) {
+ #ifdef __SSE4_1__
+ return _mm_mul_epi32(a, b);
+ #else
+ __m128i sign, zero, mul_us, a_neg, b_neg, mul_us_neg;
+ sign = _mm_xor_si128(a, b);
+ sign = _mm_srai_epi32(sign, 31); // promote sign bit to all fields, all fff if
+ // negative and all 0 if positive
+ sign = _mm_shuffle_epi32(
+ sign,
+ _MM_SHUFFLE(2, 2, 0, 0)); // promote sign bit to 3 and 1st data lanes
+ zero = _mm_setzero_si128();
+ #ifdef __SSSE3__
+ a_neg = _mm_abs_epi32(a); // negate a and b
+ b_neg = _mm_abs_epi32(b); // negate a and b
+ #else // pre-SSSE3
+ const __m128i a_neg_mask = _mm_cmplt_epi32(a, zero);
+ a_neg = _mm_sub_epi32(_mm_xor_si128(a, a_neg_mask), a_neg_mask);
+ const __m128i b_neg_mask = _mm_cmplt_epi32(b, zero);
+ b_neg = _mm_sub_epi32(_mm_xor_si128(b, b_neg_mask), b_neg_mask);
+ #endif // pre-SSSE3
+ mul_us = _mm_mul_epu32(a_neg, b_neg); // uses 0 and 2nd data lanes, (abs), the
+ // multiplication gives 64 bit result
+ mul_us_neg = _mm_sub_epi64(zero, mul_us);
+ mul_us_neg = _mm_and_si128(sign, mul_us_neg);
+ mul_us = _mm_andnot_si128(sign, mul_us);
+ return _mm_or_si128(mul_us, mul_us_neg);
+ #endif
+ }
+
+ static inline __m128i gemmlowp_sse_vqrdmulh_s32(__m128i a, __m128i b) {
+ // saturation only happen if a == b == INT32_MIN
+ const __m128i min = _mm_set1_epi32(INT32_MIN);
+ const __m128i saturation_mask =
+ _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_cmpeq_epi32(a, min));
+
+ // a = a0 | a1 | a2 | a3
+ // b = b0 | b1 | b2 | b3
+ const __m128i a0_a2 = a;
+ const __m128i a1_a3 = _mm_srli_si128(a, 4);
+ const __m128i b0_b2 = b;
+ const __m128i b1_b3 = _mm_srli_si128(b, 4);
+
+ const __m128i a0b0_a2b2 = gemmlowp_sse_mul_s32(a0_a2, b0_b2);
+ const __m128i a1b1_a3b3 = gemmlowp_sse_mul_s32(a1_a3, b1_b3);
+
+ // do the rounding and take into account that it will be doubled
+ const __m128i nudge = _mm_set1_epi64x(1 << 30);
+ const __m128i a0b0_a2b2_rounded = _mm_add_epi64(a0b0_a2b2, nudge);
+ const __m128i a1b1_a3b3_rounded = _mm_add_epi64(a1b1_a3b3, nudge);
+
+ // do the doubling
+ const __m128i a0b0_a2b2_rounded_2x = _mm_slli_epi64(a0b0_a2b2_rounded, 1);
+ const __m128i a1b1_a3b3_rounded_2x = _mm_slli_epi64(a1b1_a3b3_rounded, 1);
+
+ // get the high part of the products
+ #ifdef __SSE4_1__
+ const __m128i result = _mm_blend_epi16(
+ _mm_srli_epi64(a0b0_a2b2_rounded_2x, 32), a1b1_a3b3_rounded_2x, 0xCC);
+ #else
+ const __m128i result0213 = _mm_castps_si128(_mm_shuffle_ps(
+ _mm_castsi128_ps(a0b0_a2b2_rounded_2x),
+ _mm_castsi128_ps(a1b1_a3b3_rounded_2x),
+ _MM_SHUFFLE(3, 1, 3, 1)));
+ const __m128i result = _mm_shuffle_epi32(result0213, _MM_SHUFFLE(3, 1, 2, 0));
+ #endif
+
+ // saturate those which overflowed
+ #ifdef __SSE4_1__
+ const __m128i saturated_result = _mm_blendv_epi8(result, min, saturation_mask);
+ #else
+ const __m128i saturated_result = _mm_or_si128(
+ _mm_and_si128(saturation_mask, min),
+ _mm_andnot_si128(saturation_mask, result));
+ #endif
+ return saturated_result;
+ }
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
diff --git a/src/requantization/gemmlowp-scalar.c b/src/requantization/gemmlowp-scalar.c
index 23968b8..a6f9acb 100644
--- a/src/requantization/gemmlowp-scalar.c
+++ b/src/requantization/gemmlowp-scalar.c
@@ -14,7 +14,7 @@
#include <xnnpack/requantization-stubs.h>
#include <xnnpack/scalar-utils.h>
-#include "gemmlowp-scalar.h"
+#include "gemmlowp-requantization.h"
void xnn_requantize_gemmlowp__scalar(
diff --git a/src/requantization/gemmlowp-scalar.h b/src/requantization/gemmlowp-scalar.h
deleted file mode 100644
index d4d85c9..0000000
--- a/src/requantization/gemmlowp-scalar.h
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates.
-// All rights reserved.
-//
-// Copyright 2019 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#pragma once
-
-#include <stdint.h>
-#include <limits.h>
-
-// The code below is adapted from Google's gemmlowp library.
-// It is only used in XNNPACK unit tests and comparative benchmarks, but not the library itself.
-//
-// Copyright 2015 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-inline static int32_t gemmlowp_scalar_vqrdmulh_s32(int32_t a, int32_t b)
-{
- const bool overflow = a == b && a == INT32_MIN;
- const int64_t ab_64 = (int64_t) a * (int64_t) b;
- const int32_t nudge = (a ^ b) >= 0 ? INT32_C(0x40000000) : -INT32_C(0x3FFFFFFF);
- const int32_t ab_x2_high32 = (int32_t) ((ab_64 + nudge) / INT64_C(0x80000000));
- return overflow ? INT32_MAX : ab_x2_high32;
-}
-
-inline static int32_t gemmlowp_scalar_rdivbypo2_s32(int32_t x, int exponent)
-{
- const int32_t mask = ((1 << exponent) - 1);
- const int32_t remainder = x & mask;
- const int32_t threshold = (mask >> 1) + (int32_t) (x < 0);
- return asr_s32(x, exponent) + (int32_t) (remainder > threshold);
-}
diff --git a/src/requantization/gemmlowp-sse.h b/src/requantization/gemmlowp-sse.h
deleted file mode 100644
index d8e2cda..0000000
--- a/src/requantization/gemmlowp-sse.h
+++ /dev/null
@@ -1,119 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates.
-// All rights reserved.
-//
-// Copyright 2019 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#pragma once
-
-#include <limits.h>
-
-#include <immintrin.h>
-
-// The code below is adapted from Google's gemmlowp library.
-// It is only used in XNNPACK unit tests and comparative benchmarks,
-// but not the library itself.
-//
-// Copyright 2015 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-static inline __m128i gemmlowp_sse_rdivbypo2_s32(__m128i x, int exponent) {
- const __m128i mask = _mm_set1_epi32((int32_t) ((UINT64_C(1) << exponent) - UINT64_C(1)));
- const __m128i remainder = _mm_and_si128(x, mask);
- const __m128i threshold = _mm_sub_epi32(
- _mm_srli_epi32(mask, 1), _mm_cmplt_epi32(x, _mm_setzero_si128()));
- return _mm_sub_epi32(
- _mm_sra_epi32(x, _mm_cvtsi32_si128(exponent)),
- _mm_cmpgt_epi32(remainder, threshold));
-}
-
-static inline __m128i gemmlowp_sse_mul_s32(__m128i a, __m128i b) {
-#ifdef __SSE4_1__
- return _mm_mul_epi32(a, b);
-#else
- __m128i sign, zero, mul_us, a_neg, b_neg, mul_us_neg;
- sign = _mm_xor_si128(a, b);
- sign = _mm_srai_epi32(sign, 31); // promote sign bit to all fields, all fff if
- // negative and all 0 if positive
- sign = _mm_shuffle_epi32(
- sign,
- _MM_SHUFFLE(2, 2, 0, 0)); // promote sign bit to 3 and 1st data lanes
- zero = _mm_setzero_si128();
-#ifdef __SSSE3__
- a_neg = _mm_abs_epi32(a); // negate a and b
- b_neg = _mm_abs_epi32(b); // negate a and b
-#else // pre-SSSE3
- const __m128i a_neg_mask = _mm_cmplt_epi32(a, zero);
- a_neg = _mm_sub_epi32(_mm_xor_si128(a, a_neg_mask), a_neg_mask);
- const __m128i b_neg_mask = _mm_cmplt_epi32(b, zero);
- b_neg = _mm_sub_epi32(_mm_xor_si128(b, b_neg_mask), b_neg_mask);
-#endif // pre-SSSE3
- mul_us = _mm_mul_epu32(a_neg, b_neg); // uses 0 and 2nd data lanes, (abs), the
- // multiplication gives 64 bit result
- mul_us_neg = _mm_sub_epi64(zero, mul_us);
- mul_us_neg = _mm_and_si128(sign, mul_us_neg);
- mul_us = _mm_andnot_si128(sign, mul_us);
- return _mm_or_si128(mul_us, mul_us_neg);
-#endif
-}
-
-static inline __m128i gemmlowp_sse_vqrdmulh_s32(__m128i a, __m128i b) {
- // saturation only happen if a == b == INT32_MIN
- const __m128i min = _mm_set1_epi32(INT32_MIN);
- const __m128i saturation_mask =
- _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_cmpeq_epi32(a, min));
-
- // a = a0 | a1 | a2 | a3
- // b = b0 | b1 | b2 | b3
- const __m128i a0_a2 = a;
- const __m128i a1_a3 = _mm_srli_si128(a, 4);
- const __m128i b0_b2 = b;
- const __m128i b1_b3 = _mm_srli_si128(b, 4);
-
- const __m128i a0b0_a2b2 = gemmlowp_sse_mul_s32(a0_a2, b0_b2);
- const __m128i a1b1_a3b3 = gemmlowp_sse_mul_s32(a1_a3, b1_b3);
-
- // do the rounding and take into account that it will be doubled
- const __m128i nudge = _mm_set1_epi64x(1 << 30);
- const __m128i a0b0_a2b2_rounded = _mm_add_epi64(a0b0_a2b2, nudge);
- const __m128i a1b1_a3b3_rounded = _mm_add_epi64(a1b1_a3b3, nudge);
-
- // do the doubling
- const __m128i a0b0_a2b2_rounded_2x = _mm_slli_epi64(a0b0_a2b2_rounded, 1);
- const __m128i a1b1_a3b3_rounded_2x = _mm_slli_epi64(a1b1_a3b3_rounded, 1);
-
-// get the high part of the products
-#ifdef __SSE4_1__
- const __m128i result = _mm_blend_epi16(
- _mm_srli_epi64(a0b0_a2b2_rounded_2x, 32), a1b1_a3b3_rounded_2x, 0xCC);
-#else
- const __m128i result0213 = _mm_castps_si128(_mm_shuffle_ps(
- _mm_castsi128_ps(a0b0_a2b2_rounded_2x),
- _mm_castsi128_ps(a1b1_a3b3_rounded_2x),
- _MM_SHUFFLE(3, 1, 3, 1)));
- const __m128i result = _mm_shuffle_epi32(result0213, _MM_SHUFFLE(3, 1, 2, 0));
-#endif
-
-// saturate those which overflowed
-#ifdef __SSE4_1__
- const __m128i saturated_result = _mm_blendv_epi8(result, min, saturation_mask);
-#else
- const __m128i saturated_result = _mm_or_si128(
- _mm_and_si128(saturation_mask, min),
- _mm_andnot_si128(saturation_mask, result));
-#endif
- return saturated_result;
-}
diff --git a/src/requantization/gemmlowp-sse2.c b/src/requantization/gemmlowp-sse2.c
index 105bdc4..5c380ab 100644
--- a/src/requantization/gemmlowp-sse2.c
+++ b/src/requantization/gemmlowp-sse2.c
@@ -15,7 +15,7 @@
#include <xnnpack/requantization-stubs.h>
-#include "gemmlowp-sse.h"
+#include "gemmlowp-requantization.h"
void xnn_requantize_gemmlowp__sse2(
diff --git a/src/requantization/gemmlowp-sse4.c b/src/requantization/gemmlowp-sse4.c
index 1855a6d..533645d 100644
--- a/src/requantization/gemmlowp-sse4.c
+++ b/src/requantization/gemmlowp-sse4.c
@@ -15,7 +15,7 @@
#include <xnnpack/requantization-stubs.h>
-#include "gemmlowp-sse.h"
+#include "gemmlowp-requantization.h"
void xnn_requantize_gemmlowp__sse4(
diff --git a/src/requantization/gemmlowp-ssse3.c b/src/requantization/gemmlowp-ssse3.c
index ce2357f..5974541 100644
--- a/src/requantization/gemmlowp-ssse3.c
+++ b/src/requantization/gemmlowp-ssse3.c
@@ -15,7 +15,7 @@
#include <xnnpack/requantization-stubs.h>
-#include "gemmlowp-sse.h"
+#include "gemmlowp-requantization.h"
void xnn_requantize_gemmlowp__ssse3(
diff --git a/test/requantization-tester.h b/test/requantization-tester.h
index 289f926..bd1b4db 100644
--- a/test/requantization-tester.h
+++ b/test/requantization-tester.h
@@ -21,6 +21,7 @@
#include <vector>
#include <xnnpack/params.h>
+#include <xnnpack/requantization-stubs.h>
#include <xnnpack/scalar-utils.h>
@@ -39,13 +40,13 @@
return ldexpf(1.0f, -s());
}
- inline RequantizationTester& zeroPoint(int32_t zeroPoint) {
- this->zeroPoint_ = zeroPoint;
+ inline RequantizationTester& zero_point(int32_t zero_point) {
+ this->zero_point_ = zero_point;
return *this;
}
- inline int32_t zeroPoint() const {
- return this->zeroPoint_;
+ inline int32_t zero_point() const {
+ return this->zero_point_;
}
inline RequantizationTester& qmin(uint8_t qmin) {
@@ -82,9 +83,9 @@
* - no output clamping
* produces exactly i, provided that ((i - zero point) * 2**s) does not overflow.
*/
- void testExactDivideByPO2(requantization_function requantize) const {
- ASSERT_GE(zeroPoint(), 0);
- ASSERT_LE(zeroPoint(), 255);
+ void TestExactDivideByPO2(requantization_function requantize) const {
+ ASSERT_GE(zero_point(), 0);
+ ASSERT_LE(zero_point(), 255);
/* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
ASSERT_GE(s(), 1);
@@ -92,20 +93,20 @@
std::vector<int32_t> inputs(256);
std::vector<uint8_t> outputs(inputs.size());
- const int32_t maxI = (uint32_t(std::numeric_limits<int32_t>::max()) >> s()) + zeroPoint();
- const int32_t minI = -(-uint32_t(std::numeric_limits<int32_t>::min()) >> s()) + zeroPoint();
+ const int32_t maxI = (uint32_t(std::numeric_limits<int32_t>::max()) >> s()) + zero_point();
+ const int32_t minI = -(-uint32_t(std::numeric_limits<int32_t>::min()) >> s()) + zero_point();
for (int32_t i = 0; i < 256; i++) {
const int32_t clampedI = std::max(minI, std::min(maxI, i));
- inputs[i] = int32_t(uint32_t(clampedI - zeroPoint()) << s());
+ inputs[i] = int32_t(uint32_t(clampedI - zero_point()) << s());
}
requantize(inputs.size(), inputs.data(),
- scale(), zeroPoint(), qmin(), qmax(),
+ scale(), zero_point(), qmin(), qmax(),
outputs.data());
for (int32_t i = 0; i < 256; i++) {
const int32_t clampedI = std::max(minI, std::min(maxI, i));
ASSERT_EQ(clampedI, outputs[i]) << "i = " << i << ", clamped i = " << clampedI <<
", min i = " << minI << ", max i = " << maxI <<
- ", s = " << s() << ", zero point = " << zeroPoint();
+ ", s = " << s() << ", zero point = " << zero_point();
}
}
@@ -116,9 +117,9 @@
* - no output clamping
* produces exactly i, provided that ((i - zero point) * 2**s) does not overflow.
*/
- void testDivideByPO2WithRoundingUp(requantization_function requantize) {
- ASSERT_GE(zeroPoint(), 0);
- ASSERT_LE(zeroPoint(), 255);
+ void TestDivideByPO2WithRoundingUp(requantization_function requantize) {
+ ASSERT_GE(zero_point(), 0);
+ ASSERT_LE(zero_point(), 255);
/* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
ASSERT_GE(s(), 1);
@@ -127,19 +128,19 @@
std::vector<int32_t> inputs(256);
std::vector<uint8_t> outputs(inputs.size());
for (int32_t i = 0; i < 256; i++) {
- const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) -
- (INT64_C(1) << (s() - 1)) + (int64_t) (i <= zeroPoint());
+ const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) -
+ (INT64_C(1) << (s() - 1)) + (int64_t) (i <= zero_point());
inputs[i] = int32_t(input);
}
requantize(inputs.size(), inputs.data(),
- scale(), zeroPoint(), qmin(), qmax(),
+ scale(), zero_point(), qmin(), qmax(),
outputs.data());
for (int32_t i = 0; i < 256; i++) {
- const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) -
- (INT64_C(1) << (s() - 1)) + (int64_t) (i <= zeroPoint());
+ const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) -
+ (INT64_C(1) << (s() - 1)) + (int64_t) (i <= zero_point());
if (int32_t(input) == input) {
ASSERT_EQ(i, uint32_t(outputs[i])) << "i = " << i << ", input = " << input <<
- ", s = " << s() << ", zero point = " << zeroPoint();
+ ", s = " << s() << ", zero point = " << zero_point();
}
}
}
@@ -151,9 +152,9 @@
* - no output clamping
* produces exactly i, provided that ((i - zero point) * 2**s) does not overflow.
*/
- void testDivideByPO2WithRoundingDown(requantization_function requantize) {
- ASSERT_GE(zeroPoint(), 0);
- ASSERT_LE(zeroPoint(), 255);
+ void TestDivideByPO2WithRoundingDown(requantization_function requantize) {
+ ASSERT_GE(zero_point(), 0);
+ ASSERT_LE(zero_point(), 255);
/* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
ASSERT_GE(s(), 1);
@@ -162,26 +163,26 @@
std::vector<int32_t> inputs(256);
std::vector<uint8_t> outputs(inputs.size());
for (int32_t i = 0; i < 256; i++) {
- const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) +
- (INT64_C(1) << (s() - 1)) - (int64_t) (i >= zeroPoint());
+ const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) +
+ (INT64_C(1) << (s() - 1)) - (int64_t) (i >= zero_point());
inputs[i] = int32_t(input);
}
requantize(inputs.size(), inputs.data(),
- scale(), zeroPoint(), qmin(), qmax(),
+ scale(), zero_point(), qmin(), qmax(),
outputs.data());
for (int32_t i = 0; i < 256; i++) {
- const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) +
- (INT64_C(1) << (s() - 1)) - (int64_t) (i >= zeroPoint());
+ const int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s()) +
+ (INT64_C(1) << (s() - 1)) - (int64_t) (i >= zero_point());
if (int32_t(input) == input) {
ASSERT_EQ(i, uint32_t(outputs[i])) << "i = " << i << ", input = " << input <<
- ", s = " << s() << ", zero point = " << zeroPoint();
+ ", s = " << s() << ", zero point = " << zero_point();
}
}
}
- void testDivideByPO2WithRoundingAway(requantization_function requantize) {
- ASSERT_GE(zeroPoint(), 0);
- ASSERT_LE(zeroPoint(), 255);
+ void TestDivideByPO2WithRoundingAway(requantization_function requantize) {
+ ASSERT_GE(zero_point(), 0);
+ ASSERT_LE(zero_point(), 255);
/* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
ASSERT_GE(s(), 1);
@@ -190,7 +191,7 @@
std::vector<int32_t> inputs(256);
std::vector<uint8_t> outputs(inputs.size());
for (int32_t i = 0; i < 256; i++) {
- int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s());
+ int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s());
if (input > 0) {
input -= INT64_C(1) << (s() - 1);
} else if (input < 0) {
@@ -199,10 +200,10 @@
inputs[i] = int32_t(input);
}
requantize(inputs.size(), inputs.data(),
- scale(), zeroPoint(), qmin(), qmax(),
+ scale(), zero_point(), qmin(), qmax(),
outputs.data());
for (uint32_t i = 0; i < 256; i++) {
- int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s());
+ int64_t input = RequantizationTester::ShiftLeft(i - zero_point(), s());
if (input > 0) {
input -= INT64_C(1) << (s() - 1);
} else if (input < 0) {
@@ -210,26 +211,26 @@
}
if (int32_t(input) == input) {
ASSERT_EQ(i, uint32_t(outputs[i])) << "i = " << i << ", input = " << input <<
- ", s = " << s() << ", zero point = " << zeroPoint();
+ ", s = " << s() << ", zero point = " << zero_point();
}
}
}
- void testSpecialCases(requantization_function requantize) {
+ void TestSpecialCases(requantization_function requantize) {
std::vector<int32_t> inputs(256);
std::vector<uint8_t> outputs(inputs.size());
std::fill(inputs.begin(), inputs.end(), std::numeric_limits<int32_t>::min());
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
requantize(
inputs.size(),
inputs.data(),
ldexpf(1.0f, -32) /* scale */,
- zeroPoint /* zero point */,
+ zero_point /* zero point */,
std::numeric_limits<uint8_t>::min(),
std::numeric_limits<uint8_t>::max(),
outputs.data());
- ASSERT_EQ(std::max(int32_t(0), zeroPoint - 1), *std::min_element(outputs.cbegin(), outputs.cend()));
+ ASSERT_EQ(std::max(int32_t(0), zero_point - 1), *std::min_element(outputs.cbegin(), outputs.cend()));
}
std::fill(inputs.begin(), inputs.end(), std::numeric_limits<int32_t>::max());
@@ -246,7 +247,7 @@
}
}
- void testRandomCasesPrecise(requantization_function requantize) {
+ void TestRandomCasesPrecise(requantization_function requantize) {
std::random_device random_device;
std::mt19937 mtRng(random_device());
for (size_t iteration = 0; iteration < iterations(); iteration++) {
@@ -255,17 +256,17 @@
std::vector<int32_t> inputs(4096);
std::vector<uint8_t> outputs(inputs.size());
- const uint8_t zeroPoint = UINT8_C(128);
- std::uniform_real_distribution<float> scaleDistribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
- const float scale = scaleDistribution(mtRng);
+ const uint8_t zero_point = UINT8_C(128);
+ std::uniform_real_distribution<float> scale_distribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
+ const float scale = scale_distribution(mtRng);
for (size_t i = 0; i < inputs.size(); i++) {
- const uint8_t approximateOutput = rng();
- const int32_t input = int32_t(double(approximateOutput) / double(scale));
+ const uint8_t approximate_output = rng();
+ const int32_t input = int32_t(double(approximate_output) / double(scale));
inputs[i] = input;
}
requantize(
- inputs.size(), inputs.data(), scale, zeroPoint,
+ inputs.size(), inputs.data(), scale, zero_point,
std::numeric_limits<uint8_t>::min(),
std::numeric_limits<uint8_t>::max(),
outputs.data());
@@ -276,17 +277,17 @@
*std::min_element(outputs.cbegin(), outputs.cend()));
for (size_t i = 0; i < inputs.size(); i++) {
- const uint8_t referenceOutput =
+ const uint8_t reference_output =
scalar_requantize_precise(
- inputs[i], scale, zeroPoint,
+ inputs[i], scale, zero_point,
std::numeric_limits<uint8_t>::min(),
std::numeric_limits<uint8_t>::max());
- ASSERT_EQ(uint32_t(referenceOutput), uint32_t(outputs[i]));
+ ASSERT_EQ(uint32_t(reference_output), uint32_t(outputs[i]));
}
}
}
- void testRandomCasesApproximate(requantization_function requantize) {
+ void TestRandomCasesApproximate(requantization_function requantize) {
std::random_device random_device;
std::mt19937 mtRng(random_device());
for (size_t iteration = 0; iteration < iterations(); iteration++) {
@@ -295,17 +296,17 @@
std::vector<int32_t> inputs(4096);
std::vector<uint8_t> outputs(inputs.size());
- const uint8_t zeroPoint = UINT8_C(128);
- std::uniform_real_distribution<float> scaleDistribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
- const float scale = scaleDistribution(mtRng);
+ const uint8_t zero_point = UINT8_C(128);
+ std::uniform_real_distribution<float> scale_distribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
+ const float scale = scale_distribution(mtRng);
for (size_t i = 0; i < inputs.size(); i++) {
- const uint8_t approximateOutput = rng();
- const int32_t input = int32_t(double(approximateOutput) / double(scale));
+ const uint8_t approximate_output = rng();
+ const int32_t input = int32_t(double(approximate_output) / double(scale));
inputs[i] = input;
}
requantize(
- inputs.size(), inputs.data(), scale, zeroPoint,
+ inputs.size(), inputs.data(), scale, zero_point,
std::numeric_limits<uint8_t>::min(),
std::numeric_limits<uint8_t>::max(),
outputs.data());
@@ -316,19 +317,19 @@
*std::min_element(outputs.cbegin(), outputs.cend()));
for (size_t i = 0; i < inputs.size(); i++) {
- const double referenceOutput =
- RequantizationTester::requantizeApproximate(
- inputs[i], scale, zeroPoint,
+ const double reference_output =
+ RequantizationTester::RequantizeApproximate(
+ inputs[i], scale, zero_point,
std::numeric_limits<uint8_t>::min(),
std::numeric_limits<uint8_t>::max());
- ASSERT_LE(fabs(referenceOutput - double(outputs[i])), 0.55) <<
+ ASSERT_LE(fabs(reference_output - double(outputs[i])), 0.55) <<
"input = " << inputs[i] <<
- ", output = " << uint32_t(outputs[i]) << ", reference output = " << referenceOutput;
+ ", output = " << uint32_t(outputs[i]) << ", reference output = " << reference_output;
}
}
}
- void testRandomCasesAgainstReference(requantization_function requantize, requantization_function requantizeReference) {
+ void TestRandomCasesAgainstReference(requantization_function requantize, requantization_function requantize_reference) {
std::random_device random_device;
std::mt19937 mtRng(random_device());
for (size_t iteration = 0; iteration < iterations(); iteration++) {
@@ -336,28 +337,28 @@
std::vector<int32_t> inputs(4096);
std::vector<uint8_t> outputs(inputs.size());
- std::vector<uint8_t> referenceOutputs(inputs.size());
+ std::vector<uint8_t> reference_outputs(inputs.size());
- const uint8_t zeroPoint = UINT8_C(128);
- std::uniform_real_distribution<float> scaleDistribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
- const float scale = scaleDistribution(mtRng);
+ const uint8_t zero_point = UINT8_C(128);
+ std::uniform_real_distribution<float> scale_distribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
+ const float scale = scale_distribution(mtRng);
for (size_t i = 0; i < inputs.size(); i++) {
- const uint8_t approximateOutput = rng();
- const int32_t input = int32_t(double(approximateOutput) / double(scale));
+ const uint8_t approximate_output = rng();
+ const int32_t input = int32_t(double(approximate_output) / double(scale));
inputs[i] = input;
}
requantize(
- inputs.size(), inputs.data(), scale, zeroPoint,
+ inputs.size(), inputs.data(), scale, zero_point,
std::numeric_limits<uint8_t>::min(),
std::numeric_limits<uint8_t>::max(),
outputs.data());
- requantizeReference(
- inputs.size(), inputs.data(), scale, zeroPoint,
+ requantize_reference(
+ inputs.size(), inputs.data(), scale, zero_point,
std::numeric_limits<uint8_t>::min(),
std::numeric_limits<uint8_t>::max(),
- referenceOutputs.data());
+ reference_outputs.data());
/* Ensure that outputs are not all identical, as in this case Test doesn't validate much */
ASSERT_NE(
@@ -365,42 +366,42 @@
*std::min_element(outputs.cbegin(), outputs.cend()));
for (size_t i = 0; i < inputs.size(); i++) {
- ASSERT_EQ(uint32_t(referenceOutputs[i]), uint32_t(outputs[i]));
+ ASSERT_EQ(uint32_t(reference_outputs[i]), uint32_t(outputs[i]));
}
}
}
- static inline int64_t shiftLeft(int64_t w, uint32_t n) {
+ static inline int64_t ShiftLeft(int64_t w, uint32_t n) {
return (int64_t) ((uint64_t) w << n);
}
- static inline double requantizeApproximate(
+ static inline double RequantizeApproximate(
int32_t value,
float scale,
- uint8_t zeroPoint,
+ uint8_t zero_point,
uint8_t qmin,
uint8_t qmax)
{
assert(scale < 1.0f);
assert(scale >= 0x1.0p-32f);
- double clampedValue = double(value) * double(scale) + double(zeroPoint);
+ double clamped_value = double(value) * double(scale) + double(zero_point);
const double fmin = double(qmin);
- if (clampedValue < fmin) {
- clampedValue = fmin;
+ if (clamped_value < fmin) {
+ clamped_value = fmin;
}
const double fmax = double(qmax);
- if (clampedValue > fmax) {
- clampedValue = fmax;
+ if (clamped_value > fmax) {
+ clamped_value = fmax;
}
- return clampedValue;
+ return clamped_value;
}
private:
- size_t zeroPoint_{0};
+ size_t zero_point_{0};
size_t s_{1};
uint8_t qmin_{std::numeric_limits<uint8_t>::min()};
uint8_t qmax_{std::numeric_limits<uint8_t>::max()};
diff --git a/test/requantization.cc b/test/requantization.cc
index 2337722..fe14d71 100644
--- a/test/requantization.cc
+++ b/test/requantization.cc
@@ -26,63 +26,63 @@
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
.s(s)
- .testExactDivideByPO2(xnn_requantize_precise__scalar_unsigned32);
+ .TestExactDivideByPO2(xnn_requantize_precise__scalar_unsigned32);
}
}
TEST(PRECISE__SCALAR_UNSIGNED32, exact_divide_by_po2_with_zero_point) {
- for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testExactDivideByPO2(xnn_requantize_precise__scalar_unsigned32);
+ .TestExactDivideByPO2(xnn_requantize_precise__scalar_unsigned32);
}
}
}
TEST(PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_up) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingUp(xnn_requantize_precise__scalar_unsigned32);
+ .TestDivideByPO2WithRoundingUp(xnn_requantize_precise__scalar_unsigned32);
}
}
}
TEST(PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_down) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingDown(xnn_requantize_precise__scalar_unsigned32);
+ .TestDivideByPO2WithRoundingDown(xnn_requantize_precise__scalar_unsigned32);
}
}
}
TEST(PRECISE__SCALAR_UNSIGNED32, divide_by_po2_with_rounding_away) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingAway(xnn_requantize_precise__scalar_unsigned32);
+ .TestDivideByPO2WithRoundingAway(xnn_requantize_precise__scalar_unsigned32);
}
}
}
TEST(PRECISE__SCALAR_UNSIGNED32, special_cases) {
RequantizationTester()
- .testSpecialCases(xnn_requantize_precise__scalar_unsigned32);
+ .TestSpecialCases(xnn_requantize_precise__scalar_unsigned32);
}
TEST(PRECISE__SCALAR_UNSIGNED32, random_cases) {
RequantizationTester()
.iterations(100)
- .testRandomCasesPrecise(xnn_requantize_precise__scalar_unsigned32);
+ .TestRandomCasesPrecise(xnn_requantize_precise__scalar_unsigned32);
}
@@ -94,63 +94,63 @@
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
.s(s)
- .testExactDivideByPO2(xnn_requantize_precise__scalar_unsigned64);
+ .TestExactDivideByPO2(xnn_requantize_precise__scalar_unsigned64);
}
}
TEST(PRECISE__SCALAR_UNSIGNED64, exact_divide_by_po2_with_zero_point) {
- for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testExactDivideByPO2(xnn_requantize_precise__scalar_unsigned64);
+ .TestExactDivideByPO2(xnn_requantize_precise__scalar_unsigned64);
}
}
}
TEST(PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_up) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingUp(xnn_requantize_precise__scalar_unsigned64);
+ .TestDivideByPO2WithRoundingUp(xnn_requantize_precise__scalar_unsigned64);
}
}
}
TEST(PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_down) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingDown(xnn_requantize_precise__scalar_unsigned64);
+ .TestDivideByPO2WithRoundingDown(xnn_requantize_precise__scalar_unsigned64);
}
}
}
TEST(PRECISE__SCALAR_UNSIGNED64, divide_by_po2_with_rounding_away) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingAway(xnn_requantize_precise__scalar_unsigned64);
+ .TestDivideByPO2WithRoundingAway(xnn_requantize_precise__scalar_unsigned64);
}
}
}
TEST(PRECISE__SCALAR_UNSIGNED64, special_cases) {
RequantizationTester()
- .testSpecialCases(xnn_requantize_precise__scalar_unsigned64);
+ .TestSpecialCases(xnn_requantize_precise__scalar_unsigned64);
}
TEST(PRECISE__SCALAR_UNSIGNED64, random_cases) {
RequantizationTester()
.iterations(100)
- .testRandomCasesPrecise(xnn_requantize_precise__scalar_unsigned64);
+ .TestRandomCasesPrecise(xnn_requantize_precise__scalar_unsigned64);
}
@@ -162,63 +162,63 @@
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
.s(s)
- .testExactDivideByPO2(xnn_requantize_precise__scalar_signed64);
+ .TestExactDivideByPO2(xnn_requantize_precise__scalar_signed64);
}
}
TEST(PRECISE__SCALAR_SIGNED64, exact_divide_by_po2_with_zero_point) {
- for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testExactDivideByPO2(xnn_requantize_precise__scalar_signed64);
+ .TestExactDivideByPO2(xnn_requantize_precise__scalar_signed64);
}
}
}
TEST(PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_up) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingUp(xnn_requantize_precise__scalar_signed64);
+ .TestDivideByPO2WithRoundingUp(xnn_requantize_precise__scalar_signed64);
}
}
}
TEST(PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_down) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingDown(xnn_requantize_precise__scalar_signed64);
+ .TestDivideByPO2WithRoundingDown(xnn_requantize_precise__scalar_signed64);
}
}
}
TEST(PRECISE__SCALAR_SIGNED64, divide_by_po2_with_rounding_away) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingAway(xnn_requantize_precise__scalar_signed64);
+ .TestDivideByPO2WithRoundingAway(xnn_requantize_precise__scalar_signed64);
}
}
}
TEST(PRECISE__SCALAR_SIGNED64, special_cases) {
RequantizationTester()
- .testSpecialCases(xnn_requantize_precise__scalar_signed64);
+ .TestSpecialCases(xnn_requantize_precise__scalar_signed64);
}
TEST(PRECISE__SCALAR_SIGNED64, random_cases) {
RequantizationTester()
.iterations(100)
- .testRandomCasesPrecise(xnn_requantize_precise__scalar_signed64);
+ .TestRandomCasesPrecise(xnn_requantize_precise__scalar_signed64);
}
@@ -229,7 +229,7 @@
TEST(FP32__SCALAR_LRINTF, random_cases) {
RequantizationTester()
.iterations(1000)
- .testRandomCasesApproximate(xnn_requantize_fp32__scalar_lrintf);
+ .TestRandomCasesApproximate(xnn_requantize_fp32__scalar_lrintf);
}
@@ -240,7 +240,7 @@
TEST(FP32__SCALAR_MAGIC, random_cases) {
RequantizationTester()
.iterations(1000)
- .testRandomCasesApproximate(xnn_requantize_fp32__scalar_magic);
+ .TestRandomCasesApproximate(xnn_requantize_fp32__scalar_magic);
}
@@ -252,28 +252,28 @@
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
.s(s)
- .testExactDivideByPO2(xnn_requantize_q31__scalar);
+ .TestExactDivideByPO2(xnn_requantize_q31__scalar);
}
}
TEST(Q31__SCALAR, exact_divide_by_po2_with_zero_point) {
- for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testExactDivideByPO2(xnn_requantize_q31__scalar);
+ .TestExactDivideByPO2(xnn_requantize_q31__scalar);
}
}
}
TEST(Q31__SCALAR, divide_by_po2_with_rounding_up) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingUp(xnn_requantize_q31__scalar);
+ .TestDivideByPO2WithRoundingUp(xnn_requantize_q31__scalar);
}
}
}
@@ -281,31 +281,31 @@
/* No rounding down Test - it fails because of upward bias in multiplication */
TEST(Q31__SCALAR, divide_by_po2_with_rounding_away) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingAway(xnn_requantize_q31__scalar);
+ .TestDivideByPO2WithRoundingAway(xnn_requantize_q31__scalar);
}
}
}
TEST(Q31__SCALAR, special_cases) {
RequantizationTester()
- .testSpecialCases(xnn_requantize_q31__scalar);
+ .TestSpecialCases(xnn_requantize_q31__scalar);
}
TEST(Q31__SCALAR, random_cases) {
RequantizationTester()
.iterations(100)
- .testRandomCasesApproximate(xnn_requantize_q31__scalar);
+ .TestRandomCasesApproximate(xnn_requantize_q31__scalar);
}
TEST(Q31__SCALAR, random_match_gemmlowp) {
RequantizationTester()
.iterations(100)
- .testRandomCasesAgainstReference(xnn_requantize_q31__scalar, xnn_requantize_gemmlowp__scalar);
+ .TestRandomCasesAgainstReference(xnn_requantize_q31__scalar, xnn_requantize_gemmlowp__scalar);
}
@@ -316,87 +316,89 @@
TEST(GEMMLOWP__SCALAR, random_cases) {
RequantizationTester()
.iterations(100)
- .testRandomCasesApproximate(xnn_requantize_gemmlowp__scalar);
+ .TestRandomCasesApproximate(xnn_requantize_gemmlowp__scalar);
}
-/*
- * Precise PSIMD implementation using unsigned 32-bit arithmetics.
- */
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
+ /*
+ * Precise PSIMD implementation using unsigned 32-bit arithmetics.
+ */
-TEST(PRECISE__PSIMD, exact_divide_by_po2) {
- for (uint32_t s = 1; s < 32; s++) {
+ TEST(PRECISE__PSIMD, exact_divide_by_po2) {
+ for (uint32_t s = 1; s < 32; s++) {
+ RequantizationTester()
+ .s(s)
+ .TestExactDivideByPO2(xnn_requantize_precise__psimd);
+ }
+ }
+
+ TEST(PRECISE__PSIMD, exact_divide_by_po2_with_zero_point) {
+ for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
+ for (uint32_t s = 1; s < 32; s++) {
+ RequantizationTester()
+ .zero_point(zero_point)
+ .s(s)
+ .TestExactDivideByPO2(xnn_requantize_precise__psimd);
+ }
+ }
+ }
+
+ TEST(PRECISE__PSIMD, divide_by_po2_with_rounding_up) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
+ for (uint32_t s = 1; s < 32; s++) {
+ RequantizationTester()
+ .zero_point(zero_point)
+ .s(s)
+ .TestDivideByPO2WithRoundingUp(xnn_requantize_precise__psimd);
+ }
+ }
+ }
+
+ TEST(PRECISE__PSIMD, divide_by_po2_with_rounding_down) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
+ for (uint32_t s = 1; s < 32; s++) {
+ RequantizationTester()
+ .zero_point(zero_point)
+ .s(s)
+ .TestDivideByPO2WithRoundingDown(xnn_requantize_precise__psimd);
+ }
+ }
+ }
+
+ TEST(PRECISE__PSIMD, divide_by_po2_with_rounding_away) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
+ for (uint32_t s = 1; s < 32; s++) {
+ RequantizationTester()
+ .zero_point(zero_point)
+ .s(s)
+ .TestDivideByPO2WithRoundingAway(xnn_requantize_precise__psimd);
+ }
+ }
+ }
+
+ TEST(PRECISE__PSIMD, special_cases) {
RequantizationTester()
- .s(s)
- .testExactDivideByPO2(xnn_requantize_precise__psimd);
+ .TestSpecialCases(xnn_requantize_precise__psimd);
}
-}
-TEST(PRECISE__PSIMD, exact_divide_by_po2_with_zero_point) {
- for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
- for (uint32_t s = 1; s < 32; s++) {
- RequantizationTester()
- .zeroPoint(zeroPoint)
- .s(s)
- .testExactDivideByPO2(xnn_requantize_precise__psimd);
- }
+ TEST(PRECISE__PSIMD, random_cases) {
+ RequantizationTester()
+ .iterations(100)
+ .TestRandomCasesPrecise(xnn_requantize_precise__psimd);
}
-}
-TEST(PRECISE__PSIMD, divide_by_po2_with_rounding_up) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
- for (uint32_t s = 1; s < 32; s++) {
- RequantizationTester()
- .zeroPoint(zeroPoint)
- .s(s)
- .testDivideByPO2WithRoundingUp(xnn_requantize_precise__psimd);
- }
+
+ /*
+ * FP32-based PSIMD implementation using magic trick for FP32->INT32 conversion.
+ */
+
+ TEST(FP32__PSIMD, random_cases) {
+ RequantizationTester()
+ .iterations(1000)
+ .TestRandomCasesApproximate(xnn_requantize_fp32__psimd);
}
-}
-
-TEST(PRECISE__PSIMD, divide_by_po2_with_rounding_down) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
- for (uint32_t s = 1; s < 32; s++) {
- RequantizationTester()
- .zeroPoint(zeroPoint)
- .s(s)
- .testDivideByPO2WithRoundingDown(xnn_requantize_precise__psimd);
- }
- }
-}
-
-TEST(PRECISE__PSIMD, divide_by_po2_with_rounding_away) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
- for (uint32_t s = 1; s < 32; s++) {
- RequantizationTester()
- .zeroPoint(zeroPoint)
- .s(s)
- .testDivideByPO2WithRoundingAway(xnn_requantize_precise__psimd);
- }
- }
-}
-
-TEST(PRECISE__PSIMD, special_cases) {
- RequantizationTester()
- .testSpecialCases(xnn_requantize_precise__psimd);
-}
-
-TEST(PRECISE__PSIMD, random_cases) {
- RequantizationTester()
- .iterations(100)
- .testRandomCasesPrecise(xnn_requantize_precise__psimd);
-}
-
-
-/*
- * FP32-based PSIMD implementation using magic trick for FP32->INT32 conversion.
- */
-
-TEST(FP32__PSIMD, random_cases) {
- RequantizationTester()
- .iterations(1000)
- .testRandomCasesApproximate(xnn_requantize_fp32__psimd);
-}
+#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
#if XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -408,63 +410,63 @@
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
.s(s)
- .testExactDivideByPO2(xnn_requantize_precise__sse2);
+ .TestExactDivideByPO2(xnn_requantize_precise__sse2);
}
}
TEST(PRECISE__SSE2, exact_divide_by_po2_with_zero_point) {
- for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testExactDivideByPO2(xnn_requantize_precise__sse2);
+ .TestExactDivideByPO2(xnn_requantize_precise__sse2);
}
}
}
TEST(PRECISE__SSE2, divide_by_po2_with_rounding_up) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingUp(xnn_requantize_precise__sse2);
+ .TestDivideByPO2WithRoundingUp(xnn_requantize_precise__sse2);
}
}
}
TEST(PRECISE__SSE2, divide_by_po2_with_rounding_down) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingDown(xnn_requantize_precise__sse2);
+ .TestDivideByPO2WithRoundingDown(xnn_requantize_precise__sse2);
}
}
}
TEST(PRECISE__SSE2, divide_by_po2_with_rounding_away) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingAway(xnn_requantize_precise__sse2);
+ .TestDivideByPO2WithRoundingAway(xnn_requantize_precise__sse2);
}
}
}
TEST(PRECISE__SSE2, special_cases) {
RequantizationTester()
- .testSpecialCases(xnn_requantize_precise__sse2);
+ .TestSpecialCases(xnn_requantize_precise__sse2);
}
TEST(PRECISE__SSE2, random_cases) {
RequantizationTester()
.iterations(100)
- .testRandomCasesPrecise(xnn_requantize_precise__sse2);
+ .TestRandomCasesPrecise(xnn_requantize_precise__sse2);
}
@@ -476,63 +478,63 @@
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
.s(s)
- .testExactDivideByPO2(xnn_requantize_precise__ssse3);
+ .TestExactDivideByPO2(xnn_requantize_precise__ssse3);
}
}
TEST(PRECISE__SSSE3, exact_divide_by_po2_with_zero_point) {
- for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testExactDivideByPO2(xnn_requantize_precise__ssse3);
+ .TestExactDivideByPO2(xnn_requantize_precise__ssse3);
}
}
}
TEST(PRECISE__SSSE3, divide_by_po2_with_rounding_up) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingUp(xnn_requantize_precise__ssse3);
+ .TestDivideByPO2WithRoundingUp(xnn_requantize_precise__ssse3);
}
}
}
TEST(PRECISE__SSSE3, divide_by_po2_with_rounding_down) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingDown(xnn_requantize_precise__ssse3);
+ .TestDivideByPO2WithRoundingDown(xnn_requantize_precise__ssse3);
}
}
}
TEST(PRECISE__SSSE3, divide_by_po2_with_rounding_away) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingAway(xnn_requantize_precise__ssse3);
+ .TestDivideByPO2WithRoundingAway(xnn_requantize_precise__ssse3);
}
}
}
TEST(PRECISE__SSSE3, special_cases) {
RequantizationTester()
- .testSpecialCases(xnn_requantize_precise__ssse3);
+ .TestSpecialCases(xnn_requantize_precise__ssse3);
}
TEST(PRECISE__SSSE3, random_cases) {
RequantizationTester()
.iterations(100)
- .testRandomCasesPrecise(xnn_requantize_precise__ssse3);
+ .TestRandomCasesPrecise(xnn_requantize_precise__ssse3);
}
@@ -544,63 +546,63 @@
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
.s(s)
- .testExactDivideByPO2(xnn_requantize_precise__sse4);
+ .TestExactDivideByPO2(xnn_requantize_precise__sse4);
}
}
TEST(PRECISE__SSE4, exact_divide_by_po2_with_zero_point) {
- for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testExactDivideByPO2(xnn_requantize_precise__sse4);
+ .TestExactDivideByPO2(xnn_requantize_precise__sse4);
}
}
}
TEST(PRECISE__SSE4, divide_by_po2_with_rounding_up) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingUp(xnn_requantize_precise__sse4);
+ .TestDivideByPO2WithRoundingUp(xnn_requantize_precise__sse4);
}
}
}
TEST(PRECISE__SSE4, divide_by_po2_with_rounding_down) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingDown(xnn_requantize_precise__sse4);
+ .TestDivideByPO2WithRoundingDown(xnn_requantize_precise__sse4);
}
}
}
TEST(PRECISE__SSE4, divide_by_po2_with_rounding_away) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingAway(xnn_requantize_precise__sse4);
+ .TestDivideByPO2WithRoundingAway(xnn_requantize_precise__sse4);
}
}
}
TEST(PRECISE__SSE4, special_cases) {
RequantizationTester()
- .testSpecialCases(xnn_requantize_precise__sse4);
+ .TestSpecialCases(xnn_requantize_precise__sse4);
}
TEST(PRECISE__SSE4, random_cases) {
RequantizationTester()
.iterations(100)
- .testRandomCasesPrecise(xnn_requantize_precise__sse4);
+ .TestRandomCasesPrecise(xnn_requantize_precise__sse4);
}
@@ -611,7 +613,7 @@
TEST(FP32__SSE2, random_cases) {
RequantizationTester()
.iterations(1000)
- .testRandomCasesApproximate(xnn_requantize_fp32__sse2);
+ .TestRandomCasesApproximate(xnn_requantize_fp32__sse2);
}
@@ -623,28 +625,28 @@
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
.s(s)
- .testExactDivideByPO2(xnn_requantize_q31__sse2);
+ .TestExactDivideByPO2(xnn_requantize_q31__sse2);
}
}
TEST(Q31__SSE2, exact_divide_by_po2_with_zero_point) {
- for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testExactDivideByPO2(xnn_requantize_q31__sse2);
+ .TestExactDivideByPO2(xnn_requantize_q31__sse2);
}
}
}
TEST(Q31__SSE2, divide_by_po2_with_rounding_up) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingUp(xnn_requantize_q31__sse2);
+ .TestDivideByPO2WithRoundingUp(xnn_requantize_q31__sse2);
}
}
}
@@ -652,31 +654,31 @@
/* No rounding down Test - it fails because of upward bias in multiplication */
TEST(Q31__SSE2, divide_by_po2_with_rounding_away) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingAway(xnn_requantize_q31__sse2);
+ .TestDivideByPO2WithRoundingAway(xnn_requantize_q31__sse2);
}
}
}
TEST(Q31__SSE2, special_cases) {
RequantizationTester()
- .testSpecialCases(xnn_requantize_q31__sse2);
+ .TestSpecialCases(xnn_requantize_q31__sse2);
}
TEST(Q31__SSE2, random_cases) {
RequantizationTester()
.iterations(100)
- .testRandomCasesApproximate(xnn_requantize_q31__sse2);
+ .TestRandomCasesApproximate(xnn_requantize_q31__sse2);
}
TEST(Q31__SSE2, random_match_gemmlowp) {
RequantizationTester()
.iterations(100)
- .testRandomCasesAgainstReference(xnn_requantize_q31__sse2, xnn_requantize_gemmlowp__sse2);
+ .TestRandomCasesAgainstReference(xnn_requantize_q31__sse2, xnn_requantize_gemmlowp__sse2);
}
@@ -688,28 +690,28 @@
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
.s(s)
- .testExactDivideByPO2(xnn_requantize_q31__ssse3);
+ .TestExactDivideByPO2(xnn_requantize_q31__ssse3);
}
}
TEST(Q31__SSSE3, exact_divide_by_po2_with_zero_point) {
- for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testExactDivideByPO2(xnn_requantize_q31__ssse3);
+ .TestExactDivideByPO2(xnn_requantize_q31__ssse3);
}
}
}
TEST(Q31__SSSE3, divide_by_po2_with_rounding_up) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingUp(xnn_requantize_q31__ssse3);
+ .TestDivideByPO2WithRoundingUp(xnn_requantize_q31__ssse3);
}
}
}
@@ -717,31 +719,31 @@
/* No rounding down Test - it fails because of upward bias in multiplication */
TEST(Q31__SSSE3, divide_by_po2_with_rounding_away) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingAway(xnn_requantize_q31__ssse3);
+ .TestDivideByPO2WithRoundingAway(xnn_requantize_q31__ssse3);
}
}
}
TEST(Q31__SSSE3, special_cases) {
RequantizationTester()
- .testSpecialCases(xnn_requantize_q31__ssse3);
+ .TestSpecialCases(xnn_requantize_q31__ssse3);
}
TEST(Q31__SSSE3, random_cases) {
RequantizationTester()
.iterations(100)
- .testRandomCasesApproximate(xnn_requantize_q31__ssse3);
+ .TestRandomCasesApproximate(xnn_requantize_q31__ssse3);
}
TEST(Q31__SSSE3, random_match_gemmlowp) {
RequantizationTester()
.iterations(100)
- .testRandomCasesAgainstReference(xnn_requantize_q31__ssse3, xnn_requantize_gemmlowp__ssse3);
+ .TestRandomCasesAgainstReference(xnn_requantize_q31__ssse3, xnn_requantize_gemmlowp__ssse3);
}
@@ -753,28 +755,28 @@
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
.s(s)
- .testExactDivideByPO2(xnn_requantize_q31__sse4);
+ .TestExactDivideByPO2(xnn_requantize_q31__sse4);
}
}
TEST(Q31__SSE4, exact_divide_by_po2_with_zero_point) {
- for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testExactDivideByPO2(xnn_requantize_q31__sse4);
+ .TestExactDivideByPO2(xnn_requantize_q31__sse4);
}
}
}
TEST(Q31__SSE4, divide_by_po2_with_rounding_up) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingUp(xnn_requantize_q31__sse4);
+ .TestDivideByPO2WithRoundingUp(xnn_requantize_q31__sse4);
}
}
}
@@ -782,31 +784,31 @@
/* No rounding down Test - it fails because of upward bias in multiplication */
TEST(Q31__SSE4, divide_by_po2_with_rounding_away) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingAway(xnn_requantize_q31__sse4);
+ .TestDivideByPO2WithRoundingAway(xnn_requantize_q31__sse4);
}
}
}
TEST(Q31__SSE4, special_cases) {
RequantizationTester()
- .testSpecialCases(xnn_requantize_q31__sse4);
+ .TestSpecialCases(xnn_requantize_q31__sse4);
}
TEST(Q31__SSE4, random_cases) {
RequantizationTester()
.iterations(100)
- .testRandomCasesApproximate(xnn_requantize_q31__sse4);
+ .TestRandomCasesApproximate(xnn_requantize_q31__sse4);
}
TEST(Q31__SSE4, random_match_gemmlowp) {
RequantizationTester()
.iterations(100)
- .testRandomCasesAgainstReference(xnn_requantize_q31__sse4, xnn_requantize_gemmlowp__sse4);
+ .TestRandomCasesAgainstReference(xnn_requantize_q31__sse4, xnn_requantize_gemmlowp__sse4);
}
@@ -818,28 +820,28 @@
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
.s(s)
- .testExactDivideByPO2(xnn_requantize_gemmlowp__sse2);
+ .TestExactDivideByPO2(xnn_requantize_gemmlowp__sse2);
}
}
TEST(GEMMLOWP__SSE2, exact_divide_by_po2_with_zero_point) {
- for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testExactDivideByPO2(xnn_requantize_gemmlowp__sse2);
+ .TestExactDivideByPO2(xnn_requantize_gemmlowp__sse2);
}
}
}
TEST(GEMMLOWP__SSE2, divide_by_po2_with_rounding_up) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__sse2);
+ .TestDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__sse2);
}
}
}
@@ -847,25 +849,25 @@
/* No rounding down Test - it fails because of upward bias in multiplication */
TEST(GEMMLOWP__SSE2, divide_by_po2_with_rounding_away) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__sse2);
+ .TestDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__sse2);
}
}
}
TEST(GEMMLOWP__SSE2, special_cases) {
RequantizationTester()
- .testSpecialCases(xnn_requantize_gemmlowp__sse2);
+ .TestSpecialCases(xnn_requantize_gemmlowp__sse2);
}
TEST(GEMMLOWP__SSE2, random_cases) {
RequantizationTester()
.iterations(100)
- .testRandomCasesApproximate(xnn_requantize_gemmlowp__sse2);
+ .TestRandomCasesApproximate(xnn_requantize_gemmlowp__sse2);
}
@@ -877,28 +879,28 @@
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
.s(s)
- .testExactDivideByPO2(xnn_requantize_gemmlowp__ssse3);
+ .TestExactDivideByPO2(xnn_requantize_gemmlowp__ssse3);
}
}
TEST(GEMMLOWP__SSSE3, exact_divide_by_po2_with_zero_point) {
- for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testExactDivideByPO2(xnn_requantize_gemmlowp__ssse3);
+ .TestExactDivideByPO2(xnn_requantize_gemmlowp__ssse3);
}
}
}
TEST(GEMMLOWP__SSSE3, divide_by_po2_with_rounding_up) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__ssse3);
+ .TestDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__ssse3);
}
}
}
@@ -906,25 +908,25 @@
/* No rounding down Test - it fails because of upward bias in multiplication */
TEST(GEMMLOWP__SSSE3, divide_by_po2_with_rounding_away) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__ssse3);
+ .TestDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__ssse3);
}
}
}
TEST(GEMMLOWP__SSSE3, special_cases) {
RequantizationTester()
- .testSpecialCases(xnn_requantize_gemmlowp__ssse3);
+ .TestSpecialCases(xnn_requantize_gemmlowp__ssse3);
}
TEST(GEMMLOWP__SSSE3, random_cases) {
RequantizationTester()
.iterations(100)
- .testRandomCasesApproximate(xnn_requantize_gemmlowp__ssse3);
+ .TestRandomCasesApproximate(xnn_requantize_gemmlowp__ssse3);
}
@@ -936,28 +938,28 @@
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
.s(s)
- .testExactDivideByPO2(xnn_requantize_gemmlowp__sse4);
+ .TestExactDivideByPO2(xnn_requantize_gemmlowp__sse4);
}
}
TEST(GEMMLOWP__SSE4, exact_divide_by_po2_with_zero_point) {
- for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testExactDivideByPO2(xnn_requantize_gemmlowp__sse4);
+ .TestExactDivideByPO2(xnn_requantize_gemmlowp__sse4);
}
}
}
TEST(GEMMLOWP__SSE4, divide_by_po2_with_rounding_up) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__sse4);
+ .TestDivideByPO2WithRoundingUp(xnn_requantize_gemmlowp__sse4);
}
}
}
@@ -965,25 +967,25 @@
/* No rounding down Test - it fails because of upward bias in multiplication */
TEST(GEMMLOWP__SSE4, divide_by_po2_with_rounding_away) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__sse4);
+ .TestDivideByPO2WithRoundingAway(xnn_requantize_gemmlowp__sse4);
}
}
}
TEST(GEMMLOWP__SSE4, special_cases) {
RequantizationTester()
- .testSpecialCases(xnn_requantize_gemmlowp__sse4);
+ .TestSpecialCases(xnn_requantize_gemmlowp__sse4);
}
TEST(GEMMLOWP__SSE4, random_cases) {
RequantizationTester()
.iterations(100)
- .testRandomCasesApproximate(xnn_requantize_gemmlowp__sse4);
+ .TestRandomCasesApproximate(xnn_requantize_gemmlowp__sse4);
}
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -996,63 +998,63 @@
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
.s(s)
- .testExactDivideByPO2(xnn_requantize_precise__neon);
+ .TestExactDivideByPO2(xnn_requantize_precise__neon);
}
}
TEST(PRECISE__NEON, exact_divide_by_po2_with_zero_point) {
- for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testExactDivideByPO2(xnn_requantize_precise__neon);
+ .TestExactDivideByPO2(xnn_requantize_precise__neon);
}
}
}
TEST(PRECISE__NEON, divide_by_po2_with_rounding_up) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingUp(xnn_requantize_precise__neon);
+ .TestDivideByPO2WithRoundingUp(xnn_requantize_precise__neon);
}
}
}
TEST(PRECISE__NEON, divide_by_po2_with_rounding_down) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingDown(xnn_requantize_precise__neon);
+ .TestDivideByPO2WithRoundingDown(xnn_requantize_precise__neon);
}
}
}
TEST(PRECISE__NEON, divide_by_po2_with_rounding_away) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingAway(xnn_requantize_precise__neon);
+ .TestDivideByPO2WithRoundingAway(xnn_requantize_precise__neon);
}
}
}
TEST(PRECISE__NEON, special_cases) {
RequantizationTester()
- .testSpecialCases(xnn_requantize_precise__neon);
+ .TestSpecialCases(xnn_requantize_precise__neon);
}
TEST(PRECISE__NEON, random_cases) {
RequantizationTester()
.iterations(100)
- .testRandomCasesPrecise(xnn_requantize_precise__neon);
+ .TestRandomCasesPrecise(xnn_requantize_precise__neon);
}
@@ -1063,7 +1065,7 @@
TEST(FP32__NEON, random_cases) {
RequantizationTester()
.iterations(1000)
- .testRandomCasesApproximate(xnn_requantize_fp32__neon);
+ .TestRandomCasesApproximate(xnn_requantize_fp32__neon);
}
@@ -1075,28 +1077,28 @@
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
.s(s)
- .testExactDivideByPO2(xnn_requantize_q31__neon);
+ .TestExactDivideByPO2(xnn_requantize_q31__neon);
}
}
TEST(Q31__NEON, exact_divide_by_po2_with_zero_point) {
- for (int32_t zeroPoint = 1; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 1; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testExactDivideByPO2(xnn_requantize_q31__neon);
+ .TestExactDivideByPO2(xnn_requantize_q31__neon);
}
}
}
TEST(Q31__NEON, divide_by_po2_with_rounding_up) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingUp(xnn_requantize_q31__neon);
+ .TestDivideByPO2WithRoundingUp(xnn_requantize_q31__neon);
}
}
}
@@ -1104,31 +1106,31 @@
/* No rounding down Test - it fails because of upward bias in multiplication */
TEST(Q31__NEON, divide_by_po2_with_rounding_away) {
- for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
+ for (int32_t zero_point = 0; zero_point < 256; zero_point++) {
for (uint32_t s = 1; s < 32; s++) {
RequantizationTester()
- .zeroPoint(zeroPoint)
+ .zero_point(zero_point)
.s(s)
- .testDivideByPO2WithRoundingAway(xnn_requantize_q31__neon);
+ .TestDivideByPO2WithRoundingAway(xnn_requantize_q31__neon);
}
}
}
TEST(Q31__NEON, special_cases) {
RequantizationTester()
- .testSpecialCases(xnn_requantize_q31__neon);
+ .TestSpecialCases(xnn_requantize_q31__neon);
}
TEST(Q31__NEON, random_cases) {
RequantizationTester()
.iterations(100)
- .testRandomCasesApproximate(xnn_requantize_q31__neon);
+ .TestRandomCasesApproximate(xnn_requantize_q31__neon);
}
TEST(Q31__NEON, random_match_gemmlowp) {
RequantizationTester()
.iterations(100)
- .testRandomCasesAgainstReference(xnn_requantize_q31__neon, xnn_requantize_gemmlowp__neon);
+ .TestRandomCasesAgainstReference(xnn_requantize_q31__neon, xnn_requantize_gemmlowp__neon);
}
@@ -1139,6 +1141,6 @@
TEST(GEMMLOWP__NEON, random_cases) {
RequantizationTester()
.iterations(100)
- .testRandomCasesApproximate(xnn_requantize_gemmlowp__neon);
+ .TestRandomCasesApproximate(xnn_requantize_gemmlowp__neon);
}
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64