Refactor CHW micro-kernels
Rename SpCHW -> CHW
PiperOrigin-RevId: 311861144
diff --git a/BUILD.bazel b/BUILD.bazel
index 5598d11..11984cc 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -89,11 +89,11 @@
"src/f32-clamp/gen/scalar-x4.c",
"src/f32-conv-hwc/3x3s2p0p1c3x4-scalar-1x1.c",
"src/f32-conv-hwc/3x3s2p1c3x4-scalar-1x1.c",
- "src/f32-conv-hwc2spchw/3x3s2p1c3x4-scalar-1x1.c",
- "src/f32-dwconv-spchw/3x3p1-scalar.c",
- "src/f32-dwconv-spchw/3x3s2p1-scalar.c",
- "src/f32-dwconv-spchw/5x5p2-scalar.c",
- "src/f32-dwconv-spchw/5x5s2p2-scalar.c",
+ "src/f32-conv-hwc2chw/3x3s2p1c3x4-scalar-1x1.c",
+ "src/f32-dwconv-chw/3x3p1-scalar.c",
+ "src/f32-dwconv-chw/3x3s2p1-scalar.c",
+ "src/f32-dwconv-chw/5x5p2-scalar.c",
+ "src/f32-dwconv-chw/5x5s2p2-scalar.c",
"src/f32-dwconv/gen/up1x4-scalar-acc2.c",
"src/f32-dwconv/gen/up1x4-scalar.c",
"src/f32-dwconv/gen/up1x9-scalar-acc2.c",
@@ -118,7 +118,7 @@
"src/f32-dwconv/gen/up2x9-minmax-scalar.c",
"src/f32-dwconv/gen/up2x25-minmax-scalar-acc2.c",
"src/f32-dwconv/gen/up2x25-minmax-scalar.c",
- "src/f32-gavgpool-spchw/scalar-x1.c",
+ "src/f32-gavgpool-cw/scalar-x1.c",
"src/f32-gavgpool/7p7x-minmax-scalar-c1.c",
"src/f32-gavgpool/7x-minmax-scalar-c1.c",
"src/f32-gemm/gen-inc/1x4inc-minmax-scalar.c",
@@ -533,7 +533,7 @@
"src/f32-dwconv/gen/up4x25-minmax-neon-acc2.c",
"src/f32-dwconv/gen/up8x25-minmax-neon.c",
"src/f32-dwconv/gen/up8x25-minmax-neon-acc2.c",
- "src/f32-gavgpool-spchw/neon-x4.c",
+ "src/f32-gavgpool-cw/neon-x4.c",
"src/f32-gavgpool/7p7x-minmax-neon-c4.c",
"src/f32-gavgpool/7x-minmax-neon-c4.c",
"src/f32-gemm/gen/1x8-minmax-neon-lane-ld64.c",
@@ -887,11 +887,11 @@
"src/f32-conv-hwc/3x3s2p0p1c3x4-neonfma-2x2.c",
"src/f32-conv-hwc/3x3s2p1c3x4-neonfma-2x2.c",
"src/f32-conv-hwc/3x3s2p1c3x8-neonfma-2x2.c",
- "src/f32-conv-hwc2spchw/3x3s2p1c3x4-neonfma-2x2.c",
- "src/f32-dwconv-spchw/3x3p1-neonfma.c",
- "src/f32-dwconv-spchw/5x5p2-neonfma.c",
- "src/f32-dwconv-spchw/3x3s2p1-neonfma.c",
- "src/f32-dwconv-spchw/5x5s2p2-neonfma.c",
+ "src/f32-conv-hwc2chw/3x3s2p1c3x4-neonfma-2x2.c",
+ "src/f32-dwconv-chw/3x3p1-neonfma.c",
+ "src/f32-dwconv-chw/5x5p2-neonfma.c",
+ "src/f32-dwconv-chw/3x3s2p1-neonfma.c",
+ "src/f32-dwconv-chw/5x5s2p2-neonfma.c",
"src/f32-sigmoid/gen/neonfma-rr1-p5-div-x4.c",
"src/f32-sigmoid/gen/neonfma-rr1-p5-div-x8.c",
"src/f32-sigmoid/gen/neonfma-rr1-p5-div-x12.c",
@@ -1017,8 +1017,8 @@
"src/f32-avgpool/9x-minmax-sse-c4.c",
"src/f32-clamp/gen/sse-x4.c",
"src/f32-clamp/gen/sse-x8.c",
- "src/f32-dwconv-spchw/3x3p1-sse.c",
- "src/f32-dwconv-spchw/3x3s2p1-sse.c",
+ "src/f32-dwconv-chw/3x3p1-sse.c",
+ "src/f32-dwconv-chw/3x3s2p1-sse.c",
"src/f32-dwconv/gen/up4x25-minmax-sse-acc2.c",
"src/f32-dwconv/gen/up4x25-minmax-sse.c",
"src/f32-dwconv/gen/up4x4-minmax-sse-acc2.c",
@@ -1031,7 +1031,7 @@
"src/f32-dwconv/gen/up8x4-minmax-sse.c",
"src/f32-dwconv/gen/up8x9-minmax-sse-acc2.c",
"src/f32-dwconv/gen/up8x9-minmax-sse.c",
- "src/f32-gavgpool-spchw/sse-x4.c",
+ "src/f32-gavgpool-cw/sse-x4.c",
"src/f32-gavgpool/7p7x-minmax-sse-c4.c",
"src/f32-gavgpool/7x-minmax-sse-c4.c",
"src/f32-gemm/gen/1x8-minmax-sse-dup.c",
@@ -2304,9 +2304,9 @@
)
xnnpack_benchmark(
- name = "f32_conv_hwc2spchw_bench",
+ name = "f32_conv_hwc2chw_bench",
srcs = [
- "bench/f32-conv-hwc2spchw.cc",
+ "bench/f32-conv-hwc2chw.cc",
"bench/dconv.h",
"src/xnnpack/AlignedAllocator.h",
] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
@@ -2324,9 +2324,9 @@
)
xnnpack_benchmark(
- name = "f32_dwconv_spchw_bench",
+ name = "f32_dwconv_chw_bench",
srcs = [
- "bench/f32-dwconv-spchw.cc",
+ "bench/f32-dwconv-chw.cc",
"bench/dwconv.h",
"src/xnnpack/AlignedAllocator.h",
] + WEIGHTS_PACK_HDRS + MICROKERNEL_BENCHMARK_HDRS,
@@ -2820,10 +2820,10 @@
)
xnnpack_unit_test(
- name = "f32_conv_hwc2spchw_test",
+ name = "f32_conv_hwc2chw_test",
srcs = [
- "test/f32-conv-hwc2spchw.cc",
- "test/conv-hwc2spchw-microkernel-tester.h",
+ "test/f32-conv-hwc2chw.cc",
+ "test/conv-hwc2chw-microkernel-tester.h",
"src/xnnpack/AlignedAllocator.h",
] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
deps = MICROKERNEL_TEST_DEPS,
@@ -2850,10 +2850,10 @@
)
xnnpack_unit_test(
- name = "f32_dwconv_spchw_test",
+ name = "f32_dwconv_chw_test",
srcs = [
- "test/f32-dwconv-spchw.cc",
- "test/dwconv-spchw-microkernel-tester.h",
+ "test/f32-dwconv-chw.cc",
+ "test/dwconv-chw-microkernel-tester.h",
"src/xnnpack/AlignedAllocator.h",
] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
deps = MICROKERNEL_TEST_DEPS,
@@ -2870,10 +2870,10 @@
)
xnnpack_unit_test(
- name = "f32_gavgpool_spchw_test",
+ name = "f32_gavgpool_cw_test",
srcs = [
- "test/f32-gavgpool-spchw.cc",
- "test/gavgpool-spchw-microkernel-tester.h",
+ "test/f32-gavgpool-cw.cc",
+ "test/gavgpool-cw-microkernel-tester.h",
"src/xnnpack/AlignedAllocator.h",
] + MICROKERNEL_TEST_HDRS,
deps = MICROKERNEL_TEST_DEPS,
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 15088a5..4ef5181 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -209,11 +209,11 @@
src/f32-clamp/gen/scalar-x4.c
src/f32-conv-hwc/3x3s2p1c3x4-scalar-1x1.c
src/f32-conv-hwc/3x3s2p0p1c3x4-scalar-1x1.c
- src/f32-conv-hwc2spchw/3x3s2p1c3x4-scalar-1x1.c
- src/f32-dwconv-spchw/3x3p1-scalar.c
- src/f32-dwconv-spchw/3x3s2p1-scalar.c
- src/f32-dwconv-spchw/5x5p2-scalar.c
- src/f32-dwconv-spchw/5x5s2p2-scalar.c
+ src/f32-conv-hwc2chw/3x3s2p1c3x4-scalar-1x1.c
+ src/f32-dwconv-chw/3x3p1-scalar.c
+ src/f32-dwconv-chw/3x3s2p1-scalar.c
+ src/f32-dwconv-chw/5x5p2-scalar.c
+ src/f32-dwconv-chw/5x5s2p2-scalar.c
src/f32-dwconv/gen/up1x4-scalar.c
src/f32-dwconv/gen/up1x4-scalar-acc2.c
src/f32-dwconv/gen/up1x9-scalar.c
@@ -238,7 +238,7 @@
src/f32-dwconv/gen/up2x9-minmax-scalar-acc2.c
src/f32-dwconv/gen/up2x25-minmax-scalar.c
src/f32-dwconv/gen/up2x25-minmax-scalar-acc2.c
- src/f32-gavgpool-spchw/scalar-x1.c
+ src/f32-gavgpool-cw/scalar-x1.c
src/f32-gavgpool/7p7x-minmax-scalar-c1.c
src/f32-gavgpool/7x-minmax-scalar-c1.c
src/f32-gemm/gen/1x4-scalar.c
@@ -545,7 +545,7 @@
src/f32-dwconv/gen/up4x25-minmax-neon-acc2.c
src/f32-dwconv/gen/up8x25-minmax-neon.c
src/f32-dwconv/gen/up8x25-minmax-neon-acc2.c
- src/f32-gavgpool-spchw/neon-x4.c
+ src/f32-gavgpool-cw/neon-x4.c
src/f32-gavgpool/7p7x-minmax-neon-c4.c
src/f32-gavgpool/7x-minmax-neon-c4.c
src/f32-gemm/gen/1x8-minmax-neon-lane-ld64.c
@@ -903,11 +903,11 @@
src/f32-conv-hwc/3x3s2p0p1c3x4-neonfma-2x2.c
src/f32-conv-hwc/3x3s2p1c3x4-neonfma-2x2.c
src/f32-conv-hwc/3x3s2p1c3x8-neonfma-2x2.c
- src/f32-conv-hwc2spchw/3x3s2p1c3x4-neonfma-2x2.c
- src/f32-dwconv-spchw/3x3p1-neonfma.c
- src/f32-dwconv-spchw/5x5p2-neonfma.c
- src/f32-dwconv-spchw/3x3s2p1-neonfma.c
- src/f32-dwconv-spchw/5x5s2p2-neonfma.c
+ src/f32-conv-hwc2chw/3x3s2p1c3x4-neonfma-2x2.c
+ src/f32-dwconv-chw/3x3p1-neonfma.c
+ src/f32-dwconv-chw/5x5p2-neonfma.c
+ src/f32-dwconv-chw/3x3s2p1-neonfma.c
+ src/f32-dwconv-chw/5x5s2p2-neonfma.c
src/f32-sigmoid/gen/neonfma-rr1-p5-div-x4.c
src/f32-sigmoid/gen/neonfma-rr1-p5-div-x8.c
src/f32-sigmoid/gen/neonfma-rr1-p5-div-x12.c
@@ -1024,8 +1024,8 @@
src/f32-avgpool/9x-minmax-sse-c4.c
src/f32-clamp/gen/sse-x4.c
src/f32-clamp/gen/sse-x8.c
- src/f32-dwconv-spchw/3x3p1-sse.c
- src/f32-dwconv-spchw/3x3s2p1-sse.c
+ src/f32-dwconv-chw/3x3p1-sse.c
+ src/f32-dwconv-chw/3x3s2p1-sse.c
src/f32-dwconv/gen/up4x25-minmax-sse-acc2.c
src/f32-dwconv/gen/up4x25-minmax-sse.c
src/f32-dwconv/gen/up4x4-minmax-sse-acc2.c
@@ -1038,7 +1038,7 @@
src/f32-dwconv/gen/up8x4-minmax-sse.c
src/f32-dwconv/gen/up8x9-minmax-sse-acc2.c
src/f32-dwconv/gen/up8x9-minmax-sse.c
- src/f32-gavgpool-spchw/sse-x4.c
+ src/f32-gavgpool-cw/sse-x4.c
src/f32-gavgpool/7p7x-minmax-sse-c4.c
src/f32-gavgpool/7x-minmax-sse-c4.c
src/f32-gemm/gen/1x8-minmax-sse-dup.c
@@ -2214,23 +2214,23 @@
TARGET_LINK_LIBRARIES(f32-conv-hwc-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
ADD_TEST(f32-conv-hwc-test f32-conv-hwc-test)
- ADD_EXECUTABLE(f32-conv-hwc2spchw-test test/f32-conv-hwc2spchw.cc)
- SET_TARGET_PROPERTIES(f32-conv-hwc2spchw-test PROPERTIES
+ ADD_EXECUTABLE(f32-conv-hwc2chw-test test/f32-conv-hwc2chw.cc)
+ SET_TARGET_PROPERTIES(f32-conv-hwc2chw-test PROPERTIES
CXX_STANDARD 11
CXX_STANDARD_REQUIRED YES
CXX_EXTENSIONS YES)
- TARGET_INCLUDE_DIRECTORIES(f32-conv-hwc2spchw-test PRIVATE src test)
- TARGET_LINK_LIBRARIES(f32-conv-hwc2spchw-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
- ADD_TEST(f32-conv-hwc2spchw-test f32-conv-hwc2spchw-test)
+ TARGET_INCLUDE_DIRECTORIES(f32-conv-hwc2chw-test PRIVATE src test)
+ TARGET_LINK_LIBRARIES(f32-conv-hwc2chw-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+ ADD_TEST(f32-conv-hwc2chw-test f32-conv-hwc2chw-test)
- ADD_EXECUTABLE(f32-dwconv-spchw-test test/f32-dwconv-spchw.cc)
- SET_TARGET_PROPERTIES(f32-dwconv-spchw-test PROPERTIES
+ ADD_EXECUTABLE(f32-dwconv-chw-test test/f32-dwconv-chw.cc)
+ SET_TARGET_PROPERTIES(f32-dwconv-chw-test PROPERTIES
CXX_STANDARD 11
CXX_STANDARD_REQUIRED YES
CXX_EXTENSIONS YES)
- TARGET_INCLUDE_DIRECTORIES(f32-dwconv-spchw-test PRIVATE src test)
- TARGET_LINK_LIBRARIES(f32-dwconv-spchw-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
- ADD_TEST(f32-dwconv-spchw-test f32-dwconv-spchw-test)
+ TARGET_INCLUDE_DIRECTORIES(f32-dwconv-chw-test PRIVATE src test)
+ TARGET_LINK_LIBRARIES(f32-dwconv-chw-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+ ADD_TEST(f32-dwconv-chw-test f32-dwconv-chw-test)
ADD_EXECUTABLE(f32-dwconv-test test/f32-dwconv.cc)
SET_TARGET_PROPERTIES(f32-dwconv-test PROPERTIES
@@ -2250,14 +2250,14 @@
TARGET_LINK_LIBRARIES(f32-dwconv-minmax-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
ADD_TEST(f32-dwconv-minmax-test f32-dwconv-minmax-test)
- ADD_EXECUTABLE(f32-gavgpool-spchw-test test/f32-gavgpool-spchw.cc)
- SET_TARGET_PROPERTIES(f32-gavgpool-spchw-test PROPERTIES
+ ADD_EXECUTABLE(f32-gavgpool-chw-test test/f32-gavgpool-chw.cc)
+ SET_TARGET_PROPERTIES(f32-gavgpool-chw-test PROPERTIES
CXX_STANDARD 11
CXX_STANDARD_REQUIRED YES
CXX_EXTENSIONS YES)
- TARGET_INCLUDE_DIRECTORIES(f32-gavgpool-spchw-test PRIVATE src test)
- TARGET_LINK_LIBRARIES(f32-gavgpool-spchw-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
- ADD_TEST(f32-gavgpool-spchw-test f32-gavgpool-spchw-test)
+ TARGET_INCLUDE_DIRECTORIES(f32-gavgpool-chw-test PRIVATE src test)
+ TARGET_LINK_LIBRARIES(f32-gavgpool-chw-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+ ADD_TEST(f32-gavgpool-chw-test f32-gavgpool-chw-test)
ADD_EXECUTABLE(f32-gavgpool-minmax-test test/f32-gavgpool-minmax.cc)
SET_TARGET_PROPERTIES(f32-gavgpool-minmax-test PROPERTIES
@@ -3114,14 +3114,14 @@
TARGET_INCLUDE_DIRECTORIES(f32-conv-hwc-bench PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
TARGET_LINK_LIBRARIES(f32-conv-hwc-bench PRIVATE XNNPACK cpuinfo fp16 benchmark bench-utils)
- ADD_EXECUTABLE(f32-dwconv-spchw-bench bench/f32-dwconv-spchw.cc)
- SET_TARGET_PROPERTIES(f32-dwconv-spchw-bench PROPERTIES
+ ADD_EXECUTABLE(f32-dwconv-chw-bench bench/f32-dwconv-chw.cc)
+ SET_TARGET_PROPERTIES(f32-dwconv-chw-bench PROPERTIES
CXX_STANDARD 11
CXX_STANDARD_REQUIRED YES
CXX_EXTENSIONS YES)
- TARGET_INCLUDE_DIRECTORIES(f32-dwconv-spchw-bench PRIVATE src)
- TARGET_INCLUDE_DIRECTORIES(f32-dwconv-spchw-bench PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
- TARGET_LINK_LIBRARIES(f32-dwconv-spchw-bench PRIVATE XNNPACK cpuinfo fp16 benchmark bench-utils)
+ TARGET_INCLUDE_DIRECTORIES(f32-dwconv-chw-bench PRIVATE src)
+ TARGET_INCLUDE_DIRECTORIES(f32-dwconv-chw-bench PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
+ TARGET_LINK_LIBRARIES(f32-dwconv-chw-bench PRIVATE XNNPACK cpuinfo fp16 benchmark bench-utils)
ADD_EXECUTABLE(f32-dwconv-bench bench/f32-dwconv.cc)
SET_TARGET_PROPERTIES(f32-dwconv-bench PROPERTIES
diff --git a/bench/f32-conv-hwc2spchw.cc b/bench/f32-conv-hwc2chw.cc
similarity index 86%
rename from bench/f32-conv-hwc2spchw.cc
rename to bench/f32-conv-hwc2chw.cc
index 242808f..7063144 100644
--- a/bench/f32-conv-hwc2spchw.cc
+++ b/bench/f32-conv-hwc2chw.cc
@@ -23,8 +23,8 @@
#include <xnnpack/params.h>
-static void DConvHWC2SpCHW3X3S2P1Benchmark(benchmark::State& state,
- xnn_f32_conv_hwc2spchw_ukernel_function conv,
+static void DConvHWC2CHW3X3S2P1Benchmark(benchmark::State& state,
+ xnn_f32_conv_hwc2chw_ukernel_function conv,
uint32_t output_channels_tile)
{
if (!cpuinfo_initialize()) {
@@ -110,17 +110,17 @@
}
#if XNN_ARCH_ARM64
- static void f32_conv_hwc2spchw_3x3s2p1c3x4__neonfma_2x2(benchmark::State& state, const char* net) {
- DConvHWC2SpCHW3X3S2P1Benchmark(state, xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2, 4);
+ static void f32_conv_hwc2chw_3x3s2p1c3x4__neonfma_2x2(benchmark::State& state, const char* net) {
+ DConvHWC2CHW3X3S2P1Benchmark(state, xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2, 4);
}
- BENCHMARK_DCONV(f32_conv_hwc2spchw_3x3s2p1c3x4__neonfma_2x2);
+ BENCHMARK_DCONV(f32_conv_hwc2chw_3x3s2p1c3x4__neonfma_2x2);
#endif
- static void f32_conv_hwc2spchw_3x3s2p1c3x4__scalar_1x1(benchmark::State& state, const char* net) {
- DConvHWC2SpCHW3X3S2P1Benchmark(state, xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, 4);
+ static void f32_conv_hwc2chw_3x3s2p1c3x4__scalar_1x1(benchmark::State& state, const char* net) {
+ DConvHWC2CHW3X3S2P1Benchmark(state, xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, 4);
}
- BENCHMARK_DCONV(f32_conv_hwc2spchw_3x3s2p1c3x4__scalar_1x1);
+ BENCHMARK_DCONV(f32_conv_hwc2chw_3x3s2p1c3x4__scalar_1x1);
#ifndef XNNPACK_BENCHMARK_NO_MAIN
BENCHMARK_MAIN();
diff --git a/bench/f32-dwconv-spchw.cc b/bench/f32-dwconv-chw.cc
similarity index 83%
rename from bench/f32-dwconv-spchw.cc
rename to bench/f32-dwconv-chw.cc
index 8178ee7..f12195f 100644
--- a/bench/f32-dwconv-spchw.cc
+++ b/bench/f32-dwconv-chw.cc
@@ -26,7 +26,7 @@
static void DWConvCHWBenchmark(benchmark::State& state,
- xnn_f32_dwconv_spchw_ukernel_function dwconv,
+ xnn_f32_dwconv_chw_ukernel_function dwconv,
uint32_t it, uint32_t ot, uint32_t kh, uint32_t kw, uint32_t pw, uint32_t s)
{
if (!cpuinfo_initialize()) {
@@ -111,8 +111,8 @@
std::vector<float> output(o_elements * num_buffers);
std::fill(output.begin(), output.end(), std::nanf(""));
- xnn_f32_spchw_params spchw_params =
- xnn_init_f32_spchw_params(input_width, -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity());
+ xnn_f32_chw_params chw_params =
+ xnn_init_f32_chw_params(input_width, -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity());
size_t buffer_index = 0;
for (auto _ : state) {
@@ -131,7 +131,7 @@
padding_height / 2, // padding_top
it * sizeof(float), ot * sizeof(float),
input_width * sizeof(float), output_width * sizeof(float),
- &spchw_params);
+ &chw_params);
}
}
@@ -146,7 +146,7 @@
}
static void DWConvHWoTCTBenchmark(benchmark::State& state,
- xnn_f32_dwconv_spchw_ukernel_function dwconv,
+ xnn_f32_dwconv_chw_ukernel_function dwconv,
uint32_t it, uint32_t ot, uint32_t kh, uint32_t kw, uint32_t pw, uint32_t s)
{
if (!cpuinfo_initialize()) {
@@ -231,8 +231,8 @@
std::vector<float> output(o_elements * num_buffers);
std::fill(output.begin(), output.end(), std::nanf(""));
- xnn_f32_spchw_params spchw_params =
- xnn_init_f32_spchw_params(input_width, -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity());
+ xnn_f32_chw_params chw_params =
+ xnn_init_f32_chw_params(input_width, -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity());
size_t buffer_index = 0;
for (auto _ : state) {
@@ -252,7 +252,7 @@
it * channels * sizeof(float), ot * channels * sizeof(float),
benchmark::utils::RoundUp<size_t>(input_width, it) * channels * sizeof(float),
benchmark::utils::RoundUp<size_t>(output_width, ot) * channels * sizeof(float),
- &spchw_params);
+ &chw_params);
}
}
@@ -268,35 +268,35 @@
#if XNN_ARCH_ARM64
static void CHW_3x3p1__neonfma(benchmark::State& state, const char* net) {
- DWConvCHWBenchmark(state, xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma, 4, 4, 3, 3, 1, 1);
+ DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3p1__neonfma, 4, 4, 3, 3, 1, 1);
}
static void CHW_5x5p2__neonfma(benchmark::State& state, const char* net) {
- DWConvCHWBenchmark(state, xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma, 4, 4, 5, 5, 2, 1);
+ DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_5x5p2__neonfma, 4, 4, 5, 5, 2, 1);
}
static void CHW_3x3s2p1__neonfma(benchmark::State& state, const char* net) {
- DWConvCHWBenchmark(state, xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma, 4, 4, 3, 3, 1, 2);
+ DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma, 4, 4, 3, 3, 1, 2);
}
static void CHW_5x5s2p2__neonfma(benchmark::State& state, const char* net) {
- DWConvCHWBenchmark(state, xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma, 4, 4, 5, 5, 2, 2);
+ DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma, 4, 4, 5, 5, 2, 2);
}
static void HWo4C4_3x3p1__neonfma(benchmark::State& state, const char* net) {
- DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma, 4, 4, 3, 3, 1, 1);
+ DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3p1__neonfma, 4, 4, 3, 3, 1, 1);
}
static void HWo4C4_5x5p2__neonfma(benchmark::State& state, const char* net) {
- DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma, 4, 4, 5, 5, 2, 1);
+ DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_5x5p2__neonfma, 4, 4, 5, 5, 2, 1);
}
static void HWo4C4_3x3s2p1__neonfma(benchmark::State& state, const char* net) {
- DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma, 4, 4, 3, 3, 1, 2);
+ DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma, 4, 4, 3, 3, 1, 2);
}
static void HWo4C4_5x5s2p2__neonfma(benchmark::State& state, const char* net) {
- DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma, 4, 4, 5, 5, 2, 2);
+ DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma, 4, 4, 5, 5, 2, 2);
}
BENCHMARK_DWCONV(CHW_3x3p1__neonfma)
@@ -312,19 +312,19 @@
#if XNN_ARCH_X86 || XNN_ARCH_X86_64
static void CHW_3x3p1__sse(benchmark::State& state, const char* net) {
- DWConvCHWBenchmark(state, xnn_f32_dwconv_spchw_ukernel_3x3p1__sse, 4, 4, 3, 3, 1, 1);
+ DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3p1__sse, 4, 4, 3, 3, 1, 1);
}
static void CHW_3x3s2p1__sse(benchmark::State& state, const char* net) {
- DWConvCHWBenchmark(state, xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse, 4, 4, 3, 3, 1, 2);
+ DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse, 4, 4, 3, 3, 1, 2);
}
static void HWo4C4_3x3p1__sse(benchmark::State& state, const char* net) {
- DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_spchw_ukernel_3x3p1__sse, 4, 4, 3, 3, 1, 1);
+ DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3p1__sse, 4, 4, 3, 3, 1, 1);
}
static void HWo4C4_3x3s2p1__sse(benchmark::State& state, const char* net) {
- DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse, 4, 4, 3, 3, 1, 2);
+ DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse, 4, 4, 3, 3, 1, 2);
}
BENCHMARK_DWCONV(CHW_3x3p1__sse)
@@ -334,35 +334,35 @@
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
static void CHW_3x3p1__scalar(benchmark::State& state, const char* net) {
- DWConvCHWBenchmark(state, xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar, 1, 1, 3, 3, 1, 1);
+ DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3p1__scalar, 1, 1, 3, 3, 1, 1);
}
static void CHW_5x5p2__scalar(benchmark::State& state, const char* net) {
- DWConvCHWBenchmark(state, xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar, 1, 1, 5, 5, 2, 1);
+ DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_5x5p2__scalar, 1, 1, 5, 5, 2, 1);
}
static void CHW_3x3s2p1__scalar(benchmark::State& state, const char* net) {
- DWConvCHWBenchmark(state, xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar, 1, 1, 3, 3, 1, 2);
+ DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, 1, 1, 3, 3, 1, 2);
}
static void CHW_5x5s2p2__scalar(benchmark::State& state, const char* net) {
- DWConvCHWBenchmark(state, xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar, 1, 1, 5, 5, 2, 2);
+ DWConvCHWBenchmark(state, xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, 1, 1, 5, 5, 2, 2);
}
static void HWC_3x3p1__scalar(benchmark::State& state, const char* net) {
- DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar, 1, 1, 3, 3, 1, 1);
+ DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3p1__scalar, 1, 1, 3, 3, 1, 1);
}
static void HWC_5x5p2__scalar(benchmark::State& state, const char* net) {
- DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar, 1, 1, 5, 5, 2, 1);
+ DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_5x5p2__scalar, 1, 1, 5, 5, 2, 1);
}
static void HWC_3x3s2p1__scalar(benchmark::State& state, const char* net) {
- DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar, 1, 1, 3, 3, 1, 2);
+ DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, 1, 1, 3, 3, 1, 2);
}
static void HWC_5x5s2p2__scalar(benchmark::State& state, const char* net) {
- DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar, 1, 1, 5, 5, 2, 2);
+ DWConvHWoTCTBenchmark(state, xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, 1, 1, 5, 5, 2, 2);
}
diff --git a/src/f32-conv-hwc2spchw/3x3s2p1c3x4-neonfma-2x2.c b/src/f32-conv-hwc2chw/3x3s2p1c3x4-neonfma-2x2.c
similarity index 99%
rename from src/f32-conv-hwc2spchw/3x3s2p1c3x4-neonfma-2x2.c
rename to src/f32-conv-hwc2chw/3x3s2p1c3x4-neonfma-2x2.c
index e01d9ab..50961ab 100644
--- a/src/f32-conv-hwc2spchw/3x3s2p1c3x4-neonfma-2x2.c
+++ b/src/f32-conv-hwc2chw/3x3s2p1c3x4-neonfma-2x2.c
@@ -11,7 +11,7 @@
#include <xnnpack/math.h>
-void xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2(
+void xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2(
size_t input_height,
size_t input_width,
size_t output_y_start,
diff --git a/src/f32-conv-hwc2spchw/3x3s2p1c3x4-scalar-1x1.c b/src/f32-conv-hwc2chw/3x3s2p1c3x4-scalar-1x1.c
similarity index 99%
rename from src/f32-conv-hwc2spchw/3x3s2p1c3x4-scalar-1x1.c
rename to src/f32-conv-hwc2chw/3x3s2p1c3x4-scalar-1x1.c
index 9e26884..d1aca6d 100644
--- a/src/f32-conv-hwc2spchw/3x3s2p1c3x4-scalar-1x1.c
+++ b/src/f32-conv-hwc2chw/3x3s2p1c3x4-scalar-1x1.c
@@ -9,7 +9,7 @@
#include <xnnpack/math.h>
-void xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1(
+void xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1(
size_t input_height,
size_t input_width,
size_t output_y_start,
diff --git a/src/f32-dwconv-spchw/3x3p1-neonfma.c b/src/f32-dwconv-chw/3x3p1-neonfma.c
similarity index 98%
rename from src/f32-dwconv-spchw/3x3p1-neonfma.c
rename to src/f32-dwconv-chw/3x3p1-neonfma.c
index 34ea960..822a231 100644
--- a/src/f32-dwconv-spchw/3x3p1-neonfma.c
+++ b/src/f32-dwconv-chw/3x3p1-neonfma.c
@@ -11,7 +11,7 @@
#include <xnnpack/math.h>
-void xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma(
+void xnn_f32_dwconv_chw_ukernel_3x3p1__neonfma(
size_t input_height,
size_t input_width,
const float* input,
@@ -23,7 +23,7 @@
size_t output_tuple_stride,
size_t input_width_stride,
size_t output_width_stride,
- const union xnn_f32_spchw_params params[restrict XNN_MIN_ELEMENTS(1)])
+ const union xnn_f32_chw_params params[restrict XNN_MIN_ELEMENTS(1)])
{
assert(input_width != 0);
assert(input_height != 0);
diff --git a/src/f32-dwconv-spchw/3x3p1-scalar.c b/src/f32-dwconv-chw/3x3p1-scalar.c
similarity index 96%
rename from src/f32-dwconv-spchw/3x3p1-scalar.c
rename to src/f32-dwconv-chw/3x3p1-scalar.c
index 607cbf7..17ebf1b 100644
--- a/src/f32-dwconv-spchw/3x3p1-scalar.c
+++ b/src/f32-dwconv-chw/3x3p1-scalar.c
@@ -9,7 +9,7 @@
#include <xnnpack/math.h>
-void xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar(
+void xnn_f32_dwconv_chw_ukernel_3x3p1__scalar(
size_t input_height,
size_t input_width,
const float* input,
@@ -21,7 +21,7 @@
size_t output_tuple_stride,
size_t input_width_stride,
size_t output_width_stride,
- const union xnn_f32_spchw_params params[restrict XNN_MIN_ELEMENTS(1)])
+ const union xnn_f32_chw_params params[restrict XNN_MIN_ELEMENTS(1)])
{
assert(input_width != 0);
assert(input_height != 0);
diff --git a/src/f32-dwconv-spchw/3x3p1-sse.c b/src/f32-dwconv-chw/3x3p1-sse.c
similarity index 98%
rename from src/f32-dwconv-spchw/3x3p1-sse.c
rename to src/f32-dwconv-chw/3x3p1-sse.c
index d8a5d03..1e89228 100644
--- a/src/f32-dwconv-spchw/3x3p1-sse.c
+++ b/src/f32-dwconv-chw/3x3p1-sse.c
@@ -11,7 +11,7 @@
#include <xnnpack/math.h>
-void xnn_f32_dwconv_spchw_ukernel_3x3p1__sse(
+void xnn_f32_dwconv_chw_ukernel_3x3p1__sse(
size_t input_height,
size_t input_width,
const float* input,
@@ -23,7 +23,7 @@
size_t output_tuple_stride,
size_t input_width_stride,
size_t output_width_stride,
- const union xnn_f32_spchw_params params[restrict XNN_MIN_ELEMENTS(1)])
+ const union xnn_f32_chw_params params[restrict XNN_MIN_ELEMENTS(1)])
{
assert(input_width != 0);
assert(input_height != 0);
diff --git a/src/f32-dwconv-spchw/3x3s2p1-neonfma.c b/src/f32-dwconv-chw/3x3s2p1-neonfma.c
similarity index 98%
rename from src/f32-dwconv-spchw/3x3s2p1-neonfma.c
rename to src/f32-dwconv-chw/3x3s2p1-neonfma.c
index f7b7395..1f936f6 100644
--- a/src/f32-dwconv-spchw/3x3s2p1-neonfma.c
+++ b/src/f32-dwconv-chw/3x3s2p1-neonfma.c
@@ -11,7 +11,7 @@
#include <xnnpack/math.h>
-void xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma(
+void xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma(
size_t input_height,
size_t input_width,
const float* input,
@@ -23,7 +23,7 @@
size_t output_tuple_stride,
size_t input_width_stride,
size_t output_width_stride,
- const union xnn_f32_spchw_params params[restrict XNN_MIN_ELEMENTS(1)])
+ const union xnn_f32_chw_params params[restrict XNN_MIN_ELEMENTS(1)])
{
assert(input_height!= 0);
assert(input_width != 0);
diff --git a/src/f32-dwconv-spchw/3x3s2p1-scalar.c b/src/f32-dwconv-chw/3x3s2p1-scalar.c
similarity index 97%
rename from src/f32-dwconv-spchw/3x3s2p1-scalar.c
rename to src/f32-dwconv-chw/3x3s2p1-scalar.c
index 31398c7..44f91e0 100644
--- a/src/f32-dwconv-spchw/3x3s2p1-scalar.c
+++ b/src/f32-dwconv-chw/3x3s2p1-scalar.c
@@ -11,7 +11,7 @@
#include <xnnpack/math.h>
-void xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar(
+void xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar(
size_t input_height,
size_t input_width,
const float* input,
@@ -23,7 +23,7 @@
size_t output_tuple_stride,
size_t input_width_stride,
size_t output_width_stride,
- const union xnn_f32_spchw_params params[restrict XNN_MIN_ELEMENTS(1)])
+ const union xnn_f32_chw_params params[restrict XNN_MIN_ELEMENTS(1)])
{
assert(input_height!= 0);
assert(input_width != 0);
diff --git a/src/f32-dwconv-spchw/3x3s2p1-sse.c b/src/f32-dwconv-chw/3x3s2p1-sse.c
similarity index 98%
rename from src/f32-dwconv-spchw/3x3s2p1-sse.c
rename to src/f32-dwconv-chw/3x3s2p1-sse.c
index 0bb4446..74167fd 100644
--- a/src/f32-dwconv-spchw/3x3s2p1-sse.c
+++ b/src/f32-dwconv-chw/3x3s2p1-sse.c
@@ -11,7 +11,7 @@
#include <xnnpack/math.h>
-void xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse(
+void xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse(
size_t input_height,
size_t input_width,
const float* input,
@@ -23,7 +23,7 @@
size_t output_tuple_stride,
size_t input_width_stride,
size_t output_width_stride,
- const union xnn_f32_spchw_params params[restrict XNN_MIN_ELEMENTS(1)])
+ const union xnn_f32_chw_params params[restrict XNN_MIN_ELEMENTS(1)])
{
assert(input_height!= 0);
assert(input_width != 0);
diff --git a/src/f32-dwconv-spchw/5x5p2-neonfma.c b/src/f32-dwconv-chw/5x5p2-neonfma.c
similarity index 99%
rename from src/f32-dwconv-spchw/5x5p2-neonfma.c
rename to src/f32-dwconv-chw/5x5p2-neonfma.c
index 215bf77..37a1914 100644
--- a/src/f32-dwconv-spchw/5x5p2-neonfma.c
+++ b/src/f32-dwconv-chw/5x5p2-neonfma.c
@@ -11,7 +11,7 @@
#include <xnnpack/math.h>
-void xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma(
+void xnn_f32_dwconv_chw_ukernel_5x5p2__neonfma(
size_t input_height,
size_t input_width,
const float* input,
@@ -23,7 +23,7 @@
size_t output_tuple_stride,
size_t input_width_stride,
size_t output_width_stride,
- const union xnn_f32_spchw_params params[restrict XNN_MIN_ELEMENTS(1)])
+ const union xnn_f32_chw_params params[restrict XNN_MIN_ELEMENTS(1)])
{
assert(input_width != 0);
assert(input_height != 0);
diff --git a/src/f32-dwconv-spchw/5x5p2-scalar.c b/src/f32-dwconv-chw/5x5p2-scalar.c
similarity index 98%
rename from src/f32-dwconv-spchw/5x5p2-scalar.c
rename to src/f32-dwconv-chw/5x5p2-scalar.c
index 7a705fc..b41011b 100644
--- a/src/f32-dwconv-spchw/5x5p2-scalar.c
+++ b/src/f32-dwconv-chw/5x5p2-scalar.c
@@ -9,7 +9,7 @@
#include <xnnpack/math.h>
-void xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar(
+void xnn_f32_dwconv_chw_ukernel_5x5p2__scalar(
size_t input_height,
size_t input_width,
const float* input,
@@ -21,7 +21,7 @@
size_t output_tuple_stride,
size_t input_width_stride,
size_t output_width_stride,
- const union xnn_f32_spchw_params params[restrict XNN_MIN_ELEMENTS(1)])
+ const union xnn_f32_chw_params params[restrict XNN_MIN_ELEMENTS(1)])
{
assert(input_width != 0);
assert(input_height != 0);
diff --git a/src/f32-dwconv-spchw/5x5s2p2-neonfma.c b/src/f32-dwconv-chw/5x5s2p2-neonfma.c
similarity index 99%
rename from src/f32-dwconv-spchw/5x5s2p2-neonfma.c
rename to src/f32-dwconv-chw/5x5s2p2-neonfma.c
index 9f21352..acfe8fe 100644
--- a/src/f32-dwconv-spchw/5x5s2p2-neonfma.c
+++ b/src/f32-dwconv-chw/5x5s2p2-neonfma.c
@@ -11,7 +11,7 @@
#include <xnnpack/math.h>
-void xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma(
+void xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma(
size_t input_height,
size_t input_width,
const float* input,
@@ -23,7 +23,7 @@
size_t output_tuple_stride,
size_t input_width_stride,
size_t output_width_stride,
- const union xnn_f32_spchw_params params[restrict XNN_MIN_ELEMENTS(1)])
+ const union xnn_f32_chw_params params[restrict XNN_MIN_ELEMENTS(1)])
{
assert(input_width != 0);
assert(input_height != 0);
diff --git a/src/f32-dwconv-spchw/5x5s2p2-scalar.c b/src/f32-dwconv-chw/5x5s2p2-scalar.c
similarity index 98%
rename from src/f32-dwconv-spchw/5x5s2p2-scalar.c
rename to src/f32-dwconv-chw/5x5s2p2-scalar.c
index 2b16c0a..e4cc53c 100644
--- a/src/f32-dwconv-spchw/5x5s2p2-scalar.c
+++ b/src/f32-dwconv-chw/5x5s2p2-scalar.c
@@ -9,7 +9,7 @@
#include <xnnpack/math.h>
-void xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar(
+void xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar(
size_t input_height,
size_t input_width,
const float* input,
@@ -21,7 +21,7 @@
size_t output_tuple_stride,
size_t input_width_stride,
size_t output_width_stride,
- const union xnn_f32_spchw_params params[restrict XNN_MIN_ELEMENTS(1)])
+ const union xnn_f32_chw_params params[restrict XNN_MIN_ELEMENTS(1)])
{
assert(input_width != 0);
assert(input_height != 0);
diff --git a/src/f32-gavgpool-spchw/neon-x4.c b/src/f32-gavgpool-cw/neon-x4.c
similarity index 98%
rename from src/f32-gavgpool-spchw/neon-x4.c
rename to src/f32-gavgpool-cw/neon-x4.c
index 849ca22..f0daeca 100644
--- a/src/f32-gavgpool-spchw/neon-x4.c
+++ b/src/f32-gavgpool-cw/neon-x4.c
@@ -11,7 +11,7 @@
#include <xnnpack/math.h>
-void xnn_f32_gavgpool_spchw_ukernel__neon_x4(
+void xnn_f32_gavgpool_cw_ukernel__neon_x4(
size_t elements,
size_t channels,
const float* input,
diff --git a/src/f32-gavgpool-spchw/scalar-x1.c b/src/f32-gavgpool-cw/scalar-x1.c
similarity index 96%
rename from src/f32-gavgpool-spchw/scalar-x1.c
rename to src/f32-gavgpool-cw/scalar-x1.c
index 96e7977..6805f56 100644
--- a/src/f32-gavgpool-spchw/scalar-x1.c
+++ b/src/f32-gavgpool-cw/scalar-x1.c
@@ -9,7 +9,7 @@
#include <xnnpack/math.h>
-void xnn_f32_gavgpool_spchw_ukernel__scalar_x1(
+void xnn_f32_gavgpool_cw_ukernel__scalar_x1(
size_t elements,
size_t channels,
const float* input,
diff --git a/src/f32-gavgpool-spchw/sse-x4.c b/src/f32-gavgpool-cw/sse-x4.c
similarity index 98%
rename from src/f32-gavgpool-spchw/sse-x4.c
rename to src/f32-gavgpool-cw/sse-x4.c
index 8a100b4..52ba111 100644
--- a/src/f32-gavgpool-spchw/sse-x4.c
+++ b/src/f32-gavgpool-cw/sse-x4.c
@@ -11,7 +11,7 @@
#include <xnnpack/math.h>
-void xnn_f32_gavgpool_spchw_ukernel__sse_x4(
+void xnn_f32_gavgpool_cw_ukernel__sse_x4(
size_t elements,
size_t channels,
const float* input,
diff --git a/src/init.c b/src/init.c
index 3194357..1d3bba1 100644
--- a/src/init.c
+++ b/src/init.c
@@ -551,39 +551,39 @@
.mr = 8,
.nr = 4,
};
- xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
+ xnn_params.f32.conv_hwc2chw_3x3c3s2 = (struct conv_hwc2chw_parameters) {
.ukernel_with_symm_padding =
- (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1,
+ (xnn_conv_hwc2chw_ukernel_function) xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1,
.output_channel_tile = 4,
.output_height_tile = 1,
.output_width_tile = 1,
};
- xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
- .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar,
+ xnn_params.f32.dwconv_chw_3x3 = (struct dwconv_chw_parameters) {
+ .ukernel = (xnn_dwconv_chw_ukernel_function) xnn_f32_dwconv_chw_ukernel_3x3p1__scalar,
.input_width_tile = 1,
.output_width_tile = 1,
.output_height_tile = 1,
};
- xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
- .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar,
+ xnn_params.f32.dwconv_chw_3x3s2 = (struct dwconv_chw_parameters) {
+ .ukernel = (xnn_dwconv_chw_ukernel_function) xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar,
.input_width_tile = 1,
.output_width_tile = 1,
.output_height_tile = 1,
};
- xnn_params.f32.spchw_dwconv5x5 = (struct spchw_dwconv_parameters) {
- .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar,
+ xnn_params.f32.dwconv_chw_5x5 = (struct dwconv_chw_parameters) {
+ .ukernel = (xnn_dwconv_chw_ukernel_function) xnn_f32_dwconv_chw_ukernel_5x5p2__scalar,
.input_width_tile = 1,
.output_width_tile = 1,
.output_height_tile = 1,
};
- xnn_params.f32.spchw_dwconv5x5s2 = (struct spchw_dwconv_parameters) {
- .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar,
+ xnn_params.f32.dwconv_chw_5x5s2 = (struct dwconv_chw_parameters) {
+ .ukernel = (xnn_dwconv_chw_ukernel_function) xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar,
.input_width_tile = 1,
.output_width_tile = 1,
.output_height_tile = 1,
};
- xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
- .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__scalar_x1,
+ xnn_params.f32.gavgpool_cw = (struct gavgpool_cw_parameters) {
+ .ukernel = (xnn_gavgpool_cw_ukernel_function) xnn_f32_gavgpool_cw_ukernel__scalar_x1,
.channel_tile = 1,
};
#endif // XNN_NO_NCHW_OPERATORS
@@ -954,39 +954,39 @@
.mr = 16,
.nr = 4,
};
- xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
+ xnn_params.f32.conv_hwc2chw_3x3c3s2 = (struct conv_hwc2chw_parameters) {
.ukernel_with_symm_padding =
- (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2,
+ (xnn_conv_hwc2chw_ukernel_function) xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2,
.output_channel_tile = 4,
.output_height_tile = 2,
.output_width_tile = 2,
};
- xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
- .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma,
+ xnn_params.f32.dwconv_chw_3x3 = (struct dwconv_chw_parameters) {
+ .ukernel = (xnn_dwconv_chw_ukernel_function) xnn_f32_dwconv_chw_ukernel_3x3p1__neonfma,
.input_width_tile = 4,
.output_width_tile = 4,
.output_height_tile = 3,
};
- xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
- .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma,
+ xnn_params.f32.dwconv_chw_3x3s2 = (struct dwconv_chw_parameters) {
+ .ukernel = (xnn_dwconv_chw_ukernel_function) xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma,
.input_width_tile = 4,
.output_width_tile = 4,
.output_height_tile = 1,
};
- xnn_params.f32.spchw_dwconv5x5 = (struct spchw_dwconv_parameters) {
- .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma,
+ xnn_params.f32.dwconv_chw_5x5 = (struct dwconv_chw_parameters) {
+ .ukernel = (xnn_dwconv_chw_ukernel_function) xnn_f32_dwconv_chw_ukernel_5x5p2__neonfma,
.input_width_tile = 4,
.output_width_tile = 4,
.output_height_tile = 3,
};
- xnn_params.f32.spchw_dwconv5x5s2 = (struct spchw_dwconv_parameters) {
- .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma,
+ xnn_params.f32.dwconv_chw_5x5s2 = (struct dwconv_chw_parameters) {
+ .ukernel = (xnn_dwconv_chw_ukernel_function) xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma,
.input_width_tile = 4,
.output_width_tile = 4,
.output_height_tile = 1,
};
- xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
- .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
+ xnn_params.f32.gavgpool_cw = (struct gavgpool_cw_parameters) {
+ .ukernel = (xnn_gavgpool_cw_ukernel_function) xnn_f32_gavgpool_cw_ukernel__neon_x4,
.channel_tile = 4,
};
#endif // XNN_NO_NCHW_OPERATORS
@@ -1367,20 +1367,20 @@
.mr = 4,
.nr = 1,
};
- xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
- .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__sse,
+ xnn_params.f32.dwconv_chw_3x3 = (struct dwconv_chw_parameters) {
+ .ukernel = (xnn_dwconv_chw_ukernel_function) xnn_f32_dwconv_chw_ukernel_3x3p1__sse,
.input_width_tile = 4,
.output_width_tile = 4,
.output_height_tile = 1,
};
- xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
- .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse,
+ xnn_params.f32.dwconv_chw_3x3s2 = (struct dwconv_chw_parameters) {
+ .ukernel = (xnn_dwconv_chw_ukernel_function) xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse,
.input_width_tile = 4,
.output_width_tile = 4,
.output_height_tile = 1,
};
- xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
- .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
+ xnn_params.f32.gavgpool_cw = (struct gavgpool_cw_parameters) {
+ .ukernel = (xnn_gavgpool_cw_ukernel_function) xnn_f32_gavgpool_cw_ukernel__sse_x4,
.channel_tile = 4,
};
#endif // XNN_NO_NCHW_OPERATORS
@@ -1797,39 +1797,39 @@
.mr = 8,
.nr = 4,
};
- xnn_params.f32.hwc2spchw_dconv3x3c3s2 = (struct hwc2spchw_dconv_parameters) {
+ xnn_params.f32.conv_hwc2chw_3x3c3s2 = (struct conv_hwc2chw_parameters) {
.ukernel_with_symm_padding =
- (xnn_conv_hwc2spchw_ukernel_function) xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1,
+ (xnn_conv_hwc2chw_ukernel_function) xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1,
.output_channel_tile = 4,
.output_height_tile = 1,
.output_width_tile = 1,
};
- xnn_params.f32.spchw_dwconv3x3 = (struct spchw_dwconv_parameters) {
- .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar,
+ xnn_params.f32.dwconv_chw_3x3 = (struct dwconv_chw_parameters) {
+ .ukernel = (xnn_dwconv_chw_ukernel_function) xnn_f32_dwconv_chw_ukernel_3x3p1__scalar,
.input_width_tile = 1,
.output_width_tile = 1,
.output_height_tile = 1,
};
- xnn_params.f32.spchw_dwconv3x3s2 = (struct spchw_dwconv_parameters) {
- .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar,
+ xnn_params.f32.dwconv_chw_3x3s2 = (struct dwconv_chw_parameters) {
+ .ukernel = (xnn_dwconv_chw_ukernel_function) xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar,
.input_width_tile = 1,
.output_width_tile = 1,
.output_height_tile = 1,
};
- xnn_params.f32.spchw_dwconv5x5 = (struct spchw_dwconv_parameters) {
- .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar,
+ xnn_params.f32.dwconv_chw_5x5 = (struct dwconv_chw_parameters) {
+ .ukernel = (xnn_dwconv_chw_ukernel_function) xnn_f32_dwconv_chw_ukernel_5x5p2__scalar,
.input_width_tile = 1,
.output_width_tile = 1,
.output_height_tile = 1,
};
- xnn_params.f32.spchw_dwconv5x5s2 = (struct spchw_dwconv_parameters) {
- .ukernel = (xnn_dwconv_spchw_ukernel_function) xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar,
+ xnn_params.f32.dwconv_chw_5x5s2 = (struct dwconv_chw_parameters) {
+ .ukernel = (xnn_dwconv_chw_ukernel_function) xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar,
.input_width_tile = 1,
.output_width_tile = 1,
.output_height_tile = 1,
};
- xnn_params.f32.spchw_gavgpool = (struct spchw_gavgpool_parameters) {
- .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__scalar_x1,
+ xnn_params.f32.gavgpool_cw = (struct gavgpool_cw_parameters) {
+ .ukernel = (xnn_gavgpool_cw_ukernel_function) xnn_f32_gavgpool_cw_ukernel__scalar_x1,
.channel_tile = 1,
};
#endif // XNN_NO_NCHW_OPERATORS
diff --git a/src/operator-run.c b/src/operator-run.c
index 45b5e84..fe05f8d 100644
--- a/src/operator-run.c
+++ b/src/operator-run.c
@@ -291,13 +291,13 @@
&context->params);
}
-void xnn_compute_dconv2d_hwc2spchw(
- const struct dconv2d_context context[restrict XNN_MIN_ELEMENTS(1)],
+void xnn_compute_conv2d_hwc2chw(
+ const struct conv2d_context context[restrict XNN_MIN_ELEMENTS(1)],
size_t batch_index,
size_t output_y_start,
size_t output_y_slice)
{
- context->hwc2spchw_ukernel(
+ context->hwc2chw_ukernel(
context->input_height,
context->input_width,
output_y_start,
@@ -328,12 +328,12 @@
&context->params);
}
-void xnn_compute_dwconv2d_spchw(
+void xnn_compute_dwconv2d_chw(
const struct dwconv2d_context context[restrict XNN_MIN_ELEMENTS(1)],
size_t batch_index,
size_t channel)
{
- context->spchw_ukernel(
+ context->chw_ukernel(
context->input_height,
context->input_width,
(const void*) ((uintptr_t) context->input + channel * context->input_channel_stride + batch_index * context->input_batch_stride),
diff --git a/src/operators/convolution-nchw.c b/src/operators/convolution-nchw.c
index d959ff4..f7b47bb 100644
--- a/src/operators/convolution-nchw.c
+++ b/src/operators/convolution-nchw.c
@@ -141,7 +141,7 @@
status = xnn_status_unsupported_parameter;
enum xnn_ukernel_type ukernel_type;
- struct spchw_dwconv_parameters* dwconv_parameters = NULL;
+ struct dwconv_chw_parameters* dwconv_parameters = NULL;
// Supported cases:
// + 1x1 convolution (no groups)
// + 3x3 stride-2 with 3 input channels and NHWC input layout
@@ -158,33 +158,33 @@
ukernel_type = xnn_ukernel_type_spmm;
} else if (is_3x3 && subsampling_height == 2 && subsampling_width == 2 &&
input_padding_top == 1 && input_padding_left == 1 && input_padding_bottom == 1 && input_padding_right == 1 &&
- nhwc_input && groups == 1 && xnn_params.f32.hwc2spchw_dconv3x3c3s2.ukernel_with_symm_padding != NULL)
+ nhwc_input && groups == 1 && xnn_params.f32.conv_hwc2chw_3x3c3s2.ukernel_with_symm_padding != NULL)
{
- ukernel_type = xnn_ukernel_type_dconv2d_hwc2spchw;
+ ukernel_type = xnn_ukernel_type_conv2d_hwc2chw;
} else if (is_3x3 && subsampling_height == 1 && subsampling_width == 1 &&
input_padding_top == 1 && input_padding_left == 1 && input_padding_bottom == 1 && input_padding_right == 1 &&
- !nhwc_input && group_input_channels == 1 && group_output_channels == 1 && xnn_params.f32.spchw_dwconv3x3.ukernel != NULL)
+ !nhwc_input && group_input_channels == 1 && group_output_channels == 1 && xnn_params.f32.dwconv_chw_3x3.ukernel != NULL)
{
ukernel_type = xnn_ukernel_type_dwconv;
- dwconv_parameters = &xnn_params.f32.spchw_dwconv3x3;
+ dwconv_parameters = &xnn_params.f32.dwconv_chw_3x3;
} else if (is_3x3 && subsampling_height == 2 && subsampling_width == 2 &&
(input_padding_top == 0 || input_padding_top == 1) && input_padding_left == 1 && input_padding_bottom == 1 && input_padding_right == 1 &&
- !nhwc_input && group_input_channels == 1 && group_output_channels == 1 && xnn_params.f32.spchw_dwconv3x3s2.ukernel != NULL)
+ !nhwc_input && group_input_channels == 1 && group_output_channels == 1 && xnn_params.f32.dwconv_chw_3x3s2.ukernel != NULL)
{
ukernel_type = xnn_ukernel_type_dwconv;
- dwconv_parameters = &xnn_params.f32.spchw_dwconv3x3s2;
+ dwconv_parameters = &xnn_params.f32.dwconv_chw_3x3s2;
} else if (is_5x5 && subsampling_height == 1 && subsampling_width == 1 &&
input_padding_top == 2 && input_padding_left == 2 && input_padding_bottom == 2 && input_padding_right == 2 &&
- !nhwc_input && group_input_channels == 1 && group_output_channels == 1 && xnn_params.f32.spchw_dwconv5x5.ukernel != NULL)
+ !nhwc_input && group_input_channels == 1 && group_output_channels == 1 && xnn_params.f32.dwconv_chw_5x5.ukernel != NULL)
{
ukernel_type = xnn_ukernel_type_dwconv;
- dwconv_parameters = &xnn_params.f32.spchw_dwconv5x5;
+ dwconv_parameters = &xnn_params.f32.dwconv_chw_5x5;
} else if (is_5x5 && subsampling_height == 2 && subsampling_width == 2 &&
(input_padding_top == 1 || input_padding_top == 2) && input_padding_left == 2 && input_padding_bottom == 2 && input_padding_right == 2 &&
- !nhwc_input && group_input_channels == 1 && group_output_channels == 1 && xnn_params.f32.spchw_dwconv5x5s2.ukernel != NULL)
+ !nhwc_input && group_input_channels == 1 && group_output_channels == 1 && xnn_params.f32.dwconv_chw_5x5s2.ukernel != NULL)
{
ukernel_type = xnn_ukernel_type_dwconv;
- dwconv_parameters = &xnn_params.f32.spchw_dwconv5x5s2;
+ dwconv_parameters = &xnn_params.f32.dwconv_chw_5x5s2;
} else {
xnn_log_error(
"failed to create Convolution operator: only selected Convolution parameters are supported");
@@ -376,12 +376,12 @@
break;
}
- case xnn_ukernel_type_dconv2d_hwc2spchw:
+ case xnn_ukernel_type_conv2d_hwc2chw:
{
assert(groups == 1);
const size_t packed_group_output_channels =
- round_up(group_output_channels, xnn_params.f32.hwc2spchw_dconv3x3c3s2.output_channel_tile);
+ round_up(group_output_channels, xnn_params.f32.conv_hwc2chw_3x3c3s2.output_channel_tile);
const size_t packed_weights_size = groups * packed_group_output_channels *
(group_input_channels * kernel_height * kernel_width + 1 /* bias */) * sizeof(float);
convolution_op->packed_weights = xnn_allocate_simd_memory(packed_weights_size);
@@ -393,14 +393,14 @@
xnn_pack_f32_dconv_oki_w(
group_output_channels,
group_input_channels,
- xnn_params.f32.hwc2spchw_dconv3x3c3s2.output_channel_tile,
+ xnn_params.f32.conv_hwc2chw_3x3c3s2.output_channel_tile,
kernel_height, kernel_width,
kernel, bias, convolution_op->packed_weights);
- convolution_op->ukernel.dconv2d = (struct xnn_ukernel_dconv2d) {
- .hwc2spchw_function = xnn_params.f32.hwc2spchw_dconv3x3c3s2.ukernel_with_symm_padding,
- .output_height_tile = xnn_params.f32.hwc2spchw_dconv3x3c3s2.output_height_tile,
- .output_channel_tile = xnn_params.f32.hwc2spchw_dconv3x3c3s2.output_channel_tile,
+ convolution_op->ukernel.conv2d = (struct xnn_ukernel_conv2d) {
+ .hwc2chw_function = xnn_params.f32.conv_hwc2chw_3x3c3s2.ukernel_with_symm_padding,
+ .output_height_tile = xnn_params.f32.conv_hwc2chw_3x3c3s2.output_height_tile,
+ .output_channel_tile = xnn_params.f32.conv_hwc2chw_3x3c3s2.output_channel_tile,
};
break;
@@ -418,12 +418,12 @@
goto error;
}
- xnn_pack_f32_spchw_dwconv_ghw_w(
+ xnn_pack_f32_chw_dwconv_ghw_w(
kernel_height * kernel_width, groups,
kernel, bias, convolution_op->packed_weights);
convolution_op->ukernel.dwconv2d = (struct xnn_ukernel_dwconv2d) {
- .spchw_function = dwconv_parameters->ukernel,
+ .chw_function = dwconv_parameters->ukernel,
.input_width_tile = dwconv_parameters->input_width_tile,
.output_width_tile = dwconv_parameters->output_width_tile,
};
@@ -450,7 +450,7 @@
convolution_op->group_output_channels = group_output_channels;
if (ukernel_type == xnn_ukernel_type_dwconv) {
- convolution_op->f32_spchw_params = xnn_init_f32_spchw_params(0, output_min, output_max);
+ convolution_op->f32_chw_params = xnn_init_f32_chw_params(0, output_min, output_max);
} else {
convolution_op->f32_minmax_params = xnn_init_f32_minmax_params(output_min, output_max);
}
@@ -482,7 +482,7 @@
uint32_t bias_element_size,
uint32_t log2_output_element_size,
const void* params,
- const void* spchw_params,
+ const void* chw_params,
size_t num_threads)
{
convolution_op->state = xnn_run_state_invalid;
@@ -601,7 +601,7 @@
return xnn_status_success;
}
- case xnn_ukernel_type_dconv2d_hwc2spchw:
+ case xnn_ukernel_type_conv2d_hwc2chw:
{
const size_t zero_size = (input_width * convolution_op->group_input_channels << log2_input_element_size) + XNN_EXTRA_BYTES;
void* zero_buffer = xnn_reallocate_memory(convolution_op->zero_buffer, zero_size);
@@ -612,7 +612,7 @@
memset(zero_buffer, 0, zero_size);
convolution_op->zero_buffer = zero_buffer;
- convolution_op->context.dconv2d = (struct dconv2d_context) {
+ convolution_op->context.conv2d = (struct conv2d_context) {
.input_height = input_height,
.input_width = input_width,
.input = input,
@@ -625,12 +625,12 @@
.output_channels = convolution_op->group_output_channels,
.output_height_stride = output_width << log2_output_element_size,
.output_channel_stride = output_height * output_width << log2_output_element_size,
- .hwc2spchw_ukernel = convolution_op->ukernel.dconv2d.hwc2spchw_function,
+ .hwc2chw_ukernel = convolution_op->ukernel.conv2d.hwc2chw_function,
};
- memcpy(&convolution_op->context.dconv2d.params, params, sizeof(convolution_op->context.dconv2d.params));
+ memcpy(&convolution_op->context.conv2d.params, params, sizeof(convolution_op->context.conv2d.params));
size_t output_height_slice = output_height;
- const size_t output_height_tile = convolution_op->ukernel.dconv2d.output_height_tile;
+ const size_t output_height_tile = convolution_op->ukernel.conv2d.output_height_tile;
if (num_threads > 1) {
const size_t target_tiles_per_thread = 5;
const size_t max_output_height_slice = divide_round_up(output_height, num_threads * target_tiles_per_thread);
@@ -640,7 +640,7 @@
}
}
convolution_op->compute.type = xnn_parallelization_type_2d_tile_1d;
- convolution_op->compute.task_2d_tile_1d = (pthreadpool_task_2d_tile_1d_t) xnn_compute_dconv2d_hwc2spchw;
+ convolution_op->compute.task_2d_tile_1d = (pthreadpool_task_2d_tile_1d_t) xnn_compute_conv2d_hwc2chw;
convolution_op->compute.range[0] = batch_size;
convolution_op->compute.range[1] = output_height;
convolution_op->compute.tile[0] = output_height_slice;
@@ -659,7 +659,7 @@
memset(zero_buffer, 0, zero_size);
convolution_op->zero_buffer = zero_buffer;
- xnn_update_f32_spchw_params((union xnn_f32_spchw_params*) spchw_params, (uint32_t) input_width);
+ xnn_update_f32_chw_params((union xnn_f32_chw_params*) chw_params, (uint32_t) input_width);
convolution_op->context.dwconv2d = (struct dwconv2d_context) {
.input_height = input_height,
.input_width = input_width,
@@ -678,12 +678,12 @@
.output_tuple_stride = convolution_op->ukernel.dwconv2d.output_width_tile << log2_output_element_size,
.input_pixel_stride = input_width << log2_input_element_size,
.output_pixel_stride = output_width << log2_output_element_size,
- .spchw_ukernel = convolution_op->ukernel.dwconv2d.spchw_function,
+ .chw_ukernel = convolution_op->ukernel.dwconv2d.chw_function,
};
- memcpy(&convolution_op->context.dwconv2d.params, spchw_params, sizeof(convolution_op->context.dwconv2d.params));
+ memcpy(&convolution_op->context.dwconv2d.params, chw_params, sizeof(convolution_op->context.dwconv2d.params));
convolution_op->compute.type = xnn_parallelization_type_2d;
- convolution_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_dwconv2d_spchw;
+ convolution_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_dwconv2d_chw;
convolution_op->compute.range[0] = batch_size;
convolution_op->compute.range[1] = groups;
convolution_op->state = xnn_run_state_ready;
@@ -721,6 +721,6 @@
sizeof(float) /* sizeof(bias element) */,
2 /* log2(sizeof(output element)) = log2(sizeof(float)) */,
&convolution_op->f32_minmax_params,
- &convolution_op->f32_spchw_params,
+ &convolution_op->f32_chw_params,
pthreadpool_get_threads_count(threadpool));
}
diff --git a/src/operators/global-average-pooling-ncw.c b/src/operators/global-average-pooling-ncw.c
index 2f6db07..a18f4ac 100644
--- a/src/operators/global-average-pooling-ncw.c
+++ b/src/operators/global-average-pooling-ncw.c
@@ -62,7 +62,7 @@
}
status = xnn_status_unsupported_parameter;
- if (xnn_params.f32.spchw_gavgpool.ukernel == NULL) {
+ if (xnn_params.f32.gavgpool_cw.ukernel == NULL) {
xnn_log_error(
"failed to create Global Average Pooling operator: "
"only selected configurations parameters are supported");
@@ -133,7 +133,7 @@
.output = output,
.output_channel_stride = sizeof(float),
.output_batch_stride = global_average_pooling_op->channels * sizeof(float),
- .ukernel = xnn_params.f32.spchw_gavgpool.ukernel,
+ .ukernel = xnn_params.f32.gavgpool_cw.ukernel,
.params.f32 = global_average_pooling_op->f32_gavgpool_params,
};
@@ -142,7 +142,7 @@
(pthreadpool_task_2d_tile_1d_t) xnn_compute_global_average_pooling_ncw;
global_average_pooling_op->compute.range[0] = batch_size;
global_average_pooling_op->compute.range[1] = global_average_pooling_op->channels;
- global_average_pooling_op->compute.tile[0] = global_average_pooling_op->channels; //xnn_params.f32.spchw_gavgpool.channel_tile;
+ global_average_pooling_op->compute.tile[0] = global_average_pooling_op->channels; //xnn_params.f32.gavgpool_cw.channel_tile;
global_average_pooling_op->state = xnn_run_state_ready;
diff --git a/src/xnnpack/compute.h b/src/xnnpack/compute.h
index 540fd29..a5d53a2 100644
--- a/src/xnnpack/compute.h
+++ b/src/xnnpack/compute.h
@@ -301,7 +301,7 @@
size_t nr_block_size);
#endif
-struct dconv2d_context {
+struct conv2d_context {
size_t input_height;
size_t input_width;
const void* input;
@@ -315,7 +315,7 @@
size_t output_height_stride;
size_t output_channel_stride;
union {
- xnn_conv_hwc2spchw_ukernel_function hwc2spchw_ukernel;
+ xnn_conv_hwc2chw_ukernel_function hwc2chw_ukernel;
};
union {
union xnn_f32_minmax_params f32;
@@ -323,8 +323,8 @@
};
#ifndef __cplusplus
- XNN_PRIVATE void xnn_compute_dconv2d_hwc2spchw(
- const struct dconv2d_context context[restrict XNN_MIN_ELEMENTS(1)],
+ XNN_PRIVATE void xnn_compute_conv2d_hwc2chw(
+ const struct conv2d_context context[restrict XNN_MIN_ELEMENTS(1)],
size_t batch_index,
size_t output_y_start,
size_t output_y_slice);
@@ -373,15 +373,15 @@
size_t input_pixel_stride;
size_t output_pixel_stride;
union {
- union xnn_f32_spchw_params f32;
+ union xnn_f32_chw_params f32;
} params;
union {
- xnn_dwconv_spchw_ukernel_function spchw_ukernel;
+ xnn_dwconv_chw_ukernel_function chw_ukernel;
};
};
#ifndef __cplusplus
- XNN_PRIVATE void xnn_compute_dwconv2d_spchw(
+ XNN_PRIVATE void xnn_compute_dwconv2d_chw(
const struct dwconv2d_context context[restrict XNN_MIN_ELEMENTS(1)],
size_t batch_index,
size_t channel);
@@ -585,7 +585,7 @@
void* output;
size_t output_channel_stride;
size_t output_batch_stride;
- xnn_gavgpool_spchw_ukernel_function ukernel;
+ xnn_gavgpool_cw_ukernel_function ukernel;
union {
union xnn_f32_gavgpool_params f32;
} params;
diff --git a/src/xnnpack/conv.h b/src/xnnpack/conv.h
index 892e12c..43b0cf9 100644
--- a/src/xnnpack/conv.h
+++ b/src/xnnpack/conv.h
@@ -43,7 +43,7 @@
DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1)
-#define DECLARE_F32_CONV_HWC2SPCHW_UKERNEL_FUNCTION(fn_name) \
+#define DECLARE_F32_CONV_HWC2CHW_UKERNEL_FUNCTION(fn_name) \
XNN_INTERNAL void fn_name( \
size_t input_height, \
size_t input_width, \
@@ -59,8 +59,8 @@
size_t output_channel_stride, \
const union xnn_f32_minmax_params* params);
-DECLARE_F32_CONV_HWC2SPCHW_UKERNEL_FUNCTION(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2)
-DECLARE_F32_CONV_HWC2SPCHW_UKERNEL_FUNCTION(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1)
+DECLARE_F32_CONV_HWC2CHW_UKERNEL_FUNCTION(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2)
+DECLARE_F32_CONV_HWC2CHW_UKERNEL_FUNCTION(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1)
#ifdef __cplusplus
diff --git a/src/xnnpack/dwconv.h b/src/xnnpack/dwconv.h
index 0ec0e30..fef788b 100644
--- a/src/xnnpack/dwconv.h
+++ b/src/xnnpack/dwconv.h
@@ -206,31 +206,31 @@
DECLARE_Q8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_q8_dwconv_minmax_ukernel_up8x9__sse2)
-#define DECLARE_F32_DWCONV_SPCHW_UKERNEL_FUNCTION(fn_name) \
- XNN_INTERNAL void fn_name( \
- size_t input_height, \
- size_t input_width, \
- const float* input, \
- const float* weights, \
- const float* zero, \
- float* output, \
- uint32_t padding_top, \
- size_t input_tuple_stride, \
- size_t output_tuple_stride, \
- size_t input_height_stride, \
- size_t output_height_stride, \
- const union xnn_f32_spchw_params* params);
+#define DECLARE_F32_DWCONV_CHW_UKERNEL_FUNCTION(fn_name) \
+ XNN_INTERNAL void fn_name( \
+ size_t input_height, \
+ size_t input_width, \
+ const float* input, \
+ const float* weights, \
+ const float* zero, \
+ float* output, \
+ uint32_t padding_top, \
+ size_t input_tuple_stride, \
+ size_t output_tuple_stride, \
+ size_t input_height_stride, \
+ size_t output_height_stride, \
+ const union xnn_f32_chw_params* params);
-DECLARE_F32_DWCONV_SPCHW_UKERNEL_FUNCTION(xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar)
-DECLARE_F32_DWCONV_SPCHW_UKERNEL_FUNCTION(xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar)
-DECLARE_F32_DWCONV_SPCHW_UKERNEL_FUNCTION(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar)
-DECLARE_F32_DWCONV_SPCHW_UKERNEL_FUNCTION(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar)
-DECLARE_F32_DWCONV_SPCHW_UKERNEL_FUNCTION(xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma)
-DECLARE_F32_DWCONV_SPCHW_UKERNEL_FUNCTION(xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma)
-DECLARE_F32_DWCONV_SPCHW_UKERNEL_FUNCTION(xnn_f32_dwconv_spchw_ukernel_3x3p1__sse)
-DECLARE_F32_DWCONV_SPCHW_UKERNEL_FUNCTION(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma)
-DECLARE_F32_DWCONV_SPCHW_UKERNEL_FUNCTION(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma)
-DECLARE_F32_DWCONV_SPCHW_UKERNEL_FUNCTION(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse)
+DECLARE_F32_DWCONV_CHW_UKERNEL_FUNCTION(xnn_f32_dwconv_chw_ukernel_3x3p1__scalar)
+DECLARE_F32_DWCONV_CHW_UKERNEL_FUNCTION(xnn_f32_dwconv_chw_ukernel_5x5p2__scalar)
+DECLARE_F32_DWCONV_CHW_UKERNEL_FUNCTION(xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar)
+DECLARE_F32_DWCONV_CHW_UKERNEL_FUNCTION(xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar)
+DECLARE_F32_DWCONV_CHW_UKERNEL_FUNCTION(xnn_f32_dwconv_chw_ukernel_3x3p1__neonfma)
+DECLARE_F32_DWCONV_CHW_UKERNEL_FUNCTION(xnn_f32_dwconv_chw_ukernel_5x5p2__neonfma)
+DECLARE_F32_DWCONV_CHW_UKERNEL_FUNCTION(xnn_f32_dwconv_chw_ukernel_3x3p1__sse)
+DECLARE_F32_DWCONV_CHW_UKERNEL_FUNCTION(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma)
+DECLARE_F32_DWCONV_CHW_UKERNEL_FUNCTION(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma)
+DECLARE_F32_DWCONV_CHW_UKERNEL_FUNCTION(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse)
#ifdef __cplusplus
diff --git a/src/xnnpack/gavgpool.h b/src/xnnpack/gavgpool.h
index 41bda93..7a904b6 100644
--- a/src/xnnpack/gavgpool.h
+++ b/src/xnnpack/gavgpool.h
@@ -84,17 +84,17 @@
DECLARE_Q8_GAVGPOOL_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_q8_gavgpool_minmax_ukernel_7x__scalar_c1)
-#define DECLARE_F32_GAVGPOOL_SPCHW_UKERNEL_FUNCTION(fn_name) \
- XNN_INTERNAL void fn_name( \
- size_t elements, \
- size_t channels, \
- const float* input, \
- float* output, \
+#define DECLARE_F32_GAVGPOOL_CW_UKERNEL_FUNCTION(fn_name) \
+ XNN_INTERNAL void fn_name( \
+ size_t elements, \
+ size_t channels, \
+ const float* input, \
+ float* output, \
const union xnn_f32_gavgpool_params* params);
-DECLARE_F32_GAVGPOOL_SPCHW_UKERNEL_FUNCTION(xnn_f32_gavgpool_spchw_ukernel__neon_x4)
-DECLARE_F32_GAVGPOOL_SPCHW_UKERNEL_FUNCTION(xnn_f32_gavgpool_spchw_ukernel__sse_x4)
-DECLARE_F32_GAVGPOOL_SPCHW_UKERNEL_FUNCTION(xnn_f32_gavgpool_spchw_ukernel__scalar_x1)
+DECLARE_F32_GAVGPOOL_CW_UKERNEL_FUNCTION(xnn_f32_gavgpool_cw_ukernel__neon_x4)
+DECLARE_F32_GAVGPOOL_CW_UKERNEL_FUNCTION(xnn_f32_gavgpool_cw_ukernel__sse_x4)
+DECLARE_F32_GAVGPOOL_CW_UKERNEL_FUNCTION(xnn_f32_gavgpool_cw_ukernel__scalar_x1)
#ifdef __cplusplus
diff --git a/src/xnnpack/operator.h b/src/xnnpack/operator.h
index c9002ea..ded7c73 100644
--- a/src/xnnpack/operator.h
+++ b/src/xnnpack/operator.h
@@ -25,7 +25,7 @@
xnn_ukernel_type_binary_elementwise,
xnn_ukernel_type_channel_shuffle,
xnn_ukernel_type_clamp,
- xnn_ukernel_type_dconv2d_hwc2spchw,
+ xnn_ukernel_type_conv2d_hwc2chw,
xnn_ukernel_type_dwconv,
xnn_ukernel_type_gemm,
xnn_ukernel_type_global_average_pooling,
@@ -85,9 +85,9 @@
xnn_operator_type_unpooling_nhwc_x32,
};
-struct xnn_ukernel_dconv2d {
+struct xnn_ukernel_conv2d {
union {
- xnn_conv_hwc2spchw_ukernel_function hwc2spchw_function;
+ xnn_conv_hwc2chw_ukernel_function hwc2chw_function;
xnn_conv_hwc_ukernel_function hwc_function;
};
uint8_t output_height_tile;
@@ -106,7 +106,7 @@
// Direct 2D Depthwise Convolution
struct xnn_ukernel_dwconv2d {
union {
- xnn_dwconv_spchw_ukernel_function spchw_function;
+ xnn_dwconv_chw_ukernel_function chw_function;
};
uint8_t input_width_tile;
uint8_t output_width_tile;
@@ -142,7 +142,7 @@
struct xnn_ukernel {
enum xnn_ukernel_type type;
union {
- struct xnn_ukernel_dconv2d dconv2d;
+ struct xnn_ukernel_conv2d conv2d;
struct xnn_ukernel_dwconv dwconv;
struct xnn_ukernel_dwconv2d dwconv2d;
struct xnn_ukernel_gemm gemm;
@@ -249,7 +249,7 @@
union xnn_f32_scaleminmax_params f32_scaleminmax_params;
union xnn_f32_minmax_params f32_minmax_params;
};
- union xnn_f32_spchw_params f32_spchw_params;
+ union xnn_f32_chw_params f32_chw_params;
union xnn_q8_add_params q8_add_params;
union xnn_q8_gemm_params q8_gemm_params;
// Average Pooling normally use q8_avgpool_params, but also initialize q8_gavgpool_params in case it needs to switch
@@ -272,7 +272,7 @@
struct average_pooling_context average_pooling;
struct channel_pad_context channel_pad;
struct channel_shuffle_context channel_shuffle;
- struct dconv2d_context dconv2d;
+ struct conv2d_context conv2d;
struct dwconv2d_context dwconv2d;
struct dwconv_context dwconv;
struct elementwise_binary_context elementwise_binary;
diff --git a/src/xnnpack/pack.h b/src/xnnpack/pack.h
index 64d17b3..0535435 100644
--- a/src/xnnpack/pack.h
+++ b/src/xnnpack/pack.h
@@ -778,7 +778,7 @@
}
}
-static inline void xnn_pack_f16_spchw_dwconv_ghw_w(
+static inline void xnn_pack_f16_chw_dwconv_ghw_w(
size_t kernel_size,
size_t groups,
const uint16_t* kernel,
@@ -1205,7 +1205,7 @@
}
}
-static inline void xnn_pack_f32_spchw_dwconv_ghw_w(
+static inline void xnn_pack_f32_chw_dwconv_ghw_w(
size_t kernel_size,
size_t groups,
const float* kernel,
diff --git a/src/xnnpack/params-init.h b/src/xnnpack/params-init.h
index 50ab491..0916e93 100644
--- a/src/xnnpack/params-init.h
+++ b/src/xnnpack/params-init.h
@@ -455,12 +455,12 @@
return params;
}
-static inline union xnn_f32_spchw_params xnn_init_f32_spchw_params(
+static inline union xnn_f32_chw_params xnn_init_f32_chw_params(
uint32_t width,
float output_min,
float output_max)
{
- union xnn_f32_spchw_params params;
+ union xnn_f32_chw_params params;
#if XNN_ARCH_X86 || XNN_ARCH_X86_64
for (uint32_t i = 0; i < 4; i++) {
params.sse.min[i] = output_min;
@@ -508,8 +508,8 @@
return params;
}
-static inline void xnn_update_f32_spchw_params(
- union xnn_f32_spchw_params* params,
+static inline void xnn_update_f32_chw_params(
+ union xnn_f32_chw_params* params,
uint32_t width)
{
#if XNN_ARCH_X86 || XNN_ARCH_X86_64
@@ -547,12 +547,12 @@
#endif
}
-static inline union xnn_f32_spchw_params xnn_init_scalar_f32_spchw_params(
+static inline union xnn_f32_chw_params xnn_init_scalar_f32_chw_params(
uint32_t width,
float output_min,
float output_max)
{
- union xnn_f32_spchw_params params;
+ union xnn_f32_chw_params params;
params.scalar.min = output_min;
params.scalar.max = output_max;
return params;
diff --git a/src/xnnpack/params.h b/src/xnnpack/params.h
index 804d7d3..bd0ebe4 100644
--- a/src/xnnpack/params.h
+++ b/src/xnnpack/params.h
@@ -50,7 +50,7 @@
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
};
-union xnn_f32_spchw_params {
+union xnn_f32_chw_params {
struct {
float min;
float max;
@@ -577,7 +577,7 @@
size_t output_width_stride,
const union xnn_f32_minmax_params* params);
-typedef void (*xnn_conv_hwc2spchw_ukernel_function)(
+typedef void (*xnn_conv_hwc2chw_ukernel_function)(
size_t input_height,
size_t input_width,
size_t output_y_start,
@@ -592,7 +592,7 @@
size_t output_channel_stride,
const void* params);
-typedef void (*xnn_f32_conv_hwc2spchw_ukernel_function)(
+typedef void (*xnn_f32_conv_hwc2chw_ukernel_function)(
size_t input_height,
size_t input_width,
size_t output_y_start,
@@ -717,7 +717,7 @@
const uint8_t* t,
uint8_t* y);
-typedef void (*xnn_dwconv_spchw_ukernel_function)(
+typedef void (*xnn_dwconv_chw_ukernel_function)(
size_t input_height,
size_t input_width,
const void* input,
@@ -731,7 +731,7 @@
size_t output_height_stride,
const void* params);
-typedef void (*xnn_f32_dwconv_spchw_ukernel_function)(
+typedef void (*xnn_f32_dwconv_chw_ukernel_function)(
size_t input_height,
size_t input_width,
const float* input,
@@ -743,7 +743,7 @@
size_t output_tuple_stride,
size_t input_height_stride,
size_t output_height_stride,
- const union xnn_f32_spchw_params* params);
+ const union xnn_f32_chw_params* params);
typedef void (*xnn_dwconv_unipass_ukernel_function)(
size_t channels,
@@ -871,14 +871,14 @@
uint8_t* output,
const union xnn_q8_avgpool_params* params);
-typedef void (*xnn_gavgpool_spchw_ukernel_function)(
+typedef void (*xnn_gavgpool_cw_ukernel_function)(
size_t elements,
size_t channels,
const float* input,
float* output,
const void* params);
-typedef void (*xnn_f32_gavgpool_spchw_ukernel_function)(
+typedef void (*xnn_f32_gavgpool_cw_ukernel_function)(
size_t elements,
size_t channels,
const float* input,
@@ -1390,8 +1390,8 @@
uint8_t nr;
};
-struct hwc2spchw_dconv_parameters {
- xnn_conv_hwc2spchw_ukernel_function ukernel_with_symm_padding;
+struct conv_hwc2chw_parameters {
+ xnn_conv_hwc2chw_ukernel_function ukernel_with_symm_padding;
// Number of output channels in a tile.
// This parameter must be passed as is to weight packing function.
uint8_t output_channel_tile;
@@ -1402,8 +1402,8 @@
uint8_t output_width_tile;
};
-struct spchw_dwconv_parameters {
- xnn_dwconv_spchw_ukernel_function ukernel;
+struct dwconv_chw_parameters {
+ xnn_dwconv_chw_ukernel_function ukernel;
// Number of input width pixels in a tile.
uint8_t input_width_tile;
// Number of output width pixels in a tile.
@@ -1413,8 +1413,8 @@
uint8_t output_height_tile;
};
-struct spchw_gavgpool_parameters {
- xnn_gavgpool_spchw_ukernel_function ukernel;
+struct gavgpool_cw_parameters {
+ xnn_gavgpool_cw_ukernel_function ukernel;
// Number of channels in a tile.
// For best efficiency, micro-kernel must process a multiple of this number of channels in each call.
uint8_t channel_tile;
@@ -1556,18 +1556,18 @@
struct spmm_parameters spmm2;
// Sparse Matrix-Dense Matrix Multiplication (NR=4 block).
struct spmm_parameters spmm4;
- // Direct 3x3 stride-2 Convolution with 3 input channels and HWC->SpCHW layout conversion.
- struct hwc2spchw_dconv_parameters hwc2spchw_dconv3x3c3s2;
- // Direct 3x3 stride-1 Convolution with padding 1 on left and right in SpCHW layout.
- struct spchw_dwconv_parameters spchw_dwconv3x3;
- // Direct 3x3 stride-2 Convolution with padding 1 on left and right in SpCHW layout.
- struct spchw_dwconv_parameters spchw_dwconv3x3s2;
- // Direct 5x5 stride-1 Convolution with padding 2 on left and right in SpCHW layout.
- struct spchw_dwconv_parameters spchw_dwconv5x5;
- // Direct 5x5 stride-2 Convolution with padding 2 on left and right in SpCHW layout.
- struct spchw_dwconv_parameters spchw_dwconv5x5s2;
- // Global Average Pooling in SpCHW layout.
- struct spchw_gavgpool_parameters spchw_gavgpool;
+ // Direct 3x3 stride-2 Convolution with 3 input channels and HWC->CHW layout conversion.
+ struct conv_hwc2chw_parameters conv_hwc2chw_3x3c3s2;
+ // Direct 3x3 stride-1 Convolution with padding 1 on left and right in CHW layout.
+ struct dwconv_chw_parameters dwconv_chw_3x3;
+ // Direct 3x3 stride-2 Convolution with padding 1 on left and right in CHW layout.
+ struct dwconv_chw_parameters dwconv_chw_3x3s2;
+ // Direct 5x5 stride-1 Convolution with padding 2 on left and right in CHW layout.
+ struct dwconv_chw_parameters dwconv_chw_5x5;
+ // Direct 5x5 stride-2 Convolution with padding 2 on left and right in CHW layout.
+ struct dwconv_chw_parameters dwconv_chw_5x5s2;
+ // Global Average Pooling in CW layout.
+ struct gavgpool_cw_parameters gavgpool_cw;
} f32;
struct {
struct pad_parameters pad;
diff --git a/test/conv-hwc2spchw-microkernel-tester.h b/test/conv-hwc2chw-microkernel-tester.h
similarity index 84%
rename from test/conv-hwc2spchw-microkernel-tester.h
rename to test/conv-hwc2chw-microkernel-tester.h
index 5b2e505..c680c35 100644
--- a/test/conv-hwc2spchw-microkernel-tester.h
+++ b/test/conv-hwc2chw-microkernel-tester.h
@@ -24,14 +24,14 @@
#include <xnnpack.h>
-class ConvHWC2SpCHWMicrokernelTester {
+class ConvHWC2CHWMicrokernelTester {
public:
enum class Variant {
Native,
Scalar,
};
- inline ConvHWC2SpCHWMicrokernelTester& output_channels_tile(uint32_t output_channels_tile) {
+ inline ConvHWC2CHWMicrokernelTester& output_channels_tile(uint32_t output_channels_tile) {
this->output_channels_tile_ = output_channels_tile;
return *this;
}
@@ -40,7 +40,7 @@
return this->output_channels_tile_;
}
- inline ConvHWC2SpCHWMicrokernelTester& padding(uint32_t padding) {
+ inline ConvHWC2CHWMicrokernelTester& padding(uint32_t padding) {
this->padding_top_ = padding;
this->padding_right_ = padding;
this->padding_bottom_ = padding;
@@ -48,19 +48,19 @@
return *this;
}
- inline ConvHWC2SpCHWMicrokernelTester& padding_height(uint32_t padding_height) {
+ inline ConvHWC2CHWMicrokernelTester& padding_height(uint32_t padding_height) {
this->padding_top_ = padding_height;
this->padding_bottom_ = padding_height;
return *this;
}
- inline ConvHWC2SpCHWMicrokernelTester& padding_width(uint32_t padding_width) {
+ inline ConvHWC2CHWMicrokernelTester& padding_width(uint32_t padding_width) {
this->padding_right_ = padding_width;
this->padding_left_ = padding_width;
return *this;
}
- inline ConvHWC2SpCHWMicrokernelTester& padding_top(uint32_t padding_top) {
+ inline ConvHWC2CHWMicrokernelTester& padding_top(uint32_t padding_top) {
this->padding_top_ = padding_top;
return *this;
}
@@ -69,7 +69,7 @@
return this->padding_top_;
}
- inline ConvHWC2SpCHWMicrokernelTester& padding_right(uint32_t padding_right) {
+ inline ConvHWC2CHWMicrokernelTester& padding_right(uint32_t padding_right) {
this->padding_right_ = padding_right;
return *this;
}
@@ -78,7 +78,7 @@
return this->padding_right_;
}
- inline ConvHWC2SpCHWMicrokernelTester& padding_bottom(uint32_t padding_bottom) {
+ inline ConvHWC2CHWMicrokernelTester& padding_bottom(uint32_t padding_bottom) {
this->padding_bottom_ = padding_bottom;
return *this;
}
@@ -87,7 +87,7 @@
return this->padding_bottom_;
}
- inline ConvHWC2SpCHWMicrokernelTester& padding_left(uint32_t padding_left) {
+ inline ConvHWC2CHWMicrokernelTester& padding_left(uint32_t padding_left) {
this->padding_left_ = padding_left;
return *this;
}
@@ -96,7 +96,7 @@
return this->padding_left_;
}
- inline ConvHWC2SpCHWMicrokernelTester& input_size(uint32_t input_height, uint32_t input_width) {
+ inline ConvHWC2CHWMicrokernelTester& input_size(uint32_t input_height, uint32_t input_width) {
assert(input_height >= 1);
assert(input_width >= 1);
this->input_height_ = input_height;
@@ -104,7 +104,7 @@
return *this;
}
- inline ConvHWC2SpCHWMicrokernelTester& input_height(uint32_t input_height) {
+ inline ConvHWC2CHWMicrokernelTester& input_height(uint32_t input_height) {
assert(input_height >= 1);
this->input_height_ = input_height;
return *this;
@@ -114,7 +114,7 @@
return this->input_height_;
}
- inline ConvHWC2SpCHWMicrokernelTester& input_width(uint32_t input_width) {
+ inline ConvHWC2CHWMicrokernelTester& input_width(uint32_t input_width) {
assert(input_width >= 1);
this->input_width_ = input_width;
return *this;
@@ -124,7 +124,7 @@
return this->input_width_;
}
- inline ConvHWC2SpCHWMicrokernelTester& input_channels(size_t input_channels) {
+ inline ConvHWC2CHWMicrokernelTester& input_channels(size_t input_channels) {
assert(input_channels >= 1);
this->input_channels_ = input_channels;
return *this;
@@ -134,7 +134,7 @@
return this->input_channels_;
}
- inline ConvHWC2SpCHWMicrokernelTester& output_channels(size_t output_channels) {
+ inline ConvHWC2CHWMicrokernelTester& output_channels(size_t output_channels) {
assert(output_channels >= 1);
this->output_channels_ = output_channels;
return *this;
@@ -148,7 +148,7 @@
return output_channels() % output_channels_tile() == 0 ? output_channels() : output_channels() / output_channels_tile() * output_channels_tile() + output_channels_tile();
}
- inline ConvHWC2SpCHWMicrokernelTester& batch_size(size_t batch_size) {
+ inline ConvHWC2CHWMicrokernelTester& batch_size(size_t batch_size) {
assert(batch_size >= 1);
this->batch_size_ = batch_size;
return *this;
@@ -158,14 +158,14 @@
return this->batch_size_;
}
- inline ConvHWC2SpCHWMicrokernelTester& kernel_size(uint32_t kernel_size) {
+ inline ConvHWC2CHWMicrokernelTester& kernel_size(uint32_t kernel_size) {
assert(kernel_size >= 1);
this->kernel_height_ = kernel_size;
this->kernel_width_ = kernel_size;
return *this;
}
- inline ConvHWC2SpCHWMicrokernelTester& kernel_height(uint32_t kernel_height) {
+ inline ConvHWC2CHWMicrokernelTester& kernel_height(uint32_t kernel_height) {
assert(kernel_height >= 1);
this->kernel_height_ = kernel_height;
return *this;
@@ -175,7 +175,7 @@
return this->kernel_height_;
}
- inline ConvHWC2SpCHWMicrokernelTester& kernel_width(uint32_t kernel_width) {
+ inline ConvHWC2CHWMicrokernelTester& kernel_width(uint32_t kernel_width) {
assert(kernel_width >= 1);
this->kernel_width_ = kernel_width;
return *this;
@@ -185,14 +185,14 @@
return this->kernel_width_;
}
- inline ConvHWC2SpCHWMicrokernelTester& subsampling(uint32_t subsampling) {
+ inline ConvHWC2CHWMicrokernelTester& subsampling(uint32_t subsampling) {
assert(subsampling >= 1);
this->subsampling_height_ = subsampling;
this->subsampling_width_ = subsampling;
return *this;
}
- inline ConvHWC2SpCHWMicrokernelTester& subsampling_height(uint32_t subsampling_height) {
+ inline ConvHWC2CHWMicrokernelTester& subsampling_height(uint32_t subsampling_height) {
assert(subsampling_height >= 1);
this->subsampling_height_ = subsampling_height;
return *this;
@@ -202,7 +202,7 @@
return this->subsampling_height_;
}
- inline ConvHWC2SpCHWMicrokernelTester& subsampling_width(uint32_t subsampling_width) {
+ inline ConvHWC2CHWMicrokernelTester& subsampling_width(uint32_t subsampling_width) {
assert(subsampling_width >= 1);
this->subsampling_width_ = subsampling_width;
return *this;
@@ -212,7 +212,7 @@
return this->subsampling_width_;
}
- inline ConvHWC2SpCHWMicrokernelTester& output_y_start(uint32_t output_y_start) {
+ inline ConvHWC2CHWMicrokernelTester& output_y_start(uint32_t output_y_start) {
this->output_y_start_ = output_y_start;
return *this;
}
@@ -221,7 +221,7 @@
return this->output_y_start_;
}
- inline ConvHWC2SpCHWMicrokernelTester& output_y_end(uint32_t output_y_end) {
+ inline ConvHWC2CHWMicrokernelTester& output_y_end(uint32_t output_y_end) {
this->output_y_end_ = output_y_end;
return *this;
}
@@ -260,7 +260,7 @@
}
}
- inline ConvHWC2SpCHWMicrokernelTester& qmin(uint8_t qmin) {
+ inline ConvHWC2CHWMicrokernelTester& qmin(uint8_t qmin) {
this->qmin_ = qmin;
return *this;
}
@@ -269,7 +269,7 @@
return this->qmin_;
}
- inline ConvHWC2SpCHWMicrokernelTester& qmax(uint8_t qmax) {
+ inline ConvHWC2CHWMicrokernelTester& qmax(uint8_t qmax) {
this->qmax_ = qmax;
return *this;
}
@@ -278,7 +278,7 @@
return this->qmax_;
}
- inline ConvHWC2SpCHWMicrokernelTester& iterations(size_t iterations) {
+ inline ConvHWC2CHWMicrokernelTester& iterations(size_t iterations) {
this->iterations_ = iterations;
return *this;
}
@@ -287,7 +287,7 @@
return this->iterations_;
}
- void Test(xnn_f32_conv_hwc2spchw_ukernel_function conv, Variant variant = Variant::Native) const {
+ void Test(xnn_f32_conv_hwc2chw_ukernel_function conv, Variant variant = Variant::Native) const {
ASSERT_LT(output_y_start(), output_height());
ASSERT_LE(output_y_end(), output_height());
ASSERT_GT(output_y_end(), output_y_start());
diff --git a/test/convolution-nchw.cc b/test/convolution-nchw.cc
index fb1fe68..97315be 100644
--- a/test/convolution-nchw.cc
+++ b/test/convolution-nchw.cc
@@ -385,7 +385,7 @@
.TestNCHWxF32();
}
-/**************************** DConv 3x3c3s2 HWC->SpCHW path, batched ****************************/
+/**************************** DConv 3x3c3s2 HWC->CHW path, batched ****************************/
TEST(CONVOLUTION_NHWC2NCHW_OP_F32, batched_3x3c3s2) {
ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
diff --git a/test/dwconv-spchw-microkernel-tester.h b/test/dwconv-chw-microkernel-tester.h
similarity index 83%
rename from test/dwconv-spchw-microkernel-tester.h
rename to test/dwconv-chw-microkernel-tester.h
index a5b4354..aea6ddf 100644
--- a/test/dwconv-spchw-microkernel-tester.h
+++ b/test/dwconv-chw-microkernel-tester.h
@@ -27,14 +27,14 @@
#include <xnnpack/params.h>
-class DWConvSpCHWMicrokernelTester {
+class DWConvCHWMicrokernelTester {
public:
enum class Variant {
Native,
Scalar,
};
- inline DWConvSpCHWMicrokernelTester& input_tuple_size(uint32_t input_tuple_size) {
+ inline DWConvCHWMicrokernelTester& input_tuple_size(uint32_t input_tuple_size) {
this->input_tuple_size_ = input_tuple_size;
return *this;
}
@@ -43,7 +43,7 @@
return this->input_tuple_size_;
}
- inline DWConvSpCHWMicrokernelTester& output_tuple_size(uint32_t output_tuple_size) {
+ inline DWConvCHWMicrokernelTester& output_tuple_size(uint32_t output_tuple_size) {
this->output_tuple_size_ = output_tuple_size;
return *this;
}
@@ -52,7 +52,7 @@
return this->output_tuple_size_;
}
- inline DWConvSpCHWMicrokernelTester& padding_left(uint32_t padding_left) {
+ inline DWConvCHWMicrokernelTester& padding_left(uint32_t padding_left) {
this->padding_left_ = padding_left;
return *this;
}
@@ -61,7 +61,7 @@
return this->padding_left_;
}
- inline DWConvSpCHWMicrokernelTester& padding_right(uint32_t padding_right) {
+ inline DWConvCHWMicrokernelTester& padding_right(uint32_t padding_right) {
this->padding_right_ = padding_right;
return *this;
}
@@ -70,7 +70,7 @@
return this->padding_right_;
}
- inline DWConvSpCHWMicrokernelTester& padding_top(uint32_t padding_top) {
+ inline DWConvCHWMicrokernelTester& padding_top(uint32_t padding_top) {
this->padding_top_ = padding_top;
return *this;
}
@@ -80,7 +80,7 @@
}
- inline DWConvSpCHWMicrokernelTester& padding_bottom(uint32_t padding_bottom) {
+ inline DWConvCHWMicrokernelTester& padding_bottom(uint32_t padding_bottom) {
this->padding_bottom_ = padding_bottom;
return *this;
}
@@ -92,7 +92,7 @@
return (output_height() - 1) * subsampling() + kernel_height() - padding_top() - padding_bottom();
}
- inline DWConvSpCHWMicrokernelTester& input_width(uint32_t input_width) {
+ inline DWConvCHWMicrokernelTester& input_width(uint32_t input_width) {
assert(input_width >= 1);
this->input_width_ = input_width;
return *this;
@@ -102,7 +102,7 @@
return this->input_width_;
}
- inline DWConvSpCHWMicrokernelTester& subsampling(uint32_t subsampling) {
+ inline DWConvCHWMicrokernelTester& subsampling(uint32_t subsampling) {
assert(subsampling >= 1);
this->subsampling_ = subsampling;
return *this;
@@ -112,7 +112,7 @@
return this->subsampling_;
}
- inline DWConvSpCHWMicrokernelTester& kernel_height(uint32_t kernel_height) {
+ inline DWConvCHWMicrokernelTester& kernel_height(uint32_t kernel_height) {
assert(kernel_height != 0);
this->kernel_height_ = kernel_height;
return *this;
@@ -122,7 +122,7 @@
return this->kernel_height_;
}
- inline DWConvSpCHWMicrokernelTester& kernel_width(uint32_t kernel_width) {
+ inline DWConvCHWMicrokernelTester& kernel_width(uint32_t kernel_width) {
assert(kernel_width != 0);
this->kernel_width_ = kernel_width;
return *this;
@@ -136,7 +136,7 @@
return kernel_height() * kernel_width();
}
- inline DWConvSpCHWMicrokernelTester& output_height(uint32_t output_height) {
+ inline DWConvCHWMicrokernelTester& output_height(uint32_t output_height) {
assert(output_height >= 1);
this->output_height_ = output_height;
return *this;
@@ -155,7 +155,7 @@
}
}
- inline DWConvSpCHWMicrokernelTester& input_tuple_stride(uint32_t input_tuple_stride) {
+ inline DWConvCHWMicrokernelTester& input_tuple_stride(uint32_t input_tuple_stride) {
assert(input_tuple_stride != 0);
this->input_tuple_stride_ = input_tuple_stride;
return *this;
@@ -169,7 +169,7 @@
}
}
- inline DWConvSpCHWMicrokernelTester& output_tuple_stride(uint32_t output_tuple_stride) {
+ inline DWConvCHWMicrokernelTester& output_tuple_stride(uint32_t output_tuple_stride) {
assert(output_tuple_stride != 0);
this->output_tuple_stride_ = output_tuple_stride;
return *this;
@@ -183,7 +183,7 @@
}
}
- inline DWConvSpCHWMicrokernelTester& input_width_stride(uint32_t input_width_stride) {
+ inline DWConvCHWMicrokernelTester& input_width_stride(uint32_t input_width_stride) {
assert(input_width_stride != 0);
this->input_width_stride_ = input_width_stride;
return *this;
@@ -197,7 +197,7 @@
}
}
- inline DWConvSpCHWMicrokernelTester& output_width_stride(uint32_t output_width_stride) {
+ inline DWConvCHWMicrokernelTester& output_width_stride(uint32_t output_width_stride) {
assert(output_width_stride != 0);
this->output_width_stride_ = output_width_stride;
return *this;
@@ -211,7 +211,7 @@
}
}
- inline DWConvSpCHWMicrokernelTester& qmin(uint8_t qmin) {
+ inline DWConvCHWMicrokernelTester& qmin(uint8_t qmin) {
this->qmin_ = qmin;
return *this;
}
@@ -220,7 +220,7 @@
return this->qmin_;
}
- inline DWConvSpCHWMicrokernelTester& qmax(uint8_t qmax) {
+ inline DWConvCHWMicrokernelTester& qmax(uint8_t qmax) {
this->qmax_ = qmax;
return *this;
}
@@ -229,7 +229,7 @@
return this->qmax_;
}
- inline DWConvSpCHWMicrokernelTester& iterations(size_t iterations) {
+ inline DWConvCHWMicrokernelTester& iterations(size_t iterations) {
this->iterations_ = iterations;
return *this;
}
@@ -238,7 +238,7 @@
return this->iterations_;
}
- void Test(xnn_f32_dwconv_spchw_ukernel_function dwconv, Variant variant = Variant::Native) const {
+ void Test(xnn_f32_dwconv_chw_ukernel_function dwconv, Variant variant = Variant::Native) const {
ASSERT_EQ(0, input_tuple_stride() % input_tuple_size());
ASSERT_EQ(0, output_tuple_stride() % output_tuple_size());
@@ -285,13 +285,13 @@
const float output_max = accumulated_max - accumulated_range / 255.0f * float(255 - qmax());
// Prepare output parameters.
- xnn_f32_spchw_params spchw_params = { };
+ xnn_f32_chw_params chw_params = { };
switch (variant) {
case Variant::Native:
- spchw_params = xnn_init_f32_spchw_params(input_width(), output_min, output_max);
+ chw_params = xnn_init_f32_chw_params(input_width(), output_min, output_max);
break;
case Variant::Scalar:
- spchw_params = xnn_init_scalar_f32_spchw_params(input_width(), output_min, output_max);
+ chw_params = xnn_init_scalar_f32_chw_params(input_width(), output_min, output_max);
break;
}
@@ -307,7 +307,7 @@
padding_top(),
input_tuple_stride() * sizeof(float), output_tuple_stride() * sizeof(float),
input_width_stride() * sizeof(float), output_width_stride() * sizeof(float),
- &spchw_params);
+ &chw_params);
// Verify results.
for (size_t y = 0; y < output_height(); y++) {
diff --git a/test/f32-conv-hwc2spchw.cc b/test/f32-conv-hwc2chw.cc
similarity index 69%
rename from test/f32-conv-hwc2spchw.cc
rename to test/f32-conv-hwc2chw.cc
index 7227ac3..5cba25f 100644
--- a/test/f32-conv-hwc2spchw.cc
+++ b/test/f32-conv-hwc2chw.cc
@@ -9,13 +9,13 @@
#include <xnnpack/isa-checks.h>
#include <xnnpack/conv.h>
-#include "conv-hwc2spchw-microkernel-tester.h"
+#include "conv-hwc2chw-microkernel-tester.h"
#if XNN_ARCH_ARM64
- TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_eq_4) {
+ TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_eq_4) {
TEST_REQUIRES_ARM_NEON_FMA;
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -24,13 +24,13 @@
.output_channels(4)
.input_width(4)
.input_height(3)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
}
- TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_div_4) {
+ TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_div_4) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 8; input_width <= 32; input_width += 12) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -39,14 +39,14 @@
.output_channels(4)
.input_width(input_width)
.input_height(3)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
}
}
- TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_lt_4) {
+ TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_lt_4) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 4; input_width++) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -55,14 +55,14 @@
.output_channels(4)
.input_width(input_width)
.input_height(3)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
}
}
- TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_gt_4) {
+ TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_width_gt_4) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 5; input_width < 8; input_width++) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -71,15 +71,15 @@
.output_channels(4)
.input_width(input_width)
.input_height(3)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
}
}
- TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, output_channels_lt_4) {
+ TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_channels_lt_4) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t output_channels = 1; output_channels < 4; output_channels++) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -88,16 +88,16 @@
.output_channels(output_channels)
.input_width(input_width)
.input_height(3)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
}
}
}
- TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, output_channels_div_4) {
+ TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_channels_div_4) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -106,16 +106,16 @@
.output_channels(output_channels)
.input_width(input_width)
.input_height(3)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
}
}
}
- TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, output_channels_gt_4) {
+ TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_channels_gt_4) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t output_channels = 5; output_channels < 8; output_channels++) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -124,17 +124,17 @@
.output_channels(output_channels)
.input_width(input_width)
.input_height(3)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
}
}
}
- TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, input_height_lt_3) {
+ TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_height_lt_3) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_height = 1; input_height < 3; input_height++) {
for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding(1)
@@ -143,18 +143,18 @@
.output_channels(output_channels)
.input_width(input_width)
.input_height(input_height)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
}
}
}
}
- TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, input_height_gt_3) {
+ TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, input_height_gt_3) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_height = 4; input_height <= 9; input_height++) {
for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -163,18 +163,18 @@
.output_channels(output_channels)
.input_width(input_width)
.input_height(input_height)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
}
}
}
}
- TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, padding_top) {
+ TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, padding_top) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -184,18 +184,18 @@
.output_channels(output_channels)
.input_width(input_width)
.input_height(9)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
}
}
}
}
- TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, padding_bottom) {
+ TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, padding_bottom) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -205,18 +205,18 @@
.output_channels(output_channels)
.input_width(input_width)
.input_height(9)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
}
}
}
}
- TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, output_y_start) {
+ TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_y_start) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -226,18 +226,18 @@
.input_width(input_width)
.input_height(9)
.output_y_start(output_y_start)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
}
}
}
}
- TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, output_y_end) {
+ TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, output_y_end) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -247,17 +247,17 @@
.input_width(input_width)
.input_height(9)
.output_y_end(output_y_end)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
}
}
}
}
- TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, qmin) {
+ TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, qmin) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -267,16 +267,16 @@
.input_width(input_width)
.input_height(6)
.qmin(128)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
}
}
}
- TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__NEONFMA_2X2, qmax) {
+ TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__NEONFMA_2X2, qmax) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -286,14 +286,14 @@
.input_width(input_width)
.input_height(6)
.qmax(128)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__neonfma_2x2);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2);
}
}
}
#endif // XNN_ARCH_ARM64
-TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, input_width_eq_1) {
- ConvHWC2SpCHWMicrokernelTester()
+TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, input_width_eq_1) {
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -302,13 +302,13 @@
.output_channels(4)
.input_width(4)
.input_height(3)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
}
-TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, input_width_gt_1) {
+TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, input_width_gt_1) {
for (size_t input_width = 2; input_width < 33; input_width++) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -317,14 +317,14 @@
.output_channels(4)
.input_width(input_width)
.input_height(3)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, output_channels_lt_4) {
+TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_channels_lt_4) {
for (size_t output_channels = 1; output_channels < 4; output_channels++) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -333,15 +333,15 @@
.output_channels(output_channels)
.input_width(input_width)
.input_height(3)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
}
}
}
-TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, output_channels_div_4) {
+TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_channels_div_4) {
for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -350,15 +350,15 @@
.output_channels(output_channels)
.input_width(input_width)
.input_height(3)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
}
}
}
-TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, output_channels_gt_4) {
+TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_channels_gt_4) {
for (size_t output_channels = 5; output_channels < 8; output_channels++) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -367,16 +367,16 @@
.output_channels(output_channels)
.input_width(input_width)
.input_height(3)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
}
}
}
-TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, input_height_lt_3) {
+TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, input_height_lt_3) {
for (size_t input_height = 1; input_height < 3; input_height++) {
for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding(1)
@@ -385,17 +385,17 @@
.output_channels(output_channels)
.input_width(input_width)
.input_height(input_height)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
}
}
}
}
-TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, input_height_gt_3) {
+TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, input_height_gt_3) {
for (size_t input_height = 4; input_height <= 9; input_height++) {
for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -404,17 +404,17 @@
.output_channels(output_channels)
.input_width(input_width)
.input_height(input_height)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
}
}
}
}
-TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, padding_top) {
+TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, padding_top) {
for (size_t padding_top = 0; padding_top <= 1; padding_top++) {
for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -424,17 +424,17 @@
.output_channels(output_channels)
.input_width(input_width)
.input_height(9)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
}
}
}
}
-TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, padding_bottom) {
+TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, padding_bottom) {
for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) {
for (size_t output_channels = 1; output_channels < 16; output_channels += 7) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -444,17 +444,17 @@
.output_channels(output_channels)
.input_width(input_width)
.input_height(9)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
}
}
}
}
-TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, output_y_start) {
+TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_y_start) {
for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) {
for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -464,17 +464,17 @@
.input_width(input_width)
.input_height(9)
.output_y_start(output_y_start)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
}
}
}
}
-TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, output_y_end) {
+TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, output_y_end) {
for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) {
for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -484,16 +484,16 @@
.input_width(input_width)
.input_height(9)
.output_y_end(output_y_end)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
}
}
}
}
-TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, qmin) {
+TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, qmin) {
for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -503,15 +503,15 @@
.input_width(input_width)
.input_height(6)
.qmin(128)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
}
}
}
-TEST(F32_CONV_HWC2SPCHW_3X3S2P1C3X4__SCALAR_1X1, qmax) {
+TEST(F32_CONV_HWC2CHW_3X3S2P1C3X4__SCALAR_1X1, qmax) {
for (size_t output_channels = 1; output_channels < 8; output_channels += 3) {
for (size_t input_width = 1; input_width < 32; input_width += 7) {
- ConvHWC2SpCHWMicrokernelTester()
+ ConvHWC2CHWMicrokernelTester()
.kernel_size(3)
.subsampling(2)
.padding_width(1)
@@ -521,7 +521,7 @@
.input_width(input_width)
.input_height(6)
.qmax(128)
- .Test(xnn_f32_conv_hwc2spchw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2SpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1, ConvHWC2CHWMicrokernelTester::Variant::Scalar);
}
}
}
diff --git a/test/f32-dwconv-spchw.cc b/test/f32-dwconv-chw.cc
similarity index 70%
rename from test/f32-dwconv-spchw.cc
rename to test/f32-dwconv-chw.cc
index 173c5e0..c7434d4 100644
--- a/test/f32-dwconv-spchw.cc
+++ b/test/f32-dwconv-chw.cc
@@ -9,12 +9,12 @@
#include <xnnpack/isa-checks.h>
#include <xnnpack/dwconv.h>
-#include "dwconv-spchw-microkernel-tester.h"
+#include "dwconv-chw-microkernel-tester.h"
#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- TEST(F32_DWCONV_SPCHW_3X3P1__SSE, input_width_eq_4) {
+ TEST(F32_DWCONV_CHW_3X3P1__SSE, input_width_eq_4) {
TEST_REQUIRES_X86_SSE;
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(4)
@@ -25,13 +25,13 @@
.kernel_height(3)
.kernel_width(3)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__sse);
}
- TEST(F32_DWCONV_SPCHW_3X3P1__SSE, input_width_lt_4) {
+ TEST(F32_DWCONV_CHW_3X3P1__SSE, input_width_lt_4) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 1; input_width < 4; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -42,14 +42,14 @@
.kernel_height(3)
.kernel_width(3)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3P1__SSE, input_width_gt_4) {
+ TEST(F32_DWCONV_CHW_3X3P1__SSE, input_width_gt_4) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 5; input_width < 8; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -60,14 +60,14 @@
.kernel_height(3)
.kernel_width(3)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3P1__SSE, input_width_div_4) {
+ TEST(F32_DWCONV_CHW_3X3P1__SSE, input_width_div_4) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 8; input_width < 32; input_width += 4) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -78,14 +78,14 @@
.kernel_height(3)
.kernel_width(3)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3P1__SSE, input_width_stride) {
+ TEST(F32_DWCONV_CHW_3X3P1__SSE, input_width_stride) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -97,14 +97,14 @@
.kernel_height(3)
.kernel_width(3)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3P1__SSE, input_tuple_stride) {
+ TEST(F32_DWCONV_CHW_3X3P1__SSE, input_tuple_stride) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 1; input_width < 32; input_width += 5) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -117,15 +117,15 @@
.kernel_height(3)
.kernel_width(3)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3P1__SSE, output_height_gt_1) {
+ TEST(F32_DWCONV_CHW_3X3P1__SSE, output_height_gt_1) {
TEST_REQUIRES_X86_SSE;
for (size_t output_height = 2; output_height < 5; output_height++) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -136,15 +136,15 @@
.kernel_height(3)
.kernel_width(3)
.output_height(output_height)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__sse);
}
}
}
- TEST(F32_DWCONV_SPCHW_3X3P1__SSE, output_width_stride) {
+ TEST(F32_DWCONV_CHW_3X3P1__SSE, output_width_stride) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -156,14 +156,14 @@
.kernel_width(3)
.output_height(5)
.output_width_stride(36)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3P1__SSE, output_tuple_stride) {
+ TEST(F32_DWCONV_CHW_3X3P1__SSE, output_tuple_stride) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -176,14 +176,14 @@
.output_height(5)
.output_width_stride(4)
.output_tuple_stride(5 * 4)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3P1__SSE, chw_layout) {
+ TEST(F32_DWCONV_CHW_3X3P1__SSE, chw_layout) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -196,15 +196,15 @@
.kernel_width(3)
.output_height(5)
.output_width_stride(input_width)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__sse);
}
}
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, input_width_eq_4_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, input_width_eq_4_pad0) {
TEST_REQUIRES_X86_SSE;
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(4)
@@ -216,13 +216,13 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, input_width_lt_4_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, input_width_lt_4_pad0) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 1; input_width < 4; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -234,14 +234,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, input_width_gt_4_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, input_width_gt_4_pad0) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 5; input_width < 8; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -253,14 +253,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, input_width_div_4_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, input_width_div_4_pad0) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 8; input_width < 32; input_width += 4) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -272,14 +272,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, input_width_stride_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, input_width_stride_pad0) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -292,14 +292,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, input_tuple_stride_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, input_tuple_stride_pad0) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 1; input_width < 32; input_width += 5) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -313,15 +313,15 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, output_height_gt_1_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, output_height_gt_1_pad0) {
TEST_REQUIRES_X86_SSE;
for (size_t output_height = 2; output_height < 5; output_height++) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -333,15 +333,15 @@
.kernel_width(3)
.subsampling(2)
.output_height(output_height)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, output_width_stride_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, output_width_stride_pad0) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -354,14 +354,14 @@
.subsampling(2)
.output_height(5)
.output_width_stride(36)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, output_tuple_stride_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, output_tuple_stride_pad0) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -375,14 +375,14 @@
.output_height(5)
.output_width_stride(4)
.output_tuple_stride(5 * 4)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, chw_layout_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, chw_layout_pad0) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -396,13 +396,13 @@
.subsampling(2)
.output_height(5)
.output_width_stride((input_width - 1) / 2 + 1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, input_width_eq_4_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, input_width_eq_4_pad1) {
TEST_REQUIRES_X86_SSE;
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(4)
@@ -414,13 +414,13 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, input_width_lt_4_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, input_width_lt_4_pad1) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 1; input_width < 4; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -432,14 +432,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, input_width_gt_4_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, input_width_gt_4_pad1) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 5; input_width < 8; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -451,14 +451,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, input_width_div_4_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, input_width_div_4_pad1) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 8; input_width < 32; input_width += 4) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -470,14 +470,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, input_width_stride_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, input_width_stride_pad1) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -490,14 +490,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, input_tuple_stride_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, input_tuple_stride_pad1) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 1; input_width < 32; input_width += 5) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -511,15 +511,15 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, output_height_gt_1_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, output_height_gt_1_pad1) {
TEST_REQUIRES_X86_SSE;
for (size_t output_height = 2; output_height < 5; output_height++) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -531,15 +531,15 @@
.kernel_width(3)
.subsampling(2)
.output_height(output_height)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, output_width_stride_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, output_width_stride_pad1) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -552,14 +552,14 @@
.subsampling(2)
.output_height(5)
.output_width_stride(36)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, output_tuple_stride_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, output_tuple_stride_pad1) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -573,14 +573,14 @@
.output_height(5)
.output_width_stride(4)
.output_tuple_stride(5 * 4)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__SSE, chw_layout_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__SSE, chw_layout_pad1) {
TEST_REQUIRES_X86_SSE;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -594,16 +594,16 @@
.subsampling(2)
.output_height(5)
.output_width_stride((input_width - 1) / 2 + 1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__sse);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__sse);
}
}
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
#if XNN_ARCH_ARM64
- TEST(F32_DWCONV_SPCHW_3X3P1__NEONFMA, input_width_eq_4_pad1) {
+ TEST(F32_DWCONV_CHW_3X3P1__NEONFMA, input_width_eq_4_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(4)
@@ -614,13 +614,13 @@
.kernel_height(3)
.kernel_width(3)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__neonfma);
}
- TEST(F32_DWCONV_SPCHW_3X3P1__NEONFMA, input_width_lt_4_pad1) {
+ TEST(F32_DWCONV_CHW_3X3P1__NEONFMA, input_width_lt_4_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 4; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -631,14 +631,14 @@
.kernel_height(3)
.kernel_width(3)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3P1__NEONFMA, input_width_gt_4_pad1) {
+ TEST(F32_DWCONV_CHW_3X3P1__NEONFMA, input_width_gt_4_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 5; input_width < 8; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -649,14 +649,14 @@
.kernel_height(3)
.kernel_width(3)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3P1__NEONFMA, input_width_div_4_pad1) {
+ TEST(F32_DWCONV_CHW_3X3P1__NEONFMA, input_width_div_4_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 8; input_width < 32; input_width += 4) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -667,14 +667,14 @@
.kernel_height(3)
.kernel_width(3)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3P1__NEONFMA, input_width_stride_pad1) {
+ TEST(F32_DWCONV_CHW_3X3P1__NEONFMA, input_width_stride_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -686,14 +686,14 @@
.kernel_height(3)
.kernel_width(3)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3P1__NEONFMA, input_tuple_stride_pad1) {
+ TEST(F32_DWCONV_CHW_3X3P1__NEONFMA, input_tuple_stride_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 5) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -706,15 +706,15 @@
.kernel_height(3)
.kernel_width(3)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3P1__NEONFMA, output_height_gt_1_pad1) {
+ TEST(F32_DWCONV_CHW_3X3P1__NEONFMA, output_height_gt_1_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t output_height = 2; output_height <= 5; output_height++) {
for (size_t input_width = 8; input_width < 9; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -725,15 +725,15 @@
.kernel_height(3)
.kernel_width(3)
.output_height(output_height)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__neonfma);
}
}
}
- TEST(F32_DWCONV_SPCHW_3X3P1__NEONFMA, output_width_stride_pad1) {
+ TEST(F32_DWCONV_CHW_3X3P1__NEONFMA, output_width_stride_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -745,14 +745,14 @@
.kernel_width(3)
.output_height(5)
.output_width_stride(36)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3P1__NEONFMA, output_tuple_stride_pad1) {
+ TEST(F32_DWCONV_CHW_3X3P1__NEONFMA, output_tuple_stride_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -765,14 +765,14 @@
.output_height(5)
.output_width_stride(4)
.output_tuple_stride(5 * 4)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3P1__NEONFMA, chw_layout_pad1) {
+ TEST(F32_DWCONV_CHW_3X3P1__NEONFMA, chw_layout_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -785,16 +785,16 @@
.kernel_width(3)
.output_height(5)
.output_width_stride(input_width)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__neonfma);
}
}
#endif // XNN_ARCH_ARM64
#if XNN_ARCH_ARM64
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, input_width_eq_4_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, input_width_eq_4_pad0) {
TEST_REQUIRES_ARM_NEON_FMA;
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(4)
@@ -806,12 +806,12 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, input_width_eq_4_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, input_width_eq_4_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(4)
@@ -823,13 +823,13 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, input_width_lt_4_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, input_width_lt_4_pad0) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 4; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -841,14 +841,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, input_width_lt_4_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, input_width_lt_4_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 4; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -860,14 +860,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, input_width_gt_4_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, input_width_gt_4_pad0) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 5; input_width < 8; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -879,14 +879,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, input_width_gt_4_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, input_width_gt_4_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 5; input_width < 8; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -898,14 +898,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, input_width_div_4_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, input_width_div_4_pad0) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 8; input_width < 32; input_width += 4) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -917,14 +917,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, input_width_div_4_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, input_width_div_4_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 8; input_width < 32; input_width += 4) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -936,14 +936,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, input_width_stride_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, input_width_stride_pad0) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -956,14 +956,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, input_width_stride_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, input_width_stride_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -976,15 +976,15 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, input_tuple_stride_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, input_tuple_stride_pad0) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 5) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -998,14 +998,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, input_tuple_stride_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, input_tuple_stride_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 5) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1019,15 +1019,15 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, output_height_gt_1_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, output_height_gt_1_pad0) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t output_height = 2; output_height < 5; output_height++) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1039,16 +1039,16 @@
.kernel_width(3)
.subsampling(2)
.output_height(output_height)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, output_height_gt_1_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, output_height_gt_1_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t output_height = 2; output_height < 5; output_height++) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1060,15 +1060,15 @@
.kernel_width(3)
.subsampling(2)
.output_height(output_height)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, output_width_stride_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, output_width_stride_pad0) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1081,14 +1081,14 @@
.subsampling(2)
.output_height(5)
.output_width_stride(36)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, output_width_stride_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, output_width_stride_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1101,14 +1101,14 @@
.subsampling(2)
.output_height(5)
.output_width_stride(36)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, output_tuple_stride_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, output_tuple_stride_pad0) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1122,14 +1122,14 @@
.output_height(5)
.output_width_stride(4)
.output_tuple_stride(5 * 4)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, output_tuple_stride_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, output_tuple_stride_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1143,14 +1143,14 @@
.output_height(5)
.output_width_stride(4)
.output_tuple_stride(5 * 4)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, chw_layout_pad0) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, chw_layout_pad0) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1164,14 +1164,14 @@
.subsampling(2)
.output_height(5)
.output_width_stride((input_width - 1) / 2 + 1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_3X3S2P1__NEONFMA, chw_layout_pad1) {
+ TEST(F32_DWCONV_CHW_3X3S2P1__NEONFMA, chw_layout_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1185,16 +1185,16 @@
.subsampling(2)
.output_height(5)
.output_width_stride((input_width - 1) / 2 + 1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__neonfma);
}
}
#endif // XNN_ARCH_ARM64
#if XNN_ARCH_ARM64
- TEST(F32_DWCONV_SPCHW_5X5P2__NEONFMA, input_width_eq_4_pad2) {
+ TEST(F32_DWCONV_CHW_5X5P2__NEONFMA, input_width_eq_4_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(4)
@@ -1205,13 +1205,13 @@
.kernel_height(5)
.kernel_width(5)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5p2__neonfma);
}
- TEST(F32_DWCONV_SPCHW_5X5P2__NEONFMA, input_width_lt_4_pad2) {
+ TEST(F32_DWCONV_CHW_5X5P2__NEONFMA, input_width_lt_4_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 4; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1222,14 +1222,14 @@
.kernel_height(5)
.kernel_width(5)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5P2__NEONFMA, input_width_gt_4_pad2) {
+ TEST(F32_DWCONV_CHW_5X5P2__NEONFMA, input_width_gt_4_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 5; input_width < 8; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1240,14 +1240,14 @@
.kernel_height(5)
.kernel_width(5)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5P2__NEONFMA, input_width_div_4_pad2) {
+ TEST(F32_DWCONV_CHW_5X5P2__NEONFMA, input_width_div_4_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 8; input_width < 32; input_width += 4) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1258,14 +1258,14 @@
.kernel_height(5)
.kernel_width(5)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5P2__NEONFMA, input_width_stride_pad2) {
+ TEST(F32_DWCONV_CHW_5X5P2__NEONFMA, input_width_stride_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1277,14 +1277,14 @@
.kernel_height(5)
.kernel_width(5)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5P2__NEONFMA, input_tuple_stride_pad2) {
+ TEST(F32_DWCONV_CHW_5X5P2__NEONFMA, input_tuple_stride_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 5) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1297,14 +1297,14 @@
.kernel_height(5)
.kernel_width(5)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5P2__NEONFMA, output_height_eq_2_pad2) {
+ TEST(F32_DWCONV_CHW_5X5P2__NEONFMA, output_height_eq_2_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1315,15 +1315,15 @@
.kernel_height(5)
.kernel_width(5)
.output_height(2)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5P2__NEONFMA, output_height_gt_2_pad2) {
+ TEST(F32_DWCONV_CHW_5X5P2__NEONFMA, output_height_gt_2_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t output_height = 3; output_height < 5; output_height++) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1334,15 +1334,15 @@
.kernel_height(5)
.kernel_width(5)
.output_height(output_height)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5p2__neonfma);
}
}
}
- TEST(F32_DWCONV_SPCHW_5X5P2__NEONFMA, output_width_stride_pad2) {
+ TEST(F32_DWCONV_CHW_5X5P2__NEONFMA, output_width_stride_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1354,14 +1354,14 @@
.kernel_width(5)
.output_height(5)
.output_width_stride(36)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5P2__NEONFMA, output_tuple_stride_pad2) {
+ TEST(F32_DWCONV_CHW_5X5P2__NEONFMA, output_tuple_stride_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1374,15 +1374,15 @@
.output_height(5)
.output_width_stride(4)
.output_tuple_stride(5 * 4)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5P2__NEONFMA, chw_layout_pad2) {
+ TEST(F32_DWCONV_CHW_5X5P2__NEONFMA, chw_layout_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
for (size_t output_height = 1; output_height < 32; output_height += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1395,7 +1395,7 @@
.kernel_width(5)
.output_height(5)
.output_width_stride(input_width)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5p2__neonfma);
}
}
}
@@ -1403,9 +1403,9 @@
#if XNN_ARCH_ARM64
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, input_width_eq_8_pad2) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, input_width_eq_8_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(8)
@@ -1417,12 +1417,12 @@
.kernel_width(5)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, input_width_eq_8_pad1) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, input_width_eq_8_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(8)
@@ -1434,13 +1434,13 @@
.kernel_width(5)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, input_width_lt_8_pad1) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, input_width_lt_8_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 8; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1452,14 +1452,14 @@
.kernel_width(5)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, input_width_lt_8_pad2) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, input_width_lt_8_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 8; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1471,14 +1471,14 @@
.kernel_width(5)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, input_width_gt_8_pad1) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, input_width_gt_8_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 8; input_width < 16; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1490,14 +1490,14 @@
.kernel_width(5)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, input_width_gt_8_pad2) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, input_width_gt_8_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 8; input_width < 16; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1509,14 +1509,14 @@
.kernel_width(5)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, input_width_div_4_pad1) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, input_width_div_4_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 16; input_width < 32; input_width += 4) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1528,14 +1528,14 @@
.kernel_width(5)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, input_width_div_4_pad2) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, input_width_div_4_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 16; input_width < 32; input_width += 4) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1547,14 +1547,14 @@
.kernel_width(5)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, input_width_stride_pad1) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, input_width_stride_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1567,14 +1567,14 @@
.kernel_width(5)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, input_width_stride_pad2) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, input_width_stride_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1587,14 +1587,14 @@
.kernel_width(5)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, input_tuple_stride_pad1) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, input_tuple_stride_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 5) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1608,14 +1608,14 @@
.kernel_width(5)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, input_tuple_stride_pad2) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, input_tuple_stride_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 5) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1629,15 +1629,15 @@
.kernel_width(5)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, output_height_gt_1_pad1) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, output_height_gt_1_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t output_height = 3; output_height < 4; output_height++) {
for (size_t input_width = 4; input_width < 5; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1649,16 +1649,16 @@
.kernel_width(5)
.subsampling(2)
.output_height(output_height)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
}
}
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, output_height_gt_1_pad2) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, output_height_gt_1_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t output_height = 3; output_height < 4; output_height++) {
for (size_t input_width = 4; input_width < 5; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1670,15 +1670,15 @@
.kernel_width(5)
.subsampling(2)
.output_height(output_height)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
}
}
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, output_width_stride_pad1) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, output_width_stride_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1691,14 +1691,14 @@
.subsampling(2)
.output_height(5)
.output_width_stride(36)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, output_width_stride_pad2) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, output_width_stride_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1711,14 +1711,14 @@
.subsampling(2)
.output_height(5)
.output_width_stride(36)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, output_tuple_stride_pad1) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, output_tuple_stride_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1732,14 +1732,14 @@
.output_height(5)
.output_width_stride(4)
.output_tuple_stride(5 * 4)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, output_tuple_stride_pad2) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, output_tuple_stride_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1753,14 +1753,14 @@
.output_height(5)
.output_width_stride(4)
.output_tuple_stride(5 * 4)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, chw_layout_pad1) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, chw_layout_pad1) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 1) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1774,14 +1774,14 @@
.subsampling(2)
.output_height(5)
.output_width_stride((input_width - 1) / 2 + 1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
}
- TEST(F32_DWCONV_SPCHW_5X5S2P2__NEONFMA, chw_layout_pad2) {
+ TEST(F32_DWCONV_CHW_5X5S2P2__NEONFMA, chw_layout_pad2) {
TEST_REQUIRES_ARM_NEON_FMA;
for (size_t input_width = 1; input_width < 32; input_width += 1) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(4)
.output_tuple_size(4)
.input_width(input_width)
@@ -1795,14 +1795,14 @@
.subsampling(2)
.output_height(5)
.output_width_stride((input_width - 1) / 2 + 1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__neonfma);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__neonfma);
}
}
#endif // XNN_ARCH_ARM64
-TEST(F32_DWCONV_SPCHW_3X3P1__SCALAR, input_width_eq_1) {
- DWConvSpCHWMicrokernelTester()
+TEST(F32_DWCONV_CHW_3X3P1__SCALAR, input_width_eq_1) {
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(1)
@@ -1813,12 +1813,12 @@
.kernel_height(3)
.kernel_width(3)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
-TEST(F32_DWCONV_SPCHW_3X3P1__SCALAR, input_width_gt_1) {
+TEST(F32_DWCONV_CHW_3X3P1__SCALAR, input_width_gt_1) {
for (size_t input_width = 2; input_width < 32; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -1829,13 +1829,13 @@
.kernel_height(3)
.kernel_width(3)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_3X3P1__SCALAR, input_width_stride) {
+TEST(F32_DWCONV_CHW_3X3P1__SCALAR, input_width_stride) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -1847,13 +1847,13 @@
.kernel_height(3)
.kernel_width(3)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_3X3P1__SCALAR, input_tuple_stride) {
+TEST(F32_DWCONV_CHW_3X3P1__SCALAR, input_tuple_stride) {
for (size_t input_width = 1; input_width < 32; input_width += 5) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -1866,14 +1866,14 @@
.kernel_height(3)
.kernel_width(3)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_3X3P1__SCALAR, output_height_gt_1) {
+TEST(F32_DWCONV_CHW_3X3P1__SCALAR, output_height_gt_1) {
for (size_t output_height = 2; output_height < 5; output_height++) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -1884,14 +1884,14 @@
.kernel_height(3)
.kernel_width(3)
.output_height(output_height)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
}
-TEST(F32_DWCONV_SPCHW_3X3P1__SCALAR, output_width_stride) {
+TEST(F32_DWCONV_CHW_3X3P1__SCALAR, output_width_stride) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -1903,13 +1903,13 @@
.kernel_width(3)
.output_height(5)
.output_width_stride(36)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_3X3P1__SCALAR, output_tuple_stride) {
+TEST(F32_DWCONV_CHW_3X3P1__SCALAR, output_tuple_stride) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -1922,13 +1922,13 @@
.output_height(5)
.output_width_stride(4)
.output_tuple_stride(5 * 4)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_3X3P1__SCALAR, chw_layout) {
+TEST(F32_DWCONV_CHW_3X3P1__SCALAR, chw_layout) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -1941,12 +1941,12 @@
.kernel_width(3)
.output_height(5)
.output_width_stride(input_width)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_3X3S2P1__SCALAR, input_width_eq_1_pad0) {
- DWConvSpCHWMicrokernelTester()
+TEST(F32_DWCONV_CHW_3X3S2P1__SCALAR, input_width_eq_1_pad0) {
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(1)
@@ -1958,12 +1958,12 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
-TEST(F32_DWCONV_SPCHW_3X3S2P1__SCALAR, input_width_gt_1_pad0) {
+TEST(F32_DWCONV_CHW_3X3S2P1__SCALAR, input_width_gt_1_pad0) {
for (size_t input_width = 2; input_width < 32; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -1975,13 +1975,13 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_3X3S2P1__SCALAR, input_width_stride_pad0) {
+TEST(F32_DWCONV_CHW_3X3S2P1__SCALAR, input_width_stride_pad0) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -1994,13 +1994,13 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_3X3S2P1__SCALAR, input_tuple_stride_pad0) {
+TEST(F32_DWCONV_CHW_3X3S2P1__SCALAR, input_tuple_stride_pad0) {
for (size_t input_width = 1; input_width < 32; input_width += 5) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2014,14 +2014,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_3X3S2P1__SCALAR, output_height_gt_1_pad0) {
+TEST(F32_DWCONV_CHW_3X3S2P1__SCALAR, output_height_gt_1_pad0) {
for (size_t output_height = 2; output_height < 5; output_height++) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2033,14 +2033,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(output_height)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
}
-TEST(F32_DWCONV_SPCHW_3X3S2P1__SCALAR, output_width_stride_pad0) {
+TEST(F32_DWCONV_CHW_3X3S2P1__SCALAR, output_width_stride_pad0) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2053,13 +2053,13 @@
.subsampling(2)
.output_height(5)
.output_width_stride(36)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_3X3S2P1__SCALAR, output_tuple_stride_pad0) {
+TEST(F32_DWCONV_CHW_3X3S2P1__SCALAR, output_tuple_stride_pad0) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2073,13 +2073,13 @@
.output_height(5)
.output_width_stride(4)
.output_tuple_stride(5 * 4)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_3X3S2P1__SCALAR, chw_layout_pad0) {
+TEST(F32_DWCONV_CHW_3X3S2P1__SCALAR, chw_layout_pad0) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2093,12 +2093,12 @@
.subsampling(2)
.output_height(5)
.output_width_stride(input_width)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_3X3S2P1__SCALAR, input_width_eq_1_pad1) {
- DWConvSpCHWMicrokernelTester()
+TEST(F32_DWCONV_CHW_3X3S2P1__SCALAR, input_width_eq_1_pad1) {
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(1)
@@ -2110,12 +2110,12 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
-TEST(F32_DWCONV_SPCHW_3X3S2P1__SCALAR, input_width_gt_1_pad1) {
+TEST(F32_DWCONV_CHW_3X3S2P1__SCALAR, input_width_gt_1_pad1) {
for (size_t input_width = 2; input_width < 32; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2127,13 +2127,13 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_3X3S2P1__SCALAR, input_width_stride_pad1) {
+TEST(F32_DWCONV_CHW_3X3S2P1__SCALAR, input_width_stride_pad1) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2146,13 +2146,13 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_3X3S2P1__SCALAR, input_tuple_stride_pad1) {
+TEST(F32_DWCONV_CHW_3X3S2P1__SCALAR, input_tuple_stride_pad1) {
for (size_t input_width = 1; input_width < 32; input_width += 5) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2166,14 +2166,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_3X3S2P1__SCALAR, output_height_gt_1_pad1) {
+TEST(F32_DWCONV_CHW_3X3S2P1__SCALAR, output_height_gt_1_pad1) {
for (size_t output_height = 2; output_height < 5; output_height++) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2185,14 +2185,14 @@
.kernel_width(3)
.subsampling(2)
.output_height(output_height)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
}
-TEST(F32_DWCONV_SPCHW_3X3S2P1__SCALAR, output_width_stride_pad1) {
+TEST(F32_DWCONV_CHW_3X3S2P1__SCALAR, output_width_stride_pad1) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2205,13 +2205,13 @@
.subsampling(2)
.output_height(5)
.output_width_stride(36)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_3X3S2P1__SCALAR, output_tuple_stride_pad1) {
+TEST(F32_DWCONV_CHW_3X3S2P1__SCALAR, output_tuple_stride_pad1) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2225,13 +2225,13 @@
.output_height(5)
.output_width_stride(4)
.output_tuple_stride(5 * 4)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_3X3S2P1__SCALAR, chw_layout_pad1) {
+TEST(F32_DWCONV_CHW_3X3S2P1__SCALAR, chw_layout_pad1) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2245,12 +2245,12 @@
.subsampling(2)
.output_height(5)
.output_width_stride(input_width)
- .Test(xnn_f32_dwconv_spchw_ukernel_3x3s2p1__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_3x3s2p1__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_5X5P2__SCALAR, input_width_eq_1_pad2) {
- DWConvSpCHWMicrokernelTester()
+TEST(F32_DWCONV_CHW_5X5P2__SCALAR, input_width_eq_1_pad2) {
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(1)
@@ -2261,12 +2261,12 @@
.kernel_height(5)
.kernel_width(5)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
-TEST(F32_DWCONV_SPCHW_5X5P2__SCALAR, input_width_gt_1_pad2) {
+TEST(F32_DWCONV_CHW_5X5P2__SCALAR, input_width_gt_1_pad2) {
for (size_t input_width = 2; input_width < 32; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2277,13 +2277,13 @@
.kernel_height(5)
.kernel_width(5)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_5X5P2__SCALAR, input_width_stride_pad2) {
+TEST(F32_DWCONV_CHW_5X5P2__SCALAR, input_width_stride_pad2) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2295,13 +2295,13 @@
.kernel_height(5)
.kernel_width(5)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_5X5P2__SCALAR, input_tuple_stride_pad2) {
+TEST(F32_DWCONV_CHW_5X5P2__SCALAR, input_tuple_stride_pad2) {
for (size_t input_width = 1; input_width < 32; input_width += 5) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2314,14 +2314,14 @@
.kernel_height(5)
.kernel_width(5)
.output_height(1)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_5X5P2__SCALAR, output_height_gt_1_pad2) {
+TEST(F32_DWCONV_CHW_5X5P2__SCALAR, output_height_gt_1_pad2) {
for (size_t output_height = 2; output_height < 5; output_height++) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2332,14 +2332,14 @@
.kernel_height(5)
.kernel_width(5)
.output_height(output_height)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
}
-TEST(F32_DWCONV_SPCHW_5X5P2__SCALAR, output_width_stride_pad2) {
+TEST(F32_DWCONV_CHW_5X5P2__SCALAR, output_width_stride_pad2) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2351,13 +2351,13 @@
.kernel_width(5)
.output_height(5)
.output_width_stride(36)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_5X5P2__SCALAR, output_tuple_stride_pad2) {
+TEST(F32_DWCONV_CHW_5X5P2__SCALAR, output_tuple_stride_pad2) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2370,13 +2370,13 @@
.output_height(5)
.output_width_stride(4)
.output_tuple_stride(5 * 4)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_5X5P2__SCALAR, chw_layout_pad2) {
+TEST(F32_DWCONV_CHW_5X5P2__SCALAR, chw_layout_pad2) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2389,12 +2389,12 @@
.kernel_width(5)
.output_height(5)
.output_width_stride(input_width)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_5X5S2P2__SCALAR, input_width_eq_1_pad1) {
- DWConvSpCHWMicrokernelTester()
+TEST(F32_DWCONV_CHW_5X5S2P2__SCALAR, input_width_eq_1_pad1) {
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(1)
@@ -2406,12 +2406,12 @@
.kernel_width(5)
.output_height(1)
.subsampling(2)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
-TEST(F32_DWCONV_SPCHW_5X5S2P2__SCALAR, input_width_gt_1_pad1) {
+TEST(F32_DWCONV_CHW_5X5S2P2__SCALAR, input_width_gt_1_pad1) {
for (size_t input_width = 2; input_width < 32; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2423,13 +2423,13 @@
.kernel_width(5)
.output_height(1)
.subsampling(2)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_5X5S2P2__SCALAR, input_width_stride_pad1) {
+TEST(F32_DWCONV_CHW_5X5S2P2__SCALAR, input_width_stride_pad1) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2442,13 +2442,13 @@
.kernel_width(5)
.output_height(1)
.subsampling(2)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_5X5S2P2__SCALAR, input_tuple_stride_pad1) {
+TEST(F32_DWCONV_CHW_5X5S2P2__SCALAR, input_tuple_stride_pad1) {
for (size_t input_width = 1; input_width < 32; input_width += 5) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2462,14 +2462,14 @@
.kernel_width(5)
.output_height(1)
.subsampling(2)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_5X5S2P2__SCALAR, output_height_gt_1_pad1) {
+TEST(F32_DWCONV_CHW_5X5S2P2__SCALAR, output_height_gt_1_pad1) {
for (size_t output_height = 2; output_height < 5; output_height++) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2481,14 +2481,14 @@
.kernel_width(5)
.output_height(output_height)
.subsampling(2)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
}
-TEST(F32_DWCONV_SPCHW_5X5S2P2__SCALAR, output_width_stride_pad1) {
+TEST(F32_DWCONV_CHW_5X5S2P2__SCALAR, output_width_stride_pad1) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2501,13 +2501,13 @@
.output_height(5)
.output_width_stride(36)
.subsampling(2)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_5X5S2P2__SCALAR, output_tuple_stride_pad1) {
+TEST(F32_DWCONV_CHW_5X5S2P2__SCALAR, output_tuple_stride_pad1) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2521,13 +2521,13 @@
.output_width_stride(4)
.output_tuple_stride(5 * 4)
.subsampling(2)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_5X5S2P2__SCALAR, chw_layout_pad1) {
+TEST(F32_DWCONV_CHW_5X5S2P2__SCALAR, chw_layout_pad1) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2541,12 +2541,12 @@
.output_height(5)
.output_width_stride(input_width)
.subsampling(2)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_5X5S2P2__SCALAR, input_width_eq_1_pad2) {
- DWConvSpCHWMicrokernelTester()
+TEST(F32_DWCONV_CHW_5X5S2P2__SCALAR, input_width_eq_1_pad2) {
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(1)
@@ -2558,12 +2558,12 @@
.kernel_width(5)
.output_height(1)
.subsampling(2)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
-TEST(F32_DWCONV_SPCHW_5X5S2P2__SCALAR, input_width_gt_1_pad2) {
+TEST(F32_DWCONV_CHW_5X5S2P2__SCALAR, input_width_gt_1_pad2) {
for (size_t input_width = 2; input_width < 32; input_width++) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2575,13 +2575,13 @@
.kernel_width(5)
.output_height(1)
.subsampling(2)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_5X5S2P2__SCALAR, input_width_stride_pad2) {
+TEST(F32_DWCONV_CHW_5X5S2P2__SCALAR, input_width_stride_pad2) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2594,13 +2594,13 @@
.kernel_width(5)
.output_height(1)
.subsampling(2)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_5X5S2P2__SCALAR, input_tuple_stride_pad2) {
+TEST(F32_DWCONV_CHW_5X5S2P2__SCALAR, input_tuple_stride_pad2) {
for (size_t input_width = 1; input_width < 32; input_width += 5) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2614,14 +2614,14 @@
.kernel_width(5)
.output_height(1)
.subsampling(2)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_5X5S2P2__SCALAR, output_height_gt_1_pad2) {
+TEST(F32_DWCONV_CHW_5X5S2P2__SCALAR, output_height_gt_1_pad2) {
for (size_t output_height = 2; output_height < 5; output_height++) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2633,14 +2633,14 @@
.kernel_width(5)
.output_height(output_height)
.subsampling(2)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
}
-TEST(F32_DWCONV_SPCHW_5X5S2P2__SCALAR, output_width_stride_pad2) {
+TEST(F32_DWCONV_CHW_5X5S2P2__SCALAR, output_width_stride_pad2) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2653,13 +2653,13 @@
.output_height(5)
.output_width_stride(36)
.subsampling(2)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_5X5S2P2__SCALAR, output_tuple_stride_pad2) {
+TEST(F32_DWCONV_CHW_5X5S2P2__SCALAR, output_tuple_stride_pad2) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2673,13 +2673,13 @@
.output_width_stride(4)
.output_tuple_stride(5 * 4)
.subsampling(2)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
-TEST(F32_DWCONV_SPCHW_5X5S2P2__SCALAR, chw_layout_pad2) {
+TEST(F32_DWCONV_CHW_5X5S2P2__SCALAR, chw_layout_pad2) {
for (size_t input_width = 1; input_width < 32; input_width += 3) {
- DWConvSpCHWMicrokernelTester()
+ DWConvCHWMicrokernelTester()
.input_tuple_size(1)
.output_tuple_size(1)
.input_width(input_width)
@@ -2693,6 +2693,6 @@
.output_height(5)
.output_width_stride(input_width)
.subsampling(2)
- .Test(xnn_f32_dwconv_spchw_ukernel_5x5s2p2__scalar, DWConvSpCHWMicrokernelTester::Variant::Scalar);
+ .Test(xnn_f32_dwconv_chw_ukernel_5x5s2p2__scalar, DWConvCHWMicrokernelTester::Variant::Scalar);
}
}
diff --git a/test/f32-gavgpool-cw.cc b/test/f32-gavgpool-cw.cc
new file mode 100644
index 0000000..6f12f07
--- /dev/null
+++ b/test/f32-gavgpool-cw.cc
@@ -0,0 +1,276 @@
+// Copyright 2019 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <gtest/gtest.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/isa-checks.h>
+
+#include <xnnpack/gavgpool.h>
+#include "gavgpool-cw-microkernel-tester.h"
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(F32_GAVGPOOL_CW__NEON_X4, elements_eq_4) {
+ TEST_REQUIRES_ARM_NEON;
+ GAvgPoolCWMicrokernelTester()
+ .elements(4)
+ .channels(4)
+ .Test(xnn_f32_gavgpool_cw_ukernel__neon_x4);
+ }
+
+ TEST(F32_GAVGPOOL_CW__NEON_X4, elements_div_4) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t elements = 8; elements < 32; elements += 4) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(4)
+ .Test(xnn_f32_gavgpool_cw_ukernel__neon_x4);
+ }
+ }
+
+ TEST(F32_GAVGPOOL_CW__NEON_X4, elements_lt_4) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t elements = 1; elements < 4; elements++) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(4)
+ .Test(xnn_f32_gavgpool_cw_ukernel__neon_x4);
+ }
+ }
+
+ TEST(F32_GAVGPOOL_CW__NEON_X4, elements_gt_4) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t elements = 5; elements < 8; elements++) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(4)
+ .Test(xnn_f32_gavgpool_cw_ukernel__neon_x4);
+ }
+ }
+
+ TEST(F32_GAVGPOOL_CW__NEON_X4, channels_lt_4) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t channels = 1; channels < 4; channels++) {
+ for (size_t elements = 1; elements < 16; elements += 3) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(channels)
+ .Test(xnn_f32_gavgpool_cw_ukernel__neon_x4);
+ }
+ }
+ }
+
+ TEST(F32_GAVGPOOL_CW__NEON_X4, channels_gt_4) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t channels = 5; channels < 8; channels++) {
+ for (size_t elements = 1; elements < 16; elements += 3) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(channels)
+ .Test(xnn_f32_gavgpool_cw_ukernel__neon_x4);
+ }
+ }
+ }
+
+ TEST(F32_GAVGPOOL_CW__NEON_X4, channels_div_4) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t channels = 8; channels <= 16; channels += 4) {
+ for (size_t elements = 1; elements < 16; elements += 3) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(channels)
+ .Test(xnn_f32_gavgpool_cw_ukernel__neon_x4);
+ }
+ }
+ }
+
+ TEST(F32_GAVGPOOL_CW__NEON_X4, qmin) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t elements = 1; elements < 16; elements += 3) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(4)
+ .qmin(128)
+ .Test(xnn_f32_gavgpool_cw_ukernel__neon_x4);
+ }
+ }
+
+ TEST(F32_GAVGPOOL_CW__NEON_X4, qmax) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t elements = 1; elements < 16; elements += 3) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(4)
+ .qmax(128)
+ .Test(xnn_f32_gavgpool_cw_ukernel__neon_x4);
+ }
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ TEST(F32_GAVGPOOL_CW__SSE_X4, elements_eq_4) {
+ TEST_REQUIRES_X86_SSE;
+ GAvgPoolCWMicrokernelTester()
+ .elements(4)
+ .channels(4)
+ .Test(xnn_f32_gavgpool_cw_ukernel__sse_x4);
+ }
+
+ TEST(F32_GAVGPOOL_CW__SSE_X4, elements_div_4) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t elements = 8; elements < 32; elements += 4) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(4)
+ .Test(xnn_f32_gavgpool_cw_ukernel__sse_x4);
+ }
+ }
+
+ TEST(F32_GAVGPOOL_CW__SSE_X4, elements_lt_4) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t elements = 1; elements < 4; elements++) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(4)
+ .Test(xnn_f32_gavgpool_cw_ukernel__sse_x4);
+ }
+ }
+
+ TEST(F32_GAVGPOOL_CW__SSE_X4, elements_gt_4) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t elements = 5; elements < 8; elements++) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(4)
+ .Test(xnn_f32_gavgpool_cw_ukernel__sse_x4);
+ }
+ }
+
+ TEST(F32_GAVGPOOL_CW__SSE_X4, channels_lt_4) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t channels = 1; channels < 4; channels++) {
+ for (size_t elements = 1; elements < 16; elements += 3) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(channels)
+ .Test(xnn_f32_gavgpool_cw_ukernel__sse_x4);
+ }
+ }
+ }
+
+ TEST(F32_GAVGPOOL_CW__SSE_X4, channels_gt_4) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t channels = 5; channels < 8; channels++) {
+ for (size_t elements = 1; elements < 16; elements += 3) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(channels)
+ .Test(xnn_f32_gavgpool_cw_ukernel__sse_x4);
+ }
+ }
+ }
+
+ TEST(F32_GAVGPOOL_CW__SSE_X4, channels_div_4) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t channels = 8; channels <= 16; channels += 4) {
+ for (size_t elements = 1; elements < 16; elements += 3) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(channels)
+ .Test(xnn_f32_gavgpool_cw_ukernel__sse_x4);
+ }
+ }
+ }
+
+ TEST(F32_GAVGPOOL_CW__SSE_X4, qmin) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t elements = 1; elements < 16; elements += 3) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(4)
+ .qmin(128)
+ .Test(xnn_f32_gavgpool_cw_ukernel__sse_x4);
+ }
+ }
+
+ TEST(F32_GAVGPOOL_CW__SSE_X4, qmax) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t elements = 1; elements < 16; elements += 3) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(4)
+ .qmax(128)
+ .Test(xnn_f32_gavgpool_cw_ukernel__sse_x4);
+ }
+ }
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+TEST(F32_GAVGPOOL_CW__SCALAR_X1, elements_eq_4) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(4)
+ .channels(1)
+ .Test(xnn_f32_gavgpool_cw_ukernel__scalar_x1, GAvgPoolCWMicrokernelTester::Variant::Scalar);
+}
+
+TEST(F32_GAVGPOOL_CW__SCALAR_X1, elements_div_4) {
+ for (size_t elements = 8; elements < 32; elements += 4) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(1)
+ .Test(xnn_f32_gavgpool_cw_ukernel__scalar_x1, GAvgPoolCWMicrokernelTester::Variant::Scalar);
+ }
+}
+
+TEST(F32_GAVGPOOL_CW__SCALAR_X1, elements_lt_4) {
+ for (size_t elements = 1; elements < 4; elements++) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(1)
+ .Test(xnn_f32_gavgpool_cw_ukernel__scalar_x1, GAvgPoolCWMicrokernelTester::Variant::Scalar);
+ }
+}
+
+TEST(F32_GAVGPOOL_CW__SCALAR_X1, elements_gt_4) {
+ for (size_t elements = 5; elements < 8; elements++) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(1)
+ .Test(xnn_f32_gavgpool_cw_ukernel__scalar_x1, GAvgPoolCWMicrokernelTester::Variant::Scalar);
+ }
+}
+
+TEST(F32_GAVGPOOL_CW__SCALAR_X1, channels_gt_1) {
+ for (size_t channels = 2; channels < 5; channels++) {
+ for (size_t elements = 1; elements < 16; elements += 3) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(channels)
+ .Test(xnn_f32_gavgpool_cw_ukernel__scalar_x1, GAvgPoolCWMicrokernelTester::Variant::Scalar);
+ }
+ }
+}
+
+TEST(F32_GAVGPOOL_CW__SCALAR_X1, qmin) {
+ for (size_t elements = 1; elements < 16; elements += 3) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(4)
+ .qmin(128)
+ .Test(xnn_f32_gavgpool_cw_ukernel__scalar_x1, GAvgPoolCWMicrokernelTester::Variant::Scalar);
+ }
+}
+
+TEST(F32_GAVGPOOL_CW__SCALAR_X1, qmax) {
+ for (size_t elements = 1; elements < 16; elements += 3) {
+ GAvgPoolCWMicrokernelTester()
+ .elements(elements)
+ .channels(4)
+ .qmax(128)
+ .Test(xnn_f32_gavgpool_cw_ukernel__scalar_x1, GAvgPoolCWMicrokernelTester::Variant::Scalar);
+ }
+}
diff --git a/test/f32-gavgpool-spchw.cc b/test/f32-gavgpool-spchw.cc
deleted file mode 100644
index b2b2b40..0000000
--- a/test/f32-gavgpool-spchw.cc
+++ /dev/null
@@ -1,276 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#include <gtest/gtest.h>
-
-#include <xnnpack/common.h>
-#include <xnnpack/isa-checks.h>
-
-#include <xnnpack/gavgpool.h>
-#include "gavgpool-spchw-microkernel-tester.h"
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(F32_GAVGPOOL_SPCHW__NEON_X4, elements_eq_4) {
- TEST_REQUIRES_ARM_NEON;
- GAvgPoolSpCHWMicrokernelTester()
- .elements(4)
- .channels(4)
- .Test(xnn_f32_gavgpool_spchw_ukernel__neon_x4);
- }
-
- TEST(F32_GAVGPOOL_SPCHW__NEON_X4, elements_div_4) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t elements = 8; elements < 32; elements += 4) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(4)
- .Test(xnn_f32_gavgpool_spchw_ukernel__neon_x4);
- }
- }
-
- TEST(F32_GAVGPOOL_SPCHW__NEON_X4, elements_lt_4) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t elements = 1; elements < 4; elements++) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(4)
- .Test(xnn_f32_gavgpool_spchw_ukernel__neon_x4);
- }
- }
-
- TEST(F32_GAVGPOOL_SPCHW__NEON_X4, elements_gt_4) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t elements = 5; elements < 8; elements++) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(4)
- .Test(xnn_f32_gavgpool_spchw_ukernel__neon_x4);
- }
- }
-
- TEST(F32_GAVGPOOL_SPCHW__NEON_X4, channels_lt_4) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t channels = 1; channels < 4; channels++) {
- for (size_t elements = 1; elements < 16; elements += 3) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(channels)
- .Test(xnn_f32_gavgpool_spchw_ukernel__neon_x4);
- }
- }
- }
-
- TEST(F32_GAVGPOOL_SPCHW__NEON_X4, channels_gt_4) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t channels = 5; channels < 8; channels++) {
- for (size_t elements = 1; elements < 16; elements += 3) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(channels)
- .Test(xnn_f32_gavgpool_spchw_ukernel__neon_x4);
- }
- }
- }
-
- TEST(F32_GAVGPOOL_SPCHW__NEON_X4, channels_div_4) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t channels = 8; channels <= 16; channels += 4) {
- for (size_t elements = 1; elements < 16; elements += 3) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(channels)
- .Test(xnn_f32_gavgpool_spchw_ukernel__neon_x4);
- }
- }
- }
-
- TEST(F32_GAVGPOOL_SPCHW__NEON_X4, qmin) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t elements = 1; elements < 16; elements += 3) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(4)
- .qmin(128)
- .Test(xnn_f32_gavgpool_spchw_ukernel__neon_x4);
- }
- }
-
- TEST(F32_GAVGPOOL_SPCHW__NEON_X4, qmax) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t elements = 1; elements < 16; elements += 3) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(4)
- .qmax(128)
- .Test(xnn_f32_gavgpool_spchw_ukernel__neon_x4);
- }
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- TEST(F32_GAVGPOOL_SPCHW__SSE_X4, elements_eq_4) {
- TEST_REQUIRES_X86_SSE;
- GAvgPoolSpCHWMicrokernelTester()
- .elements(4)
- .channels(4)
- .Test(xnn_f32_gavgpool_spchw_ukernel__sse_x4);
- }
-
- TEST(F32_GAVGPOOL_SPCHW__SSE_X4, elements_div_4) {
- TEST_REQUIRES_X86_SSE;
- for (size_t elements = 8; elements < 32; elements += 4) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(4)
- .Test(xnn_f32_gavgpool_spchw_ukernel__sse_x4);
- }
- }
-
- TEST(F32_GAVGPOOL_SPCHW__SSE_X4, elements_lt_4) {
- TEST_REQUIRES_X86_SSE;
- for (size_t elements = 1; elements < 4; elements++) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(4)
- .Test(xnn_f32_gavgpool_spchw_ukernel__sse_x4);
- }
- }
-
- TEST(F32_GAVGPOOL_SPCHW__SSE_X4, elements_gt_4) {
- TEST_REQUIRES_X86_SSE;
- for (size_t elements = 5; elements < 8; elements++) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(4)
- .Test(xnn_f32_gavgpool_spchw_ukernel__sse_x4);
- }
- }
-
- TEST(F32_GAVGPOOL_SPCHW__SSE_X4, channels_lt_4) {
- TEST_REQUIRES_X86_SSE;
- for (size_t channels = 1; channels < 4; channels++) {
- for (size_t elements = 1; elements < 16; elements += 3) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(channels)
- .Test(xnn_f32_gavgpool_spchw_ukernel__sse_x4);
- }
- }
- }
-
- TEST(F32_GAVGPOOL_SPCHW__SSE_X4, channels_gt_4) {
- TEST_REQUIRES_X86_SSE;
- for (size_t channels = 5; channels < 8; channels++) {
- for (size_t elements = 1; elements < 16; elements += 3) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(channels)
- .Test(xnn_f32_gavgpool_spchw_ukernel__sse_x4);
- }
- }
- }
-
- TEST(F32_GAVGPOOL_SPCHW__SSE_X4, channels_div_4) {
- TEST_REQUIRES_X86_SSE;
- for (size_t channels = 8; channels <= 16; channels += 4) {
- for (size_t elements = 1; elements < 16; elements += 3) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(channels)
- .Test(xnn_f32_gavgpool_spchw_ukernel__sse_x4);
- }
- }
- }
-
- TEST(F32_GAVGPOOL_SPCHW__SSE_X4, qmin) {
- TEST_REQUIRES_X86_SSE;
- for (size_t elements = 1; elements < 16; elements += 3) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(4)
- .qmin(128)
- .Test(xnn_f32_gavgpool_spchw_ukernel__sse_x4);
- }
- }
-
- TEST(F32_GAVGPOOL_SPCHW__SSE_X4, qmax) {
- TEST_REQUIRES_X86_SSE;
- for (size_t elements = 1; elements < 16; elements += 3) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(4)
- .qmax(128)
- .Test(xnn_f32_gavgpool_spchw_ukernel__sse_x4);
- }
- }
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-TEST(F32_GAVGPOOL_SPCHW__SCALAR_X1, elements_eq_4) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(4)
- .channels(1)
- .Test(xnn_f32_gavgpool_spchw_ukernel__scalar_x1, GAvgPoolSpCHWMicrokernelTester::Variant::Scalar);
-}
-
-TEST(F32_GAVGPOOL_SPCHW__SCALAR_X1, elements_div_4) {
- for (size_t elements = 8; elements < 32; elements += 4) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(1)
- .Test(xnn_f32_gavgpool_spchw_ukernel__scalar_x1, GAvgPoolSpCHWMicrokernelTester::Variant::Scalar);
- }
-}
-
-TEST(F32_GAVGPOOL_SPCHW__SCALAR_X1, elements_lt_4) {
- for (size_t elements = 1; elements < 4; elements++) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(1)
- .Test(xnn_f32_gavgpool_spchw_ukernel__scalar_x1, GAvgPoolSpCHWMicrokernelTester::Variant::Scalar);
- }
-}
-
-TEST(F32_GAVGPOOL_SPCHW__SCALAR_X1, elements_gt_4) {
- for (size_t elements = 5; elements < 8; elements++) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(1)
- .Test(xnn_f32_gavgpool_spchw_ukernel__scalar_x1, GAvgPoolSpCHWMicrokernelTester::Variant::Scalar);
- }
-}
-
-TEST(F32_GAVGPOOL_SPCHW__SCALAR_X1, channels_gt_1) {
- for (size_t channels = 2; channels < 5; channels++) {
- for (size_t elements = 1; elements < 16; elements += 3) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(channels)
- .Test(xnn_f32_gavgpool_spchw_ukernel__scalar_x1, GAvgPoolSpCHWMicrokernelTester::Variant::Scalar);
- }
- }
-}
-
-TEST(F32_GAVGPOOL_SPCHW__SCALAR_X1, qmin) {
- for (size_t elements = 1; elements < 16; elements += 3) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(4)
- .qmin(128)
- .Test(xnn_f32_gavgpool_spchw_ukernel__scalar_x1, GAvgPoolSpCHWMicrokernelTester::Variant::Scalar);
- }
-}
-
-TEST(F32_GAVGPOOL_SPCHW__SCALAR_X1, qmax) {
- for (size_t elements = 1; elements < 16; elements += 3) {
- GAvgPoolSpCHWMicrokernelTester()
- .elements(elements)
- .channels(4)
- .qmax(128)
- .Test(xnn_f32_gavgpool_spchw_ukernel__scalar_x1, GAvgPoolSpCHWMicrokernelTester::Variant::Scalar);
- }
-}
diff --git a/test/gavgpool-spchw-microkernel-tester.h b/test/gavgpool-cw-microkernel-tester.h
similarity index 88%
rename from test/gavgpool-spchw-microkernel-tester.h
rename to test/gavgpool-cw-microkernel-tester.h
index 113f470..76ce050 100644
--- a/test/gavgpool-spchw-microkernel-tester.h
+++ b/test/gavgpool-cw-microkernel-tester.h
@@ -22,14 +22,14 @@
#include <xnnpack/params.h>
-class GAvgPoolSpCHWMicrokernelTester {
+class GAvgPoolCWMicrokernelTester {
public:
enum class Variant {
Native,
Scalar,
};
- inline GAvgPoolSpCHWMicrokernelTester& elements(size_t elements) {
+ inline GAvgPoolCWMicrokernelTester& elements(size_t elements) {
assert(elements != 0);
this->elements_ = elements;
return *this;
@@ -39,7 +39,7 @@
return this->elements_;
}
- inline GAvgPoolSpCHWMicrokernelTester& channels(size_t channels) {
+ inline GAvgPoolCWMicrokernelTester& channels(size_t channels) {
assert(channels != 0);
this->channels_ = channels;
return *this;
@@ -49,7 +49,7 @@
return this->channels_;
}
- inline GAvgPoolSpCHWMicrokernelTester& qmin(uint8_t qmin) {
+ inline GAvgPoolCWMicrokernelTester& qmin(uint8_t qmin) {
this->qmin_ = qmin;
return *this;
}
@@ -58,7 +58,7 @@
return this->qmin_;
}
- inline GAvgPoolSpCHWMicrokernelTester& qmax(uint8_t qmax) {
+ inline GAvgPoolCWMicrokernelTester& qmax(uint8_t qmax) {
this->qmax_ = qmax;
return *this;
}
@@ -67,7 +67,7 @@
return this->qmax_;
}
- inline GAvgPoolSpCHWMicrokernelTester& iterations(size_t iterations) {
+ inline GAvgPoolCWMicrokernelTester& iterations(size_t iterations) {
this->iterations_ = iterations;
return *this;
}
@@ -77,7 +77,7 @@
}
- void Test(xnn_f32_gavgpool_spchw_ukernel_function gavgpool, Variant variant = Variant::Native) const {
+ void Test(xnn_f32_gavgpool_cw_ukernel_function gavgpool, Variant variant = Variant::Native) const {
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto f32rng = std::bind(std::uniform_real_distribution<float>(), rng);