| // Copyright 2019 Google LLC |
| // |
| // This source code is licensed under the BSD-style license found in the |
| // LICENSE file in the root directory of this source tree. |
| |
| #include <gtest/gtest.h> |
| |
| #include <xnnpack/common.h> |
| #include <xnnpack/isa-checks.h> |
| |
| #include <xnnpack/conv.h> |
| #include "conv-hwc-microkernel-tester.h" |
| |
| |
| #if XNN_ARCH_ARM || XNN_ARCH_ARM64 |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, input_width_eq_4) { |
| TEST_REQUIRES_ARM_NEON; |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(4) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2); |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, input_width_div_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_width = 8; input_width <= 32; input_width += 12) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, input_width_lt_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_width = 1; input_width < 4; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, input_width_gt_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_width = 5; input_width < 8; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, output_channels_lt_8) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 1; output_channels < 8; output_channels++) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, output_channels_div_8) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 16; output_channels <= 32; output_channels += 8) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, output_channels_gt_8) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 9; output_channels < 16; output_channels++) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, input_height_lt_3) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_height = 1; input_height < 3; input_height++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding(1) // padded input height of at least 3 required |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, input_height_gt_3) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_height = 4; input_height <= 9; input_height++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, padding_top) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t padding_top = 0; padding_top <= 1; padding_top++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_top(padding_top) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, padding_bottom) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_bottom(padding_bottom) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, output_y_start) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_start(output_y_start) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, output_y_end) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_end(output_y_end) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, qmin) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmin(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X2, qmax) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmax(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2); |
| } |
| } |
| } |
| #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 |
| |
| #if XNN_ARCH_ARM || XNN_ARCH_ARM64 |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, input_width_eq_4) { |
| TEST_REQUIRES_ARM_NEON; |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(4) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2); |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, input_width_div_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_width = 8; input_width <= 32; input_width += 12) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, input_width_lt_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_width = 1; input_width < 4; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, input_width_gt_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_width = 5; input_width < 8; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, output_channels_lt_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 1; output_channels < 4; output_channels++) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, output_channels_div_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, output_channels_gt_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 5; output_channels < 8; output_channels++) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, input_height_lt_3) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_height = 1; input_height < 3; input_height++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding(1) |
| .input_channels(3) // padded input height of at least 3 required |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, input_height_gt_3) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_height = 4; input_height <= 9; input_height++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, padding_top) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t padding_top = 0; padding_top <= 1; padding_top++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_top(padding_top) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, padding_bottom) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_bottom(padding_bottom) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, output_y_start) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_start(output_y_start) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, output_y_end) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_end(output_y_end) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, qmin) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmin(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X2, qmax) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmax(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2); |
| } |
| } |
| } |
| #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 |
| |
| #if XNN_ARCH_ARM || XNN_ARCH_ARM64 |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, input_width_eq_4) { |
| TEST_REQUIRES_ARM_NEON; |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(4) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2); |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, input_width_div_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_width = 8; input_width <= 32; input_width += 12) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, input_width_lt_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_width = 2; input_width < 4; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, input_width_gt_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_width = 5; input_width < 8; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, output_channels_lt_8) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 1; output_channels < 8; output_channels++) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, output_channels_div_8) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 16; output_channels <= 32; output_channels += 8) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, output_channels_gt_8) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 9; output_channels < 16; output_channels++) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, input_height_lt_3) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_height = 1; input_height < 3; input_height++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_height(1) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, input_height_gt_3) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_height = 4; input_height <= 9; input_height++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, padding_top) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t padding_top = 0; padding_top <= 1; padding_top++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .padding_top(padding_top) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, padding_bottom) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .padding_bottom(padding_bottom) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, output_y_start) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_start(output_y_start) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, output_y_end) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_end(output_y_end) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, qmin) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmin(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEON_2X2, qmax) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmax(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2); |
| } |
| } |
| } |
| #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 |
| |
| #if XNN_ARCH_ARM || XNN_ARCH_ARM64 |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, input_width_eq_4) { |
| TEST_REQUIRES_ARM_NEON; |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(5) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2); |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, input_width_div_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_width = 8; input_width <= 32; input_width += 12) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, input_width_lt_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_width = 2; input_width < 4; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, input_width_gt_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_width = 5; input_width < 8; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, output_channels_lt_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 1; output_channels < 4; output_channels++) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, output_channels_div_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, output_channels_gt_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 5; output_channels < 8; output_channels++) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, input_height_lt_3) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_height = 1; input_height < 3; input_height++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .padding_height(1) // padded input height of at least 3 required |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, input_height_gt_3) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_height = 4; input_height <= 9; input_height++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, padding_top) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t padding_top = 0; padding_top <= 1; padding_top++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .padding_top(padding_top) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, padding_bottom) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .padding_bottom(padding_bottom) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, output_y_start) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_start(output_y_start) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, output_y_end) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_end(output_y_end) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, qmin) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmin(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEON_2X2, qmax) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmax(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2); |
| } |
| } |
| } |
| #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 |
| |
| #if XNN_ARCH_ARM64 |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, input_width_eq_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(4) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2); |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, input_width_div_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 8; input_width <= 32; input_width += 12) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, input_width_lt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 1; input_width < 4; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, input_width_gt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 5; input_width < 8; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, output_channels_lt_8) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 8; output_channels++) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, output_channels_div_8) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 16; output_channels <= 32; output_channels += 8) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, output_channels_gt_8) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 9; output_channels < 16; output_channels++) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, input_height_lt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 1; input_height < 3; input_height++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding(1) // padded input height of at least 3 required |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, input_height_gt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 4; input_height <= 9; input_height++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, padding_top) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_top = 0; padding_top <= 1; padding_top++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_top(padding_top) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, padding_bottom) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_bottom(padding_bottom) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, output_y_start) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_start(output_y_start) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, output_y_end) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_end(output_y_end) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, qmin) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmin(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X2, qmax) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmax(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| #endif // XNN_ARCH_ARM64 |
| |
| #if XNN_ARCH_ARM64 |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, input_width_eq_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(4) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2); |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, input_width_div_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 8; input_width <= 32; input_width += 12) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, input_width_lt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 1; input_width < 4; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, input_width_gt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 5; input_width < 8; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, output_channels_lt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 4; output_channels++) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, output_channels_div_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, output_channels_gt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 5; output_channels < 8; output_channels++) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, input_height_lt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 1; input_height < 3; input_height++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding(1) |
| .input_channels(3) // padded input height of at least 3 required |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, input_height_gt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 4; input_height <= 9; input_height++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, padding_top) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_top = 0; padding_top <= 1; padding_top++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_top(padding_top) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, padding_bottom) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_bottom(padding_bottom) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, output_y_start) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_start(output_y_start) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, output_y_end) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_end(output_y_end) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, qmin) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmin(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X2, qmax) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmax(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| #endif // XNN_ARCH_ARM64 |
| |
| #if XNN_ARCH_ARM64 |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, input_width_eq_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(4) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2); |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, input_width_div_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 8; input_width <= 32; input_width += 12) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, input_width_lt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 2; input_width < 4; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, input_width_gt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 5; input_width < 8; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, output_channels_lt_8) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 8; output_channels++) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, output_channels_div_8) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 16; output_channels <= 32; output_channels += 8) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, output_channels_gt_8) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 9; output_channels < 16; output_channels++) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, input_height_lt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 1; input_height < 3; input_height++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_height(1) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, input_height_gt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 4; input_height <= 9; input_height++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, padding_top) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_top = 0; padding_top <= 1; padding_top++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .padding_top(padding_top) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, padding_bottom) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .padding_bottom(padding_bottom) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, output_y_start) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_start(output_y_start) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, output_y_end) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_end(output_y_end) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, qmin) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmin(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X2, qmax) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmax(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x2); |
| } |
| } |
| } |
| #endif // XNN_ARCH_ARM64 |
| |
| #if XNN_ARCH_ARM64 |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, input_width_eq_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(5) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2); |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, input_width_div_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 8; input_width <= 32; input_width += 12) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, input_width_lt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 2; input_width < 4; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, input_width_gt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 5; input_width < 8; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, output_channels_lt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 4; output_channels++) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, output_channels_div_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, output_channels_gt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 5; output_channels < 8; output_channels++) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, input_height_lt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 1; input_height < 3; input_height++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .padding_height(1) // padded input height of at least 3 required |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, input_height_gt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 4; input_height <= 9; input_height++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, padding_top) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_top = 0; padding_top <= 1; padding_top++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .padding_top(padding_top) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, padding_bottom) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .padding_bottom(padding_bottom) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, output_y_start) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_start(output_y_start) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, output_y_end) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_end(output_y_end) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, qmin) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmin(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X2, qmax) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 32; input_width += 7) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmax(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x2); |
| } |
| } |
| } |
| #endif // XNN_ARCH_ARM64 |
| |
| #if XNN_ARCH_ARM64 |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, input_width_eq_2) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(2) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1); |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, input_width_div_2) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 4; input_width <= 16; input_width += 6) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, input_width_gt_2) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 3; input_width < 4; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, output_channels_lt_8) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 8; output_channels++) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, output_channels_div_8) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 16; output_channels <= 32; output_channels += 8) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, output_channels_gt_8) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 9; output_channels < 16; output_channels++) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, input_height_lt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 1; input_height < 3; input_height++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_height(1) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, input_height_gt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 4; input_height <= 9; input_height++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, padding_top) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_top = 0; padding_top <= 1; padding_top++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .padding_top(padding_top) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, padding_bottom) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .padding_bottom(padding_bottom) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, output_y_start) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_start(output_y_start) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, output_y_end) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_end(output_y_end) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, qmin) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmin(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X8__NEONFMA_2X1, qmax) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmax(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| #endif // XNN_ARCH_ARM64 |
| |
| #if XNN_ARCH_ARM64 |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, input_width_eq_2) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(2) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1); |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, input_width_div_2) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 4; input_width <= 16; input_width += 6) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, input_width_gt_2) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 3; input_width < 4; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, output_channels_lt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 4; output_channels++) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, output_channels_div_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, output_channels_gt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 5; output_channels < 8; output_channels++) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, input_height_lt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 1; input_height < 3; input_height++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .padding_height(1) // padded input height of at least 3 required |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, input_height_gt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 4; input_height <= 9; input_height++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, padding_top) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_top = 0; padding_top <= 1; padding_top++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .padding_top(padding_top) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, padding_bottom) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .padding_bottom(padding_bottom) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, output_y_start) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_start(output_y_start) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, output_y_end) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_end(output_y_end) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, qmin) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmin(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__NEONFMA_2X1, qmax) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmax(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| #endif // XNN_ARCH_ARM64 |
| |
| #if XNN_ARCH_ARM64 |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, input_width_eq_2) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(2) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1); |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, input_width_div_2) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 4; input_width <= 16; input_width += 6) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, input_width_gt_2) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 3; input_width < 4; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, output_channels_lt_8) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 8; output_channels++) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, output_channels_div_8) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 16; output_channels <= 32; output_channels += 8) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, output_channels_gt_8) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 9; output_channels < 16; output_channels++) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, input_height_lt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 1; input_height < 3; input_height++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_height(1) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, input_height_gt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 4; input_height <= 9; input_height++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, padding_top) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_top = 0; padding_top <= 1; padding_top++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_top(padding_top) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, padding_bottom) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_bottom(padding_bottom) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, output_y_start) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_start(output_y_start) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, output_y_end) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_end(output_y_end) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, qmin) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmin(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEONFMA_2X1, qmax) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmax(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1); |
| } |
| } |
| } |
| #endif // XNN_ARCH_ARM64 |
| |
| #if XNN_ARCH_ARM64 |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, input_width_eq_2) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(2) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1); |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, input_width_div_2) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 4; input_width <= 16; input_width += 6) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, input_width_gt_2) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 3; input_width < 4; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, output_channels_lt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 4; output_channels++) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, output_channels_div_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, output_channels_gt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 5; output_channels < 8; output_channels++) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, input_height_lt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 1; input_height < 3; input_height++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_height(1) // padded input height of at least 3 required |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, input_height_gt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 4; input_height <= 9; input_height++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, padding_top) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_top = 0; padding_top <= 1; padding_top++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_top(padding_top) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, padding_bottom) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_bottom(padding_bottom) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, output_y_start) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_start(output_y_start) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, output_y_end) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_end(output_y_end) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, qmin) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmin(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEONFMA_2X1, qmax) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmax(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neonfma_2x1); |
| } |
| } |
| } |
| #endif // XNN_ARCH_ARM64 |
| |
| #if XNN_ARCH_ARM || XNN_ARCH_ARM64 |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, input_width_eq_2) { |
| TEST_REQUIRES_ARM_NEON; |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(2) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1); |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, input_width_div_2) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_width = 4; input_width <= 16; input_width += 6) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, input_width_gt_2) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_width = 3; input_width < 4; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(8) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, output_channels_lt_8) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 1; output_channels < 8; output_channels++) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, output_channels_div_8) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 16; output_channels <= 32; output_channels += 8) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, output_channels_gt_8) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 9; output_channels < 16; output_channels++) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, input_height_lt_3) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_height = 1; input_height < 3; input_height++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_height(1) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, input_height_gt_3) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_height = 4; input_height <= 9; input_height++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, padding_top) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t padding_top = 0; padding_top <= 1; padding_top++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_top(padding_top) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, padding_bottom) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_bottom(padding_bottom) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, output_y_start) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_start(output_y_start) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, output_y_end) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_end(output_y_end) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, qmin) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmin(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X8__NEON_2X1, qmax) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(8) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmax(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1); |
| } |
| } |
| } |
| #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 |
| |
| #if XNN_ARCH_ARM || XNN_ARCH_ARM64 |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, input_width_eq_2) { |
| TEST_REQUIRES_ARM_NEON; |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(2) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1); |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, input_width_div_2) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_width = 4; input_width <= 16; input_width += 6) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, input_width_gt_2) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_width = 3; input_width < 4; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, output_channels_lt_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 1; output_channels < 4; output_channels++) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, output_channels_div_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, output_channels_gt_4) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 5; output_channels < 8; output_channels++) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, input_height_lt_3) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_height = 1; input_height < 3; input_height++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_height(1) // padded input height of at least 3 required |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, input_height_gt_3) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t input_height = 4; input_height <= 9; input_height++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, padding_top) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t padding_top = 0; padding_top <= 1; padding_top++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_top(padding_top) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, padding_bottom) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_bottom(padding_bottom) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, output_y_start) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_start(output_y_start) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, output_y_end) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_end(output_y_end) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, qmin) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmin(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__NEON_2X1, qmax) { |
| TEST_REQUIRES_ARM_NEON; |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmax(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1); |
| } |
| } |
| } |
| #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 |
| |
| TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, input_width_eq_2) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(2) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1); |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, input_width_div_2) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 4; input_width <= 16; input_width += 6) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, input_width_lt_2) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 1; input_width < 2; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, input_width_gt_2) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 3; input_width < 4; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, output_channels_lt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 4; output_channels++) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, output_channels_div_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, output_channels_gt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 5; output_channels < 8; output_channels++) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, input_height_lt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 1; input_height < 3; input_height++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding(1) |
| .input_channels(3) // padded input height of at least 3 required |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, input_height_gt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 4; input_height <= 9; input_height++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, padding_top) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_top = 0; padding_top <= 1; padding_top++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_top(padding_top) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, padding_bottom) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .padding_bottom(padding_bottom) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, output_y_start) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_start(output_y_start) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, output_y_end) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_end(output_y_end) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, qmin) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmin(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P1C3X4__SCALAR_1X1, qmax) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 1; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_width(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmax(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, input_width_eq_2) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(2) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1); |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, input_width_div_2) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 4; input_width <= 16; input_width += 6) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, input_width_gt_2) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_width = 3; input_width < 4; input_width++) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(4) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1); |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, output_channels_lt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 4; output_channels++) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, output_channels_div_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, output_channels_gt_4) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 5; output_channels < 8; output_channels++) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(3) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, input_height_lt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 1; input_height < 3; input_height++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .padding_height(1) // padded input height of at least 3 required |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, input_height_gt_3) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t input_height = 4; input_height <= 9; input_height++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(input_height) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, padding_top) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_top = 0; padding_top <= 1; padding_top++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .padding_top(padding_top) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, padding_bottom) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) { |
| for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .padding_bottom(padding_bottom) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, output_y_start) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_start(output_y_start) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, output_y_end) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) { |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(9) |
| .output_y_end(output_y_end) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, qmin) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmin(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1); |
| } |
| } |
| } |
| |
| TEST(F32_CONV_3X3S2P0P1C3X4__SCALAR_1X1, qmax) { |
| TEST_REQUIRES_ARM_NEON_FMA; |
| for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { |
| for (size_t input_width = 2; input_width < 16; input_width += 3) { |
| ConvHWCMicrokernelTester() |
| .kernel_size(3) |
| .subsampling(2) |
| .padding_right(1) |
| .input_channels(3) |
| .output_channels_tile(4) |
| .output_channels(output_channels) |
| .input_width(input_width) |
| .input_height(6) |
| .qmax(128) |
| .Test(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1); |
| } |
| } |
| } |