Rename BILINEAR microkernels into IBILINEAR
- Indicate the use of indirection buffers
- Prepare for alternative BILINEAR micro-kernels without indirection buffer
PiperOrigin-RevId: 300064208
diff --git a/BUILD.bazel b/BUILD.bazel
index b015dec..6c5d50c 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -84,9 +84,6 @@
"src/f32-argmaxpool/9x-scalar-c1.c",
"src/f32-avgpool/9p8x-scalar-c1.c",
"src/f32-avgpool/9x-scalar-c1.c",
- "src/f32-bilinear/gen/scalar-c1.c",
- "src/f32-bilinear/gen/scalar-c2.c",
- "src/f32-bilinear/gen/scalar-c4.c",
"src/f32-clamp/scalar.c",
"src/f32-conv-hwc/3x3s2p0p1c3x4-scalar-1x1.c",
"src/f32-conv-hwc/3x3s2p1c3x4-scalar-1x1.c",
@@ -120,6 +117,9 @@
"src/f32-hswish/gen/scalar-x1.c",
"src/f32-hswish/gen/scalar-x2.c",
"src/f32-hswish/gen/scalar-x4.c",
+ "src/f32-ibilinear/gen/scalar-c1.c",
+ "src/f32-ibilinear/gen/scalar-c2.c",
+ "src/f32-ibilinear/gen/scalar-c4.c",
"src/f32-igemm/gen/1x4-scalar.c",
"src/f32-igemm/gen/2x4-scalar.c",
"src/f32-igemm/gen/4x2-scalar.c",
@@ -336,8 +336,6 @@
"src/f32-argmaxpool/9x-psimd-c4.c",
"src/f32-avgpool/9p8x-psimd-c4.c",
"src/f32-avgpool/9x-psimd-c4.c",
- "src/f32-bilinear/gen/psimd-c4.c",
- "src/f32-bilinear/gen/psimd-c8.c",
"src/f32-clamp/psimd.c",
"src/f32-dwconv/gen/up4x25-psimd-acc2.c",
"src/f32-dwconv/gen/up4x25-psimd.c",
@@ -374,6 +372,8 @@
"src/f32-gemm/gen-inc/6x8s4-psimd.c",
"src/f32-hswish/gen/psimd-x4.c",
"src/f32-hswish/gen/psimd-x8.c",
+ "src/f32-ibilinear/gen/psimd-c4.c",
+ "src/f32-ibilinear/gen/psimd-c8.c",
"src/f32-igemm/gen/1x8-psimd-loadsplat.c",
"src/f32-igemm/gen/1x8-psimd-splat.c",
"src/f32-igemm/gen/1x8s4-psimd.c",
@@ -458,8 +458,6 @@
NEON_UKERNELS = [
"src/f32-avgpool/9p8x-neon-c4.c",
"src/f32-avgpool/9x-neon-c4.c",
- "src/f32-bilinear/gen/neon-c4.c",
- "src/f32-bilinear/gen/neon-c8.c",
"src/f32-clamp/neon.c",
"src/f32-dwconv/gen/up4x9-neon.c",
"src/f32-dwconv/gen/up4x9-neon-acc2.c",
@@ -501,6 +499,8 @@
"src/f32-gemm/gen-inc/8x8s4-neon.c",
"src/f32-hswish/gen/neon-x4.c",
"src/f32-hswish/gen/neon-x8.c",
+ "src/f32-ibilinear/gen/neon-c4.c",
+ "src/f32-ibilinear/gen/neon-c8.c",
"src/f32-igemm/gen/1x8-neon-lane-ld64.c",
"src/f32-igemm/gen/4x2-neon-lane-ld64.c",
"src/f32-igemm/gen/4x4-neon-lane-ld64.c",
@@ -629,8 +629,8 @@
]
NEONFMA_UKERNELS = [
- "src/f32-bilinear/gen/neonfma-c4.c",
- "src/f32-bilinear/gen/neonfma-c8.c",
+ "src/f32-ibilinear/gen/neonfma-c4.c",
+ "src/f32-ibilinear/gen/neonfma-c8.c",
"src/f32-igemm/gen/1x8-neonfma-dup-ld64.c",
"src/f32-igemm/gen/4x8-neonfma-dup-ld128.c",
"src/f32-igemm/gen/4x8-neonfma-dup-ld64.c",
@@ -866,8 +866,6 @@
SSE_UKERNELS = [
"src/f32-avgpool/9p8x-sse-c4.c",
"src/f32-avgpool/9x-sse-c4.c",
- "src/f32-bilinear/gen/sse-c4.c",
- "src/f32-bilinear/gen/sse-c8.c",
"src/f32-clamp/sse.c",
"src/f32-dwconv-spchw/3x3p1-sse.c",
"src/f32-dwconv-spchw/3x3s2p1-sse.c",
@@ -901,6 +899,8 @@
"src/f32-gemm/gen-inc/4x8s4-sse.c",
"src/f32-hswish/gen/sse-x4.c",
"src/f32-hswish/gen/sse-x8.c",
+ "src/f32-ibilinear/gen/sse-c4.c",
+ "src/f32-ibilinear/gen/sse-c8.c",
"src/f32-igemm/gen/1x8-sse-dup.c",
"src/f32-igemm/gen/1x8-sse-load1.c",
"src/f32-igemm/gen/1x8s4-sse.c",
@@ -1465,7 +1465,6 @@
"src/requantization/gemmlowp-requantization.h",
"src/xnnpack/argmaxpool.h",
"src/xnnpack/avgpool.h",
- "src/xnnpack/bilinear.h",
"src/xnnpack/clamp.h",
"src/xnnpack/common.h",
"src/xnnpack/conv.h",
@@ -1473,6 +1472,7 @@
"src/xnnpack/gavgpool.h",
"src/xnnpack/gemm.h",
"src/xnnpack/hswish.h",
+ "src/xnnpack/ibilinear.h",
"src/xnnpack/igemm.h",
"src/xnnpack/intrinsics-polyfill.h",
"src/xnnpack/lut.h",
@@ -2418,10 +2418,10 @@
)
xnnpack_unit_test(
- name = "f32_bilinear_test",
+ name = "f32_ibilinear_test",
srcs = [
- "test/f32-bilinear.cc",
- "test/bilinear-microkernel-tester.h",
+ "test/f32-ibilinear.cc",
+ "test/ibilinear-microkernel-tester.h",
"src/xnnpack/AlignedAllocator.h",
] + MICROKERNEL_TEST_HDRS,
deps = MICROKERNEL_TEST_DEPS,
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ad936e5..549afa9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -183,9 +183,6 @@
src/f32-argmaxpool/9x-scalar-c1.c
src/f32-avgpool/9p8x-scalar-c1.c
src/f32-avgpool/9x-scalar-c1.c
- src/f32-bilinear/gen/scalar-c1.c
- src/f32-bilinear/gen/scalar-c2.c
- src/f32-bilinear/gen/scalar-c4.c
src/f32-clamp/scalar.c
src/f32-conv-hwc/3x3s2p1c3x4-scalar-1x1.c
src/f32-conv-hwc/3x3s2p0p1c3x4-scalar-1x1.c
@@ -219,6 +216,9 @@
src/f32-hswish/gen/scalar-x1.c
src/f32-hswish/gen/scalar-x2.c
src/f32-hswish/gen/scalar-x4.c
+ src/f32-ibilinear/gen/scalar-c1.c
+ src/f32-ibilinear/gen/scalar-c2.c
+ src/f32-ibilinear/gen/scalar-c4.c
src/f32-igemm/gen/1x4-scalar.c
src/f32-igemm/gen/2x4-scalar.c
src/f32-igemm/gen/4x2-scalar.c
@@ -352,8 +352,6 @@
src/f32-argmaxpool/9x-psimd-c4.c
src/f32-avgpool/9p8x-psimd-c4.c
src/f32-avgpool/9x-psimd-c4.c
- src/f32-bilinear/gen/psimd-c4.c
- src/f32-bilinear/gen/psimd-c8.c
src/f32-clamp/psimd.c
src/f32-dwconv/gen/up4x25-psimd-acc2.c
src/f32-dwconv/gen/up4x25-psimd.c
@@ -390,6 +388,8 @@
src/f32-gemm/gen-inc/6x8s4-psimd.c
src/f32-hswish/gen/psimd-x4.c
src/f32-hswish/gen/psimd-x8.c
+ src/f32-ibilinear/gen/psimd-c4.c
+ src/f32-ibilinear/gen/psimd-c8.c
src/f32-igemm/gen/1x8-psimd-loadsplat.c
src/f32-igemm/gen/1x8-psimd-splat.c
src/f32-igemm/gen/1x8s4-psimd.c
@@ -471,8 +471,6 @@
SET(XNNPACK_NEON_MICROKERNEL_SRCS
src/f32-avgpool/9p8x-neon-c4.c
src/f32-avgpool/9x-neon-c4.c
- src/f32-bilinear/gen/neon-c4.c
- src/f32-bilinear/gen/neon-c8.c
src/f32-clamp/neon.c
src/f32-dwconv/gen/up4x9-neon.c
src/f32-dwconv/gen/up4x9-neon-acc2.c
@@ -514,6 +512,8 @@
src/f32-gemm/gen-inc/8x8s4-neon.c
src/f32-hswish/gen/neon-x4.c
src/f32-hswish/gen/neon-x8.c
+ src/f32-ibilinear/gen/neon-c4.c
+ src/f32-ibilinear/gen/neon-c8.c
src/f32-igemm/gen/1x8-neon-lane-ld64.c
src/f32-igemm/gen/4x2-neon-lane-ld64.c
src/f32-igemm/gen/4x4-neon-lane-ld64.c
@@ -641,8 +641,8 @@
src/requantization/gemmlowp-neon.c)
SET(XNNPACK_NEONFMA_MICROKERNEL_SRCS
- src/f32-bilinear/gen/neonfma-c4.c
- src/f32-bilinear/gen/neonfma-c8.c
+ src/f32-ibilinear/gen/neonfma-c4.c
+ src/f32-ibilinear/gen/neonfma-c8.c
src/f32-igemm/gen/1x8-neonfma-dup-ld64.c
src/f32-igemm/gen/4x8-neonfma-dup-ld128.c
src/f32-igemm/gen/4x8-neonfma-dup-ld64.c
@@ -867,8 +867,6 @@
SET(XNNPACK_SSE_MICROKERNEL_SRCS
src/f32-avgpool/9p8x-sse-c4.c
src/f32-avgpool/9x-sse-c4.c
- src/f32-bilinear/gen/sse-c4.c
- src/f32-bilinear/gen/sse-c8.c
src/f32-clamp/sse.c
src/f32-dwconv-spchw/3x3p1-sse.c
src/f32-dwconv-spchw/3x3s2p1-sse.c
@@ -902,6 +900,8 @@
src/f32-gemm/gen-inc/4x8s4-sse.c
src/f32-hswish/gen/sse-x4.c
src/f32-hswish/gen/sse-x8.c
+ src/f32-ibilinear/gen/sse-c4.c
+ src/f32-ibilinear/gen/sse-c8.c
src/f32-igemm/gen/1x8-sse-dup.c
src/f32-igemm/gen/1x8-sse-load1.c
src/f32-igemm/gen/1x8s4-sse.c
@@ -1893,15 +1893,6 @@
TARGET_LINK_LIBRARIES(f32-avgpool-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
ADD_TEST(f32-avgpool-test f32-avgpool-test)
- ADD_EXECUTABLE(f32-bilinear-test test/f32-bilinear.cc)
- SET_TARGET_PROPERTIES(f32-bilinear-test PROPERTIES
- CXX_STANDARD 11
- CXX_STANDARD_REQUIRED YES
- CXX_EXTENSIONS YES)
- TARGET_INCLUDE_DIRECTORIES(f32-bilinear-test PRIVATE src test)
- TARGET_LINK_LIBRARIES(f32-bilinear-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
- ADD_TEST(f32-bilinear-test f32-bilinear-test)
-
ADD_EXECUTABLE(f32-clamp-test test/f32-clamp.cc)
SET_TARGET_PROPERTIES(f32-clamp-test PROPERTIES
CXX_STANDARD 11
@@ -1992,6 +1983,15 @@
TARGET_LINK_LIBRARIES(f32-hswish-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
ADD_TEST(f32-hswish-test f32-hswish-test)
+ ADD_EXECUTABLE(f32-ibilinear-test test/f32-ibilinear.cc)
+ SET_TARGET_PROPERTIES(f32-ibilinear-test PROPERTIES
+ CXX_STANDARD 11
+ CXX_STANDARD_REQUIRED YES
+ CXX_EXTENSIONS YES)
+ TARGET_INCLUDE_DIRECTORIES(f32-ibilinear-test PRIVATE src test)
+ TARGET_LINK_LIBRARIES(f32-ibilinear-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+ ADD_TEST(f32-ibilinear-test f32-ibilinear-test)
+
ADD_EXECUTABLE(f32-igemm-test test/f32-igemm.cc)
SET_TARGET_PROPERTIES(f32-igemm-test PROPERTIES
CXX_STANDARD 11
diff --git a/scripts/generate-f32-bilinear.sh b/scripts/generate-f32-bilinear.sh
deleted file mode 100755
index 75fe719..0000000
--- a/scripts/generate-f32-bilinear.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/sh
-# Copyright 2019 Google LLC
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-#################################### Scalar ###################################
-tools/xngen src/f32-bilinear/scalar.c.in -D CHANNEL_TILE=1 -D PIXEL_TILE=1 -o src/f32-bilinear/gen/scalar-c1.c
-tools/xngen src/f32-bilinear/scalar.c.in -D CHANNEL_TILE=2 -D PIXEL_TILE=1 -o src/f32-bilinear/gen/scalar-c2.c
-tools/xngen src/f32-bilinear/scalar.c.in -D CHANNEL_TILE=4 -D PIXEL_TILE=1 -o src/f32-bilinear/gen/scalar-c4.c
-
-################################### ARM NEON ##################################
-tools/xngen src/f32-bilinear/neon.c.in -D CHANNEL_TILE=4 -D PIXEL_TILE=1 -D FMA=0 -o src/f32-bilinear/gen/neon-c4.c
-tools/xngen src/f32-bilinear/neon.c.in -D CHANNEL_TILE=8 -D PIXEL_TILE=1 -D FMA=0 -o src/f32-bilinear/gen/neon-c8.c
-
-tools/xngen src/f32-bilinear/neon.c.in -D CHANNEL_TILE=4 -D PIXEL_TILE=1 -D FMA=1 -o src/f32-bilinear/gen/neonfma-c4.c
-tools/xngen src/f32-bilinear/neon.c.in -D CHANNEL_TILE=8 -D PIXEL_TILE=1 -D FMA=1 -o src/f32-bilinear/gen/neonfma-c8.c
-
-#################################### PSIMD ####################################
-tools/xngen src/f32-bilinear/psimd.c.in -D CHANNEL_TILE=4 -D PIXEL_TILE=1 -o src/f32-bilinear/gen/psimd-c4.c
-tools/xngen src/f32-bilinear/psimd.c.in -D CHANNEL_TILE=8 -D PIXEL_TILE=1 -o src/f32-bilinear/gen/psimd-c8.c
-
-################################### x86 SSE ###################################
-tools/xngen src/f32-bilinear/sse.c.in -D CHANNEL_TILE=4 -D PIXEL_TILE=1 -o src/f32-bilinear/gen/sse-c4.c
-tools/xngen src/f32-bilinear/sse.c.in -D CHANNEL_TILE=8 -D PIXEL_TILE=1 -o src/f32-bilinear/gen/sse-c8.c
-
-################################## Unit tests #################################
-tools/generate-bilinear-test.py --spec test/f32-bilinear.yaml --output test/f32-bilinear.cc
diff --git a/scripts/generate-f32-ibilinear.sh b/scripts/generate-f32-ibilinear.sh
new file mode 100755
index 0000000..dc76d0c
--- /dev/null
+++ b/scripts/generate-f32-ibilinear.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+# Copyright 2019 Google LLC
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+#################################### Scalar ###################################
+tools/xngen src/f32-ibilinear/scalar.c.in -D CHANNEL_TILE=1 -D PIXEL_TILE=1 -o src/f32-ibilinear/gen/scalar-c1.c
+tools/xngen src/f32-ibilinear/scalar.c.in -D CHANNEL_TILE=2 -D PIXEL_TILE=1 -o src/f32-ibilinear/gen/scalar-c2.c
+tools/xngen src/f32-ibilinear/scalar.c.in -D CHANNEL_TILE=4 -D PIXEL_TILE=1 -o src/f32-ibilinear/gen/scalar-c4.c
+
+################################### ARM NEON ##################################
+tools/xngen src/f32-ibilinear/neon.c.in -D CHANNEL_TILE=4 -D PIXEL_TILE=1 -D FMA=0 -o src/f32-ibilinear/gen/neon-c4.c
+tools/xngen src/f32-ibilinear/neon.c.in -D CHANNEL_TILE=8 -D PIXEL_TILE=1 -D FMA=0 -o src/f32-ibilinear/gen/neon-c8.c
+
+tools/xngen src/f32-ibilinear/neon.c.in -D CHANNEL_TILE=4 -D PIXEL_TILE=1 -D FMA=1 -o src/f32-ibilinear/gen/neonfma-c4.c
+tools/xngen src/f32-ibilinear/neon.c.in -D CHANNEL_TILE=8 -D PIXEL_TILE=1 -D FMA=1 -o src/f32-ibilinear/gen/neonfma-c8.c
+
+#################################### PSIMD ####################################
+tools/xngen src/f32-ibilinear/psimd.c.in -D CHANNEL_TILE=4 -D PIXEL_TILE=1 -o src/f32-ibilinear/gen/psimd-c4.c
+tools/xngen src/f32-ibilinear/psimd.c.in -D CHANNEL_TILE=8 -D PIXEL_TILE=1 -o src/f32-ibilinear/gen/psimd-c8.c
+
+################################### x86 SSE ###################################
+tools/xngen src/f32-ibilinear/sse.c.in -D CHANNEL_TILE=4 -D PIXEL_TILE=1 -o src/f32-ibilinear/gen/sse-c4.c
+tools/xngen src/f32-ibilinear/sse.c.in -D CHANNEL_TILE=8 -D PIXEL_TILE=1 -o src/f32-ibilinear/gen/sse-c8.c
+
+################################## Unit tests #################################
+tools/generate-ibilinear-test.py --spec test/f32-ibilinear.yaml --output test/f32-ibilinear.cc
diff --git a/src/f32-bilinear/gen/neon-c4.c b/src/f32-ibilinear/gen/neon-c4.c
similarity index 95%
rename from src/f32-bilinear/gen/neon-c4.c
rename to src/f32-ibilinear/gen/neon-c4.c
index 95b7bc0..bc29c74 100644
--- a/src/f32-bilinear/gen/neon-c4.c
+++ b/src/f32-ibilinear/gen/neon-c4.c
@@ -1,5 +1,5 @@
// Auto-generated file. Do not edit!
-// Template: src/f32-bilinear/neon.c.in
+// Template: src/f32-ibilinear/neon.c.in
// Generator: tools/xngen
//
// Copyright 2019 Google LLC
@@ -12,10 +12,10 @@
#include <arm_neon.h>
#include <xnnpack/common.h>
-#include <xnnpack/bilinear.h>
+#include <xnnpack/ibilinear.h>
-void xnn_f32_bilinear_ukernel__neon_c4(
+void xnn_f32_ibilinear_ukernel__neon_c4(
size_t output_pixels,
size_t channels,
const float**restrict input,
diff --git a/src/f32-bilinear/gen/neon-c8.c b/src/f32-ibilinear/gen/neon-c8.c
similarity index 97%
rename from src/f32-bilinear/gen/neon-c8.c
rename to src/f32-ibilinear/gen/neon-c8.c
index 9551bd4..4baf353 100644
--- a/src/f32-bilinear/gen/neon-c8.c
+++ b/src/f32-ibilinear/gen/neon-c8.c
@@ -1,5 +1,5 @@
// Auto-generated file. Do not edit!
-// Template: src/f32-bilinear/neon.c.in
+// Template: src/f32-ibilinear/neon.c.in
// Generator: tools/xngen
//
// Copyright 2019 Google LLC
@@ -12,10 +12,10 @@
#include <arm_neon.h>
#include <xnnpack/common.h>
-#include <xnnpack/bilinear.h>
+#include <xnnpack/ibilinear.h>
-void xnn_f32_bilinear_ukernel__neon_c8(
+void xnn_f32_ibilinear_ukernel__neon_c8(
size_t output_pixels,
size_t channels,
const float**restrict input,
diff --git a/src/f32-bilinear/gen/neonfma-c4.c b/src/f32-ibilinear/gen/neonfma-c4.c
similarity index 96%
rename from src/f32-bilinear/gen/neonfma-c4.c
rename to src/f32-ibilinear/gen/neonfma-c4.c
index b3b14d0..11c3c3c 100644
--- a/src/f32-bilinear/gen/neonfma-c4.c
+++ b/src/f32-ibilinear/gen/neonfma-c4.c
@@ -1,5 +1,5 @@
// Auto-generated file. Do not edit!
-// Template: src/f32-bilinear/neon.c.in
+// Template: src/f32-ibilinear/neon.c.in
// Generator: tools/xngen
//
// Copyright 2019 Google LLC
@@ -12,10 +12,10 @@
#include <arm_neon.h>
#include <xnnpack/common.h>
-#include <xnnpack/bilinear.h>
+#include <xnnpack/ibilinear.h>
-void xnn_f32_bilinear_ukernel__neonfma_c4(
+void xnn_f32_ibilinear_ukernel__neonfma_c4(
size_t output_pixels,
size_t channels,
const float**restrict input,
diff --git a/src/f32-bilinear/gen/neonfma-c8.c b/src/f32-ibilinear/gen/neonfma-c8.c
similarity index 97%
rename from src/f32-bilinear/gen/neonfma-c8.c
rename to src/f32-ibilinear/gen/neonfma-c8.c
index 36db988..4a60fa4 100644
--- a/src/f32-bilinear/gen/neonfma-c8.c
+++ b/src/f32-ibilinear/gen/neonfma-c8.c
@@ -1,5 +1,5 @@
// Auto-generated file. Do not edit!
-// Template: src/f32-bilinear/neon.c.in
+// Template: src/f32-ibilinear/neon.c.in
// Generator: tools/xngen
//
// Copyright 2019 Google LLC
@@ -12,10 +12,10 @@
#include <arm_neon.h>
#include <xnnpack/common.h>
-#include <xnnpack/bilinear.h>
+#include <xnnpack/ibilinear.h>
-void xnn_f32_bilinear_ukernel__neonfma_c8(
+void xnn_f32_ibilinear_ukernel__neonfma_c8(
size_t output_pixels,
size_t channels,
const float**restrict input,
diff --git a/src/f32-bilinear/gen/psimd-c4.c b/src/f32-ibilinear/gen/psimd-c4.c
similarity index 95%
rename from src/f32-bilinear/gen/psimd-c4.c
rename to src/f32-ibilinear/gen/psimd-c4.c
index efa9947..d86fb90 100644
--- a/src/f32-bilinear/gen/psimd-c4.c
+++ b/src/f32-ibilinear/gen/psimd-c4.c
@@ -1,5 +1,5 @@
// Auto-generated file. Do not edit!
-// Template: src/f32-bilinear/psimd.c.in
+// Template: src/f32-ibilinear/psimd.c.in
// Generator: tools/xngen
//
// Copyright 2019 Google LLC
@@ -11,10 +11,10 @@
#include <psimd.h>
-#include <xnnpack/bilinear.h>
+#include <xnnpack/ibilinear.h>
-void xnn_f32_bilinear_ukernel__psimd_c4(
+void xnn_f32_ibilinear_ukernel__psimd_c4(
size_t output_pixels,
size_t channels,
const float**restrict input,
diff --git a/src/f32-bilinear/gen/psimd-c8.c b/src/f32-ibilinear/gen/psimd-c8.c
similarity index 97%
rename from src/f32-bilinear/gen/psimd-c8.c
rename to src/f32-ibilinear/gen/psimd-c8.c
index 84dfbfc..a6492bd 100644
--- a/src/f32-bilinear/gen/psimd-c8.c
+++ b/src/f32-ibilinear/gen/psimd-c8.c
@@ -1,5 +1,5 @@
// Auto-generated file. Do not edit!
-// Template: src/f32-bilinear/psimd.c.in
+// Template: src/f32-ibilinear/psimd.c.in
// Generator: tools/xngen
//
// Copyright 2019 Google LLC
@@ -11,10 +11,10 @@
#include <psimd.h>
-#include <xnnpack/bilinear.h>
+#include <xnnpack/ibilinear.h>
-void xnn_f32_bilinear_ukernel__psimd_c8(
+void xnn_f32_ibilinear_ukernel__psimd_c8(
size_t output_pixels,
size_t channels,
const float**restrict input,
diff --git a/src/f32-bilinear/gen/scalar-c1.c b/src/f32-ibilinear/gen/scalar-c1.c
similarity index 92%
rename from src/f32-bilinear/gen/scalar-c1.c
rename to src/f32-ibilinear/gen/scalar-c1.c
index 91fcd4d..f76affc 100644
--- a/src/f32-bilinear/gen/scalar-c1.c
+++ b/src/f32-ibilinear/gen/scalar-c1.c
@@ -1,5 +1,5 @@
// Auto-generated file. Do not edit!
-// Template: src/f32-bilinear/scalar.c.in
+// Template: src/f32-ibilinear/scalar.c.in
// Generator: tools/xngen
//
// Copyright 2019 Google LLC
@@ -9,10 +9,10 @@
#include <assert.h>
-#include <xnnpack/bilinear.h>
+#include <xnnpack/ibilinear.h>
-void xnn_f32_bilinear_ukernel__scalar_c1(
+void xnn_f32_ibilinear_ukernel__scalar_c1(
size_t output_pixels,
size_t channels,
const float**restrict input,
diff --git a/src/f32-bilinear/gen/scalar-c2.c b/src/f32-ibilinear/gen/scalar-c2.c
similarity index 95%
rename from src/f32-bilinear/gen/scalar-c2.c
rename to src/f32-ibilinear/gen/scalar-c2.c
index 26822c4..b66caff 100644
--- a/src/f32-bilinear/gen/scalar-c2.c
+++ b/src/f32-ibilinear/gen/scalar-c2.c
@@ -1,5 +1,5 @@
// Auto-generated file. Do not edit!
-// Template: src/f32-bilinear/scalar.c.in
+// Template: src/f32-ibilinear/scalar.c.in
// Generator: tools/xngen
//
// Copyright 2019 Google LLC
@@ -9,10 +9,10 @@
#include <assert.h>
-#include <xnnpack/bilinear.h>
+#include <xnnpack/ibilinear.h>
-void xnn_f32_bilinear_ukernel__scalar_c2(
+void xnn_f32_ibilinear_ukernel__scalar_c2(
size_t output_pixels,
size_t channels,
const float**restrict input,
diff --git a/src/f32-bilinear/gen/scalar-c4.c b/src/f32-ibilinear/gen/scalar-c4.c
similarity index 96%
rename from src/f32-bilinear/gen/scalar-c4.c
rename to src/f32-ibilinear/gen/scalar-c4.c
index 7a1d592..c1e844c 100644
--- a/src/f32-bilinear/gen/scalar-c4.c
+++ b/src/f32-ibilinear/gen/scalar-c4.c
@@ -1,5 +1,5 @@
// Auto-generated file. Do not edit!
-// Template: src/f32-bilinear/scalar.c.in
+// Template: src/f32-ibilinear/scalar.c.in
// Generator: tools/xngen
//
// Copyright 2019 Google LLC
@@ -9,10 +9,10 @@
#include <assert.h>
-#include <xnnpack/bilinear.h>
+#include <xnnpack/ibilinear.h>
-void xnn_f32_bilinear_ukernel__scalar_c4(
+void xnn_f32_ibilinear_ukernel__scalar_c4(
size_t output_pixels,
size_t channels,
const float**restrict input,
diff --git a/src/f32-bilinear/gen/sse-c4.c b/src/f32-ibilinear/gen/sse-c4.c
similarity index 96%
rename from src/f32-bilinear/gen/sse-c4.c
rename to src/f32-ibilinear/gen/sse-c4.c
index 4707cf4..321b6e1 100644
--- a/src/f32-bilinear/gen/sse-c4.c
+++ b/src/f32-ibilinear/gen/sse-c4.c
@@ -1,5 +1,5 @@
// Auto-generated file. Do not edit!
-// Template: src/f32-bilinear/sse.c.in
+// Template: src/f32-ibilinear/sse.c.in
// Generator: tools/xngen
//
// Copyright 2019 Google LLC
@@ -11,10 +11,10 @@
#include <xmmintrin.h>
-#include <xnnpack/bilinear.h>
+#include <xnnpack/ibilinear.h>
-void xnn_f32_bilinear_ukernel__sse_c4(
+void xnn_f32_ibilinear_ukernel__sse_c4(
size_t output_pixels,
size_t channels,
const float**restrict input,
diff --git a/src/f32-bilinear/gen/sse-c8.c b/src/f32-ibilinear/gen/sse-c8.c
similarity index 97%
rename from src/f32-bilinear/gen/sse-c8.c
rename to src/f32-ibilinear/gen/sse-c8.c
index 31c34a2..2bd65f3 100644
--- a/src/f32-bilinear/gen/sse-c8.c
+++ b/src/f32-ibilinear/gen/sse-c8.c
@@ -1,5 +1,5 @@
// Auto-generated file. Do not edit!
-// Template: src/f32-bilinear/sse.c.in
+// Template: src/f32-ibilinear/sse.c.in
// Generator: tools/xngen
//
// Copyright 2019 Google LLC
@@ -11,10 +11,10 @@
#include <xmmintrin.h>
-#include <xnnpack/bilinear.h>
+#include <xnnpack/ibilinear.h>
-void xnn_f32_bilinear_ukernel__sse_c8(
+void xnn_f32_ibilinear_ukernel__sse_c8(
size_t output_pixels,
size_t channels,
const float**restrict input,
diff --git a/src/f32-bilinear/neon.c.in b/src/f32-ibilinear/neon.c.in
similarity index 97%
rename from src/f32-bilinear/neon.c.in
rename to src/f32-ibilinear/neon.c.in
index 4ae2bb9..081ba66 100644
--- a/src/f32-bilinear/neon.c.in
+++ b/src/f32-ibilinear/neon.c.in
@@ -13,10 +13,10 @@
#include <arm_neon.h>
#include <xnnpack/common.h>
-#include <xnnpack/bilinear.h>
+#include <xnnpack/ibilinear.h>
-void xnn_f32_bilinear_ukernel__${"neonfma" if FMA else "neon"}_c${CHANNEL_TILE}${"" if PIXEL_TILE == 1 else "x%d" % PIXEL_TILE}(
+void xnn_f32_ibilinear_ukernel__${"neonfma" if FMA else "neon"}_c${CHANNEL_TILE}${"" if PIXEL_TILE == 1 else "x%d" % PIXEL_TILE}(
size_t output_pixels,
size_t channels,
const float**restrict input,
diff --git a/src/f32-bilinear/psimd.c.in b/src/f32-ibilinear/psimd.c.in
similarity index 96%
rename from src/f32-bilinear/psimd.c.in
rename to src/f32-ibilinear/psimd.c.in
index 84ee83b..c65c6b3 100644
--- a/src/f32-bilinear/psimd.c.in
+++ b/src/f32-ibilinear/psimd.c.in
@@ -11,10 +11,10 @@
#include <psimd.h>
-#include <xnnpack/bilinear.h>
+#include <xnnpack/ibilinear.h>
-void xnn_f32_bilinear_ukernel__psimd_c${CHANNEL_TILE}${"" if PIXEL_TILE == 1 else "x%d" % PIXEL_TILE}(
+void xnn_f32_ibilinear_ukernel__psimd_c${CHANNEL_TILE}${"" if PIXEL_TILE == 1 else "x%d" % PIXEL_TILE}(
size_t output_pixels,
size_t channels,
const float**restrict input,
diff --git a/src/f32-bilinear/scalar.c.in b/src/f32-ibilinear/scalar.c.in
similarity index 95%
rename from src/f32-bilinear/scalar.c.in
rename to src/f32-ibilinear/scalar.c.in
index 43aa085..e29205a 100644
--- a/src/f32-bilinear/scalar.c.in
+++ b/src/f32-ibilinear/scalar.c.in
@@ -8,10 +8,10 @@
$ABC = "0123456789ABCDEFGHIJKLMN"
#include <assert.h>
-#include <xnnpack/bilinear.h>
+#include <xnnpack/ibilinear.h>
-void xnn_f32_bilinear_ukernel__scalar_c${CHANNEL_TILE}${"" if PIXEL_TILE == 1 else "x%d" % PIXEL_TILE}(
+void xnn_f32_ibilinear_ukernel__scalar_c${CHANNEL_TILE}${"" if PIXEL_TILE == 1 else "x%d" % PIXEL_TILE}(
size_t output_pixels,
size_t channels,
const float**restrict input,
diff --git a/src/f32-bilinear/sse.c.in b/src/f32-ibilinear/sse.c.in
similarity index 97%
rename from src/f32-bilinear/sse.c.in
rename to src/f32-ibilinear/sse.c.in
index a0a1cd0..c1e7fd3 100644
--- a/src/f32-bilinear/sse.c.in
+++ b/src/f32-ibilinear/sse.c.in
@@ -11,10 +11,10 @@
#include <xmmintrin.h>
-#include <xnnpack/bilinear.h>
+#include <xnnpack/ibilinear.h>
-void xnn_f32_bilinear_ukernel__sse_c${CHANNEL_TILE}${"" if PIXEL_TILE == 1 else "x%d" % PIXEL_TILE}(
+void xnn_f32_ibilinear_ukernel__sse_c${CHANNEL_TILE}${"" if PIXEL_TILE == 1 else "x%d" % PIXEL_TILE}(
size_t output_pixels,
size_t channels,
const float**restrict input,
diff --git a/src/init.c b/src/init.c
index 51eeebf..8263459 100644
--- a/src/init.c
+++ b/src/init.c
@@ -20,7 +20,6 @@
#include <xnnpack.h>
#include <xnnpack/argmaxpool.h>
#include <xnnpack/avgpool.h>
-#include <xnnpack/bilinear.h>
#include <xnnpack/clamp.h>
#include <xnnpack/common.h>
#include <xnnpack/conv.h>
@@ -28,6 +27,7 @@
#include <xnnpack/gavgpool.h>
#include <xnnpack/gemm.h>
#include <xnnpack/hswish.h>
+#include <xnnpack/ibilinear.h>
#include <xnnpack/igemm.h>
#include <xnnpack/log.h>
#include <xnnpack/lut.h>
@@ -249,8 +249,8 @@
.mr = 9,
.qr = 8,
};
- xnn_params.f32.bilinear = (struct bilinear_parameters) {
- .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neon_c8,
+ xnn_params.f32.ibilinear = (struct ibilinear_parameters) {
+ .ukernel = (xnn_ibilinear_ukernel_function) xnn_f32_ibilinear_ukernel__neon_c8,
.pixel_tile = 1,
.channel_tile = 8,
};
@@ -556,8 +556,8 @@
.mr = 9,
.qr = 8,
};
- xnn_params.f32.bilinear = (struct bilinear_parameters) {
- .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__neonfma_c8,
+ xnn_params.f32.ibilinear = (struct ibilinear_parameters) {
+ .ukernel = (xnn_ibilinear_ukernel_function) xnn_f32_ibilinear_ukernel__neonfma_c8,
.pixel_tile = 1,
.channel_tile = 8,
};
@@ -899,8 +899,8 @@
.mr = 9,
.qr = 8,
};
- xnn_params.f32.bilinear = (struct bilinear_parameters) {
- .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__sse_c8,
+ xnn_params.f32.ibilinear = (struct ibilinear_parameters) {
+ .ukernel = (xnn_ibilinear_ukernel_function) xnn_f32_ibilinear_ukernel__sse_c8,
.pixel_tile = 1,
.channel_tile = 8,
};
@@ -1226,8 +1226,8 @@
.mr = 9,
.qr = 8,
};
- xnn_params.f32.bilinear = (struct bilinear_parameters) {
- .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__psimd_c8,
+ xnn_params.f32.ibilinear = (struct ibilinear_parameters) {
+ .ukernel = (xnn_ibilinear_ukernel_function) xnn_f32_ibilinear_ukernel__psimd_c8,
.pixel_tile = 1,
.channel_tile = 8,
};
@@ -1434,8 +1434,8 @@
.mr = 9,
.qr = 8,
};
- xnn_params.f32.bilinear = (struct bilinear_parameters) {
- .ukernel = (xnn_bilinear_ukernel_function) xnn_f32_bilinear_ukernel__scalar_c2,
+ xnn_params.f32.ibilinear = (struct ibilinear_parameters) {
+ .ukernel = (xnn_ibilinear_ukernel_function) xnn_f32_ibilinear_ukernel__scalar_c2,
.pixel_tile = 1,
.channel_tile = 2,
};
diff --git a/src/resize-bilinear-nhwc.c b/src/resize-bilinear-nhwc.c
index f8ff604..73e1ba7 100644
--- a/src/resize-bilinear-nhwc.c
+++ b/src/resize-bilinear-nhwc.c
@@ -195,7 +195,7 @@
.output_pixel_stride = output_pixel_stride_in_bytes,
.output_batch_stride = output_pixel_stride_in_bytes * output_height * output_width,
.log2_wsize = 3 /* log2(2 * sizeof(float)) */,
- .ukernel = xnn_params.f32.bilinear.ukernel,
+ .ukernel = xnn_params.f32.ibilinear.ukernel,
};
const size_t output_size = output_height * output_width;
@@ -205,7 +205,7 @@
const size_t target_tiles_per_thread = 5;
const size_t max_output_size_tile = divide_round_up(output_size, num_threads * target_tiles_per_thread);
if (max_output_size_tile < output_size_tile) {
- const uint32_t output_size_subtile = xnn_params.f32.bilinear.pixel_tile;
+ const uint32_t output_size_subtile = xnn_params.f32.ibilinear.pixel_tile;
output_size_tile =
min(output_size_tile,
divide_round_up(output_size_tile, max_output_size_tile * output_size_subtile) * output_size_subtile);
diff --git a/src/xnnpack/bilinear.h b/src/xnnpack/bilinear.h
deleted file mode 100644
index 570cbe9..0000000
--- a/src/xnnpack/bilinear.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-
-#pragma once
-
-#include <stddef.h>
-#include <stdint.h>
-
-#include <xnnpack/params.h>
-#include <xnnpack/common.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-#define DECLARE_F32_BILINEAR_UKERNEL_FUNCTION(fn_name) \
- XNN_INTERNAL void fn_name( \
- size_t output_pixels, \
- size_t channels, \
- const float** input, \
- size_t input_offset, \
- const float* weights, \
- float* output, \
- size_t output_increment);
-
-DECLARE_F32_BILINEAR_UKERNEL_FUNCTION(xnn_f32_bilinear_ukernel__scalar_c1)
-DECLARE_F32_BILINEAR_UKERNEL_FUNCTION(xnn_f32_bilinear_ukernel__scalar_c2)
-DECLARE_F32_BILINEAR_UKERNEL_FUNCTION(xnn_f32_bilinear_ukernel__scalar_c4)
-
-DECLARE_F32_BILINEAR_UKERNEL_FUNCTION(xnn_f32_bilinear_ukernel__neon_c4)
-DECLARE_F32_BILINEAR_UKERNEL_FUNCTION(xnn_f32_bilinear_ukernel__neon_c8)
-
-DECLARE_F32_BILINEAR_UKERNEL_FUNCTION(xnn_f32_bilinear_ukernel__neonfma_c4)
-DECLARE_F32_BILINEAR_UKERNEL_FUNCTION(xnn_f32_bilinear_ukernel__neonfma_c8)
-
-DECLARE_F32_BILINEAR_UKERNEL_FUNCTION(xnn_f32_bilinear_ukernel__sse_c4)
-DECLARE_F32_BILINEAR_UKERNEL_FUNCTION(xnn_f32_bilinear_ukernel__sse_c8)
-
-DECLARE_F32_BILINEAR_UKERNEL_FUNCTION(xnn_f32_bilinear_ukernel__psimd_c4)
-DECLARE_F32_BILINEAR_UKERNEL_FUNCTION(xnn_f32_bilinear_ukernel__psimd_c8)
-
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
diff --git a/src/xnnpack/compute.h b/src/xnnpack/compute.h
index 1d205bf..dd16210 100644
--- a/src/xnnpack/compute.h
+++ b/src/xnnpack/compute.h
@@ -567,7 +567,7 @@
// log2(sizeof(weight element)).
uint32_t log2_wsize;
// Pointer to BILINEAR micro-kernel function.
- xnn_bilinear_ukernel_function ukernel;
+ xnn_ibilinear_ukernel_function ukernel;
};
#ifndef __cplusplus
diff --git a/src/xnnpack/ibilinear.h b/src/xnnpack/ibilinear.h
new file mode 100644
index 0000000..173a796
--- /dev/null
+++ b/src/xnnpack/ibilinear.h
@@ -0,0 +1,48 @@
+// Copyright 2019 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <xnnpack/params.h>
+#include <xnnpack/common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(fn_name) \
+ XNN_INTERNAL void fn_name( \
+ size_t output_pixels, \
+ size_t channels, \
+ const float** input, \
+ size_t input_offset, \
+ const float* weights, \
+ float* output, \
+ size_t output_increment);
+
+DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__scalar_c1)
+DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__scalar_c2)
+DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__scalar_c4)
+
+DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__neon_c4)
+DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__neon_c8)
+
+DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__neonfma_c4)
+DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__neonfma_c8)
+
+DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__sse_c4)
+DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__sse_c8)
+
+DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__psimd_c4)
+DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__psimd_c8)
+
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
diff --git a/src/xnnpack/params.h b/src/xnnpack/params.h
index d0e9131..28222f1 100644
--- a/src/xnnpack/params.h
+++ b/src/xnnpack/params.h
@@ -727,7 +727,7 @@
size_t output_increment,
const void* params);
-typedef void (*xnn_f32_bilinear_ukernel_function)(
+typedef void (*xnn_f32_ibilinear_ukernel_function)(
size_t output_pixels,
size_t channels,
const float** input,
@@ -736,7 +736,7 @@
float* output,
size_t output_increment);
-typedef void (*xnn_bilinear_ukernel_function)(
+typedef void (*xnn_ibilinear_ukernel_function)(
size_t output_pixels,
size_t channels,
const void** input,
@@ -1307,8 +1307,8 @@
uint8_t qr;
};
-struct bilinear_parameters {
- xnn_bilinear_ukernel_function ukernel;
+struct ibilinear_parameters {
+ xnn_ibilinear_ukernel_function ukernel;
// Number of output pixels in a tile.
// For best efficiency, micro-kernel must produce a multiple of this number of pixels in each call.
uint8_t pixel_tile;
@@ -1375,7 +1375,7 @@
struct maxpool_parameters maxpool;
struct argmaxpool_parameters argmaxpool[XNN_MAX_F32_ARGMAXPOOL_UKERNELS];
// Bilinear interpolation (2D).
- struct bilinear_parameters bilinear;
+ struct ibilinear_parameters ibilinear;
xnn_univector_ukernel_function clamp;
xnn_univector_ukernel_function hswish;
xnn_univector_ukernel_function sigmoid;
diff --git a/test/f32-bilinear.cc b/test/f32-bilinear.cc
deleted file mode 100644
index a85cede..0000000
--- a/test/f32-bilinear.cc
+++ /dev/null
@@ -1,827 +0,0 @@
-// Copyright 2019 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-//
-// Auto-generated file. Do not edit!
-// Specification: test/f32-bilinear.yaml
-// Generator: tools/generate-bilinear-test.py
-
-
-#include <gtest/gtest.h>
-
-#include <xnnpack/common.h>
-#include <xnnpack/isa-checks.h>
-
-#include <xnnpack/bilinear.h>
-#include "bilinear-microkernel-tester.h"
-
-
-TEST(F32_BILINEAR__SCALAR_C1, channels_eq_1) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(1)
- .Test(xnn_f32_bilinear_ukernel__scalar_c1);
-}
-
-TEST(F32_BILINEAR__SCALAR_C1, channels_gt_1) {
- for (size_t channels = 2; channels < 10; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__scalar_c1);
- }
-}
-
-TEST(F32_BILINEAR__SCALAR_C1, pixels_gt_1) {
- for (size_t pixels = 2; pixels < 3; pixels++) {
- for (size_t channels = 1; channels <= 5; channels += 1) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__scalar_c1);
- }
- }
-}
-
-TEST(F32_BILINEAR__SCALAR_C1, input_offset) {
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 5; channels += 1) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .input_offset(7)
- .Test(xnn_f32_bilinear_ukernel__scalar_c1);
- }
- }
-}
-TEST(F32_BILINEAR__SCALAR_C1, output_stride) {
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 5; channels += 1) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .output_stride(7)
- .Test(xnn_f32_bilinear_ukernel__scalar_c1);
- }
- }
-}
-
-TEST(F32_BILINEAR__SCALAR_C2, channels_eq_2) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(2)
- .Test(xnn_f32_bilinear_ukernel__scalar_c2);
-}
-
-TEST(F32_BILINEAR__SCALAR_C2, channels_div_2) {
- for (size_t channels = 4; channels < 20; channels += 2) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__scalar_c2);
- }
-}
-
-TEST(F32_BILINEAR__SCALAR_C2, channels_lt_2) {
- for (size_t channels = 1; channels < 2; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__scalar_c2);
- }
-}
-
-TEST(F32_BILINEAR__SCALAR_C2, channels_gt_2) {
- for (size_t channels = 3; channels < 4; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__scalar_c2);
- }
-}
-
-TEST(F32_BILINEAR__SCALAR_C2, pixels_gt_1) {
- for (size_t pixels = 2; pixels < 3; pixels++) {
- for (size_t channels = 1; channels <= 10; channels += 1) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__scalar_c2);
- }
- }
-}
-
-TEST(F32_BILINEAR__SCALAR_C2, input_offset) {
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 10; channels += 1) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .input_offset(13)
- .Test(xnn_f32_bilinear_ukernel__scalar_c2);
- }
- }
-}
-TEST(F32_BILINEAR__SCALAR_C2, output_stride) {
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 10; channels += 1) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .output_stride(13)
- .Test(xnn_f32_bilinear_ukernel__scalar_c2);
- }
- }
-}
-
-TEST(F32_BILINEAR__SCALAR_C4, channels_eq_4) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(4)
- .Test(xnn_f32_bilinear_ukernel__scalar_c4);
-}
-
-TEST(F32_BILINEAR__SCALAR_C4, channels_div_4) {
- for (size_t channels = 8; channels < 40; channels += 4) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__scalar_c4);
- }
-}
-
-TEST(F32_BILINEAR__SCALAR_C4, channels_lt_4) {
- for (size_t channels = 1; channels < 4; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__scalar_c4);
- }
-}
-
-TEST(F32_BILINEAR__SCALAR_C4, channels_gt_4) {
- for (size_t channels = 5; channels < 8; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__scalar_c4);
- }
-}
-
-TEST(F32_BILINEAR__SCALAR_C4, pixels_gt_1) {
- for (size_t pixels = 2; pixels < 3; pixels++) {
- for (size_t channels = 1; channels <= 20; channels += 3) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__scalar_c4);
- }
- }
-}
-
-TEST(F32_BILINEAR__SCALAR_C4, input_offset) {
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 20; channels += 3) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .input_offset(23)
- .Test(xnn_f32_bilinear_ukernel__scalar_c4);
- }
- }
-}
-TEST(F32_BILINEAR__SCALAR_C4, output_stride) {
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 20; channels += 3) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .output_stride(23)
- .Test(xnn_f32_bilinear_ukernel__scalar_c4);
- }
- }
-}
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(F32_BILINEAR__NEON_C4, channels_eq_4) {
- TEST_REQUIRES_ARM_NEON;
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(4)
- .Test(xnn_f32_bilinear_ukernel__neon_c4);
- }
-
- TEST(F32_BILINEAR__NEON_C4, channels_div_4) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t channels = 8; channels < 40; channels += 4) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__neon_c4);
- }
- }
-
- TEST(F32_BILINEAR__NEON_C4, channels_lt_4) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t channels = 1; channels < 4; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__neon_c4);
- }
- }
-
- TEST(F32_BILINEAR__NEON_C4, channels_gt_4) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t channels = 5; channels < 8; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__neon_c4);
- }
- }
-
- TEST(F32_BILINEAR__NEON_C4, pixels_gt_1) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t pixels = 2; pixels < 3; pixels++) {
- for (size_t channels = 1; channels <= 20; channels += 3) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__neon_c4);
- }
- }
- }
-
- TEST(F32_BILINEAR__NEON_C4, input_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 20; channels += 3) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .input_offset(23)
- .Test(xnn_f32_bilinear_ukernel__neon_c4);
- }
- }
- }
- TEST(F32_BILINEAR__NEON_C4, output_stride) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 20; channels += 3) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .output_stride(23)
- .Test(xnn_f32_bilinear_ukernel__neon_c4);
- }
- }
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(F32_BILINEAR__NEON_C8, channels_eq_8) {
- TEST_REQUIRES_ARM_NEON;
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(8)
- .Test(xnn_f32_bilinear_ukernel__neon_c8);
- }
-
- TEST(F32_BILINEAR__NEON_C8, channels_div_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t channels = 16; channels < 80; channels += 8) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__neon_c8);
- }
- }
-
- TEST(F32_BILINEAR__NEON_C8, channels_lt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t channels = 1; channels < 8; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__neon_c8);
- }
- }
-
- TEST(F32_BILINEAR__NEON_C8, channels_gt_8) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t channels = 9; channels < 16; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__neon_c8);
- }
- }
-
- TEST(F32_BILINEAR__NEON_C8, pixels_gt_1) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t pixels = 2; pixels < 3; pixels++) {
- for (size_t channels = 1; channels <= 40; channels += 7) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__neon_c8);
- }
- }
- }
-
- TEST(F32_BILINEAR__NEON_C8, input_offset) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 40; channels += 7) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .input_offset(43)
- .Test(xnn_f32_bilinear_ukernel__neon_c8);
- }
- }
- }
- TEST(F32_BILINEAR__NEON_C8, output_stride) {
- TEST_REQUIRES_ARM_NEON;
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 40; channels += 7) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .output_stride(43)
- .Test(xnn_f32_bilinear_ukernel__neon_c8);
- }
- }
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(F32_BILINEAR__NEONFMA_C4, channels_eq_4) {
- TEST_REQUIRES_ARM_NEON_FMA;
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(4)
- .Test(xnn_f32_bilinear_ukernel__neonfma_c4);
- }
-
- TEST(F32_BILINEAR__NEONFMA_C4, channels_div_4) {
- TEST_REQUIRES_ARM_NEON_FMA;
- for (size_t channels = 8; channels < 40; channels += 4) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__neonfma_c4);
- }
- }
-
- TEST(F32_BILINEAR__NEONFMA_C4, channels_lt_4) {
- TEST_REQUIRES_ARM_NEON_FMA;
- for (size_t channels = 1; channels < 4; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__neonfma_c4);
- }
- }
-
- TEST(F32_BILINEAR__NEONFMA_C4, channels_gt_4) {
- TEST_REQUIRES_ARM_NEON_FMA;
- for (size_t channels = 5; channels < 8; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__neonfma_c4);
- }
- }
-
- TEST(F32_BILINEAR__NEONFMA_C4, pixels_gt_1) {
- TEST_REQUIRES_ARM_NEON_FMA;
- for (size_t pixels = 2; pixels < 3; pixels++) {
- for (size_t channels = 1; channels <= 20; channels += 3) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__neonfma_c4);
- }
- }
- }
-
- TEST(F32_BILINEAR__NEONFMA_C4, input_offset) {
- TEST_REQUIRES_ARM_NEON_FMA;
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 20; channels += 3) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .input_offset(23)
- .Test(xnn_f32_bilinear_ukernel__neonfma_c4);
- }
- }
- }
- TEST(F32_BILINEAR__NEONFMA_C4, output_stride) {
- TEST_REQUIRES_ARM_NEON_FMA;
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 20; channels += 3) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .output_stride(23)
- .Test(xnn_f32_bilinear_ukernel__neonfma_c4);
- }
- }
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
- TEST(F32_BILINEAR__NEONFMA_C8, channels_eq_8) {
- TEST_REQUIRES_ARM_NEON_FMA;
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(8)
- .Test(xnn_f32_bilinear_ukernel__neonfma_c8);
- }
-
- TEST(F32_BILINEAR__NEONFMA_C8, channels_div_8) {
- TEST_REQUIRES_ARM_NEON_FMA;
- for (size_t channels = 16; channels < 80; channels += 8) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__neonfma_c8);
- }
- }
-
- TEST(F32_BILINEAR__NEONFMA_C8, channels_lt_8) {
- TEST_REQUIRES_ARM_NEON_FMA;
- for (size_t channels = 1; channels < 8; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__neonfma_c8);
- }
- }
-
- TEST(F32_BILINEAR__NEONFMA_C8, channels_gt_8) {
- TEST_REQUIRES_ARM_NEON_FMA;
- for (size_t channels = 9; channels < 16; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__neonfma_c8);
- }
- }
-
- TEST(F32_BILINEAR__NEONFMA_C8, pixels_gt_1) {
- TEST_REQUIRES_ARM_NEON_FMA;
- for (size_t pixels = 2; pixels < 3; pixels++) {
- for (size_t channels = 1; channels <= 40; channels += 7) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__neonfma_c8);
- }
- }
- }
-
- TEST(F32_BILINEAR__NEONFMA_C8, input_offset) {
- TEST_REQUIRES_ARM_NEON_FMA;
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 40; channels += 7) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .input_offset(43)
- .Test(xnn_f32_bilinear_ukernel__neonfma_c8);
- }
- }
- }
- TEST(F32_BILINEAR__NEONFMA_C8, output_stride) {
- TEST_REQUIRES_ARM_NEON_FMA;
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 40; channels += 7) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .output_stride(43)
- .Test(xnn_f32_bilinear_ukernel__neonfma_c8);
- }
- }
- }
-#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- TEST(F32_BILINEAR__SSE_C4, channels_eq_4) {
- TEST_REQUIRES_X86_SSE;
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(4)
- .Test(xnn_f32_bilinear_ukernel__sse_c4);
- }
-
- TEST(F32_BILINEAR__SSE_C4, channels_div_4) {
- TEST_REQUIRES_X86_SSE;
- for (size_t channels = 8; channels < 40; channels += 4) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__sse_c4);
- }
- }
-
- TEST(F32_BILINEAR__SSE_C4, channels_lt_4) {
- TEST_REQUIRES_X86_SSE;
- for (size_t channels = 1; channels < 4; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__sse_c4);
- }
- }
-
- TEST(F32_BILINEAR__SSE_C4, channels_gt_4) {
- TEST_REQUIRES_X86_SSE;
- for (size_t channels = 5; channels < 8; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__sse_c4);
- }
- }
-
- TEST(F32_BILINEAR__SSE_C4, pixels_gt_1) {
- TEST_REQUIRES_X86_SSE;
- for (size_t pixels = 2; pixels < 3; pixels++) {
- for (size_t channels = 1; channels <= 20; channels += 3) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__sse_c4);
- }
- }
- }
-
- TEST(F32_BILINEAR__SSE_C4, input_offset) {
- TEST_REQUIRES_X86_SSE;
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 20; channels += 3) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .input_offset(23)
- .Test(xnn_f32_bilinear_ukernel__sse_c4);
- }
- }
- }
- TEST(F32_BILINEAR__SSE_C4, output_stride) {
- TEST_REQUIRES_X86_SSE;
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 20; channels += 3) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .output_stride(23)
- .Test(xnn_f32_bilinear_ukernel__sse_c4);
- }
- }
- }
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
- TEST(F32_BILINEAR__SSE_C8, channels_eq_8) {
- TEST_REQUIRES_X86_SSE;
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(8)
- .Test(xnn_f32_bilinear_ukernel__sse_c8);
- }
-
- TEST(F32_BILINEAR__SSE_C8, channels_div_8) {
- TEST_REQUIRES_X86_SSE;
- for (size_t channels = 16; channels < 80; channels += 8) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__sse_c8);
- }
- }
-
- TEST(F32_BILINEAR__SSE_C8, channels_lt_8) {
- TEST_REQUIRES_X86_SSE;
- for (size_t channels = 1; channels < 8; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__sse_c8);
- }
- }
-
- TEST(F32_BILINEAR__SSE_C8, channels_gt_8) {
- TEST_REQUIRES_X86_SSE;
- for (size_t channels = 9; channels < 16; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__sse_c8);
- }
- }
-
- TEST(F32_BILINEAR__SSE_C8, pixels_gt_1) {
- TEST_REQUIRES_X86_SSE;
- for (size_t pixels = 2; pixels < 3; pixels++) {
- for (size_t channels = 1; channels <= 40; channels += 7) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__sse_c8);
- }
- }
- }
-
- TEST(F32_BILINEAR__SSE_C8, input_offset) {
- TEST_REQUIRES_X86_SSE;
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 40; channels += 7) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .input_offset(43)
- .Test(xnn_f32_bilinear_ukernel__sse_c8);
- }
- }
- }
- TEST(F32_BILINEAR__SSE_C8, output_stride) {
- TEST_REQUIRES_X86_SSE;
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 40; channels += 7) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .output_stride(43)
- .Test(xnn_f32_bilinear_ukernel__sse_c8);
- }
- }
- }
-#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
- TEST(F32_BILINEAR__PSIMD_C4, channels_eq_4) {
- TEST_REQUIRES_PSIMD;
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(4)
- .Test(xnn_f32_bilinear_ukernel__psimd_c4);
- }
-
- TEST(F32_BILINEAR__PSIMD_C4, channels_div_4) {
- TEST_REQUIRES_PSIMD;
- for (size_t channels = 8; channels < 40; channels += 4) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__psimd_c4);
- }
- }
-
- TEST(F32_BILINEAR__PSIMD_C4, channels_lt_4) {
- TEST_REQUIRES_PSIMD;
- for (size_t channels = 1; channels < 4; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__psimd_c4);
- }
- }
-
- TEST(F32_BILINEAR__PSIMD_C4, channels_gt_4) {
- TEST_REQUIRES_PSIMD;
- for (size_t channels = 5; channels < 8; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__psimd_c4);
- }
- }
-
- TEST(F32_BILINEAR__PSIMD_C4, pixels_gt_1) {
- TEST_REQUIRES_PSIMD;
- for (size_t pixels = 2; pixels < 3; pixels++) {
- for (size_t channels = 1; channels <= 20; channels += 3) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__psimd_c4);
- }
- }
- }
-
- TEST(F32_BILINEAR__PSIMD_C4, input_offset) {
- TEST_REQUIRES_PSIMD;
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 20; channels += 3) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .input_offset(23)
- .Test(xnn_f32_bilinear_ukernel__psimd_c4);
- }
- }
- }
- TEST(F32_BILINEAR__PSIMD_C4, output_stride) {
- TEST_REQUIRES_PSIMD;
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 20; channels += 3) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .output_stride(23)
- .Test(xnn_f32_bilinear_ukernel__psimd_c4);
- }
- }
- }
-#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
-
-
-#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
- TEST(F32_BILINEAR__PSIMD_C8, channels_eq_8) {
- TEST_REQUIRES_PSIMD;
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(8)
- .Test(xnn_f32_bilinear_ukernel__psimd_c8);
- }
-
- TEST(F32_BILINEAR__PSIMD_C8, channels_div_8) {
- TEST_REQUIRES_PSIMD;
- for (size_t channels = 16; channels < 80; channels += 8) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__psimd_c8);
- }
- }
-
- TEST(F32_BILINEAR__PSIMD_C8, channels_lt_8) {
- TEST_REQUIRES_PSIMD;
- for (size_t channels = 1; channels < 8; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__psimd_c8);
- }
- }
-
- TEST(F32_BILINEAR__PSIMD_C8, channels_gt_8) {
- TEST_REQUIRES_PSIMD;
- for (size_t channels = 9; channels < 16; channels++) {
- BilinearMicrokernelTester()
- .pixels(1)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__psimd_c8);
- }
- }
-
- TEST(F32_BILINEAR__PSIMD_C8, pixels_gt_1) {
- TEST_REQUIRES_PSIMD;
- for (size_t pixels = 2; pixels < 3; pixels++) {
- for (size_t channels = 1; channels <= 40; channels += 7) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .Test(xnn_f32_bilinear_ukernel__psimd_c8);
- }
- }
- }
-
- TEST(F32_BILINEAR__PSIMD_C8, input_offset) {
- TEST_REQUIRES_PSIMD;
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 40; channels += 7) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .input_offset(43)
- .Test(xnn_f32_bilinear_ukernel__psimd_c8);
- }
- }
- }
- TEST(F32_BILINEAR__PSIMD_C8, output_stride) {
- TEST_REQUIRES_PSIMD;
- for (size_t pixels = 1; pixels < 5; pixels += 1) {
- for (size_t channels = 1; channels <= 40; channels += 7) {
- BilinearMicrokernelTester()
- .pixels(pixels)
- .channels(channels)
- .output_stride(43)
- .Test(xnn_f32_bilinear_ukernel__psimd_c8);
- }
- }
- }
-#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
diff --git a/test/f32-bilinear.yaml b/test/f32-bilinear.yaml
deleted file mode 100644
index 76cfc84..0000000
--- a/test/f32-bilinear.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright 2019 Google LLC
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-- name: xnn_f32_bilinear_ukernel__scalar_c1
-- name: xnn_f32_bilinear_ukernel__scalar_c2
-- name: xnn_f32_bilinear_ukernel__scalar_c4
-- name: xnn_f32_bilinear_ukernel__neon_c4
-- name: xnn_f32_bilinear_ukernel__neon_c8
-- name: xnn_f32_bilinear_ukernel__neonfma_c4
-- name: xnn_f32_bilinear_ukernel__neonfma_c8
-- name: xnn_f32_bilinear_ukernel__sse_c4
-- name: xnn_f32_bilinear_ukernel__sse_c8
-- name: xnn_f32_bilinear_ukernel__psimd_c4
-- name: xnn_f32_bilinear_ukernel__psimd_c8
diff --git a/test/f32-ibilinear.cc b/test/f32-ibilinear.cc
new file mode 100644
index 0000000..e5f75de
--- /dev/null
+++ b/test/f32-ibilinear.cc
@@ -0,0 +1,827 @@
+// Copyright 2019 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+//
+// Auto-generated file. Do not edit!
+// Specification: test/f32-ibilinear.yaml
+// Generator: tools/generate-ibilinear-test.py
+
+
+#include <gtest/gtest.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/isa-checks.h>
+
+#include <xnnpack/ibilinear.h>
+#include "ibilinear-microkernel-tester.h"
+
+
+TEST(F32_IBILINEAR__SCALAR_C1, channels_eq_1) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(1)
+ .Test(xnn_f32_ibilinear_ukernel__scalar_c1);
+}
+
+TEST(F32_IBILINEAR__SCALAR_C1, channels_gt_1) {
+ for (size_t channels = 2; channels < 10; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__scalar_c1);
+ }
+}
+
+TEST(F32_IBILINEAR__SCALAR_C1, pixels_gt_1) {
+ for (size_t pixels = 2; pixels < 3; pixels++) {
+ for (size_t channels = 1; channels <= 5; channels += 1) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__scalar_c1);
+ }
+ }
+}
+
+TEST(F32_IBILINEAR__SCALAR_C1, input_offset) {
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 5; channels += 1) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .input_offset(7)
+ .Test(xnn_f32_ibilinear_ukernel__scalar_c1);
+ }
+ }
+}
+TEST(F32_IBILINEAR__SCALAR_C1, output_stride) {
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 5; channels += 1) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .output_stride(7)
+ .Test(xnn_f32_ibilinear_ukernel__scalar_c1);
+ }
+ }
+}
+
+TEST(F32_IBILINEAR__SCALAR_C2, channels_eq_2) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(2)
+ .Test(xnn_f32_ibilinear_ukernel__scalar_c2);
+}
+
+TEST(F32_IBILINEAR__SCALAR_C2, channels_div_2) {
+ for (size_t channels = 4; channels < 20; channels += 2) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__scalar_c2);
+ }
+}
+
+TEST(F32_IBILINEAR__SCALAR_C2, channels_lt_2) {
+ for (size_t channels = 1; channels < 2; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__scalar_c2);
+ }
+}
+
+TEST(F32_IBILINEAR__SCALAR_C2, channels_gt_2) {
+ for (size_t channels = 3; channels < 4; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__scalar_c2);
+ }
+}
+
+TEST(F32_IBILINEAR__SCALAR_C2, pixels_gt_1) {
+ for (size_t pixels = 2; pixels < 3; pixels++) {
+ for (size_t channels = 1; channels <= 10; channels += 1) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__scalar_c2);
+ }
+ }
+}
+
+TEST(F32_IBILINEAR__SCALAR_C2, input_offset) {
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 10; channels += 1) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .input_offset(13)
+ .Test(xnn_f32_ibilinear_ukernel__scalar_c2);
+ }
+ }
+}
+TEST(F32_IBILINEAR__SCALAR_C2, output_stride) {
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 10; channels += 1) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .output_stride(13)
+ .Test(xnn_f32_ibilinear_ukernel__scalar_c2);
+ }
+ }
+}
+
+TEST(F32_IBILINEAR__SCALAR_C4, channels_eq_4) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(4)
+ .Test(xnn_f32_ibilinear_ukernel__scalar_c4);
+}
+
+TEST(F32_IBILINEAR__SCALAR_C4, channels_div_4) {
+ for (size_t channels = 8; channels < 40; channels += 4) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__scalar_c4);
+ }
+}
+
+TEST(F32_IBILINEAR__SCALAR_C4, channels_lt_4) {
+ for (size_t channels = 1; channels < 4; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__scalar_c4);
+ }
+}
+
+TEST(F32_IBILINEAR__SCALAR_C4, channels_gt_4) {
+ for (size_t channels = 5; channels < 8; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__scalar_c4);
+ }
+}
+
+TEST(F32_IBILINEAR__SCALAR_C4, pixels_gt_1) {
+ for (size_t pixels = 2; pixels < 3; pixels++) {
+ for (size_t channels = 1; channels <= 20; channels += 3) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__scalar_c4);
+ }
+ }
+}
+
+TEST(F32_IBILINEAR__SCALAR_C4, input_offset) {
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 20; channels += 3) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .input_offset(23)
+ .Test(xnn_f32_ibilinear_ukernel__scalar_c4);
+ }
+ }
+}
+TEST(F32_IBILINEAR__SCALAR_C4, output_stride) {
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 20; channels += 3) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .output_stride(23)
+ .Test(xnn_f32_ibilinear_ukernel__scalar_c4);
+ }
+ }
+}
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(F32_IBILINEAR__NEON_C4, channels_eq_4) {
+ TEST_REQUIRES_ARM_NEON;
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(4)
+ .Test(xnn_f32_ibilinear_ukernel__neon_c4);
+ }
+
+ TEST(F32_IBILINEAR__NEON_C4, channels_div_4) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t channels = 8; channels < 40; channels += 4) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__neon_c4);
+ }
+ }
+
+ TEST(F32_IBILINEAR__NEON_C4, channels_lt_4) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t channels = 1; channels < 4; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__neon_c4);
+ }
+ }
+
+ TEST(F32_IBILINEAR__NEON_C4, channels_gt_4) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t channels = 5; channels < 8; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__neon_c4);
+ }
+ }
+
+ TEST(F32_IBILINEAR__NEON_C4, pixels_gt_1) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t pixels = 2; pixels < 3; pixels++) {
+ for (size_t channels = 1; channels <= 20; channels += 3) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__neon_c4);
+ }
+ }
+ }
+
+ TEST(F32_IBILINEAR__NEON_C4, input_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 20; channels += 3) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .input_offset(23)
+ .Test(xnn_f32_ibilinear_ukernel__neon_c4);
+ }
+ }
+ }
+ TEST(F32_IBILINEAR__NEON_C4, output_stride) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 20; channels += 3) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .output_stride(23)
+ .Test(xnn_f32_ibilinear_ukernel__neon_c4);
+ }
+ }
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(F32_IBILINEAR__NEON_C8, channels_eq_8) {
+ TEST_REQUIRES_ARM_NEON;
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(8)
+ .Test(xnn_f32_ibilinear_ukernel__neon_c8);
+ }
+
+ TEST(F32_IBILINEAR__NEON_C8, channels_div_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t channels = 16; channels < 80; channels += 8) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__neon_c8);
+ }
+ }
+
+ TEST(F32_IBILINEAR__NEON_C8, channels_lt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t channels = 1; channels < 8; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__neon_c8);
+ }
+ }
+
+ TEST(F32_IBILINEAR__NEON_C8, channels_gt_8) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t channels = 9; channels < 16; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__neon_c8);
+ }
+ }
+
+ TEST(F32_IBILINEAR__NEON_C8, pixels_gt_1) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t pixels = 2; pixels < 3; pixels++) {
+ for (size_t channels = 1; channels <= 40; channels += 7) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__neon_c8);
+ }
+ }
+ }
+
+ TEST(F32_IBILINEAR__NEON_C8, input_offset) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 40; channels += 7) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .input_offset(43)
+ .Test(xnn_f32_ibilinear_ukernel__neon_c8);
+ }
+ }
+ }
+ TEST(F32_IBILINEAR__NEON_C8, output_stride) {
+ TEST_REQUIRES_ARM_NEON;
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 40; channels += 7) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .output_stride(43)
+ .Test(xnn_f32_ibilinear_ukernel__neon_c8);
+ }
+ }
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(F32_IBILINEAR__NEONFMA_C4, channels_eq_4) {
+ TEST_REQUIRES_ARM_NEON_FMA;
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(4)
+ .Test(xnn_f32_ibilinear_ukernel__neonfma_c4);
+ }
+
+ TEST(F32_IBILINEAR__NEONFMA_C4, channels_div_4) {
+ TEST_REQUIRES_ARM_NEON_FMA;
+ for (size_t channels = 8; channels < 40; channels += 4) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__neonfma_c4);
+ }
+ }
+
+ TEST(F32_IBILINEAR__NEONFMA_C4, channels_lt_4) {
+ TEST_REQUIRES_ARM_NEON_FMA;
+ for (size_t channels = 1; channels < 4; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__neonfma_c4);
+ }
+ }
+
+ TEST(F32_IBILINEAR__NEONFMA_C4, channels_gt_4) {
+ TEST_REQUIRES_ARM_NEON_FMA;
+ for (size_t channels = 5; channels < 8; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__neonfma_c4);
+ }
+ }
+
+ TEST(F32_IBILINEAR__NEONFMA_C4, pixels_gt_1) {
+ TEST_REQUIRES_ARM_NEON_FMA;
+ for (size_t pixels = 2; pixels < 3; pixels++) {
+ for (size_t channels = 1; channels <= 20; channels += 3) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__neonfma_c4);
+ }
+ }
+ }
+
+ TEST(F32_IBILINEAR__NEONFMA_C4, input_offset) {
+ TEST_REQUIRES_ARM_NEON_FMA;
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 20; channels += 3) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .input_offset(23)
+ .Test(xnn_f32_ibilinear_ukernel__neonfma_c4);
+ }
+ }
+ }
+ TEST(F32_IBILINEAR__NEONFMA_C4, output_stride) {
+ TEST_REQUIRES_ARM_NEON_FMA;
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 20; channels += 3) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .output_stride(23)
+ .Test(xnn_f32_ibilinear_ukernel__neonfma_c4);
+ }
+ }
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+ TEST(F32_IBILINEAR__NEONFMA_C8, channels_eq_8) {
+ TEST_REQUIRES_ARM_NEON_FMA;
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(8)
+ .Test(xnn_f32_ibilinear_ukernel__neonfma_c8);
+ }
+
+ TEST(F32_IBILINEAR__NEONFMA_C8, channels_div_8) {
+ TEST_REQUIRES_ARM_NEON_FMA;
+ for (size_t channels = 16; channels < 80; channels += 8) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__neonfma_c8);
+ }
+ }
+
+ TEST(F32_IBILINEAR__NEONFMA_C8, channels_lt_8) {
+ TEST_REQUIRES_ARM_NEON_FMA;
+ for (size_t channels = 1; channels < 8; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__neonfma_c8);
+ }
+ }
+
+ TEST(F32_IBILINEAR__NEONFMA_C8, channels_gt_8) {
+ TEST_REQUIRES_ARM_NEON_FMA;
+ for (size_t channels = 9; channels < 16; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__neonfma_c8);
+ }
+ }
+
+ TEST(F32_IBILINEAR__NEONFMA_C8, pixels_gt_1) {
+ TEST_REQUIRES_ARM_NEON_FMA;
+ for (size_t pixels = 2; pixels < 3; pixels++) {
+ for (size_t channels = 1; channels <= 40; channels += 7) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__neonfma_c8);
+ }
+ }
+ }
+
+ TEST(F32_IBILINEAR__NEONFMA_C8, input_offset) {
+ TEST_REQUIRES_ARM_NEON_FMA;
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 40; channels += 7) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .input_offset(43)
+ .Test(xnn_f32_ibilinear_ukernel__neonfma_c8);
+ }
+ }
+ }
+ TEST(F32_IBILINEAR__NEONFMA_C8, output_stride) {
+ TEST_REQUIRES_ARM_NEON_FMA;
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 40; channels += 7) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .output_stride(43)
+ .Test(xnn_f32_ibilinear_ukernel__neonfma_c8);
+ }
+ }
+ }
+#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ TEST(F32_IBILINEAR__SSE_C4, channels_eq_4) {
+ TEST_REQUIRES_X86_SSE;
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(4)
+ .Test(xnn_f32_ibilinear_ukernel__sse_c4);
+ }
+
+ TEST(F32_IBILINEAR__SSE_C4, channels_div_4) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t channels = 8; channels < 40; channels += 4) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__sse_c4);
+ }
+ }
+
+ TEST(F32_IBILINEAR__SSE_C4, channels_lt_4) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t channels = 1; channels < 4; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__sse_c4);
+ }
+ }
+
+ TEST(F32_IBILINEAR__SSE_C4, channels_gt_4) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t channels = 5; channels < 8; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__sse_c4);
+ }
+ }
+
+ TEST(F32_IBILINEAR__SSE_C4, pixels_gt_1) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t pixels = 2; pixels < 3; pixels++) {
+ for (size_t channels = 1; channels <= 20; channels += 3) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__sse_c4);
+ }
+ }
+ }
+
+ TEST(F32_IBILINEAR__SSE_C4, input_offset) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 20; channels += 3) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .input_offset(23)
+ .Test(xnn_f32_ibilinear_ukernel__sse_c4);
+ }
+ }
+ }
+ TEST(F32_IBILINEAR__SSE_C4, output_stride) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 20; channels += 3) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .output_stride(23)
+ .Test(xnn_f32_ibilinear_ukernel__sse_c4);
+ }
+ }
+ }
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+ TEST(F32_IBILINEAR__SSE_C8, channels_eq_8) {
+ TEST_REQUIRES_X86_SSE;
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(8)
+ .Test(xnn_f32_ibilinear_ukernel__sse_c8);
+ }
+
+ TEST(F32_IBILINEAR__SSE_C8, channels_div_8) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t channels = 16; channels < 80; channels += 8) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__sse_c8);
+ }
+ }
+
+ TEST(F32_IBILINEAR__SSE_C8, channels_lt_8) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t channels = 1; channels < 8; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__sse_c8);
+ }
+ }
+
+ TEST(F32_IBILINEAR__SSE_C8, channels_gt_8) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t channels = 9; channels < 16; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__sse_c8);
+ }
+ }
+
+ TEST(F32_IBILINEAR__SSE_C8, pixels_gt_1) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t pixels = 2; pixels < 3; pixels++) {
+ for (size_t channels = 1; channels <= 40; channels += 7) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__sse_c8);
+ }
+ }
+ }
+
+ TEST(F32_IBILINEAR__SSE_C8, input_offset) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 40; channels += 7) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .input_offset(43)
+ .Test(xnn_f32_ibilinear_ukernel__sse_c8);
+ }
+ }
+ }
+ TEST(F32_IBILINEAR__SSE_C8, output_stride) {
+ TEST_REQUIRES_X86_SSE;
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 40; channels += 7) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .output_stride(43)
+ .Test(xnn_f32_ibilinear_ukernel__sse_c8);
+ }
+ }
+ }
+#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
+ TEST(F32_IBILINEAR__PSIMD_C4, channels_eq_4) {
+ TEST_REQUIRES_PSIMD;
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(4)
+ .Test(xnn_f32_ibilinear_ukernel__psimd_c4);
+ }
+
+ TEST(F32_IBILINEAR__PSIMD_C4, channels_div_4) {
+ TEST_REQUIRES_PSIMD;
+ for (size_t channels = 8; channels < 40; channels += 4) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__psimd_c4);
+ }
+ }
+
+ TEST(F32_IBILINEAR__PSIMD_C4, channels_lt_4) {
+ TEST_REQUIRES_PSIMD;
+ for (size_t channels = 1; channels < 4; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__psimd_c4);
+ }
+ }
+
+ TEST(F32_IBILINEAR__PSIMD_C4, channels_gt_4) {
+ TEST_REQUIRES_PSIMD;
+ for (size_t channels = 5; channels < 8; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__psimd_c4);
+ }
+ }
+
+ TEST(F32_IBILINEAR__PSIMD_C4, pixels_gt_1) {
+ TEST_REQUIRES_PSIMD;
+ for (size_t pixels = 2; pixels < 3; pixels++) {
+ for (size_t channels = 1; channels <= 20; channels += 3) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__psimd_c4);
+ }
+ }
+ }
+
+ TEST(F32_IBILINEAR__PSIMD_C4, input_offset) {
+ TEST_REQUIRES_PSIMD;
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 20; channels += 3) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .input_offset(23)
+ .Test(xnn_f32_ibilinear_ukernel__psimd_c4);
+ }
+ }
+ }
+ TEST(F32_IBILINEAR__PSIMD_C4, output_stride) {
+ TEST_REQUIRES_PSIMD;
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 20; channels += 3) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .output_stride(23)
+ .Test(xnn_f32_ibilinear_ukernel__psimd_c4);
+ }
+ }
+ }
+#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
+
+
+#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
+ TEST(F32_IBILINEAR__PSIMD_C8, channels_eq_8) {
+ TEST_REQUIRES_PSIMD;
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(8)
+ .Test(xnn_f32_ibilinear_ukernel__psimd_c8);
+ }
+
+ TEST(F32_IBILINEAR__PSIMD_C8, channels_div_8) {
+ TEST_REQUIRES_PSIMD;
+ for (size_t channels = 16; channels < 80; channels += 8) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__psimd_c8);
+ }
+ }
+
+ TEST(F32_IBILINEAR__PSIMD_C8, channels_lt_8) {
+ TEST_REQUIRES_PSIMD;
+ for (size_t channels = 1; channels < 8; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__psimd_c8);
+ }
+ }
+
+ TEST(F32_IBILINEAR__PSIMD_C8, channels_gt_8) {
+ TEST_REQUIRES_PSIMD;
+ for (size_t channels = 9; channels < 16; channels++) {
+ IBilinearMicrokernelTester()
+ .pixels(1)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__psimd_c8);
+ }
+ }
+
+ TEST(F32_IBILINEAR__PSIMD_C8, pixels_gt_1) {
+ TEST_REQUIRES_PSIMD;
+ for (size_t pixels = 2; pixels < 3; pixels++) {
+ for (size_t channels = 1; channels <= 40; channels += 7) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .Test(xnn_f32_ibilinear_ukernel__psimd_c8);
+ }
+ }
+ }
+
+ TEST(F32_IBILINEAR__PSIMD_C8, input_offset) {
+ TEST_REQUIRES_PSIMD;
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 40; channels += 7) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .input_offset(43)
+ .Test(xnn_f32_ibilinear_ukernel__psimd_c8);
+ }
+ }
+ }
+ TEST(F32_IBILINEAR__PSIMD_C8, output_stride) {
+ TEST_REQUIRES_PSIMD;
+ for (size_t pixels = 1; pixels < 5; pixels += 1) {
+ for (size_t channels = 1; channels <= 40; channels += 7) {
+ IBilinearMicrokernelTester()
+ .pixels(pixels)
+ .channels(channels)
+ .output_stride(43)
+ .Test(xnn_f32_ibilinear_ukernel__psimd_c8);
+ }
+ }
+ }
+#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
diff --git a/test/f32-ibilinear.yaml b/test/f32-ibilinear.yaml
new file mode 100644
index 0000000..0fc4eab
--- /dev/null
+++ b/test/f32-ibilinear.yaml
@@ -0,0 +1,15 @@
+# Copyright 2019 Google LLC
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+- name: xnn_f32_ibilinear_ukernel__scalar_c1
+- name: xnn_f32_ibilinear_ukernel__scalar_c2
+- name: xnn_f32_ibilinear_ukernel__scalar_c4
+- name: xnn_f32_ibilinear_ukernel__neon_c4
+- name: xnn_f32_ibilinear_ukernel__neon_c8
+- name: xnn_f32_ibilinear_ukernel__neonfma_c4
+- name: xnn_f32_ibilinear_ukernel__neonfma_c8
+- name: xnn_f32_ibilinear_ukernel__sse_c4
+- name: xnn_f32_ibilinear_ukernel__sse_c8
+- name: xnn_f32_ibilinear_ukernel__psimd_c4
+- name: xnn_f32_ibilinear_ukernel__psimd_c8
diff --git a/test/bilinear-microkernel-tester.h b/test/ibilinear-microkernel-tester.h
similarity index 89%
rename from test/bilinear-microkernel-tester.h
rename to test/ibilinear-microkernel-tester.h
index abfc4aa..7ec159d 100644
--- a/test/bilinear-microkernel-tester.h
+++ b/test/ibilinear-microkernel-tester.h
@@ -21,9 +21,9 @@
#include <xnnpack/params.h>
-class BilinearMicrokernelTester {
+class IBilinearMicrokernelTester {
public:
- inline BilinearMicrokernelTester& pixels(uint32_t pixels) {
+ inline IBilinearMicrokernelTester& pixels(uint32_t pixels) {
assert(pixels >= 1);
this->pixels_ = pixels;
return *this;
@@ -33,7 +33,7 @@
return this->pixels_;
}
- inline BilinearMicrokernelTester& channels(uint32_t channels) {
+ inline IBilinearMicrokernelTester& channels(uint32_t channels) {
assert(channels >= 1);
this->channels_ = channels;
return *this;
@@ -43,7 +43,7 @@
return this->channels_;
}
- inline BilinearMicrokernelTester& input_offset(uint32_t input_offset) {
+ inline IBilinearMicrokernelTester& input_offset(uint32_t input_offset) {
this->input_offset_ = input_offset;
return *this;
}
@@ -52,7 +52,7 @@
return this->input_offset_;
}
- inline BilinearMicrokernelTester& output_stride(uint32_t output_stride) {
+ inline IBilinearMicrokernelTester& output_stride(uint32_t output_stride) {
assert(output_stride != 0);
this->output_stride_ = output_stride;
return *this;
@@ -67,7 +67,7 @@
}
}
- inline BilinearMicrokernelTester& iterations(size_t iterations) {
+ inline IBilinearMicrokernelTester& iterations(size_t iterations) {
this->iterations_ = iterations;
return *this;
}
@@ -76,7 +76,7 @@
return this->iterations_;
}
- void Test(xnn_f32_bilinear_ukernel_function bilinear) const {
+ void Test(xnn_f32_ibilinear_ukernel_function ibilinear) const {
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
@@ -111,7 +111,7 @@
}
// Call optimized micro-kernel.
- bilinear(
+ ibilinear(
pixels(), channels() * sizeof(float),
indirection.data(), input_offset() * sizeof(float),
packed_weights.data(), output.data(),
diff --git a/tools/generate-bilinear-test.py b/tools/generate-ibilinear-test.py
similarity index 91%
rename from tools/generate-bilinear-test.py
rename to tools/generate-ibilinear-test.py
index 6b90820..5bb4869 100755
--- a/tools/generate-bilinear-test.py
+++ b/tools/generate-ibilinear-test.py
@@ -18,7 +18,7 @@
parser = argparse.ArgumentParser(
- description='Bilinear microkernel test generator')
+ description='IBILINEAR microkernel test generator')
parser.add_argument("-s", "--spec", metavar="FILE", required=True,
help="Specification (YAML) file")
parser.add_argument("-o", "--output", metavar="FILE", required=True,
@@ -27,7 +27,7 @@
def split_ukernel_name(name):
- match = re.match(r"^xnn_(f16|f32)_bilinear_ukernel__(.+)_c(\d+)$", name)
+ match = re.match(r"^xnn_(f16|f32)_ibilinear_ukernel__(.+)_c(\d+)$", name)
assert match is not None
channel_tile = int(match.group(3))
pixel_tile = 1
@@ -36,11 +36,11 @@
return channel_tile, pixel_tile, arch, isa
-BILINEAR_TEST_TEMPLATE = """\
+IBILINEAR_TEST_TEMPLATE = """\
TEST(${TEST_NAME}, channels_eq_${CHANNEL_TILE}) {
$if ISA_CHECK:
${ISA_CHECK};
- BilinearMicrokernelTester()
+ IBilinearMicrokernelTester()
.pixels(${PIXEL_TILE})
.channels(${CHANNEL_TILE})
.Test(${TEST_FUNC});
@@ -51,7 +51,7 @@
$if ISA_CHECK:
${ISA_CHECK};
for (size_t channels = ${CHANNEL_TILE*2}; channels < ${CHANNEL_TILE*10}; channels += ${CHANNEL_TILE}) {
- BilinearMicrokernelTester()
+ IBilinearMicrokernelTester()
.pixels(${PIXEL_TILE})
.channels(channels)
.Test(${TEST_FUNC});
@@ -62,7 +62,7 @@
$if ISA_CHECK:
${ISA_CHECK};
for (size_t channels = 1; channels < ${CHANNEL_TILE}; channels++) {
- BilinearMicrokernelTester()
+ IBilinearMicrokernelTester()
.pixels(${PIXEL_TILE})
.channels(channels)
.Test(${TEST_FUNC});
@@ -73,7 +73,7 @@
$if ISA_CHECK:
${ISA_CHECK};
for (size_t channels = ${CHANNEL_TILE+1}; channels < ${10 if CHANNEL_TILE == 1 else CHANNEL_TILE*2}; channels++) {
- BilinearMicrokernelTester()
+ IBilinearMicrokernelTester()
.pixels(${PIXEL_TILE})
.channels(channels)
.Test(${TEST_FUNC});
@@ -86,7 +86,7 @@
${ISA_CHECK};
for (size_t pixels = ${PIXEL_TILE*2}; pixels < ${PIXEL_TILE*10}; pixels += ${PIXEL_TILE}) {
for (size_t channels = 1; channels <= ${CHANNEL_TILE * 5}; channels += ${max(1, CHANNEL_TILE - 1)}) {
- BilinearMicrokernelTester()
+ IBilinearMicrokernelTester()
.pixels(pixels)
.channels(channels)
.Test(${TEST_FUNC});
@@ -99,7 +99,7 @@
${ISA_CHECK};
for (size_t pixels = 1; pixels < ${PIXEL_TILE}; pixels++) {
for (size_t channels = 1; channels <= ${CHANNEL_TILE * 5}; channels += ${max(1, CHANNEL_TILE - 1)}) {
- BilinearMicrokernelTester()
+ IBilinearMicrokernelTester()
.pixels(pixels)
.channels(channels)
.Test(${TEST_FUNC});
@@ -112,7 +112,7 @@
${ISA_CHECK};
for (size_t pixels = ${PIXEL_TILE+1}; pixels < ${max(PIXEL_TILE*2, 3)}; pixels++) {
for (size_t channels = 1; channels <= ${CHANNEL_TILE * 5}; channels += ${max(1, CHANNEL_TILE - 1)}) {
- BilinearMicrokernelTester()
+ IBilinearMicrokernelTester()
.pixels(pixels)
.channels(channels)
.Test(${TEST_FUNC});
@@ -125,7 +125,7 @@
${ISA_CHECK};
for (size_t pixels = 1; pixels < ${PIXEL_TILE * 5}; pixels += ${max(1, PIXEL_TILE - 1)}) {
for (size_t channels = 1; channels <= ${CHANNEL_TILE * 5}; channels += ${max(1, CHANNEL_TILE - 1)}) {
- BilinearMicrokernelTester()
+ IBilinearMicrokernelTester()
.pixels(pixels)
.channels(channels)
.input_offset(${next_prime(CHANNEL_TILE * 5 + 1)})
@@ -138,7 +138,7 @@
${ISA_CHECK};
for (size_t pixels = 1; pixels < ${PIXEL_TILE * 5}; pixels += ${max(1, PIXEL_TILE - 1)}) {
for (size_t channels = 1; channels <= ${CHANNEL_TILE * 5}; channels += ${max(1, CHANNEL_TILE - 1)}) {
- BilinearMicrokernelTester()
+ IBilinearMicrokernelTester()
.pixels(pixels)
.channels(channels)
.output_stride(${next_prime(CHANNEL_TILE * 5 + 1)})
@@ -167,7 +167,7 @@
_, test_name = ukernel.split("_", 1)
_, datatype, ukernel_type, _ = ukernel.split("_", 3)
test_args = [ukernel]
- return xngen.preprocess(BILINEAR_TEST_TEMPLATE, {
+ return xngen.preprocess(IBILINEAR_TEST_TEMPLATE, {
"TEST_NAME": test_name.upper().replace("UKERNEL_", ""),
"TEST_FUNC": ukernel,
"UKERNEL_TYPE": ukernel_type.upper(),
@@ -203,8 +203,8 @@
#include <xnnpack/common.h>
#include <xnnpack/isa-checks.h>
-#include <xnnpack/bilinear.h>
-#include "bilinear-microkernel-tester.h"
+#include <xnnpack/ibilinear.h>
+#include "ibilinear-microkernel-tester.h"
""".format(specification=options.spec, generator=sys.argv[0])
for ukernel_spec in spec_yaml: