blob: a1536dd40aca66096201b82b69bcae8796d603ad [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <cpuinfo.h>
7#include <gtest/gtest.h>
8
9#include <xnnpack/isa-checks.h>
10#include <xnnpack/clamp.h>
11
12#include "clamp-microkernel-tester.h"
13
14
15#if !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
16 TEST(F32_CLAMP__PSIMD, n_eq_4) {
17 TEST_REQUIRES_PSIMD;
18 ClampMicrokernelTester()
19 .n(4)
20 .Test(xnn_f32_clamp_ukernel__psimd, ClampMicrokernelTester::Variant::Scalar);
21 }
22
23 TEST(F32_CLAMP__PSIMD, n_div_4) {
24 TEST_REQUIRES_PSIMD;
25 for (size_t n = 4; n < 256; n += 4) {
26 ClampMicrokernelTester()
27 .n(n)
28 .Test(xnn_f32_clamp_ukernel__psimd, ClampMicrokernelTester::Variant::Scalar);
29 }
30 }
31
32 TEST(F32_CLAMP__PSIMD, n_gt_4) {
33 TEST_REQUIRES_PSIMD;
34 for (size_t n = 5; n < 8; n++) {
35 ClampMicrokernelTester()
36 .n(n)
37 .Test(xnn_f32_clamp_ukernel__psimd, ClampMicrokernelTester::Variant::Scalar);
38 }
39 }
40
41 TEST(F32_CLAMP__PSIMD, n_lt_4) {
42 TEST_REQUIRES_PSIMD;
43 for (size_t n = 1; n < 4; n++) {
44 ClampMicrokernelTester()
45 .n(n)
46 .Test(xnn_f32_clamp_ukernel__psimd, ClampMicrokernelTester::Variant::Scalar);
47 }
48 }
49
50 TEST(F32_CLAMP__PSIMD, inplace) {
51 TEST_REQUIRES_PSIMD;
52 for (size_t n = 1; n < 64; n += 3) {
53 ClampMicrokernelTester()
54 .iterations(1)
55 .n(n)
56 .inplace(true)
57 .Test(xnn_f32_clamp_ukernel__psimd, ClampMicrokernelTester::Variant::Scalar);
58 }
59 }
60
61 TEST(F32_CLAMP__PSIMD, qmin) {
62 TEST_REQUIRES_PSIMD;
63 for (size_t n = 1; n < 64; n += 5) {
64 for (uint8_t qmin = 1; qmin < 255; qmin++) {
65 ClampMicrokernelTester()
66 .iterations(1)
67 .n(n)
68 .qmin(qmin)
69 .qmax(255)
70 .Test(xnn_f32_clamp_ukernel__psimd, ClampMicrokernelTester::Variant::Scalar);
71 }
72 }
73 }
74
75 TEST(F32_CLAMP__PSIMD, qmax) {
76 TEST_REQUIRES_PSIMD;
77 for (size_t n = 1; n < 64; n += 5) {
78 for (uint8_t qmax = 1; qmax < 255; qmax++) {
79 ClampMicrokernelTester()
80 .iterations(1)
81 .n(n)
82 .qmin(0)
83 .qmax(qmax)
84 .Test(xnn_f32_clamp_ukernel__psimd, ClampMicrokernelTester::Variant::Scalar);
85 }
86 }
87 }
88#endif // !CPUINFO_ARCH_WASM && !CPUINFO_ARCH_ASMJS
89
90
91TEST(F32_CLAMP__SCALAR, n_eq_2) {
92 ClampMicrokernelTester()
93 .n(2)
94 .Test(xnn_f32_clamp_ukernel__scalar, ClampMicrokernelTester::Variant::Scalar);
95}
96
97TEST(F32_CLAMP__SCALAR, n_div_2) {
98 for (size_t n = 4; n < 128; n += 2) {
99 ClampMicrokernelTester()
100 .n(n)
101 .Test(xnn_f32_clamp_ukernel__scalar, ClampMicrokernelTester::Variant::Scalar);
102 }
103}
104
105TEST(F32_CLAMP__SCALAR, n_gt_2) {
106 for (size_t n = 3; n < 4; n++) {
107 ClampMicrokernelTester()
108 .n(n)
109 .Test(xnn_f32_clamp_ukernel__scalar, ClampMicrokernelTester::Variant::Scalar);
110 }
111}
112
113TEST(F32_CLAMP__SCALAR, n_lt_2) {
114 for (size_t n = 1; n < 2; n++) {
115 ClampMicrokernelTester()
116 .n(n)
117 .Test(xnn_f32_clamp_ukernel__scalar, ClampMicrokernelTester::Variant::Scalar);
118 }
119}
120
121TEST(F32_CLAMP__SCALAR, inplace) {
122 for (size_t n = 1; n < 32; n += 3) {
123 ClampMicrokernelTester()
124 .iterations(1)
125 .n(n)
126 .inplace(true)
127 .Test(xnn_f32_clamp_ukernel__scalar, ClampMicrokernelTester::Variant::Scalar);
128 }
129}
130
131TEST(F32_CLAMP__SCALAR, qmin) {
132 for (size_t n = 1; n < 32; n += 3) {
133 for (uint8_t qmin = 1; qmin < 255; qmin++) {
134 ClampMicrokernelTester()
135 .iterations(1)
136 .n(n)
137 .qmin(qmin)
138 .qmax(255)
139 .Test(xnn_f32_clamp_ukernel__scalar, ClampMicrokernelTester::Variant::Scalar);
140 }
141 }
142}
143
144TEST(F32_CLAMP__SCALAR, qmax) {
145 for (size_t n = 1; n < 32; n += 3) {
146 for (uint8_t qmax = 1; qmax < 255; qmax++) {
147 ClampMicrokernelTester()
148 .iterations(1)
149 .n(n)
150 .qmin(0)
151 .qmax(qmax)
152 .Test(xnn_f32_clamp_ukernel__scalar, ClampMicrokernelTester::Variant::Scalar);
153 }
154 }
155}
156
157#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
158 TEST(F32_CLAMP__NEON, n_eq_4) {
159 TEST_REQUIRES_ARM_NEON;
160 ClampMicrokernelTester()
161 .n(4)
162 .Test(xnn_f32_clamp_ukernel__neon);
163 }
164
165 TEST(F32_CLAMP__NEON, n_div_4) {
166 TEST_REQUIRES_ARM_NEON;
167 for (size_t n = 4; n < 256; n += 4) {
168 ClampMicrokernelTester()
169 .n(n)
170 .Test(xnn_f32_clamp_ukernel__neon);
171 }
172 }
173
174 TEST(F32_CLAMP__NEON, n_gt_4) {
175 TEST_REQUIRES_ARM_NEON;
176 for (size_t n = 5; n < 8; n++) {
177 ClampMicrokernelTester()
178 .n(n)
179 .Test(xnn_f32_clamp_ukernel__neon);
180 }
181 }
182
183 TEST(F32_CLAMP__NEON, n_lt_4) {
184 TEST_REQUIRES_ARM_NEON;
185 for (size_t n = 1; n < 4; n++) {
186 ClampMicrokernelTester()
187 .n(n)
188 .Test(xnn_f32_clamp_ukernel__neon);
189 }
190 }
191
192 TEST(F32_CLAMP__NEON, inplace) {
193 TEST_REQUIRES_ARM_NEON;
194 for (size_t n = 1; n < 64; n += 3) {
195 ClampMicrokernelTester()
196 .iterations(1)
197 .n(n)
198 .inplace(true)
199 .Test(xnn_f32_clamp_ukernel__neon);
200 }
201 }
202
203 TEST(F32_CLAMP__NEON, qmin) {
204 TEST_REQUIRES_ARM_NEON;
205 for (size_t n = 1; n < 64; n += 5) {
206 for (uint8_t qmin = 1; qmin < 255; qmin++) {
207 ClampMicrokernelTester()
208 .iterations(1)
209 .n(n)
210 .qmin(qmin)
211 .qmax(255)
212 .Test(xnn_f32_clamp_ukernel__neon);
213 }
214 }
215 }
216
217 TEST(F32_CLAMP__NEON, qmax) {
218 TEST_REQUIRES_ARM_NEON;
219 for (size_t n = 1; n < 64; n += 5) {
220 for (uint8_t qmax = 1; qmax < 255; qmax++) {
221 ClampMicrokernelTester()
222 .iterations(1)
223 .n(n)
224 .qmin(0)
225 .qmax(qmax)
226 .Test(xnn_f32_clamp_ukernel__neon);
227 }
228 }
229 }
230#endif // CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
231
232#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
233 TEST(F32_CLAMP__SSE, n_eq_4) {
234 TEST_REQUIRES_X86_SSE2;
235 ClampMicrokernelTester()
236 .n(4)
237 .Test(xnn_f32_clamp_ukernel__sse);
238 }
239
240 TEST(F32_CLAMP__SSE, n_div_4) {
241 TEST_REQUIRES_X86_SSE2;
242 for (size_t n = 4; n < 256; n += 4) {
243 ClampMicrokernelTester()
244 .n(n)
245 .Test(xnn_f32_clamp_ukernel__sse);
246 }
247 }
248
249 TEST(F32_CLAMP__SSE, n_gt_4) {
250 TEST_REQUIRES_X86_SSE2;
251 for (size_t n = 5; n < 8; n++) {
252 ClampMicrokernelTester()
253 .n(n)
254 .Test(xnn_f32_clamp_ukernel__sse);
255 }
256 }
257
258 TEST(F32_CLAMP__SSE, n_lt_4) {
259 TEST_REQUIRES_X86_SSE2;
260 for (size_t n = 1; n < 4; n++) {
261 ClampMicrokernelTester()
262 .n(n)
263 .Test(xnn_f32_clamp_ukernel__sse);
264 }
265 }
266
267 TEST(F32_CLAMP__SSE, inplace) {
268 TEST_REQUIRES_X86_SSE2;
269 for (size_t n = 1; n < 64; n += 3) {
270 ClampMicrokernelTester()
271 .iterations(1)
272 .n(n)
273 .inplace(true)
274 .Test(xnn_f32_clamp_ukernel__sse);
275 }
276 }
277
278 TEST(F32_CLAMP__SSE, qmin) {
279 TEST_REQUIRES_X86_SSE2;
280 for (size_t n = 1; n < 64; n += 5) {
281 for (uint8_t qmin = 1; qmin < 255; qmin++) {
282 ClampMicrokernelTester()
283 .iterations(1)
284 .n(n)
285 .qmin(qmin)
286 .qmax(255)
287 .Test(xnn_f32_clamp_ukernel__sse);
288 }
289 }
290 }
291
292 TEST(F32_CLAMP__SSE, qmax) {
293 TEST_REQUIRES_X86_SSE2;
294 for (size_t n = 1; n < 64; n += 5) {
295 for (uint8_t qmax = 1; qmax < 255; qmax++) {
296 ClampMicrokernelTester()
297 .iterations(1)
298 .n(n)
299 .qmin(0)
300 .qmax(qmax)
301 .Test(xnn_f32_clamp_ukernel__sse);
302 }
303 }
304 }
305#endif // CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64