blob: a8e4305e3f20a903bbd5a39bb6487a5d905788e1 [file] [log] [blame]
Marat Dukhan933051b2021-08-07 16:26:15 -07001// Copyright 2020 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <gtest/gtest.h>
7
8#include <xnnpack/common.h>
9#include <xnnpack/isa-checks.h>
10
11#include <xnnpack/fill.h>
12#include "fill-microkernel-tester.h"
13
14
15#if XNN_ARCH_ARM || XNN_ARCH_ARM64
16 TEST(XX_FILL__NEON_X64, channels_eq_1) {
17 TEST_REQUIRES_ARM_NEON;
18 FillMicrokernelTester()
19 .channels(1)
20 .Test(xnn_xx_fill_ukernel__neon_x64);
21 }
22
23 TEST(XX_FILL__NEON_X64, channels_eq_2) {
24 TEST_REQUIRES_ARM_NEON;
25 FillMicrokernelTester()
26 .channels(2)
27 .Test(xnn_xx_fill_ukernel__neon_x64);
28 }
29
30 TEST(XX_FILL__NEON_X64, channels_eq_4) {
31 TEST_REQUIRES_ARM_NEON;
32 FillMicrokernelTester()
33 .channels(4)
34 .Test(xnn_xx_fill_ukernel__neon_x64);
35 }
36
37 TEST(XX_FILL__NEON_X64, channels_eq_64) {
38 TEST_REQUIRES_ARM_NEON;
39 FillMicrokernelTester()
40 .channels(64)
41 .Test(xnn_xx_fill_ukernel__neon_x64);
42 }
43
44 TEST(XX_FILL__NEON_X64, channels_div_64) {
45 TEST_REQUIRES_ARM_NEON;
46 for (size_t channels = 128; channels <= 192; channels += 64) {
47 FillMicrokernelTester()
48 .channels(channels)
49 .Test(xnn_xx_fill_ukernel__neon_x64);
50 }
51 }
52
53 TEST(XX_FILL__NEON_X64, channels_lt_64) {
54 TEST_REQUIRES_ARM_NEON;
55 for (size_t channels = 1; channels < 64; channels++) {
56 FillMicrokernelTester()
57 .channels(channels)
58 .Test(xnn_xx_fill_ukernel__neon_x64);
59 }
60 }
61
62 TEST(XX_FILL__NEON_X64, channels_gt_64) {
63 TEST_REQUIRES_ARM_NEON;
64 for (size_t channels = 65; channels < 128; channels++) {
65 FillMicrokernelTester()
66 .channels(channels)
67 .Test(xnn_xx_fill_ukernel__neon_x64);
68 }
69 }
70
71 TEST(XX_FILL__NEON_X64, multiple_rows) {
72 TEST_REQUIRES_ARM_NEON;
73 for (size_t rows = 2; rows < 5; rows++) {
74 for (size_t channels = 1; channels < 192; channels += 15) {
75 FillMicrokernelTester()
76 .channels(channels)
77 .rows(rows)
78 .Test(xnn_xx_fill_ukernel__neon_x64);
79 }
80 }
81 }
82
83 TEST(XX_FILL__NEON_X64, multiple_rows_with_output_stride) {
84 TEST_REQUIRES_ARM_NEON;
85 for (size_t rows = 2; rows < 5; rows++) {
86 for (size_t channels = 1; channels < 192; channels += 15) {
87 FillMicrokernelTester()
88 .channels(channels)
89 .rows(rows)
90 .output_stride(193)
91 .Test(xnn_xx_fill_ukernel__neon_x64);
92 }
93 }
94 }
95#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
96
97
98#if XNN_ARCH_X86 || XNN_ARCH_X86_64
99 TEST(XX_FILL__SSE2_X64, channels_eq_1) {
100 TEST_REQUIRES_X86_SSE2;
101 FillMicrokernelTester()
102 .channels(1)
103 .Test(xnn_xx_fill_ukernel__sse2_x64);
104 }
105
106 TEST(XX_FILL__SSE2_X64, channels_eq_2) {
107 TEST_REQUIRES_X86_SSE2;
108 FillMicrokernelTester()
109 .channels(2)
110 .Test(xnn_xx_fill_ukernel__sse2_x64);
111 }
112
113 TEST(XX_FILL__SSE2_X64, channels_eq_4) {
114 TEST_REQUIRES_X86_SSE2;
115 FillMicrokernelTester()
116 .channels(4)
117 .Test(xnn_xx_fill_ukernel__sse2_x64);
118 }
119
120 TEST(XX_FILL__SSE2_X64, channels_eq_64) {
121 TEST_REQUIRES_X86_SSE2;
122 FillMicrokernelTester()
123 .channels(64)
124 .Test(xnn_xx_fill_ukernel__sse2_x64);
125 }
126
127 TEST(XX_FILL__SSE2_X64, channels_div_64) {
128 TEST_REQUIRES_X86_SSE2;
129 for (size_t channels = 128; channels <= 192; channels += 64) {
130 FillMicrokernelTester()
131 .channels(channels)
132 .Test(xnn_xx_fill_ukernel__sse2_x64);
133 }
134 }
135
136 TEST(XX_FILL__SSE2_X64, channels_lt_64) {
137 TEST_REQUIRES_X86_SSE2;
138 for (size_t channels = 1; channels < 64; channels++) {
139 FillMicrokernelTester()
140 .channels(channels)
141 .Test(xnn_xx_fill_ukernel__sse2_x64);
142 }
143 }
144
145 TEST(XX_FILL__SSE2_X64, channels_gt_64) {
146 TEST_REQUIRES_X86_SSE2;
147 for (size_t channels = 65; channels < 128; channels++) {
148 FillMicrokernelTester()
149 .channels(channels)
150 .Test(xnn_xx_fill_ukernel__sse2_x64);
151 }
152 }
153
154 TEST(XX_FILL__SSE2_X64, multiple_rows) {
155 TEST_REQUIRES_X86_SSE2;
156 for (size_t rows = 2; rows < 5; rows++) {
157 for (size_t channels = 1; channels < 192; channels += 15) {
158 FillMicrokernelTester()
159 .channels(channels)
160 .rows(rows)
161 .Test(xnn_xx_fill_ukernel__sse2_x64);
162 }
163 }
164 }
165
166 TEST(XX_FILL__SSE2_X64, multiple_rows_with_output_stride) {
167 TEST_REQUIRES_X86_SSE2;
168 for (size_t rows = 2; rows < 5; rows++) {
169 for (size_t channels = 1; channels < 192; channels += 15) {
170 FillMicrokernelTester()
171 .channels(channels)
172 .rows(rows)
173 .output_stride(193)
174 .Test(xnn_xx_fill_ukernel__sse2_x64);
175 }
176 }
177 }
178#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
179
180
181#if XNN_ARCH_WASMSIMD
182 TEST(XX_FILL__WASMSIMD_X64, channels_eq_1) {
183 FillMicrokernelTester()
184 .channels(1)
185 .Test(xnn_xx_fill_ukernel__wasmsimd_x64);
186 }
187
188 TEST(XX_FILL__WASMSIMD_X64, channels_eq_2) {
189 FillMicrokernelTester()
190 .channels(2)
191 .Test(xnn_xx_fill_ukernel__wasmsimd_x64);
192 }
193
194 TEST(XX_FILL__WASMSIMD_X64, channels_eq_4) {
195 FillMicrokernelTester()
196 .channels(4)
197 .Test(xnn_xx_fill_ukernel__wasmsimd_x64);
198 }
199
200 TEST(XX_FILL__WASMSIMD_X64, channels_eq_64) {
201 FillMicrokernelTester()
202 .channels(64)
203 .Test(xnn_xx_fill_ukernel__wasmsimd_x64);
204 }
205
206 TEST(XX_FILL__WASMSIMD_X64, channels_div_64) {
207 for (size_t channels = 128; channels <= 192; channels += 64) {
208 FillMicrokernelTester()
209 .channels(channels)
210 .Test(xnn_xx_fill_ukernel__wasmsimd_x64);
211 }
212 }
213
214 TEST(XX_FILL__WASMSIMD_X64, channels_lt_64) {
215 for (size_t channels = 1; channels < 64; channels++) {
216 FillMicrokernelTester()
217 .channels(channels)
218 .Test(xnn_xx_fill_ukernel__wasmsimd_x64);
219 }
220 }
221
222 TEST(XX_FILL__WASMSIMD_X64, channels_gt_64) {
223 for (size_t channels = 65; channels < 128; channels++) {
224 FillMicrokernelTester()
225 .channels(channels)
226 .Test(xnn_xx_fill_ukernel__wasmsimd_x64);
227 }
228 }
229
230 TEST(XX_FILL__WASMSIMD_X64, multiple_rows) {
231 for (size_t rows = 2; rows < 5; rows++) {
232 for (size_t channels = 1; channels < 192; channels += 15) {
233 FillMicrokernelTester()
234 .channels(channels)
235 .rows(rows)
236 .Test(xnn_xx_fill_ukernel__wasmsimd_x64);
237 }
238 }
239 }
240
241 TEST(XX_FILL__WASMSIMD_X64, multiple_rows_with_output_stride) {
242 for (size_t rows = 2; rows < 5; rows++) {
243 for (size_t channels = 1; channels < 192; channels += 15) {
244 FillMicrokernelTester()
245 .channels(channels)
246 .rows(rows)
247 .output_stride(193)
248 .Test(xnn_xx_fill_ukernel__wasmsimd_x64);
249 }
250 }
251 }
252#endif // XNN_ARCH_WASMSIMD
253
254
255TEST(XX_FILL__SCALAR_X16, channels_eq_1) {
256 FillMicrokernelTester()
257 .channels(1)
258 .Test(xnn_xx_fill_ukernel__scalar_x16);
259}
260
261TEST(XX_FILL__SCALAR_X16, channels_eq_2) {
262 FillMicrokernelTester()
263 .channels(2)
264 .Test(xnn_xx_fill_ukernel__scalar_x16);
265}
266
267TEST(XX_FILL__SCALAR_X16, channels_eq_4) {
268 FillMicrokernelTester()
269 .channels(4)
270 .Test(xnn_xx_fill_ukernel__scalar_x16);
271}
272
273TEST(XX_FILL__SCALAR_X16, channels_eq_16) {
274 FillMicrokernelTester()
275 .channels(16)
276 .Test(xnn_xx_fill_ukernel__scalar_x16);
277}
278
279TEST(XX_FILL__SCALAR_X16, channels_div_16) {
280 for (size_t channels = 32; channels <= 48; channels += 48) {
281 FillMicrokernelTester()
282 .channels(channels)
283 .Test(xnn_xx_fill_ukernel__scalar_x16);
284 }
285}
286
287TEST(XX_FILL__SCALAR_X16, channels_lt_16) {
288 for (size_t channels = 1; channels < 16; channels++) {
289 FillMicrokernelTester()
290 .channels(channels)
291 .Test(xnn_xx_fill_ukernel__scalar_x16);
292 }
293}
294
295TEST(XX_FILL__SCALAR_X16, channels_gt_16) {
296 for (size_t channels = 17; channels < 32; channels++) {
297 FillMicrokernelTester()
298 .channels(channels)
299 .Test(xnn_xx_fill_ukernel__scalar_x16);
300 }
301}
302
303TEST(XX_FILL__SCALAR_X16, multiple_rows) {
304 for (size_t rows = 2; rows < 5; rows++) {
305 for (size_t channels = 1; channels < 48; channels += 3) {
306 FillMicrokernelTester()
307 .channels(channels)
308 .rows(rows)
309 .Test(xnn_xx_fill_ukernel__scalar_x16);
310 }
311 }
312}
313
314TEST(XX_FILL__SCALAR_X16, multiple_rows_with_output_stride) {
315 for (size_t rows = 2; rows < 5; rows++) {
316 for (size_t channels = 1; channels < 48; channels += 3) {
317 FillMicrokernelTester()
318 .channels(channels)
319 .rows(rows)
320 .output_stride(53)
321 .Test(xnn_xx_fill_ukernel__scalar_x16);
322 }
323 }
324}