blob: 5e2dc54239a67bb50f12a96a1e9cb9415fc392d3 [file] [log] [blame]
Marat Dukhan2dbb9442020-05-12 20:43:43 -07001// Copyright 2020 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <cstddef>
9#include <cstdint>
10#include <cstdlib>
11#include <iomanip>
12#include <ios>
13#include <vector>
14
15#include <gtest/gtest.h>
16
17#include <fp16.h>
18
19#include <xnnpack/AlignedAllocator.h>
20#include <xnnpack/common.h>
21#include <xnnpack/math-stubs.h>
22
23
24constexpr int kBlockSize = 1024;
25
26#if XNN_ARCH_X86 || XNN_ARCH_X86_64
27 TEST(ROUNDZ__SSE_ADDSUB, positive_normal) {
28 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
29 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
30 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
31 for (uint32_t i = 0; i < kBlockSize; i++) {
32 inputs[i] = fp32_from_bits(n + i);
33 }
34 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
35 for (uint32_t i = 0; i < kBlockSize; i++) {
36 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
37 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
38 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
39 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
40 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
41 }
42 }
43 }
44
45 TEST(ROUNDZ__SSE_ADDSUB, negative_normal) {
46 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
47 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
48 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
49 for (uint32_t i = 0; i < kBlockSize; i++) {
50 inputs[i] = fp32_from_bits(n + i);
51 }
52 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
53 for (uint32_t i = 0; i < kBlockSize; i++) {
54 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
55 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
56 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
57 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
58 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
59 }
60 }
61 }
62
63 TEST(ROUNDZ__SSE_ADDSUB, positive_integral) {
64 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
65 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
66 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
67 for (uint32_t i = 0; i < kBlockSize; i++) {
68 inputs[i] = fp32_from_bits(n + i);
69 }
70 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
71 for (uint32_t i = 0; i < kBlockSize; i++) {
72 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
73 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
74 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
75 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
76 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
77 }
78 }
79 }
80
81 TEST(ROUNDZ__SSE_ADDSUB, negative_integral) {
82 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
83 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
84 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
85 for (uint32_t i = 0; i < kBlockSize; i++) {
86 inputs[i] = fp32_from_bits(n + i);
87 }
88 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
89 for (uint32_t i = 0; i < kBlockSize; i++) {
90 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
91 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
92 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
93 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
94 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
95 }
96 }
97 }
98
99 TEST(ROUNDZ__SSE_ADDSUB, positive_infinity) {
100 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
101 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700102 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan2dbb9442020-05-12 20:43:43 -0700103 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
104 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
105 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
106 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
107 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
108 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
109 }
110
111 TEST(ROUNDZ__SSE_ADDSUB, negative_infinity) {
112 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
113 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700114 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan2dbb9442020-05-12 20:43:43 -0700115 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
116 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
117 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
118 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
119 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
120 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
121 }
122
123 TEST(ROUNDZ__SSE_ADDSUB, positive_qnan) {
124 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
125 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
126 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
127 for (uint32_t i = 0; i < kBlockSize; i++) {
128 inputs[i] = fp32_from_bits(n + i);
129 }
130 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
131 for (uint32_t i = 0; i < kBlockSize; i++) {
132 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
133 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
134 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
135 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
136 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
137 }
138 }
139 }
140
141 TEST(ROUNDZ__SSE_ADDSUB, negative_qnan) {
142 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
143 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
144 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
145 for (uint32_t i = 0; i < kBlockSize; i++) {
146 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
147 }
148 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
149 for (uint32_t i = 0; i < kBlockSize; i++) {
150 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
151 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
152 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
153 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
154 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
155 }
156 }
157 }
158
159 TEST(ROUNDZ__SSE_ADDSUB, positive_snan) {
160 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
161 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
162 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
163 for (uint32_t i = 0; i < kBlockSize; i++) {
164 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
165 }
166 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
167 for (uint32_t i = 0; i < kBlockSize; i++) {
168 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
169 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
170 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
171 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
172 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
173 }
174 }
175 }
176
177 TEST(ROUNDZ__SSE_ADDSUB, negative_snan) {
178 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
179 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
180 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
181 for (uint32_t i = 0; i < kBlockSize; i++) {
182 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
183 }
184 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
185 for (uint32_t i = 0; i < kBlockSize; i++) {
186 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
187 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
188 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
189 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
190 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
191 }
192 }
193 }
194
195 TEST(ROUNDZ__SSE_ADDSUB, positive_snan_to_qnan) {
196 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
197 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
198 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
199 for (uint32_t i = 0; i < kBlockSize; i++) {
200 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
201 }
202 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
203 for (uint32_t i = 0; i < kBlockSize; i++) {
204 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
205 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
206 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
207 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
208 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
209 }
210 }
211 }
212
213 TEST(ROUNDZ__SSE_ADDSUB, negative_snan_to_qnan) {
214 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
215 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
216 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
217 for (uint32_t i = 0; i < kBlockSize; i++) {
218 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
219 }
220 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
221 for (uint32_t i = 0; i < kBlockSize; i++) {
222 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
223 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
224 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
225 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
226 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
227 }
228 }
229 }
230#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
231
232#if XNN_ARCH_X86 || XNN_ARCH_X86_64
233 TEST(ROUNDZ__SSE2_CVT, positive_normal) {
234 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
235 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
236 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
237 for (uint32_t i = 0; i < kBlockSize; i++) {
238 inputs[i] = fp32_from_bits(n + i);
239 }
240 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
241 for (uint32_t i = 0; i < kBlockSize; i++) {
242 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
243 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
244 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
245 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
246 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
247 }
248 }
249 }
250
251 TEST(ROUNDZ__SSE2_CVT, negative_normal) {
252 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
253 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
254 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
255 for (uint32_t i = 0; i < kBlockSize; i++) {
256 inputs[i] = fp32_from_bits(n + i);
257 }
258 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
259 for (uint32_t i = 0; i < kBlockSize; i++) {
260 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
261 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
262 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
263 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
264 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
265 }
266 }
267 }
268
269 TEST(ROUNDZ__SSE2_CVT, positive_integral) {
270 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
271 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
272 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
273 for (uint32_t i = 0; i < kBlockSize; i++) {
274 inputs[i] = fp32_from_bits(n + i);
275 }
276 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
277 for (uint32_t i = 0; i < kBlockSize; i++) {
278 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
279 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
280 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
281 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
282 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
283 }
284 }
285 }
286
287 TEST(ROUNDZ__SSE2_CVT, negative_integral) {
288 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
289 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
290 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
291 for (uint32_t i = 0; i < kBlockSize; i++) {
292 inputs[i] = fp32_from_bits(n + i);
293 }
294 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
295 for (uint32_t i = 0; i < kBlockSize; i++) {
296 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
297 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
298 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
299 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
300 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
301 }
302 }
303 }
304
305 TEST(ROUNDZ__SSE2_CVT, positive_infinity) {
306 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
307 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700308 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan2dbb9442020-05-12 20:43:43 -0700309 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
310 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
311 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
312 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
313 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
314 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
315 }
316
317 TEST(ROUNDZ__SSE2_CVT, negative_infinity) {
318 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
319 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700320 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan2dbb9442020-05-12 20:43:43 -0700321 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
322 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
323 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
324 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
325 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
326 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
327 }
328
329 TEST(ROUNDZ__SSE2_CVT, positive_qnan) {
330 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
331 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
332 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
333 for (uint32_t i = 0; i < kBlockSize; i++) {
334 inputs[i] = fp32_from_bits(n + i);
335 }
336 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
337 for (uint32_t i = 0; i < kBlockSize; i++) {
338 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
339 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
340 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
341 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
342 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
343 }
344 }
345 }
346
347 TEST(ROUNDZ__SSE2_CVT, negative_qnan) {
348 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
349 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
350 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
351 for (uint32_t i = 0; i < kBlockSize; i++) {
352 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
353 }
354 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
355 for (uint32_t i = 0; i < kBlockSize; i++) {
356 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
357 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
358 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
359 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
360 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
361 }
362 }
363 }
364
365 TEST(ROUNDZ__SSE2_CVT, positive_snan) {
366 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
367 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
368 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
369 for (uint32_t i = 0; i < kBlockSize; i++) {
370 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
371 }
372 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
373 for (uint32_t i = 0; i < kBlockSize; i++) {
374 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
375 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
376 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
377 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
378 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
379 }
380 }
381 }
382
383 TEST(ROUNDZ__SSE2_CVT, negative_snan) {
384 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
385 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
386 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
387 for (uint32_t i = 0; i < kBlockSize; i++) {
388 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
389 }
390 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
391 for (uint32_t i = 0; i < kBlockSize; i++) {
392 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
393 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
394 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
395 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
396 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
397 }
398 }
399 }
400
401 TEST(ROUNDZ__SSE2_CVT, DISABLED_positive_snan_to_qnan) {
402 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
403 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
404 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
405 for (uint32_t i = 0; i < kBlockSize; i++) {
406 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
407 }
408 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
409 for (uint32_t i = 0; i < kBlockSize; i++) {
410 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
411 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
412 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
413 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
414 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
415 }
416 }
417 }
418
419 TEST(ROUNDZ__SSE2_CVT, DISABLED_negative_snan_to_qnan) {
420 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
421 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
422 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
423 for (uint32_t i = 0; i < kBlockSize; i++) {
424 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
425 }
426 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
427 for (uint32_t i = 0; i < kBlockSize; i++) {
428 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
429 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
430 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
431 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
432 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
433 }
434 }
435 }
436#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
437
438#if XNN_ARCH_X86 || XNN_ARCH_X86_64
439 TEST(ROUNDZ__SSE41, positive_normal) {
440 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
441 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
442 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
443 for (uint32_t i = 0; i < kBlockSize; i++) {
444 inputs[i] = fp32_from_bits(n + i);
445 }
446 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
447 for (uint32_t i = 0; i < kBlockSize; i++) {
448 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
449 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
450 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
451 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
452 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
453 }
454 }
455 }
456
457 TEST(ROUNDZ__SSE41, negative_normal) {
458 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
459 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
460 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
461 for (uint32_t i = 0; i < kBlockSize; i++) {
462 inputs[i] = fp32_from_bits(n + i);
463 }
464 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
465 for (uint32_t i = 0; i < kBlockSize; i++) {
466 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
467 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
468 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
469 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
470 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
471 }
472 }
473 }
474
475 TEST(ROUNDZ__SSE41, positive_integral) {
476 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
477 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
478 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
479 for (uint32_t i = 0; i < kBlockSize; i++) {
480 inputs[i] = fp32_from_bits(n + i);
481 }
482 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
483 for (uint32_t i = 0; i < kBlockSize; i++) {
484 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
485 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
486 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
487 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
488 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
489 }
490 }
491 }
492
493 TEST(ROUNDZ__SSE41, negative_integral) {
494 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
495 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
496 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
497 for (uint32_t i = 0; i < kBlockSize; i++) {
498 inputs[i] = fp32_from_bits(n + i);
499 }
500 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
501 for (uint32_t i = 0; i < kBlockSize; i++) {
502 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
503 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
504 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
505 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
506 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
507 }
508 }
509 }
510
511 TEST(ROUNDZ__SSE41, positive_infinity) {
512 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
513 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700514 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan2dbb9442020-05-12 20:43:43 -0700515 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
516 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
517 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
518 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
519 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
520 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
521 }
522
523 TEST(ROUNDZ__SSE41, negative_infinity) {
524 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
525 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700526 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan2dbb9442020-05-12 20:43:43 -0700527 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
528 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
529 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
530 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
531 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
532 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
533 }
534
535 TEST(ROUNDZ__SSE41, positive_qnan) {
536 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
537 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
538 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
539 for (uint32_t i = 0; i < kBlockSize; i++) {
540 inputs[i] = fp32_from_bits(n + i);
541 }
542 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
543 for (uint32_t i = 0; i < kBlockSize; i++) {
544 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
545 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
546 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
547 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
548 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
549 }
550 }
551 }
552
553 TEST(ROUNDZ__SSE41, negative_qnan) {
554 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
555 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
556 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
557 for (uint32_t i = 0; i < kBlockSize; i++) {
558 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
559 }
560 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
561 for (uint32_t i = 0; i < kBlockSize; i++) {
562 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
563 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
564 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
565 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
566 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
567 }
568 }
569 }
570
571 TEST(ROUNDZ__SSE41, positive_snan) {
572 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
573 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
574 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
575 for (uint32_t i = 0; i < kBlockSize; i++) {
576 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
577 }
578 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
579 for (uint32_t i = 0; i < kBlockSize; i++) {
580 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
581 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
582 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
583 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
584 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
585 }
586 }
587 }
588
589 TEST(ROUNDZ__SSE41, negative_snan) {
590 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
591 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
592 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
593 for (uint32_t i = 0; i < kBlockSize; i++) {
594 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
595 }
596 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
597 for (uint32_t i = 0; i < kBlockSize; i++) {
598 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
599 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
600 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
601 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
602 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
603 }
604 }
605 }
606
607 TEST(ROUNDZ__SSE41, positive_snan_to_qnan) {
608 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
609 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
610 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
611 for (uint32_t i = 0; i < kBlockSize; i++) {
612 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
613 }
614 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
615 for (uint32_t i = 0; i < kBlockSize; i++) {
616 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
617 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
618 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
619 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
620 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
621 }
622 }
623 }
624
625 TEST(ROUNDZ__SSE41, negative_snan_to_qnan) {
626 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
627 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
628 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
629 for (uint32_t i = 0; i < kBlockSize; i++) {
630 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
631 }
632 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
633 for (uint32_t i = 0; i < kBlockSize; i++) {
634 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
635 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
636 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
637 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
638 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
639 }
640 }
641 }
642#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
643
644#if XNN_ARCH_ARM || XNN_ARCH_ARM64
645 TEST(ROUNDZ__NEON_ADDSUB, positive_normal) {
646 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
647 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
648 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
649 for (uint32_t i = 0; i < kBlockSize; i++) {
650 inputs[i] = fp32_from_bits(n + i);
651 }
652 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
653 for (uint32_t i = 0; i < kBlockSize; i++) {
654 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
655 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
656 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
657 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
658 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
659 }
660 }
661 }
662
663 TEST(ROUNDZ__NEON_ADDSUB, negative_normal) {
664 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
665 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
666 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
667 for (uint32_t i = 0; i < kBlockSize; i++) {
668 inputs[i] = fp32_from_bits(n + i);
669 }
670 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
671 for (uint32_t i = 0; i < kBlockSize; i++) {
672 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
673 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
674 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
675 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
676 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
677 }
678 }
679 }
680
681 TEST(ROUNDZ__NEON_ADDSUB, positive_integral) {
682 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
683 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
684 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
685 for (uint32_t i = 0; i < kBlockSize; i++) {
686 inputs[i] = fp32_from_bits(n + i);
687 }
688 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
689 for (uint32_t i = 0; i < kBlockSize; i++) {
690 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
691 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
692 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
693 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
694 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
695 }
696 }
697 }
698
699 TEST(ROUNDZ__NEON_ADDSUB, negative_integral) {
700 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
701 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
702 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
703 for (uint32_t i = 0; i < kBlockSize; i++) {
704 inputs[i] = fp32_from_bits(n + i);
705 }
706 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
707 for (uint32_t i = 0; i < kBlockSize; i++) {
708 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
709 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
710 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
711 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
712 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
713 }
714 }
715 }
716
717 TEST(ROUNDZ__NEON_ADDSUB, positive_infinity) {
718 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
719 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700720 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan2dbb9442020-05-12 20:43:43 -0700721 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
722 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
723 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
724 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
725 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
726 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
727 }
728
729 TEST(ROUNDZ__NEON_ADDSUB, negative_infinity) {
730 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
731 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700732 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan2dbb9442020-05-12 20:43:43 -0700733 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
734 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
735 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
736 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
737 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
738 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
739 }
740
741 TEST(ROUNDZ__NEON_ADDSUB, positive_qnan) {
742 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
743 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
744 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
745 for (uint32_t i = 0; i < kBlockSize; i++) {
746 inputs[i] = fp32_from_bits(n + i);
747 }
748 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
749 for (uint32_t i = 0; i < kBlockSize; i++) {
750 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
751 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
752 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
753 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
754 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
755 }
756 }
757 }
758
759 TEST(ROUNDZ__NEON_ADDSUB, negative_qnan) {
760 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
761 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
762 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
763 for (uint32_t i = 0; i < kBlockSize; i++) {
764 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
765 }
766 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
767 for (uint32_t i = 0; i < kBlockSize; i++) {
768 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
769 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
770 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
771 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
772 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
773 }
774 }
775 }
776
777 TEST(ROUNDZ__NEON_ADDSUB, positive_snan) {
778 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
779 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
780 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
781 for (uint32_t i = 0; i < kBlockSize; i++) {
782 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
783 }
784 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
785 for (uint32_t i = 0; i < kBlockSize; i++) {
786 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
787 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
788 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
789 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
790 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
791 }
792 }
793 }
794
795 TEST(ROUNDZ__NEON_ADDSUB, negative_snan) {
796 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
797 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
798 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
799 for (uint32_t i = 0; i < kBlockSize; i++) {
800 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
801 }
802 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
803 for (uint32_t i = 0; i < kBlockSize; i++) {
804 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
805 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
806 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
807 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
808 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
809 }
810 }
811 }
812
813 TEST(ROUNDZ__NEON_ADDSUB, positive_snan_to_qnan) {
814 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
815 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
816 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
817 for (uint32_t i = 0; i < kBlockSize; i++) {
818 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
819 }
820 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
821 for (uint32_t i = 0; i < kBlockSize; i++) {
822 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
823 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
824 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
825 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
826 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
827 }
828 }
829 }
830
831 TEST(ROUNDZ__NEON_ADDSUB, negative_snan_to_qnan) {
832 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
833 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
834 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
835 for (uint32_t i = 0; i < kBlockSize; i++) {
836 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
837 }
838 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
839 for (uint32_t i = 0; i < kBlockSize; i++) {
840 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
841 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
842 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
843 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
844 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
845 }
846 }
847 }
848#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
849
850#if XNN_ARCH_ARM || XNN_ARCH_ARM64
851 TEST(ROUNDZ__NEON_CVT, positive_normal) {
852 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
853 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
854 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
855 for (uint32_t i = 0; i < kBlockSize; i++) {
856 inputs[i] = fp32_from_bits(n + i);
857 }
858 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
859 for (uint32_t i = 0; i < kBlockSize; i++) {
860 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
861 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
862 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
863 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
864 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
865 }
866 }
867 }
868
869 TEST(ROUNDZ__NEON_CVT, negative_normal) {
870 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
871 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
872 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
873 for (uint32_t i = 0; i < kBlockSize; i++) {
874 inputs[i] = fp32_from_bits(n + i);
875 }
876 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
877 for (uint32_t i = 0; i < kBlockSize; i++) {
878 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
879 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
880 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
881 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
882 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
883 }
884 }
885 }
886
887 TEST(ROUNDZ__NEON_CVT, positive_integral) {
888 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
889 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
890 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
891 for (uint32_t i = 0; i < kBlockSize; i++) {
892 inputs[i] = fp32_from_bits(n + i);
893 }
894 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
895 for (uint32_t i = 0; i < kBlockSize; i++) {
896 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
897 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
898 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
899 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
900 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
901 }
902 }
903 }
904
905 TEST(ROUNDZ__NEON_CVT, negative_integral) {
906 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
907 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
908 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
909 for (uint32_t i = 0; i < kBlockSize; i++) {
910 inputs[i] = fp32_from_bits(n + i);
911 }
912 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
913 for (uint32_t i = 0; i < kBlockSize; i++) {
914 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
915 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
916 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
917 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
918 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
919 }
920 }
921 }
922
923 TEST(ROUNDZ__NEON_CVT, positive_infinity) {
924 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
925 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700926 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan2dbb9442020-05-12 20:43:43 -0700927 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
928 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
929 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
930 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
931 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
932 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
933 }
934
935 TEST(ROUNDZ__NEON_CVT, negative_infinity) {
936 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
937 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700938 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan2dbb9442020-05-12 20:43:43 -0700939 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
940 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
941 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
942 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
943 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
944 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
945 }
946
947 TEST(ROUNDZ__NEON_CVT, positive_qnan) {
948 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
949 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
950 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
951 for (uint32_t i = 0; i < kBlockSize; i++) {
952 inputs[i] = fp32_from_bits(n + i);
953 }
954 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
955 for (uint32_t i = 0; i < kBlockSize; i++) {
956 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
957 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
958 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
959 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
960 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
961 }
962 }
963 }
964
965 TEST(ROUNDZ__NEON_CVT, negative_qnan) {
966 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
967 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
968 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
969 for (uint32_t i = 0; i < kBlockSize; i++) {
970 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
971 }
972 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
973 for (uint32_t i = 0; i < kBlockSize; i++) {
974 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
975 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
976 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
977 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
978 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
979 }
980 }
981 }
982
983 TEST(ROUNDZ__NEON_CVT, positive_snan) {
984 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
985 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
986 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
987 for (uint32_t i = 0; i < kBlockSize; i++) {
988 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
989 }
990 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
991 for (uint32_t i = 0; i < kBlockSize; i++) {
992 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
993 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
994 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
995 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
996 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
997 }
998 }
999 }
1000
1001 TEST(ROUNDZ__NEON_CVT, negative_snan) {
1002 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1003 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1004 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1005 for (uint32_t i = 0; i < kBlockSize; i++) {
1006 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1007 }
1008 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1009 for (uint32_t i = 0; i < kBlockSize; i++) {
1010 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1011 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1012 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1013 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1014 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1015 }
1016 }
1017 }
1018
Marat Dukhan03723f52020-05-13 00:59:24 -07001019 TEST(ROUNDZ__NEON_CVT, DISABLED_positive_snan_to_qnan) {
Marat Dukhan2dbb9442020-05-12 20:43:43 -07001020 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1021 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1022 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1023 for (uint32_t i = 0; i < kBlockSize; i++) {
1024 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1025 }
1026 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1027 for (uint32_t i = 0; i < kBlockSize; i++) {
1028 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1029 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1030 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1031 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1032 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1033 }
1034 }
1035 }
1036
Marat Dukhan03723f52020-05-13 00:59:24 -07001037 TEST(ROUNDZ__NEON_CVT, DISABLED_negative_snan_to_qnan) {
Marat Dukhan2dbb9442020-05-12 20:43:43 -07001038 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1039 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1040 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1041 for (uint32_t i = 0; i < kBlockSize; i++) {
1042 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1043 }
1044 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1045 for (uint32_t i = 0; i < kBlockSize; i++) {
1046 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1047 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1048 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1049 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1050 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1051 }
1052 }
1053 }
1054#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1055
1056#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1057 TEST(ROUNDZ__NEONV8, positive_normal) {
1058 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1059 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1060 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1061 for (uint32_t i = 0; i < kBlockSize; i++) {
1062 inputs[i] = fp32_from_bits(n + i);
1063 }
1064 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1065 for (uint32_t i = 0; i < kBlockSize; i++) {
1066 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1067 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1068 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1069 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1070 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1071 }
1072 }
1073 }
1074
1075 TEST(ROUNDZ__NEONV8, negative_normal) {
1076 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1077 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1078 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1079 for (uint32_t i = 0; i < kBlockSize; i++) {
1080 inputs[i] = fp32_from_bits(n + i);
1081 }
1082 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1083 for (uint32_t i = 0; i < kBlockSize; i++) {
1084 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1085 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1086 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1087 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1088 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1089 }
1090 }
1091 }
1092
1093 TEST(ROUNDZ__NEONV8, positive_integral) {
1094 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1095 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1096 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1097 for (uint32_t i = 0; i < kBlockSize; i++) {
1098 inputs[i] = fp32_from_bits(n + i);
1099 }
1100 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1101 for (uint32_t i = 0; i < kBlockSize; i++) {
1102 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1103 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1104 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1105 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1106 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1107 }
1108 }
1109 }
1110
1111 TEST(ROUNDZ__NEONV8, negative_integral) {
1112 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1113 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1114 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1115 for (uint32_t i = 0; i < kBlockSize; i++) {
1116 inputs[i] = fp32_from_bits(n + i);
1117 }
1118 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1119 for (uint32_t i = 0; i < kBlockSize; i++) {
1120 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1121 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1122 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1123 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1124 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1125 }
1126 }
1127 }
1128
1129 TEST(ROUNDZ__NEONV8, positive_infinity) {
1130 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1131 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -07001132 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan2dbb9442020-05-12 20:43:43 -07001133 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1134 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1135 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1136 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1137 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1138 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1139 }
1140
1141 TEST(ROUNDZ__NEONV8, negative_infinity) {
1142 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1143 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -07001144 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan2dbb9442020-05-12 20:43:43 -07001145 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1146 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1147 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1148 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1149 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1150 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1151 }
1152
1153 TEST(ROUNDZ__NEONV8, positive_qnan) {
1154 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1155 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1156 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1157 for (uint32_t i = 0; i < kBlockSize; i++) {
1158 inputs[i] = fp32_from_bits(n + i);
1159 }
1160 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1161 for (uint32_t i = 0; i < kBlockSize; i++) {
1162 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1163 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1164 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1165 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1166 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1167 }
1168 }
1169 }
1170
1171 TEST(ROUNDZ__NEONV8, negative_qnan) {
1172 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1173 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1174 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1175 for (uint32_t i = 0; i < kBlockSize; i++) {
1176 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1177 }
1178 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1179 for (uint32_t i = 0; i < kBlockSize; i++) {
1180 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1181 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1182 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1183 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1184 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1185 }
1186 }
1187 }
1188
1189 TEST(ROUNDZ__NEONV8, positive_snan) {
1190 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1191 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1192 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1193 for (uint32_t i = 0; i < kBlockSize; i++) {
1194 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1195 }
1196 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1197 for (uint32_t i = 0; i < kBlockSize; i++) {
1198 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1199 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1200 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1201 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1202 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1203 }
1204 }
1205 }
1206
1207 TEST(ROUNDZ__NEONV8, negative_snan) {
1208 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1209 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1210 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1211 for (uint32_t i = 0; i < kBlockSize; i++) {
1212 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1213 }
1214 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1215 for (uint32_t i = 0; i < kBlockSize; i++) {
1216 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1217 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1218 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1219 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1220 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1221 }
1222 }
1223 }
1224
1225 TEST(ROUNDZ__NEONV8, positive_snan_to_qnan) {
1226 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1227 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1228 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1229 for (uint32_t i = 0; i < kBlockSize; i++) {
1230 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1231 }
1232 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1233 for (uint32_t i = 0; i < kBlockSize; i++) {
1234 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1235 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1236 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1237 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1238 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1239 }
1240 }
1241 }
1242
1243 TEST(ROUNDZ__NEONV8, negative_snan_to_qnan) {
1244 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1245 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1246 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1247 for (uint32_t i = 0; i < kBlockSize; i++) {
1248 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1249 }
1250 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1251 for (uint32_t i = 0; i < kBlockSize; i++) {
1252 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1253 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1254 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1255 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1256 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1257 }
1258 }
1259 }
1260#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1261
Marat Dukhan4c617792021-12-21 15:47:58 -08001262#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhand3f3d872020-06-24 13:08:25 -07001263 TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_normal) {
1264 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1265 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1266 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1267 for (uint32_t i = 0; i < kBlockSize; i++) {
1268 inputs[i] = fp32_from_bits(n + i);
1269 }
1270 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1271 for (uint32_t i = 0; i < kBlockSize; i++) {
1272 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1273 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1274 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1275 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1276 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1277 }
1278 }
1279 }
1280
1281 TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_normal) {
1282 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1283 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1284 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1285 for (uint32_t i = 0; i < kBlockSize; i++) {
1286 inputs[i] = fp32_from_bits(n + i);
1287 }
1288 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1289 for (uint32_t i = 0; i < kBlockSize; i++) {
1290 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1291 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1292 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1293 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1294 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1295 }
1296 }
1297 }
1298
1299 TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_integral) {
1300 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1301 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1302 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1303 for (uint32_t i = 0; i < kBlockSize; i++) {
1304 inputs[i] = fp32_from_bits(n + i);
1305 }
1306 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1307 for (uint32_t i = 0; i < kBlockSize; i++) {
1308 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1309 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1310 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1311 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1312 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1313 }
1314 }
1315 }
1316
1317 TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_integral) {
1318 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1319 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1320 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1321 for (uint32_t i = 0; i < kBlockSize; i++) {
1322 inputs[i] = fp32_from_bits(n + i);
1323 }
1324 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1325 for (uint32_t i = 0; i < kBlockSize; i++) {
1326 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1327 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1328 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1329 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1330 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1331 }
1332 }
1333 }
1334
1335 TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_infinity) {
1336 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1337 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1338 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1339 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1340 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1341 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1342 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1343 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1344 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1345 }
1346
1347 TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_infinity) {
1348 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1349 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1350 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1351 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1352 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1353 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1354 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1355 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1356 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1357 }
1358
1359 TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_qnan) {
1360 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1361 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1362 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1363 for (uint32_t i = 0; i < kBlockSize; i++) {
1364 inputs[i] = fp32_from_bits(n + i);
1365 }
1366 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1367 for (uint32_t i = 0; i < kBlockSize; i++) {
1368 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1369 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1370 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1371 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1372 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1373 }
1374 }
1375 }
1376
1377 TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_qnan) {
1378 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1379 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1380 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1381 for (uint32_t i = 0; i < kBlockSize; i++) {
1382 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1383 }
1384 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1385 for (uint32_t i = 0; i < kBlockSize; i++) {
1386 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1387 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1388 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1389 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1390 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1391 }
1392 }
1393 }
1394
1395 TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_snan) {
1396 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1397 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1398 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1399 for (uint32_t i = 0; i < kBlockSize; i++) {
1400 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1401 }
1402 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1403 for (uint32_t i = 0; i < kBlockSize; i++) {
1404 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1405 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1406 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1407 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1408 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1409 }
1410 }
1411 }
1412
1413 TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_snan) {
1414 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1415 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1416 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1417 for (uint32_t i = 0; i < kBlockSize; i++) {
1418 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1419 }
1420 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1421 for (uint32_t i = 0; i < kBlockSize; i++) {
1422 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1423 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1424 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1425 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1426 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1427 }
1428 }
1429 }
1430
1431 TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_snan_to_qnan) {
1432 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1433 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1434 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1435 for (uint32_t i = 0; i < kBlockSize; i++) {
1436 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1437 }
1438 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1439 for (uint32_t i = 0; i < kBlockSize; i++) {
1440 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1441 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1442 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1443 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1444 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1445 }
1446 }
1447 }
1448
1449 TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_snan_to_qnan) {
1450 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1451 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1452 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1453 for (uint32_t i = 0; i < kBlockSize; i++) {
1454 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1455 }
1456 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1457 for (uint32_t i = 0; i < kBlockSize; i++) {
1458 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1459 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1460 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1461 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1462 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1463 }
1464 }
1465 }
Marat Dukhan4c617792021-12-21 15:47:58 -08001466#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhand3f3d872020-06-24 13:08:25 -07001467
Marat Dukhan4c617792021-12-21 15:47:58 -08001468#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan88da62c2020-07-15 17:56:09 -07001469 TEST(ROUNDZ__WASMSIMD_CVT, positive_normal) {
1470 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1471 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1472 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1473 for (uint32_t i = 0; i < kBlockSize; i++) {
1474 inputs[i] = fp32_from_bits(n + i);
1475 }
1476 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1477 for (uint32_t i = 0; i < kBlockSize; i++) {
1478 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1479 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1480 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1481 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1482 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1483 }
1484 }
1485 }
1486
1487 TEST(ROUNDZ__WASMSIMD_CVT, negative_normal) {
1488 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1489 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1490 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1491 for (uint32_t i = 0; i < kBlockSize; i++) {
1492 inputs[i] = fp32_from_bits(n + i);
1493 }
1494 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1495 for (uint32_t i = 0; i < kBlockSize; i++) {
1496 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1497 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1498 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1499 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1500 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1501 }
1502 }
1503 }
1504
1505 TEST(ROUNDZ__WASMSIMD_CVT, positive_integral) {
1506 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1507 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1508 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1509 for (uint32_t i = 0; i < kBlockSize; i++) {
1510 inputs[i] = fp32_from_bits(n + i);
1511 }
1512 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1513 for (uint32_t i = 0; i < kBlockSize; i++) {
1514 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1515 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1516 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1517 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1518 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1519 }
1520 }
1521 }
1522
1523 TEST(ROUNDZ__WASMSIMD_CVT, negative_integral) {
1524 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1525 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1526 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1527 for (uint32_t i = 0; i < kBlockSize; i++) {
1528 inputs[i] = fp32_from_bits(n + i);
1529 }
1530 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1531 for (uint32_t i = 0; i < kBlockSize; i++) {
1532 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1533 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1534 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1535 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1536 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1537 }
1538 }
1539 }
1540
1541 TEST(ROUNDZ__WASMSIMD_CVT, positive_infinity) {
1542 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1543 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1544 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1545 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1546 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1547 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1548 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1549 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1550 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1551 }
1552
1553 TEST(ROUNDZ__WASMSIMD_CVT, negative_infinity) {
1554 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1555 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1556 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1557 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1558 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1559 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1560 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1561 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1562 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1563 }
1564
1565 TEST(ROUNDZ__WASMSIMD_CVT, positive_qnan) {
1566 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1567 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1568 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1569 for (uint32_t i = 0; i < kBlockSize; i++) {
1570 inputs[i] = fp32_from_bits(n + i);
1571 }
1572 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1573 for (uint32_t i = 0; i < kBlockSize; i++) {
1574 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1575 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1576 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1577 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1578 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1579 }
1580 }
1581 }
1582
1583 TEST(ROUNDZ__WASMSIMD_CVT, negative_qnan) {
1584 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1585 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1586 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1587 for (uint32_t i = 0; i < kBlockSize; i++) {
1588 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1589 }
1590 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1591 for (uint32_t i = 0; i < kBlockSize; i++) {
1592 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1593 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1594 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1595 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1596 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1597 }
1598 }
1599 }
1600
1601 TEST(ROUNDZ__WASMSIMD_CVT, positive_snan) {
1602 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1603 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1604 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1605 for (uint32_t i = 0; i < kBlockSize; i++) {
1606 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1607 }
1608 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1609 for (uint32_t i = 0; i < kBlockSize; i++) {
1610 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1611 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1612 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1613 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1614 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1615 }
1616 }
1617 }
1618
1619 TEST(ROUNDZ__WASMSIMD_CVT, negative_snan) {
1620 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1621 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1622 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1623 for (uint32_t i = 0; i < kBlockSize; i++) {
1624 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1625 }
1626 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1627 for (uint32_t i = 0; i < kBlockSize; i++) {
1628 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1629 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1630 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1631 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1632 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1633 }
1634 }
1635 }
1636
1637 TEST(ROUNDZ__WASMSIMD_CVT, DISABLED_positive_snan_to_qnan) {
1638 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1639 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1640 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1641 for (uint32_t i = 0; i < kBlockSize; i++) {
1642 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1643 }
1644 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1645 for (uint32_t i = 0; i < kBlockSize; i++) {
1646 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1647 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1648 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1649 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1650 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1651 }
1652 }
1653 }
1654
1655 TEST(ROUNDZ__WASMSIMD_CVT, DISABLED_negative_snan_to_qnan) {
1656 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1657 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1658 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1659 for (uint32_t i = 0; i < kBlockSize; i++) {
1660 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1661 }
1662 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1663 for (uint32_t i = 0; i < kBlockSize; i++) {
1664 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1665 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1666 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1667 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1668 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1669 }
1670 }
1671 }
Marat Dukhan4c617792021-12-21 15:47:58 -08001672#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan88da62c2020-07-15 17:56:09 -07001673
Marat Dukhan4c617792021-12-21 15:47:58 -08001674#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan33b4f752021-09-03 10:53:53 -07001675 TEST(ROUNDZ__WASMSIMD_NATIVE, positive_normal) {
1676 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1677 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1678 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1679 for (uint32_t i = 0; i < kBlockSize; i++) {
1680 inputs[i] = fp32_from_bits(n + i);
1681 }
1682 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1683 for (uint32_t i = 0; i < kBlockSize; i++) {
1684 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1685 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1686 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1687 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1688 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1689 }
1690 }
1691 }
1692
1693 TEST(ROUNDZ__WASMSIMD_NATIVE, negative_normal) {
1694 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1695 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1696 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1697 for (uint32_t i = 0; i < kBlockSize; i++) {
1698 inputs[i] = fp32_from_bits(n + i);
1699 }
1700 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1701 for (uint32_t i = 0; i < kBlockSize; i++) {
1702 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1703 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1704 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1705 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1706 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1707 }
1708 }
1709 }
1710
1711 TEST(ROUNDZ__WASMSIMD_NATIVE, positive_integral) {
1712 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1713 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1714 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1715 for (uint32_t i = 0; i < kBlockSize; i++) {
1716 inputs[i] = fp32_from_bits(n + i);
1717 }
1718 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1719 for (uint32_t i = 0; i < kBlockSize; i++) {
1720 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1721 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1722 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1723 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1724 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1725 }
1726 }
1727 }
1728
1729 TEST(ROUNDZ__WASMSIMD_NATIVE, negative_integral) {
1730 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1731 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1732 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1733 for (uint32_t i = 0; i < kBlockSize; i++) {
1734 inputs[i] = fp32_from_bits(n + i);
1735 }
1736 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1737 for (uint32_t i = 0; i < kBlockSize; i++) {
1738 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1739 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1740 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1741 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1742 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1743 }
1744 }
1745 }
1746
1747 TEST(ROUNDZ__WASMSIMD_NATIVE, positive_infinity) {
1748 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1749 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1750 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1751 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1752 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1753 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1754 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1755 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1756 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1757 }
1758
1759 TEST(ROUNDZ__WASMSIMD_NATIVE, negative_infinity) {
1760 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1761 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1762 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1763 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1764 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1765 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1766 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1767 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1768 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1769 }
1770
1771 TEST(ROUNDZ__WASMSIMD_NATIVE, positive_qnan) {
1772 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1773 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1774 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1775 for (uint32_t i = 0; i < kBlockSize; i++) {
1776 inputs[i] = fp32_from_bits(n + i);
1777 }
1778 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1779 for (uint32_t i = 0; i < kBlockSize; i++) {
1780 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1781 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1782 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1783 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1784 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1785 }
1786 }
1787 }
1788
1789 TEST(ROUNDZ__WASMSIMD_NATIVE, negative_qnan) {
1790 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1791 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1792 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1793 for (uint32_t i = 0; i < kBlockSize; i++) {
1794 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1795 }
1796 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1797 for (uint32_t i = 0; i < kBlockSize; i++) {
1798 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1799 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1800 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1801 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1802 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1803 }
1804 }
1805 }
1806
1807 TEST(ROUNDZ__WASMSIMD_NATIVE, positive_snan) {
1808 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1809 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1810 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1811 for (uint32_t i = 0; i < kBlockSize; i++) {
1812 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1813 }
1814 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1815 for (uint32_t i = 0; i < kBlockSize; i++) {
1816 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1817 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1818 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1819 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1820 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1821 }
1822 }
1823 }
1824
1825 TEST(ROUNDZ__WASMSIMD_NATIVE, negative_snan) {
1826 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1827 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1828 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1829 for (uint32_t i = 0; i < kBlockSize; i++) {
1830 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1831 }
1832 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1833 for (uint32_t i = 0; i < kBlockSize; i++) {
1834 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1835 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1836 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1837 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1838 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1839 }
1840 }
1841 }
1842
1843 TEST(ROUNDZ__WASMSIMD_NATIVE, positive_snan_to_qnan) {
1844 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1845 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1846 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1847 for (uint32_t i = 0; i < kBlockSize; i++) {
1848 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1849 }
1850 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1851 for (uint32_t i = 0; i < kBlockSize; i++) {
1852 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1853 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1854 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1855 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1856 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1857 }
1858 }
1859 }
1860
1861 TEST(ROUNDZ__WASMSIMD_NATIVE, negative_snan_to_qnan) {
1862 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1863 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1864 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1865 for (uint32_t i = 0; i < kBlockSize; i++) {
1866 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1867 }
1868 xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1869 for (uint32_t i = 0; i < kBlockSize; i++) {
1870 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1871 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1872 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1873 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1874 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1875 }
1876 }
1877 }
Marat Dukhan4c617792021-12-21 15:47:58 -08001878#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan33b4f752021-09-03 10:53:53 -07001879
Marat Dukhan2dbb9442020-05-12 20:43:43 -07001880TEST(ROUNDZ__SCALAR_ADDSUB, positive_normal) {
1881 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1882 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1883 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1884 for (uint32_t i = 0; i < kBlockSize; i++) {
1885 inputs[i] = fp32_from_bits(n + i);
1886 }
1887 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1888 for (uint32_t i = 0; i < kBlockSize; i++) {
1889 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1890 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1891 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1892 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1893 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1894 }
1895 }
1896}
1897
1898TEST(ROUNDZ__SCALAR_ADDSUB, negative_normal) {
1899 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1900 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1901 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1902 for (uint32_t i = 0; i < kBlockSize; i++) {
1903 inputs[i] = fp32_from_bits(n + i);
1904 }
1905 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1906 for (uint32_t i = 0; i < kBlockSize; i++) {
1907 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1908 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1909 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1910 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1911 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1912 }
1913 }
1914}
1915
1916TEST(ROUNDZ__SCALAR_ADDSUB, positive_integral) {
1917 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1918 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1919 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1920 for (uint32_t i = 0; i < kBlockSize; i++) {
1921 inputs[i] = fp32_from_bits(n + i);
1922 }
1923 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1924 for (uint32_t i = 0; i < kBlockSize; i++) {
1925 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1926 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1927 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1928 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1929 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1930 }
1931 }
1932}
1933
1934TEST(ROUNDZ__SCALAR_ADDSUB, negative_integral) {
1935 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1936 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1937 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1938 for (uint32_t i = 0; i < kBlockSize; i++) {
1939 inputs[i] = fp32_from_bits(n + i);
1940 }
1941 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1942 for (uint32_t i = 0; i < kBlockSize; i++) {
1943 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1944 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1945 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1946 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1947 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1948 }
1949 }
1950}
1951
1952TEST(ROUNDZ__SCALAR_ADDSUB, positive_infinity) {
1953 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1954 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -07001955 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan2dbb9442020-05-12 20:43:43 -07001956 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1957 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1958 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1959 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1960 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1961 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1962}
1963
1964TEST(ROUNDZ__SCALAR_ADDSUB, negative_infinity) {
1965 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1966 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -07001967 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan2dbb9442020-05-12 20:43:43 -07001968 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1969 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1970 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1971 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1972 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1973 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1974}
1975
1976TEST(ROUNDZ__SCALAR_ADDSUB, positive_qnan) {
1977 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1978 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1979 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1980 for (uint32_t i = 0; i < kBlockSize; i++) {
1981 inputs[i] = fp32_from_bits(n + i);
1982 }
1983 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1984 for (uint32_t i = 0; i < kBlockSize; i++) {
1985 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1986 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1987 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1988 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1989 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1990 }
1991 }
1992}
1993
1994TEST(ROUNDZ__SCALAR_ADDSUB, negative_qnan) {
1995 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1996 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1997 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1998 for (uint32_t i = 0; i < kBlockSize; i++) {
1999 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
2000 }
2001 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2002 for (uint32_t i = 0; i < kBlockSize; i++) {
2003 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2004 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2005 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2006 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2007 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2008 }
2009 }
2010}
2011
2012TEST(ROUNDZ__SCALAR_ADDSUB, positive_snan) {
2013 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2014 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2015 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2016 for (uint32_t i = 0; i < kBlockSize; i++) {
2017 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2018 }
2019 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2020 for (uint32_t i = 0; i < kBlockSize; i++) {
2021 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2022 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2023 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2024 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2025 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2026 }
2027 }
2028}
2029
2030TEST(ROUNDZ__SCALAR_ADDSUB, negative_snan) {
2031 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2032 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2033 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2034 for (uint32_t i = 0; i < kBlockSize; i++) {
2035 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2036 }
2037 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2038 for (uint32_t i = 0; i < kBlockSize; i++) {
2039 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2040 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2041 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2042 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2043 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2044 }
2045 }
2046}
2047
2048TEST(ROUNDZ__SCALAR_ADDSUB, positive_snan_to_qnan) {
2049 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2050 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2051 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2052 for (uint32_t i = 0; i < kBlockSize; i++) {
2053 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2054 }
2055 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2056 for (uint32_t i = 0; i < kBlockSize; i++) {
2057 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2058 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2059 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2060 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2061 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2062 }
2063 }
2064}
2065
2066TEST(ROUNDZ__SCALAR_ADDSUB, negative_snan_to_qnan) {
2067 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2068 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2069 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2070 for (uint32_t i = 0; i < kBlockSize; i++) {
2071 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2072 }
2073 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2074 for (uint32_t i = 0; i < kBlockSize; i++) {
2075 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2076 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2077 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2078 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2079 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2080 }
2081 }
2082}
2083
2084TEST(ROUNDZ__SCALAR_CVT, positive_normal) {
2085 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2086 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2087 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
2088 for (uint32_t i = 0; i < kBlockSize; i++) {
2089 inputs[i] = fp32_from_bits(n + i);
2090 }
2091 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2092 for (uint32_t i = 0; i < kBlockSize; i++) {
2093 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2094 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2095 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2096 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2097 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2098 }
2099 }
2100}
2101
2102TEST(ROUNDZ__SCALAR_CVT, negative_normal) {
2103 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2104 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2105 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
2106 for (uint32_t i = 0; i < kBlockSize; i++) {
2107 inputs[i] = fp32_from_bits(n + i);
2108 }
2109 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2110 for (uint32_t i = 0; i < kBlockSize; i++) {
2111 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2112 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2113 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2114 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2115 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2116 }
2117 }
2118}
2119
2120TEST(ROUNDZ__SCALAR_CVT, positive_integral) {
2121 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2122 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2123 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
2124 for (uint32_t i = 0; i < kBlockSize; i++) {
2125 inputs[i] = fp32_from_bits(n + i);
2126 }
2127 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2128 for (uint32_t i = 0; i < kBlockSize; i++) {
2129 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2130 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2131 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2132 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2133 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2134 }
2135 }
2136}
2137
2138TEST(ROUNDZ__SCALAR_CVT, negative_integral) {
2139 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2140 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2141 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
2142 for (uint32_t i = 0; i < kBlockSize; i++) {
2143 inputs[i] = fp32_from_bits(n + i);
2144 }
2145 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2146 for (uint32_t i = 0; i < kBlockSize; i++) {
2147 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2148 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2149 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2150 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2151 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2152 }
2153 }
2154}
2155
2156TEST(ROUNDZ__SCALAR_CVT, positive_infinity) {
2157 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2158 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -07002159 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan2dbb9442020-05-12 20:43:43 -07002160 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2161 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
2162 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2163 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2164 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2165 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2166}
2167
2168TEST(ROUNDZ__SCALAR_CVT, negative_infinity) {
2169 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2170 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -07002171 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan2dbb9442020-05-12 20:43:43 -07002172 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2173 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
2174 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2175 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2176 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2177 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2178}
2179
2180TEST(ROUNDZ__SCALAR_CVT, positive_qnan) {
2181 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2182 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2183 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2184 for (uint32_t i = 0; i < kBlockSize; i++) {
2185 inputs[i] = fp32_from_bits(n + i);
2186 }
2187 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2188 for (uint32_t i = 0; i < kBlockSize; i++) {
2189 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2190 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2191 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2192 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2193 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2194 }
2195 }
2196}
2197
2198TEST(ROUNDZ__SCALAR_CVT, negative_qnan) {
2199 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2200 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2201 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2202 for (uint32_t i = 0; i < kBlockSize; i++) {
2203 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
2204 }
2205 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2206 for (uint32_t i = 0; i < kBlockSize; i++) {
2207 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2208 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2209 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2210 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2211 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2212 }
2213 }
2214}
2215
2216TEST(ROUNDZ__SCALAR_CVT, positive_snan) {
2217 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2218 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2219 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2220 for (uint32_t i = 0; i < kBlockSize; i++) {
2221 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2222 }
2223 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2224 for (uint32_t i = 0; i < kBlockSize; i++) {
2225 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2226 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2227 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2228 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2229 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2230 }
2231 }
2232}
2233
2234TEST(ROUNDZ__SCALAR_CVT, negative_snan) {
2235 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2236 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2237 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2238 for (uint32_t i = 0; i < kBlockSize; i++) {
2239 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2240 }
2241 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2242 for (uint32_t i = 0; i < kBlockSize; i++) {
2243 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2244 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2245 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2246 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2247 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2248 }
2249 }
2250}
2251
2252TEST(ROUNDZ__SCALAR_CVT, DISABLED_positive_snan_to_qnan) {
2253 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2254 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2255 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2256 for (uint32_t i = 0; i < kBlockSize; i++) {
2257 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2258 }
2259 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2260 for (uint32_t i = 0; i < kBlockSize; i++) {
2261 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2262 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2263 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2264 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2265 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2266 }
2267 }
2268}
2269
2270TEST(ROUNDZ__SCALAR_CVT, DISABLED_negative_snan_to_qnan) {
2271 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2272 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2273 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2274 for (uint32_t i = 0; i < kBlockSize; i++) {
2275 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2276 }
2277 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2278 for (uint32_t i = 0; i < kBlockSize; i++) {
2279 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2280 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2281 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2282 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2283 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2284 }
2285 }
2286}