blob: ab2116519d0da95fe49dc2ddd1086c6ff746b406 [file] [log] [blame]
Marat Dukhanc9852ba2020-05-13 17:21:29 -07001// Copyright 2020 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <cstddef>
9#include <cstdint>
10#include <cstdlib>
11#include <iomanip>
12#include <ios>
13#include <vector>
14
15#include <gtest/gtest.h>
16
17#include <fp16.h>
18
19#include <xnnpack/AlignedAllocator.h>
20#include <xnnpack/common.h>
21#include <xnnpack/math-stubs.h>
22
23
24constexpr int kBlockSize = 1024;
25
26#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanea575d92020-05-31 23:49:00 -070027 TEST(ROUNDD__SSE_ADDSUB, positive_zero) {
28 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
29 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
30 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
31 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
32 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
33 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
34 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
35 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
36 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
37 }
38
39 TEST(ROUNDD__SSE_ADDSUB, negative_zero) {
40 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
41 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
42 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
43 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
44 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
45 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
46 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
47 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
48 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
49 }
50
51 TEST(ROUNDD__SSE_ADDSUB, positive_subnormal) {
52 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
53 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
54 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
55 for (uint32_t i = 0; i < kBlockSize; i++) {
56 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
57 }
58 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
59 for (uint32_t i = 0; i < kBlockSize; i++) {
60 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
61 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
62 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
63 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
64 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
65 }
66 }
67 }
68
69 TEST(ROUNDD__SSE_ADDSUB, negative_subnormal) {
70 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
71 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
72 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
73 for (uint32_t i = 0; i < kBlockSize; i++) {
74 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
75 }
76 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
77 for (uint32_t i = 0; i < kBlockSize; i++) {
78 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
79 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
80 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
81 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
82 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
83 }
84 }
85 }
86
Marat Dukhanc9852ba2020-05-13 17:21:29 -070087 TEST(ROUNDD__SSE_ADDSUB, positive_normal) {
88 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
89 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhanea575d92020-05-31 23:49:00 -070090 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
Marat Dukhanc9852ba2020-05-13 17:21:29 -070091 for (uint32_t i = 0; i < kBlockSize; i++) {
92 inputs[i] = fp32_from_bits(n + i);
93 }
94 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
95 for (uint32_t i = 0; i < kBlockSize; i++) {
96 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
97 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
98 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
99 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
100 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
101 }
102 }
103 }
104
105 TEST(ROUNDD__SSE_ADDSUB, negative_normal) {
106 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
107 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhanea575d92020-05-31 23:49:00 -0700108 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700109 for (uint32_t i = 0; i < kBlockSize; i++) {
110 inputs[i] = fp32_from_bits(n + i);
111 }
112 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
113 for (uint32_t i = 0; i < kBlockSize; i++) {
114 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
115 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
116 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
117 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
118 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
119 }
120 }
121 }
122
123 TEST(ROUNDD__SSE_ADDSUB, positive_integral) {
124 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
125 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
126 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
127 for (uint32_t i = 0; i < kBlockSize; i++) {
128 inputs[i] = fp32_from_bits(n + i);
129 }
130 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
131 for (uint32_t i = 0; i < kBlockSize; i++) {
132 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
133 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
134 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
135 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
136 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
137 }
138 }
139 }
140
141 TEST(ROUNDD__SSE_ADDSUB, negative_integral) {
142 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
143 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
144 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
145 for (uint32_t i = 0; i < kBlockSize; i++) {
146 inputs[i] = fp32_from_bits(n + i);
147 }
148 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
149 for (uint32_t i = 0; i < kBlockSize; i++) {
150 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
151 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
152 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
153 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
154 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
155 }
156 }
157 }
158
159 TEST(ROUNDD__SSE_ADDSUB, positive_infinity) {
160 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
161 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700162 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700163 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
164 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
165 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
166 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
167 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
168 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
169 }
170
171 TEST(ROUNDD__SSE_ADDSUB, negative_infinity) {
172 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
173 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700174 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700175 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
176 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
177 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
178 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
179 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
180 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
181 }
182
183 TEST(ROUNDD__SSE_ADDSUB, positive_qnan) {
184 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
185 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
186 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
187 for (uint32_t i = 0; i < kBlockSize; i++) {
188 inputs[i] = fp32_from_bits(n + i);
189 }
190 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
191 for (uint32_t i = 0; i < kBlockSize; i++) {
192 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
193 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
194 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
195 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
196 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
197 }
198 }
199 }
200
201 TEST(ROUNDD__SSE_ADDSUB, negative_qnan) {
202 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
203 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
204 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
205 for (uint32_t i = 0; i < kBlockSize; i++) {
206 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
207 }
208 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
209 for (uint32_t i = 0; i < kBlockSize; i++) {
210 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
211 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
212 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
213 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
214 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
215 }
216 }
217 }
218
219 TEST(ROUNDD__SSE_ADDSUB, positive_snan) {
220 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
221 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
222 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
223 for (uint32_t i = 0; i < kBlockSize; i++) {
224 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
225 }
226 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
227 for (uint32_t i = 0; i < kBlockSize; i++) {
228 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
229 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
230 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
231 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
232 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
233 }
234 }
235 }
236
237 TEST(ROUNDD__SSE_ADDSUB, negative_snan) {
238 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
239 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
240 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
241 for (uint32_t i = 0; i < kBlockSize; i++) {
242 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
243 }
244 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
245 for (uint32_t i = 0; i < kBlockSize; i++) {
246 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
247 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
248 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
249 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
250 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
251 }
252 }
253 }
254
255 TEST(ROUNDD__SSE_ADDSUB, positive_snan_to_qnan) {
256 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
257 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
258 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
259 for (uint32_t i = 0; i < kBlockSize; i++) {
260 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
261 }
262 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
263 for (uint32_t i = 0; i < kBlockSize; i++) {
264 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
265 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
266 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
267 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
268 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
269 }
270 }
271 }
272
273 TEST(ROUNDD__SSE_ADDSUB, negative_snan_to_qnan) {
274 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
275 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
276 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
277 for (uint32_t i = 0; i < kBlockSize; i++) {
278 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
279 }
280 xnn_math_f32_roundd__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
281 for (uint32_t i = 0; i < kBlockSize; i++) {
282 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
283 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
284 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
285 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
286 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
287 }
288 }
289 }
290#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
291
292#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanea575d92020-05-31 23:49:00 -0700293 TEST(ROUNDD__SSE2_CVT, positive_zero) {
294 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
295 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
296 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
297 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
298 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
299 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
300 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
301 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
302 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
303 }
304
305 TEST(ROUNDD__SSE2_CVT, negative_zero) {
306 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
307 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
308 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
309 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
310 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
311 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
312 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
313 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
314 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
315 }
316
317 TEST(ROUNDD__SSE2_CVT, positive_subnormal) {
318 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
319 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
320 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
321 for (uint32_t i = 0; i < kBlockSize; i++) {
322 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
323 }
324 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
325 for (uint32_t i = 0; i < kBlockSize; i++) {
326 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
327 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
328 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
329 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
330 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
331 }
332 }
333 }
334
335 TEST(ROUNDD__SSE2_CVT, negative_subnormal) {
336 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
337 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
338 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
339 for (uint32_t i = 0; i < kBlockSize; i++) {
340 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
341 }
342 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
343 for (uint32_t i = 0; i < kBlockSize; i++) {
344 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
345 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
346 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
347 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
348 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
349 }
350 }
351 }
352
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700353 TEST(ROUNDD__SSE2_CVT, positive_normal) {
354 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
355 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhanea575d92020-05-31 23:49:00 -0700356 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700357 for (uint32_t i = 0; i < kBlockSize; i++) {
358 inputs[i] = fp32_from_bits(n + i);
359 }
360 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
361 for (uint32_t i = 0; i < kBlockSize; i++) {
362 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
363 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
364 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
365 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
366 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
367 }
368 }
369 }
370
371 TEST(ROUNDD__SSE2_CVT, negative_normal) {
372 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
373 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhanea575d92020-05-31 23:49:00 -0700374 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700375 for (uint32_t i = 0; i < kBlockSize; i++) {
376 inputs[i] = fp32_from_bits(n + i);
377 }
378 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
379 for (uint32_t i = 0; i < kBlockSize; i++) {
380 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
381 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
382 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
383 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
384 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
385 }
386 }
387 }
388
389 TEST(ROUNDD__SSE2_CVT, positive_integral) {
390 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
391 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
392 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
393 for (uint32_t i = 0; i < kBlockSize; i++) {
394 inputs[i] = fp32_from_bits(n + i);
395 }
396 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
397 for (uint32_t i = 0; i < kBlockSize; i++) {
398 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
399 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
400 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
401 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
402 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
403 }
404 }
405 }
406
407 TEST(ROUNDD__SSE2_CVT, negative_integral) {
408 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
409 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
410 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
411 for (uint32_t i = 0; i < kBlockSize; i++) {
412 inputs[i] = fp32_from_bits(n + i);
413 }
414 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
415 for (uint32_t i = 0; i < kBlockSize; i++) {
416 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
417 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
418 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
419 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
420 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
421 }
422 }
423 }
424
425 TEST(ROUNDD__SSE2_CVT, positive_infinity) {
426 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
427 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700428 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700429 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
430 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
431 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
432 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
433 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
434 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
435 }
436
437 TEST(ROUNDD__SSE2_CVT, negative_infinity) {
438 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
439 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700440 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700441 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
442 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
443 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
444 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
445 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
446 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
447 }
448
449 TEST(ROUNDD__SSE2_CVT, positive_qnan) {
450 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
451 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
452 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
453 for (uint32_t i = 0; i < kBlockSize; i++) {
454 inputs[i] = fp32_from_bits(n + i);
455 }
456 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
457 for (uint32_t i = 0; i < kBlockSize; i++) {
458 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
459 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
460 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
461 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
462 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
463 }
464 }
465 }
466
467 TEST(ROUNDD__SSE2_CVT, negative_qnan) {
468 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
469 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
470 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
471 for (uint32_t i = 0; i < kBlockSize; i++) {
472 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
473 }
474 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
475 for (uint32_t i = 0; i < kBlockSize; i++) {
476 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
477 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
478 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
479 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
480 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
481 }
482 }
483 }
484
485 TEST(ROUNDD__SSE2_CVT, positive_snan) {
486 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
487 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
488 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
489 for (uint32_t i = 0; i < kBlockSize; i++) {
490 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
491 }
492 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
493 for (uint32_t i = 0; i < kBlockSize; i++) {
494 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
495 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
496 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
497 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
498 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
499 }
500 }
501 }
502
503 TEST(ROUNDD__SSE2_CVT, negative_snan) {
504 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
505 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
506 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
507 for (uint32_t i = 0; i < kBlockSize; i++) {
508 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
509 }
510 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
511 for (uint32_t i = 0; i < kBlockSize; i++) {
512 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
513 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
514 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
515 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
516 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
517 }
518 }
519 }
520
521 TEST(ROUNDD__SSE2_CVT, positive_snan_to_qnan) {
522 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
523 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
524 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
525 for (uint32_t i = 0; i < kBlockSize; i++) {
526 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
527 }
528 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
529 for (uint32_t i = 0; i < kBlockSize; i++) {
530 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
531 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
532 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
533 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
534 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
535 }
536 }
537 }
538
539 TEST(ROUNDD__SSE2_CVT, negative_snan_to_qnan) {
540 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
541 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
542 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
543 for (uint32_t i = 0; i < kBlockSize; i++) {
544 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
545 }
546 xnn_math_f32_roundd__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
547 for (uint32_t i = 0; i < kBlockSize; i++) {
548 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
549 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
550 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
551 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
552 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
553 }
554 }
555 }
556#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
557
558#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanea575d92020-05-31 23:49:00 -0700559 TEST(ROUNDD__SSE41, positive_zero) {
560 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
561 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
562 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
563 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
564 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
565 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
566 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
567 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
568 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
569 }
570
571 TEST(ROUNDD__SSE41, negative_zero) {
572 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
573 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
574 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
575 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
576 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
577 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
578 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
579 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
580 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
581 }
582
583 TEST(ROUNDD__SSE41, positive_subnormal) {
584 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
585 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
586 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
587 for (uint32_t i = 0; i < kBlockSize; i++) {
588 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
589 }
590 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
591 for (uint32_t i = 0; i < kBlockSize; i++) {
592 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
593 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
594 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
595 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
596 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
597 }
598 }
599 }
600
601 TEST(ROUNDD__SSE41, negative_subnormal) {
602 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
603 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
604 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
605 for (uint32_t i = 0; i < kBlockSize; i++) {
606 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
607 }
608 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
609 for (uint32_t i = 0; i < kBlockSize; i++) {
610 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
611 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
612 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
613 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
614 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
615 }
616 }
617 }
618
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700619 TEST(ROUNDD__SSE41, positive_normal) {
620 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
621 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhanea575d92020-05-31 23:49:00 -0700622 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700623 for (uint32_t i = 0; i < kBlockSize; i++) {
624 inputs[i] = fp32_from_bits(n + i);
625 }
626 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
627 for (uint32_t i = 0; i < kBlockSize; i++) {
628 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
629 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
630 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
631 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
632 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
633 }
634 }
635 }
636
637 TEST(ROUNDD__SSE41, negative_normal) {
638 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
639 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhanea575d92020-05-31 23:49:00 -0700640 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700641 for (uint32_t i = 0; i < kBlockSize; i++) {
642 inputs[i] = fp32_from_bits(n + i);
643 }
644 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
645 for (uint32_t i = 0; i < kBlockSize; i++) {
646 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
647 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
648 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
649 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
650 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
651 }
652 }
653 }
654
655 TEST(ROUNDD__SSE41, positive_integral) {
656 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
657 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
658 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
659 for (uint32_t i = 0; i < kBlockSize; i++) {
660 inputs[i] = fp32_from_bits(n + i);
661 }
662 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
663 for (uint32_t i = 0; i < kBlockSize; i++) {
664 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
665 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
666 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
667 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
668 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
669 }
670 }
671 }
672
673 TEST(ROUNDD__SSE41, negative_integral) {
674 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
675 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
676 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
677 for (uint32_t i = 0; i < kBlockSize; i++) {
678 inputs[i] = fp32_from_bits(n + i);
679 }
680 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
681 for (uint32_t i = 0; i < kBlockSize; i++) {
682 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
683 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
684 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
685 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
686 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
687 }
688 }
689 }
690
691 TEST(ROUNDD__SSE41, positive_infinity) {
692 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
693 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700694 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700695 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
696 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
697 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
698 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
699 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
700 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
701 }
702
703 TEST(ROUNDD__SSE41, negative_infinity) {
704 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
705 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700706 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700707 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
708 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
709 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
710 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
711 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
712 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
713 }
714
715 TEST(ROUNDD__SSE41, positive_qnan) {
716 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
717 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
718 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
719 for (uint32_t i = 0; i < kBlockSize; i++) {
720 inputs[i] = fp32_from_bits(n + i);
721 }
722 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
723 for (uint32_t i = 0; i < kBlockSize; i++) {
724 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
725 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
726 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
727 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
728 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
729 }
730 }
731 }
732
733 TEST(ROUNDD__SSE41, negative_qnan) {
734 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
735 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
736 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
737 for (uint32_t i = 0; i < kBlockSize; i++) {
738 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
739 }
740 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
741 for (uint32_t i = 0; i < kBlockSize; i++) {
742 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
743 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
744 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
745 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
746 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
747 }
748 }
749 }
750
751 TEST(ROUNDD__SSE41, positive_snan) {
752 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
753 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
754 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
755 for (uint32_t i = 0; i < kBlockSize; i++) {
756 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
757 }
758 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
759 for (uint32_t i = 0; i < kBlockSize; i++) {
760 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
761 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
762 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
763 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
764 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
765 }
766 }
767 }
768
769 TEST(ROUNDD__SSE41, negative_snan) {
770 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
771 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
772 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
773 for (uint32_t i = 0; i < kBlockSize; i++) {
774 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
775 }
776 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
777 for (uint32_t i = 0; i < kBlockSize; i++) {
778 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
779 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
780 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
781 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
782 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
783 }
784 }
785 }
786
787 TEST(ROUNDD__SSE41, positive_snan_to_qnan) {
788 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
789 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
790 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
791 for (uint32_t i = 0; i < kBlockSize; i++) {
792 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
793 }
794 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
795 for (uint32_t i = 0; i < kBlockSize; i++) {
796 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
797 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
798 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
799 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
800 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
801 }
802 }
803 }
804
805 TEST(ROUNDD__SSE41, negative_snan_to_qnan) {
806 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
807 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
808 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
809 for (uint32_t i = 0; i < kBlockSize; i++) {
810 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
811 }
812 xnn_math_f32_roundd__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
813 for (uint32_t i = 0; i < kBlockSize; i++) {
814 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
815 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
816 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
817 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
818 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
819 }
820 }
821 }
822#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
823
824#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhanea575d92020-05-31 23:49:00 -0700825 TEST(ROUNDD__NEON_ADDSUB, positive_zero) {
826 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
827 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
828 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
829 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
830 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
831 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
832 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
833 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
834 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
835 }
836
837 TEST(ROUNDD__NEON_ADDSUB, negative_zero) {
838 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
839 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
840 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
841 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
842 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
843 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
844 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
845 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
846 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
847 }
848
849 TEST(ROUNDD__NEON_ADDSUB, positive_subnormal) {
850 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
851 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
852 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
853 for (uint32_t i = 0; i < kBlockSize; i++) {
854 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
855 }
856 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
857 for (uint32_t i = 0; i < kBlockSize; i++) {
858 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
859 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
860 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
861 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
862 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
863 }
864 }
865 }
866
867 TEST(ROUNDD__NEON_ADDSUB, negative_subnormal) {
868 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
869 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
870 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
871 for (uint32_t i = 0; i < kBlockSize; i++) {
872 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
873 }
874 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
875 for (uint32_t i = 0; i < kBlockSize; i++) {
876 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
877 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
878 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
879 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
880 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
881 }
882 }
883 }
884
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700885 TEST(ROUNDD__NEON_ADDSUB, positive_normal) {
886 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
887 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhanea575d92020-05-31 23:49:00 -0700888 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700889 for (uint32_t i = 0; i < kBlockSize; i++) {
890 inputs[i] = fp32_from_bits(n + i);
891 }
892 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
893 for (uint32_t i = 0; i < kBlockSize; i++) {
894 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
895 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
896 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
897 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
898 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
899 }
900 }
901 }
902
903 TEST(ROUNDD__NEON_ADDSUB, negative_normal) {
904 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
905 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhanea575d92020-05-31 23:49:00 -0700906 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700907 for (uint32_t i = 0; i < kBlockSize; i++) {
908 inputs[i] = fp32_from_bits(n + i);
909 }
910 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
911 for (uint32_t i = 0; i < kBlockSize; i++) {
912 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
913 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
914 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
915 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
916 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
917 }
918 }
919 }
920
921 TEST(ROUNDD__NEON_ADDSUB, positive_integral) {
922 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
923 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
924 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
925 for (uint32_t i = 0; i < kBlockSize; i++) {
926 inputs[i] = fp32_from_bits(n + i);
927 }
928 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
929 for (uint32_t i = 0; i < kBlockSize; i++) {
930 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
931 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
932 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
933 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
934 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
935 }
936 }
937 }
938
939 TEST(ROUNDD__NEON_ADDSUB, negative_integral) {
940 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
941 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
942 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
943 for (uint32_t i = 0; i < kBlockSize; i++) {
944 inputs[i] = fp32_from_bits(n + i);
945 }
946 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
947 for (uint32_t i = 0; i < kBlockSize; i++) {
948 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
949 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
950 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
951 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
952 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
953 }
954 }
955 }
956
957 TEST(ROUNDD__NEON_ADDSUB, positive_infinity) {
958 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
959 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700960 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700961 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
962 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
963 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
964 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
965 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
966 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
967 }
968
969 TEST(ROUNDD__NEON_ADDSUB, negative_infinity) {
970 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
971 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700972 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhanc9852ba2020-05-13 17:21:29 -0700973 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
974 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
975 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
976 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
977 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
978 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
979 }
980
981 TEST(ROUNDD__NEON_ADDSUB, positive_qnan) {
982 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
983 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
984 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
985 for (uint32_t i = 0; i < kBlockSize; i++) {
986 inputs[i] = fp32_from_bits(n + i);
987 }
988 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
989 for (uint32_t i = 0; i < kBlockSize; i++) {
990 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
991 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
992 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
993 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
994 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
995 }
996 }
997 }
998
999 TEST(ROUNDD__NEON_ADDSUB, negative_qnan) {
1000 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1001 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1002 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1003 for (uint32_t i = 0; i < kBlockSize; i++) {
1004 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1005 }
1006 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1007 for (uint32_t i = 0; i < kBlockSize; i++) {
1008 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1009 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1010 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1011 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1012 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1013 }
1014 }
1015 }
1016
1017 TEST(ROUNDD__NEON_ADDSUB, positive_snan) {
1018 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1019 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1020 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1021 for (uint32_t i = 0; i < kBlockSize; i++) {
1022 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1023 }
1024 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1025 for (uint32_t i = 0; i < kBlockSize; i++) {
1026 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1027 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1028 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1029 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1030 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1031 }
1032 }
1033 }
1034
1035 TEST(ROUNDD__NEON_ADDSUB, negative_snan) {
1036 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1037 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1038 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1039 for (uint32_t i = 0; i < kBlockSize; i++) {
1040 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1041 }
1042 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1043 for (uint32_t i = 0; i < kBlockSize; i++) {
1044 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1045 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1046 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1047 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1048 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1049 }
1050 }
1051 }
1052
1053 TEST(ROUNDD__NEON_ADDSUB, positive_snan_to_qnan) {
1054 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1055 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1056 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1057 for (uint32_t i = 0; i < kBlockSize; i++) {
1058 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1059 }
1060 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1061 for (uint32_t i = 0; i < kBlockSize; i++) {
1062 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1063 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1064 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1065 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1066 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1067 }
1068 }
1069 }
1070
1071 TEST(ROUNDD__NEON_ADDSUB, negative_snan_to_qnan) {
1072 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1073 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1074 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1075 for (uint32_t i = 0; i < kBlockSize; i++) {
1076 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1077 }
1078 xnn_math_f32_roundd__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1079 for (uint32_t i = 0; i < kBlockSize; i++) {
1080 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1081 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1082 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1083 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1084 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1085 }
1086 }
1087 }
1088#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1089
1090#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhanea575d92020-05-31 23:49:00 -07001091 TEST(ROUNDD__NEON_CVT, positive_zero) {
1092 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1093 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1094 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
1095 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1096 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1097 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1098 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1099 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1100 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1101 }
1102
1103 TEST(ROUNDD__NEON_CVT, negative_zero) {
1104 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1105 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1106 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
1107 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1108 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1109 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1110 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1111 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1112 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1113 }
1114
1115 TEST(ROUNDD__NEON_CVT, positive_subnormal) {
1116 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1117 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1118 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
1119 for (uint32_t i = 0; i < kBlockSize; i++) {
1120 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
1121 }
1122 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1123 for (uint32_t i = 0; i < kBlockSize; i++) {
1124 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1125 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1126 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1127 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1128 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1129 }
1130 }
1131 }
1132
1133 TEST(ROUNDD__NEON_CVT, negative_subnormal) {
1134 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1135 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1136 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
1137 for (uint32_t i = 0; i < kBlockSize; i++) {
1138 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
1139 }
1140 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1141 for (uint32_t i = 0; i < kBlockSize; i++) {
1142 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1143 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1144 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1145 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1146 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1147 }
1148 }
1149 }
1150
Marat Dukhanc9852ba2020-05-13 17:21:29 -07001151 TEST(ROUNDD__NEON_CVT, positive_normal) {
1152 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1153 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhanea575d92020-05-31 23:49:00 -07001154 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
Marat Dukhanc9852ba2020-05-13 17:21:29 -07001155 for (uint32_t i = 0; i < kBlockSize; i++) {
1156 inputs[i] = fp32_from_bits(n + i);
1157 }
1158 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1159 for (uint32_t i = 0; i < kBlockSize; i++) {
1160 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1161 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1162 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1163 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1164 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1165 }
1166 }
1167 }
1168
1169 TEST(ROUNDD__NEON_CVT, negative_normal) {
1170 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1171 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhanea575d92020-05-31 23:49:00 -07001172 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
Marat Dukhanc9852ba2020-05-13 17:21:29 -07001173 for (uint32_t i = 0; i < kBlockSize; i++) {
1174 inputs[i] = fp32_from_bits(n + i);
1175 }
1176 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1177 for (uint32_t i = 0; i < kBlockSize; i++) {
1178 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1179 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1180 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1181 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1182 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1183 }
1184 }
1185 }
1186
1187 TEST(ROUNDD__NEON_CVT, positive_integral) {
1188 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1189 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1190 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1191 for (uint32_t i = 0; i < kBlockSize; i++) {
1192 inputs[i] = fp32_from_bits(n + i);
1193 }
1194 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1195 for (uint32_t i = 0; i < kBlockSize; i++) {
1196 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1197 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1198 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1199 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1200 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1201 }
1202 }
1203 }
1204
1205 TEST(ROUNDD__NEON_CVT, negative_integral) {
1206 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1207 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1208 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1209 for (uint32_t i = 0; i < kBlockSize; i++) {
1210 inputs[i] = fp32_from_bits(n + i);
1211 }
1212 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1213 for (uint32_t i = 0; i < kBlockSize; i++) {
1214 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1215 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1216 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1217 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1218 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1219 }
1220 }
1221 }
1222
1223 TEST(ROUNDD__NEON_CVT, positive_infinity) {
1224 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1225 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -07001226 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhanc9852ba2020-05-13 17:21:29 -07001227 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1228 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1229 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1230 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1231 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1232 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1233 }
1234
1235 TEST(ROUNDD__NEON_CVT, negative_infinity) {
1236 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1237 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -07001238 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhanc9852ba2020-05-13 17:21:29 -07001239 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1240 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1241 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1242 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1243 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1244 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1245 }
1246
1247 TEST(ROUNDD__NEON_CVT, positive_qnan) {
1248 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1249 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1250 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1251 for (uint32_t i = 0; i < kBlockSize; i++) {
1252 inputs[i] = fp32_from_bits(n + i);
1253 }
1254 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1255 for (uint32_t i = 0; i < kBlockSize; i++) {
1256 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1257 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1258 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1259 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1260 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1261 }
1262 }
1263 }
1264
1265 TEST(ROUNDD__NEON_CVT, negative_qnan) {
1266 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1267 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1268 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1269 for (uint32_t i = 0; i < kBlockSize; i++) {
1270 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1271 }
1272 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1273 for (uint32_t i = 0; i < kBlockSize; i++) {
1274 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1275 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1276 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1277 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1278 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1279 }
1280 }
1281 }
1282
1283 TEST(ROUNDD__NEON_CVT, positive_snan) {
1284 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1285 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1286 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1287 for (uint32_t i = 0; i < kBlockSize; i++) {
1288 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1289 }
1290 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1291 for (uint32_t i = 0; i < kBlockSize; i++) {
1292 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1293 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1294 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1295 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1296 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1297 }
1298 }
1299 }
1300
1301 TEST(ROUNDD__NEON_CVT, negative_snan) {
1302 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1303 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1304 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1305 for (uint32_t i = 0; i < kBlockSize; i++) {
1306 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1307 }
1308 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1309 for (uint32_t i = 0; i < kBlockSize; i++) {
1310 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1311 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1312 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1313 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1314 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1315 }
1316 }
1317 }
1318
1319 TEST(ROUNDD__NEON_CVT, positive_snan_to_qnan) {
1320 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1321 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1322 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1323 for (uint32_t i = 0; i < kBlockSize; i++) {
1324 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1325 }
1326 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1327 for (uint32_t i = 0; i < kBlockSize; i++) {
1328 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1329 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1330 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1331 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1332 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1333 }
1334 }
1335 }
1336
1337 TEST(ROUNDD__NEON_CVT, negative_snan_to_qnan) {
1338 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1339 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1340 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1341 for (uint32_t i = 0; i < kBlockSize; i++) {
1342 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1343 }
1344 xnn_math_f32_roundd__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1345 for (uint32_t i = 0; i < kBlockSize; i++) {
1346 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1347 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1348 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1349 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1350 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1351 }
1352 }
1353 }
1354#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1355
1356#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhanea575d92020-05-31 23:49:00 -07001357 TEST(ROUNDD__NEONV8, positive_zero) {
1358 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1359 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1360 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
1361 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1362 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1363 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1364 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1365 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1366 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1367 }
1368
1369 TEST(ROUNDD__NEONV8, negative_zero) {
1370 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1371 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1372 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
1373 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1374 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1375 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1376 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1377 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1378 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1379 }
1380
1381 TEST(ROUNDD__NEONV8, positive_subnormal) {
1382 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1383 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1384 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
1385 for (uint32_t i = 0; i < kBlockSize; i++) {
1386 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
1387 }
1388 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1389 for (uint32_t i = 0; i < kBlockSize; i++) {
1390 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1391 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1392 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1393 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1394 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1395 }
1396 }
1397 }
1398
1399 TEST(ROUNDD__NEONV8, negative_subnormal) {
1400 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1401 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1402 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
1403 for (uint32_t i = 0; i < kBlockSize; i++) {
1404 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
1405 }
1406 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1407 for (uint32_t i = 0; i < kBlockSize; i++) {
1408 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1409 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1410 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1411 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1412 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1413 }
1414 }
1415 }
1416
Marat Dukhanc9852ba2020-05-13 17:21:29 -07001417 TEST(ROUNDD__NEONV8, positive_normal) {
1418 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1419 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhanea575d92020-05-31 23:49:00 -07001420 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
Marat Dukhanc9852ba2020-05-13 17:21:29 -07001421 for (uint32_t i = 0; i < kBlockSize; i++) {
1422 inputs[i] = fp32_from_bits(n + i);
1423 }
1424 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1425 for (uint32_t i = 0; i < kBlockSize; i++) {
1426 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1427 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1428 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1429 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1430 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1431 }
1432 }
1433 }
1434
1435 TEST(ROUNDD__NEONV8, negative_normal) {
1436 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1437 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhanea575d92020-05-31 23:49:00 -07001438 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
Marat Dukhanc9852ba2020-05-13 17:21:29 -07001439 for (uint32_t i = 0; i < kBlockSize; i++) {
1440 inputs[i] = fp32_from_bits(n + i);
1441 }
1442 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1443 for (uint32_t i = 0; i < kBlockSize; i++) {
1444 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1445 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1446 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1447 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1448 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1449 }
1450 }
1451 }
1452
1453 TEST(ROUNDD__NEONV8, positive_integral) {
1454 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1455 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1456 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1457 for (uint32_t i = 0; i < kBlockSize; i++) {
1458 inputs[i] = fp32_from_bits(n + i);
1459 }
1460 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1461 for (uint32_t i = 0; i < kBlockSize; i++) {
1462 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1463 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1464 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1465 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1466 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1467 }
1468 }
1469 }
1470
1471 TEST(ROUNDD__NEONV8, negative_integral) {
1472 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1473 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1474 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1475 for (uint32_t i = 0; i < kBlockSize; i++) {
1476 inputs[i] = fp32_from_bits(n + i);
1477 }
1478 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1479 for (uint32_t i = 0; i < kBlockSize; i++) {
1480 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1481 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1482 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1483 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1484 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1485 }
1486 }
1487 }
1488
1489 TEST(ROUNDD__NEONV8, positive_infinity) {
1490 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1491 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -07001492 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhanc9852ba2020-05-13 17:21:29 -07001493 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1494 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1495 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1496 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1497 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1498 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1499 }
1500
1501 TEST(ROUNDD__NEONV8, negative_infinity) {
1502 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1503 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -07001504 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhanc9852ba2020-05-13 17:21:29 -07001505 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1506 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1507 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1508 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1509 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1510 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1511 }
1512
1513 TEST(ROUNDD__NEONV8, positive_qnan) {
1514 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1515 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1516 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1517 for (uint32_t i = 0; i < kBlockSize; i++) {
1518 inputs[i] = fp32_from_bits(n + i);
1519 }
1520 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1521 for (uint32_t i = 0; i < kBlockSize; i++) {
1522 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1523 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1524 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1525 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1526 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1527 }
1528 }
1529 }
1530
1531 TEST(ROUNDD__NEONV8, negative_qnan) {
1532 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1533 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1534 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1535 for (uint32_t i = 0; i < kBlockSize; i++) {
1536 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1537 }
1538 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1539 for (uint32_t i = 0; i < kBlockSize; i++) {
1540 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1541 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1542 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1543 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1544 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1545 }
1546 }
1547 }
1548
1549 TEST(ROUNDD__NEONV8, positive_snan) {
1550 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1551 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1552 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1553 for (uint32_t i = 0; i < kBlockSize; i++) {
1554 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1555 }
1556 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1557 for (uint32_t i = 0; i < kBlockSize; i++) {
1558 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1559 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1560 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1561 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1562 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1563 }
1564 }
1565 }
1566
1567 TEST(ROUNDD__NEONV8, negative_snan) {
1568 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1569 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1570 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1571 for (uint32_t i = 0; i < kBlockSize; i++) {
1572 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1573 }
1574 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1575 for (uint32_t i = 0; i < kBlockSize; i++) {
1576 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1577 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1578 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1579 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1580 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1581 }
1582 }
1583 }
1584
1585 TEST(ROUNDD__NEONV8, positive_snan_to_qnan) {
1586 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1587 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1588 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1589 for (uint32_t i = 0; i < kBlockSize; i++) {
1590 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1591 }
1592 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1593 for (uint32_t i = 0; i < kBlockSize; i++) {
1594 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1595 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1596 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1597 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1598 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1599 }
1600 }
1601 }
1602
1603 TEST(ROUNDD__NEONV8, negative_snan_to_qnan) {
1604 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1605 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1606 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1607 for (uint32_t i = 0; i < kBlockSize; i++) {
1608 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1609 }
1610 xnn_math_f32_roundd__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1611 for (uint32_t i = 0; i < kBlockSize; i++) {
1612 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1613 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1614 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1615 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1616 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1617 }
1618 }
1619 }
1620#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1621
Marat Dukhand3f3d872020-06-24 13:08:25 -07001622#if XNN_ARCH_WASMSIMD
1623 TEST(ROUNDD__WASMSIMD_ADDSUB, positive_zero) {
1624 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1625 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1626 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
1627 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1628 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1629 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1630 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1631 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1632 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1633 }
1634
1635 TEST(ROUNDD__WASMSIMD_ADDSUB, negative_zero) {
1636 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1637 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1638 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
1639 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1640 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1641 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1642 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1643 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1644 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1645 }
1646
1647 TEST(ROUNDD__WASMSIMD_ADDSUB, positive_subnormal) {
1648 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1649 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1650 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
1651 for (uint32_t i = 0; i < kBlockSize; i++) {
1652 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
1653 }
1654 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1655 for (uint32_t i = 0; i < kBlockSize; i++) {
1656 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1657 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1658 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1659 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1660 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1661 }
1662 }
1663 }
1664
1665 TEST(ROUNDD__WASMSIMD_ADDSUB, negative_subnormal) {
1666 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1667 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1668 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
1669 for (uint32_t i = 0; i < kBlockSize; i++) {
1670 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
1671 }
1672 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1673 for (uint32_t i = 0; i < kBlockSize; i++) {
1674 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1675 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1676 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1677 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1678 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1679 }
1680 }
1681 }
1682
1683 TEST(ROUNDD__WASMSIMD_ADDSUB, positive_normal) {
1684 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1685 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1686 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1687 for (uint32_t i = 0; i < kBlockSize; i++) {
1688 inputs[i] = fp32_from_bits(n + i);
1689 }
1690 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1691 for (uint32_t i = 0; i < kBlockSize; i++) {
1692 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1693 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1694 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1695 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1696 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1697 }
1698 }
1699 }
1700
1701 TEST(ROUNDD__WASMSIMD_ADDSUB, negative_normal) {
1702 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1703 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1704 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1705 for (uint32_t i = 0; i < kBlockSize; i++) {
1706 inputs[i] = fp32_from_bits(n + i);
1707 }
1708 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1709 for (uint32_t i = 0; i < kBlockSize; i++) {
1710 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1711 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1712 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1713 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1714 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1715 }
1716 }
1717 }
1718
1719 TEST(ROUNDD__WASMSIMD_ADDSUB, positive_integral) {
1720 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1721 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1722 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1723 for (uint32_t i = 0; i < kBlockSize; i++) {
1724 inputs[i] = fp32_from_bits(n + i);
1725 }
1726 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1727 for (uint32_t i = 0; i < kBlockSize; i++) {
1728 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1729 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1730 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1731 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1732 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1733 }
1734 }
1735 }
1736
1737 TEST(ROUNDD__WASMSIMD_ADDSUB, negative_integral) {
1738 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1739 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1740 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1741 for (uint32_t i = 0; i < kBlockSize; i++) {
1742 inputs[i] = fp32_from_bits(n + i);
1743 }
1744 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1745 for (uint32_t i = 0; i < kBlockSize; i++) {
1746 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1747 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1748 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1749 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1750 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1751 }
1752 }
1753 }
1754
1755 TEST(ROUNDD__WASMSIMD_ADDSUB, positive_infinity) {
1756 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1757 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1758 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1759 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1760 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1761 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1762 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1763 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1764 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1765 }
1766
1767 TEST(ROUNDD__WASMSIMD_ADDSUB, negative_infinity) {
1768 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1769 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1770 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1771 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1772 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1773 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1774 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1775 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1776 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1777 }
1778
1779 TEST(ROUNDD__WASMSIMD_ADDSUB, positive_qnan) {
1780 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1781 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1782 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1783 for (uint32_t i = 0; i < kBlockSize; i++) {
1784 inputs[i] = fp32_from_bits(n + i);
1785 }
1786 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1787 for (uint32_t i = 0; i < kBlockSize; i++) {
1788 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1789 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1790 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1791 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1792 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1793 }
1794 }
1795 }
1796
1797 TEST(ROUNDD__WASMSIMD_ADDSUB, negative_qnan) {
1798 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1799 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1800 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1801 for (uint32_t i = 0; i < kBlockSize; i++) {
1802 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1803 }
1804 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1805 for (uint32_t i = 0; i < kBlockSize; i++) {
1806 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1807 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1808 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1809 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1810 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1811 }
1812 }
1813 }
1814
1815 TEST(ROUNDD__WASMSIMD_ADDSUB, positive_snan) {
1816 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1817 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1818 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1819 for (uint32_t i = 0; i < kBlockSize; i++) {
1820 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1821 }
1822 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1823 for (uint32_t i = 0; i < kBlockSize; i++) {
1824 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1825 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1826 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1827 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1828 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1829 }
1830 }
1831 }
1832
1833 TEST(ROUNDD__WASMSIMD_ADDSUB, negative_snan) {
1834 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1835 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1836 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1837 for (uint32_t i = 0; i < kBlockSize; i++) {
1838 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1839 }
1840 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1841 for (uint32_t i = 0; i < kBlockSize; i++) {
1842 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1843 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1844 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1845 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1846 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1847 }
1848 }
1849 }
1850
1851 TEST(ROUNDD__WASMSIMD_ADDSUB, positive_snan_to_qnan) {
1852 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1853 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1854 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1855 for (uint32_t i = 0; i < kBlockSize; i++) {
1856 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1857 }
1858 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1859 for (uint32_t i = 0; i < kBlockSize; i++) {
1860 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1861 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1862 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1863 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1864 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1865 }
1866 }
1867 }
1868
1869 TEST(ROUNDD__WASMSIMD_ADDSUB, negative_snan_to_qnan) {
1870 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1871 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1872 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1873 for (uint32_t i = 0; i < kBlockSize; i++) {
1874 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1875 }
1876 xnn_math_f32_roundd__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1877 for (uint32_t i = 0; i < kBlockSize; i++) {
1878 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1879 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1880 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1881 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1882 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1883 }
1884 }
1885 }
1886#endif // XNN_ARCH_WASMSIMD
1887
Marat Dukhan88da62c2020-07-15 17:56:09 -07001888#if XNN_ARCH_WASMSIMD
1889 TEST(ROUNDD__WASMSIMD_CVT, positive_zero) {
1890 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1891 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1892 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
1893 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1894 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1895 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1896 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1897 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1898 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1899 }
1900
1901 TEST(ROUNDD__WASMSIMD_CVT, negative_zero) {
1902 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1903 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1904 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
1905 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1906 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
1907 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1908 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1909 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1910 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1911 }
1912
1913 TEST(ROUNDD__WASMSIMD_CVT, positive_subnormal) {
1914 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1915 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1916 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
1917 for (uint32_t i = 0; i < kBlockSize; i++) {
1918 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
1919 }
1920 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1921 for (uint32_t i = 0; i < kBlockSize; i++) {
1922 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1923 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1924 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1925 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1926 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1927 }
1928 }
1929 }
1930
1931 TEST(ROUNDD__WASMSIMD_CVT, negative_subnormal) {
1932 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1933 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1934 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
1935 for (uint32_t i = 0; i < kBlockSize; i++) {
1936 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
1937 }
1938 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1939 for (uint32_t i = 0; i < kBlockSize; i++) {
1940 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1941 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1942 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1943 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1944 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1945 }
1946 }
1947 }
1948
1949 TEST(ROUNDD__WASMSIMD_CVT, positive_normal) {
1950 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1951 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1952 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1953 for (uint32_t i = 0; i < kBlockSize; i++) {
1954 inputs[i] = fp32_from_bits(n + i);
1955 }
1956 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1957 for (uint32_t i = 0; i < kBlockSize; i++) {
1958 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1959 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1960 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1961 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1962 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1963 }
1964 }
1965 }
1966
1967 TEST(ROUNDD__WASMSIMD_CVT, negative_normal) {
1968 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1969 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1970 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1971 for (uint32_t i = 0; i < kBlockSize; i++) {
1972 inputs[i] = fp32_from_bits(n + i);
1973 }
1974 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1975 for (uint32_t i = 0; i < kBlockSize; i++) {
1976 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1977 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1978 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1979 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1980 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1981 }
1982 }
1983 }
1984
1985 TEST(ROUNDD__WASMSIMD_CVT, positive_integral) {
1986 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1987 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1988 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1989 for (uint32_t i = 0; i < kBlockSize; i++) {
1990 inputs[i] = fp32_from_bits(n + i);
1991 }
1992 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1993 for (uint32_t i = 0; i < kBlockSize; i++) {
1994 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
1995 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1996 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1997 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1998 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1999 }
2000 }
2001 }
2002
2003 TEST(ROUNDD__WASMSIMD_CVT, negative_integral) {
2004 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2005 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2006 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
2007 for (uint32_t i = 0; i < kBlockSize; i++) {
2008 inputs[i] = fp32_from_bits(n + i);
2009 }
2010 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2011 for (uint32_t i = 0; i < kBlockSize; i++) {
2012 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2013 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2014 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2015 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2016 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2017 }
2018 }
2019 }
2020
2021 TEST(ROUNDD__WASMSIMD_CVT, positive_infinity) {
2022 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2023 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2024 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
2025 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2026 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2027 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2028 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2029 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2030 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2031 }
2032
2033 TEST(ROUNDD__WASMSIMD_CVT, negative_infinity) {
2034 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2035 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2036 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
2037 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2038 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2039 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2040 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2041 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2042 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2043 }
2044
2045 TEST(ROUNDD__WASMSIMD_CVT, positive_qnan) {
2046 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2047 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2048 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2049 for (uint32_t i = 0; i < kBlockSize; i++) {
2050 inputs[i] = fp32_from_bits(n + i);
2051 }
2052 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2053 for (uint32_t i = 0; i < kBlockSize; i++) {
2054 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2055 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2056 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2057 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2058 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2059 }
2060 }
2061 }
2062
2063 TEST(ROUNDD__WASMSIMD_CVT, negative_qnan) {
2064 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2065 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2066 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2067 for (uint32_t i = 0; i < kBlockSize; i++) {
2068 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
2069 }
2070 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2071 for (uint32_t i = 0; i < kBlockSize; i++) {
2072 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2073 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2074 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2075 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2076 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2077 }
2078 }
2079 }
2080
2081 TEST(ROUNDD__WASMSIMD_CVT, positive_snan) {
2082 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2083 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2084 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2085 for (uint32_t i = 0; i < kBlockSize; i++) {
2086 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2087 }
2088 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2089 for (uint32_t i = 0; i < kBlockSize; i++) {
2090 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2091 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2092 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2093 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2094 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2095 }
2096 }
2097 }
2098
2099 TEST(ROUNDD__WASMSIMD_CVT, negative_snan) {
2100 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2101 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2102 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2103 for (uint32_t i = 0; i < kBlockSize; i++) {
2104 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2105 }
2106 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2107 for (uint32_t i = 0; i < kBlockSize; i++) {
2108 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2109 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2110 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2111 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2112 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2113 }
2114 }
2115 }
2116
2117 TEST(ROUNDD__WASMSIMD_CVT, positive_snan_to_qnan) {
2118 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2119 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2120 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2121 for (uint32_t i = 0; i < kBlockSize; i++) {
2122 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2123 }
2124 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2125 for (uint32_t i = 0; i < kBlockSize; i++) {
2126 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2127 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2128 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2129 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2130 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2131 }
2132 }
2133 }
2134
2135 TEST(ROUNDD__WASMSIMD_CVT, negative_snan_to_qnan) {
2136 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2137 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2138 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2139 for (uint32_t i = 0; i < kBlockSize; i++) {
2140 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2141 }
2142 xnn_math_f32_roundd__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2143 for (uint32_t i = 0; i < kBlockSize; i++) {
2144 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2145 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2146 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2147 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2148 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2149 }
2150 }
2151 }
2152#endif // XNN_ARCH_WASMSIMD
2153
Marat Dukhan33b4f752021-09-03 10:53:53 -07002154#if XNN_ARCH_WASMSIMD
2155 TEST(ROUNDD__WASMSIMD_NATIVE, positive_zero) {
2156 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2157 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2158 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
2159 xnn_math_f32_roundd__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2160 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2161 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2162 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2163 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2164 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2165 }
2166
2167 TEST(ROUNDD__WASMSIMD_NATIVE, negative_zero) {
2168 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2169 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2170 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
2171 xnn_math_f32_roundd__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2172 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2173 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2174 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2175 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2176 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2177 }
2178
2179 TEST(ROUNDD__WASMSIMD_NATIVE, positive_subnormal) {
2180 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2181 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2182 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
2183 for (uint32_t i = 0; i < kBlockSize; i++) {
2184 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
2185 }
2186 xnn_math_f32_roundd__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2187 for (uint32_t i = 0; i < kBlockSize; i++) {
2188 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2189 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2190 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2191 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2192 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2193 }
2194 }
2195 }
2196
2197 TEST(ROUNDD__WASMSIMD_NATIVE, negative_subnormal) {
2198 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2199 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2200 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
2201 for (uint32_t i = 0; i < kBlockSize; i++) {
2202 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
2203 }
2204 xnn_math_f32_roundd__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2205 for (uint32_t i = 0; i < kBlockSize; i++) {
2206 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2207 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2208 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2209 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2210 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2211 }
2212 }
2213 }
2214
2215 TEST(ROUNDD__WASMSIMD_NATIVE, positive_normal) {
2216 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2217 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2218 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
2219 for (uint32_t i = 0; i < kBlockSize; i++) {
2220 inputs[i] = fp32_from_bits(n + i);
2221 }
2222 xnn_math_f32_roundd__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2223 for (uint32_t i = 0; i < kBlockSize; i++) {
2224 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2225 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2226 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2227 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2228 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2229 }
2230 }
2231 }
2232
2233 TEST(ROUNDD__WASMSIMD_NATIVE, negative_normal) {
2234 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2235 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2236 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
2237 for (uint32_t i = 0; i < kBlockSize; i++) {
2238 inputs[i] = fp32_from_bits(n + i);
2239 }
2240 xnn_math_f32_roundd__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2241 for (uint32_t i = 0; i < kBlockSize; i++) {
2242 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2243 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2244 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2245 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2246 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2247 }
2248 }
2249 }
2250
2251 TEST(ROUNDD__WASMSIMD_NATIVE, positive_integral) {
2252 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2253 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2254 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
2255 for (uint32_t i = 0; i < kBlockSize; i++) {
2256 inputs[i] = fp32_from_bits(n + i);
2257 }
2258 xnn_math_f32_roundd__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2259 for (uint32_t i = 0; i < kBlockSize; i++) {
2260 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2261 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2262 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2263 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2264 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2265 }
2266 }
2267 }
2268
2269 TEST(ROUNDD__WASMSIMD_NATIVE, negative_integral) {
2270 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2271 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2272 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
2273 for (uint32_t i = 0; i < kBlockSize; i++) {
2274 inputs[i] = fp32_from_bits(n + i);
2275 }
2276 xnn_math_f32_roundd__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2277 for (uint32_t i = 0; i < kBlockSize; i++) {
2278 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2279 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2280 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2281 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2282 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2283 }
2284 }
2285 }
2286
2287 TEST(ROUNDD__WASMSIMD_NATIVE, positive_infinity) {
2288 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2289 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2290 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
2291 xnn_math_f32_roundd__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2292 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2293 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2294 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2295 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2296 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2297 }
2298
2299 TEST(ROUNDD__WASMSIMD_NATIVE, negative_infinity) {
2300 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2301 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2302 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
2303 xnn_math_f32_roundd__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2304 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2305 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2306 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2307 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2308 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2309 }
2310
2311 TEST(ROUNDD__WASMSIMD_NATIVE, positive_qnan) {
2312 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2313 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2314 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2315 for (uint32_t i = 0; i < kBlockSize; i++) {
2316 inputs[i] = fp32_from_bits(n + i);
2317 }
2318 xnn_math_f32_roundd__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2319 for (uint32_t i = 0; i < kBlockSize; i++) {
2320 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2321 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2322 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2323 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2324 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2325 }
2326 }
2327 }
2328
2329 TEST(ROUNDD__WASMSIMD_NATIVE, negative_qnan) {
2330 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2331 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2332 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2333 for (uint32_t i = 0; i < kBlockSize; i++) {
2334 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
2335 }
2336 xnn_math_f32_roundd__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2337 for (uint32_t i = 0; i < kBlockSize; i++) {
2338 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2339 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2340 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2341 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2342 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2343 }
2344 }
2345 }
2346
2347 TEST(ROUNDD__WASMSIMD_NATIVE, positive_snan) {
2348 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2349 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2350 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2351 for (uint32_t i = 0; i < kBlockSize; i++) {
2352 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2353 }
2354 xnn_math_f32_roundd__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2355 for (uint32_t i = 0; i < kBlockSize; i++) {
2356 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2357 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2358 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2359 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2360 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2361 }
2362 }
2363 }
2364
2365 TEST(ROUNDD__WASMSIMD_NATIVE, negative_snan) {
2366 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2367 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2368 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2369 for (uint32_t i = 0; i < kBlockSize; i++) {
2370 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2371 }
2372 xnn_math_f32_roundd__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2373 for (uint32_t i = 0; i < kBlockSize; i++) {
2374 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2375 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2376 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2377 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2378 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2379 }
2380 }
2381 }
2382
2383 TEST(ROUNDD__WASMSIMD_NATIVE, positive_snan_to_qnan) {
2384 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2385 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2386 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2387 for (uint32_t i = 0; i < kBlockSize; i++) {
2388 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2389 }
2390 xnn_math_f32_roundd__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2391 for (uint32_t i = 0; i < kBlockSize; i++) {
2392 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2393 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2394 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2395 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2396 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2397 }
2398 }
2399 }
2400
2401 TEST(ROUNDD__WASMSIMD_NATIVE, negative_snan_to_qnan) {
2402 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2403 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2404 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2405 for (uint32_t i = 0; i < kBlockSize; i++) {
2406 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2407 }
2408 xnn_math_f32_roundd__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2409 for (uint32_t i = 0; i < kBlockSize; i++) {
2410 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2411 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2412 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2413 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2414 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2415 }
2416 }
2417 }
2418#endif // XNN_ARCH_WASMSIMD
2419
Marat Dukhanea575d92020-05-31 23:49:00 -07002420TEST(ROUNDD__SCALAR_ADDSUB, positive_zero) {
2421 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2422 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2423 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
2424 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2425 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2426 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2427 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2428 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2429 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2430}
2431
2432TEST(ROUNDD__SCALAR_ADDSUB, negative_zero) {
2433 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2434 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2435 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
2436 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2437 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2438 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2439 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2440 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2441 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2442}
2443
2444TEST(ROUNDD__SCALAR_ADDSUB, positive_subnormal) {
2445 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2446 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2447 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
2448 for (uint32_t i = 0; i < kBlockSize; i++) {
2449 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
2450 }
2451 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2452 for (uint32_t i = 0; i < kBlockSize; i++) {
2453 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2454 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2455 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2456 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2457 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2458 }
2459 }
2460}
2461
2462TEST(ROUNDD__SCALAR_ADDSUB, negative_subnormal) {
2463 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2464 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2465 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
2466 for (uint32_t i = 0; i < kBlockSize; i++) {
2467 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
2468 }
2469 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2470 for (uint32_t i = 0; i < kBlockSize; i++) {
2471 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2472 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2473 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2474 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2475 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2476 }
2477 }
2478}
2479
Marat Dukhanc9852ba2020-05-13 17:21:29 -07002480TEST(ROUNDD__SCALAR_ADDSUB, positive_normal) {
2481 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2482 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhanea575d92020-05-31 23:49:00 -07002483 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
Marat Dukhanc9852ba2020-05-13 17:21:29 -07002484 for (uint32_t i = 0; i < kBlockSize; i++) {
2485 inputs[i] = fp32_from_bits(n + i);
2486 }
2487 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2488 for (uint32_t i = 0; i < kBlockSize; i++) {
2489 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2490 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2491 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2492 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2493 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2494 }
2495 }
2496}
2497
2498TEST(ROUNDD__SCALAR_ADDSUB, negative_normal) {
2499 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2500 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhanea575d92020-05-31 23:49:00 -07002501 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
Marat Dukhanc9852ba2020-05-13 17:21:29 -07002502 for (uint32_t i = 0; i < kBlockSize; i++) {
2503 inputs[i] = fp32_from_bits(n + i);
2504 }
2505 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2506 for (uint32_t i = 0; i < kBlockSize; i++) {
2507 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2508 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2509 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2510 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2511 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2512 }
2513 }
2514}
2515
2516TEST(ROUNDD__SCALAR_ADDSUB, positive_integral) {
2517 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2518 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2519 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
2520 for (uint32_t i = 0; i < kBlockSize; i++) {
2521 inputs[i] = fp32_from_bits(n + i);
2522 }
2523 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2524 for (uint32_t i = 0; i < kBlockSize; i++) {
2525 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2526 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2527 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2528 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2529 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2530 }
2531 }
2532}
2533
2534TEST(ROUNDD__SCALAR_ADDSUB, negative_integral) {
2535 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2536 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2537 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
2538 for (uint32_t i = 0; i < kBlockSize; i++) {
2539 inputs[i] = fp32_from_bits(n + i);
2540 }
2541 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2542 for (uint32_t i = 0; i < kBlockSize; i++) {
2543 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2544 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2545 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2546 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2547 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2548 }
2549 }
2550}
2551
2552TEST(ROUNDD__SCALAR_ADDSUB, positive_infinity) {
2553 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2554 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -07002555 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhanc9852ba2020-05-13 17:21:29 -07002556 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2557 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2558 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2559 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2560 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2561 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2562}
2563
2564TEST(ROUNDD__SCALAR_ADDSUB, negative_infinity) {
2565 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2566 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -07002567 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhanc9852ba2020-05-13 17:21:29 -07002568 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2569 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2570 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2571 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2572 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2573 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2574}
2575
2576TEST(ROUNDD__SCALAR_ADDSUB, positive_qnan) {
2577 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2578 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2579 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2580 for (uint32_t i = 0; i < kBlockSize; i++) {
2581 inputs[i] = fp32_from_bits(n + i);
2582 }
2583 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2584 for (uint32_t i = 0; i < kBlockSize; i++) {
2585 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2586 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2587 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2588 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2589 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2590 }
2591 }
2592}
2593
2594TEST(ROUNDD__SCALAR_ADDSUB, negative_qnan) {
2595 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2596 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2597 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2598 for (uint32_t i = 0; i < kBlockSize; i++) {
2599 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
2600 }
2601 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2602 for (uint32_t i = 0; i < kBlockSize; i++) {
2603 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2604 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2605 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2606 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2607 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2608 }
2609 }
2610}
2611
2612TEST(ROUNDD__SCALAR_ADDSUB, positive_snan) {
2613 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2614 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2615 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2616 for (uint32_t i = 0; i < kBlockSize; i++) {
2617 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2618 }
2619 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2620 for (uint32_t i = 0; i < kBlockSize; i++) {
2621 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2622 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2623 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2624 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2625 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2626 }
2627 }
2628}
2629
2630TEST(ROUNDD__SCALAR_ADDSUB, negative_snan) {
2631 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2632 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2633 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2634 for (uint32_t i = 0; i < kBlockSize; i++) {
2635 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2636 }
2637 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2638 for (uint32_t i = 0; i < kBlockSize; i++) {
2639 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2640 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2641 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2642 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2643 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2644 }
2645 }
2646}
2647
2648TEST(ROUNDD__SCALAR_ADDSUB, positive_snan_to_qnan) {
2649 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2650 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2651 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2652 for (uint32_t i = 0; i < kBlockSize; i++) {
2653 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2654 }
2655 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2656 for (uint32_t i = 0; i < kBlockSize; i++) {
2657 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2658 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2659 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2660 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2661 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2662 }
2663 }
2664}
2665
2666TEST(ROUNDD__SCALAR_ADDSUB, negative_snan_to_qnan) {
2667 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2668 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2669 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2670 for (uint32_t i = 0; i < kBlockSize; i++) {
2671 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2672 }
2673 xnn_math_f32_roundd__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2674 for (uint32_t i = 0; i < kBlockSize; i++) {
2675 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2676 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2677 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2678 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2679 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2680 }
2681 }
2682}
2683
Marat Dukhanea575d92020-05-31 23:49:00 -07002684TEST(ROUNDD__SCALAR_CVT, positive_zero) {
2685 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2686 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2687 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
2688 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2689 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2690 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2691 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2692 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2693 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2694}
2695
2696TEST(ROUNDD__SCALAR_CVT, negative_zero) {
2697 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2698 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2699 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
2700 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2701 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2702 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2703 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2704 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2705 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2706}
2707
2708TEST(ROUNDD__SCALAR_CVT, positive_subnormal) {
2709 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2710 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2711 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
2712 for (uint32_t i = 0; i < kBlockSize; i++) {
2713 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
2714 }
2715 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2716 for (uint32_t i = 0; i < kBlockSize; i++) {
2717 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2718 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2719 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2720 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2721 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2722 }
2723 }
2724}
2725
2726TEST(ROUNDD__SCALAR_CVT, negative_subnormal) {
2727 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2728 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2729 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
2730 for (uint32_t i = 0; i < kBlockSize; i++) {
2731 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
2732 }
2733 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2734 for (uint32_t i = 0; i < kBlockSize; i++) {
2735 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2736 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2737 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2738 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2739 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2740 }
2741 }
2742}
2743
Marat Dukhanc9852ba2020-05-13 17:21:29 -07002744TEST(ROUNDD__SCALAR_CVT, positive_normal) {
2745 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2746 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhanea575d92020-05-31 23:49:00 -07002747 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
Marat Dukhanc9852ba2020-05-13 17:21:29 -07002748 for (uint32_t i = 0; i < kBlockSize; i++) {
2749 inputs[i] = fp32_from_bits(n + i);
2750 }
2751 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2752 for (uint32_t i = 0; i < kBlockSize; i++) {
2753 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2754 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2755 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2756 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2757 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2758 }
2759 }
2760}
2761
2762TEST(ROUNDD__SCALAR_CVT, negative_normal) {
2763 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2764 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhanea575d92020-05-31 23:49:00 -07002765 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
Marat Dukhanc9852ba2020-05-13 17:21:29 -07002766 for (uint32_t i = 0; i < kBlockSize; i++) {
2767 inputs[i] = fp32_from_bits(n + i);
2768 }
2769 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2770 for (uint32_t i = 0; i < kBlockSize; i++) {
2771 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2772 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2773 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2774 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2775 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2776 }
2777 }
2778}
2779
2780TEST(ROUNDD__SCALAR_CVT, positive_integral) {
2781 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2782 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2783 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
2784 for (uint32_t i = 0; i < kBlockSize; i++) {
2785 inputs[i] = fp32_from_bits(n + i);
2786 }
2787 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2788 for (uint32_t i = 0; i < kBlockSize; i++) {
2789 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2790 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2791 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2792 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2793 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2794 }
2795 }
2796}
2797
2798TEST(ROUNDD__SCALAR_CVT, negative_integral) {
2799 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2800 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2801 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
2802 for (uint32_t i = 0; i < kBlockSize; i++) {
2803 inputs[i] = fp32_from_bits(n + i);
2804 }
2805 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2806 for (uint32_t i = 0; i < kBlockSize; i++) {
2807 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2808 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2809 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2810 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2811 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2812 }
2813 }
2814}
2815
2816TEST(ROUNDD__SCALAR_CVT, positive_infinity) {
2817 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2818 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -07002819 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhanc9852ba2020-05-13 17:21:29 -07002820 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2821 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2822 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2823 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2824 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2825 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2826}
2827
2828TEST(ROUNDD__SCALAR_CVT, negative_infinity) {
2829 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2830 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -07002831 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhanc9852ba2020-05-13 17:21:29 -07002832 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2833 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[0]));
2834 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
2835 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
2836 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2837 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
2838}
2839
2840TEST(ROUNDD__SCALAR_CVT, positive_qnan) {
2841 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2842 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2843 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2844 for (uint32_t i = 0; i < kBlockSize; i++) {
2845 inputs[i] = fp32_from_bits(n + i);
2846 }
2847 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2848 for (uint32_t i = 0; i < kBlockSize; i++) {
2849 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2850 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2851 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2852 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2853 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2854 }
2855 }
2856}
2857
2858TEST(ROUNDD__SCALAR_CVT, negative_qnan) {
2859 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2860 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2861 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2862 for (uint32_t i = 0; i < kBlockSize; i++) {
2863 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
2864 }
2865 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2866 for (uint32_t i = 0; i < kBlockSize; i++) {
2867 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2868 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2869 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2870 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2871 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2872 }
2873 }
2874}
2875
2876TEST(ROUNDD__SCALAR_CVT, positive_snan) {
2877 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2878 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2879 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2880 for (uint32_t i = 0; i < kBlockSize; i++) {
2881 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2882 }
2883 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2884 for (uint32_t i = 0; i < kBlockSize; i++) {
2885 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2886 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2887 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2888 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2889 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2890 }
2891 }
2892}
2893
2894TEST(ROUNDD__SCALAR_CVT, negative_snan) {
2895 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2896 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2897 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2898 for (uint32_t i = 0; i < kBlockSize; i++) {
2899 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2900 }
2901 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2902 for (uint32_t i = 0; i < kBlockSize; i++) {
2903 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2904 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2905 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2906 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2907 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2908 }
2909 }
2910}
2911
2912TEST(ROUNDD__SCALAR_CVT, positive_snan_to_qnan) {
2913 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2914 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2915 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2916 for (uint32_t i = 0; i < kBlockSize; i++) {
2917 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2918 }
2919 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2920 for (uint32_t i = 0; i < kBlockSize; i++) {
2921 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2922 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2923 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2924 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2925 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2926 }
2927 }
2928}
2929
2930TEST(ROUNDD__SCALAR_CVT, negative_snan_to_qnan) {
2931 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2932 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2933 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2934 for (uint32_t i = 0; i < kBlockSize; i++) {
2935 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2936 }
2937 xnn_math_f32_roundd__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2938 for (uint32_t i = 0; i < kBlockSize; i++) {
2939 const uint32_t reference_output = fp32_to_bits(std::floor(inputs[i]));
2940 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2941 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2942 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2943 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2944 }
2945 }
2946}