blob: 256ea3bef9ebb77df64632b9bc5b03c86d523292 [file] [log] [blame]
Marat Dukhan582e1842021-10-25 17:18:36 -07001// Copyright 2021 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <cstddef>
9#include <cstdint>
10#include <cstdlib>
11#include <iomanip>
12#include <ios>
13#include <vector>
14
15#include <gtest/gtest.h>
16
17#include <fp16.h>
18
19#include <xnnpack/AlignedAllocator.h>
20#include <xnnpack/common.h>
21#include <xnnpack/isa-checks.h>
22#include <xnnpack/math-stubs.h>
23
24
25constexpr int kBlockSize = 1024;
26
27#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan056f49d2021-11-08 17:44:42 -080028 TEST(CVT__SSE2, positive_normal) {
29 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
30 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
31 for (uint32_t n = UINT32_C(0x387FE000); n < UINT32_C(0x477FF000); n += kBlockSize) {
32 for (uint32_t i = 0; i < kBlockSize; i++) {
33 inputs[i] = fp32_from_bits(n + i);
34 }
35 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
36 for (uint32_t i = 0; i < kBlockSize; i++) {
37 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
38 ASSERT_EQ(reference_output, outputs[i])
39 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
40 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
41 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
42 }
43 }
44 }
45
46 TEST(CVT__SSE2, negative_normal) {
47 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
48 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
49 for (uint32_t n = UINT32_C(0xB87FE000); n < UINT32_C(0xC77FF000); n += kBlockSize) {
50 for (uint32_t i = 0; i < kBlockSize; i++) {
51 inputs[i] = fp32_from_bits(n + i);
52 }
53 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
54 for (uint32_t i = 0; i < kBlockSize; i++) {
55 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
56 ASSERT_EQ(reference_output, outputs[i])
57 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
58 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
59 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
60 }
61 }
62 }
63
64 TEST(CVT__SSE2, positive_subnormal) {
65 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
66 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
67 for (uint32_t n = UINT32_C(0x33000001); n < UINT32_C(0x387FE000); n += kBlockSize) {
68 for (uint32_t i = 0; i < kBlockSize; i++) {
69 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0x387FDFFF)));
70 }
71 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
72 for (uint32_t i = 0; i < kBlockSize; i++) {
73 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
74 ASSERT_EQ(reference_output, outputs[i])
75 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
76 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
77 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
78 }
79 }
80 }
81
82 TEST(CVT__SSE2, negative_subnormal) {
83 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
84 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
85 for (uint32_t n = UINT32_C(0xB3000001); n < UINT32_C(0xB87FE000); n += kBlockSize) {
86 for (uint32_t i = 0; i < kBlockSize; i++) {
87 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0xB87FDFFF)));
88 }
89 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
90 for (uint32_t i = 0; i < kBlockSize; i++) {
91 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
92 ASSERT_EQ(reference_output, outputs[i])
93 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
94 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
95 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
96 }
97 }
98 }
99
100 TEST(CVT__SSE2, positive_underflow) {
101 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
102 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
103 for (uint32_t n = UINT32_C(0x00000001); n < UINT32_C(0x33000001); n += kBlockSize) {
104 for (uint32_t i = 0; i < kBlockSize; i++) {
105 inputs[i] = fp32_from_bits(n + i);
106 }
107 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
108 for (uint32_t i = 0; i < kBlockSize; i++) {
109 const uint16_t reference_output = UINT16_C(0x0000);
110 ASSERT_EQ(reference_output, outputs[i])
111 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
112 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
113 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
114 }
115 }
116 }
117
118 TEST(CVT__SSE2, negative_underflow) {
119 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
120 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
121 for (uint32_t n = UINT32_C(0x80000001); n < UINT32_C(0xB3000001); n += kBlockSize) {
122 for (uint32_t i = 0; i < kBlockSize; i++) {
123 inputs[i] = fp32_from_bits(n + i);
124 }
125 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
126 for (uint32_t i = 0; i < kBlockSize; i++) {
127 const uint16_t reference_output = UINT16_C(0x8000);
128 ASSERT_EQ(reference_output, outputs[i])
129 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
130 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
131 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
132 }
133 }
134 }
135
136 TEST(CVT__SSE2, positive_zero) {
137 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
138 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
139 std::fill(inputs.begin(), inputs.end(), +0.0f);
140 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
141 const uint16_t reference_output = UINT16_C(0x0000);
142 ASSERT_EQ(reference_output, outputs[0])
143 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
144 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
145 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
146 }
147
148 TEST(CVT__SSE2, negative_zero) {
149 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
150 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
151 std::fill(inputs.begin(), inputs.end(), -0.0f);
152 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
153 const uint16_t reference_output = UINT16_C(0x8000);
154 ASSERT_EQ(reference_output, outputs[0])
155 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
156 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
157 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
158 }
159
160 TEST(CVT__SSE2, positive_overflow) {
161 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
162 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
163 for (uint32_t n = UINT32_C(0x477FF000); n < UINT32_C(0x7F800000); n += kBlockSize) {
164 for (uint32_t i = 0; i < kBlockSize; i++) {
165 inputs[i] = fp32_from_bits(n + i);
166 }
167 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
168 for (uint32_t i = 0; i < kBlockSize; i++) {
169 const uint16_t reference_output = UINT16_C(0x7C00);
170 ASSERT_EQ(reference_output, outputs[i])
171 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
172 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
173 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
174 }
175 }
176 }
177
178 TEST(CVT__SSE2, negative_overflow) {
179 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
180 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
181 for (uint32_t n = UINT32_C(0xC77FF000); n < UINT32_C(0xFF800000); n += kBlockSize) {
182 for (uint32_t i = 0; i < kBlockSize; i++) {
183 inputs[i] = fp32_from_bits(n + i);
184 }
185 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
186 for (uint32_t i = 0; i < kBlockSize; i++) {
187 const uint16_t reference_output = UINT16_C(0xFC00);
188 ASSERT_EQ(reference_output, outputs[i])
189 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
190 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
191 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
192 }
193 }
194 }
195
196 TEST(CVT__SSE2, positive_infinity) {
197 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
198 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
199 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
200 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
201 const uint16_t reference_output = UINT16_C(0x7C00);
202 ASSERT_EQ(reference_output, outputs[0])
203 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
204 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
205 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
206 }
207
208 TEST(CVT__SSE2, negative_infinity) {
209 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
210 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
211 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
212 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
213 const uint16_t reference_output = UINT16_C(0xFC00);
214 ASSERT_EQ(reference_output, outputs[0])
215 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
216 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
217 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
218 }
219
220 TEST(CVT__SSE2, positive_nan) {
221 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
222 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
223 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
224 for (uint32_t i = 0; i < kBlockSize; i++) {
225 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
226 }
227 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
228 for (uint32_t i = 0; i < kBlockSize; i++) {
229 ASSERT_GT(outputs[i], UINT16_C(0x7C00))
230 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
231 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
232 ASSERT_LT(outputs[i], UINT16_C(0x8000))
233 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
234 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
235 }
236 }
237 }
238
239 TEST(CVT__SSE2, negative_nan) {
240 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
241 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
242 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
243 for (uint32_t i = 0; i < kBlockSize; i++) {
244 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
245 }
246 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
247 for (uint32_t i = 0; i < kBlockSize; i++) {
248 ASSERT_GT(outputs[i], UINT16_C(0xFC00))
249 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
250 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
251 }
252 }
253 }
254#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
255
256#if XNN_ARCH_X86 || XNN_ARCH_X86_64
257 TEST(CVT__SSE41, positive_normal) {
258 TEST_REQUIRES_X86_SSE41;
259
260 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
261 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
262 for (uint32_t n = UINT32_C(0x387FE000); n < UINT32_C(0x477FF000); n += kBlockSize) {
263 for (uint32_t i = 0; i < kBlockSize; i++) {
264 inputs[i] = fp32_from_bits(n + i);
265 }
266 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
267 for (uint32_t i = 0; i < kBlockSize; i++) {
268 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
269 ASSERT_EQ(reference_output, outputs[i])
270 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
271 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
272 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
273 }
274 }
275 }
276
277 TEST(CVT__SSE41, negative_normal) {
278 TEST_REQUIRES_X86_SSE41;
279
280 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
281 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
282 for (uint32_t n = UINT32_C(0xB87FE000); n < UINT32_C(0xC77FF000); n += kBlockSize) {
283 for (uint32_t i = 0; i < kBlockSize; i++) {
284 inputs[i] = fp32_from_bits(n + i);
285 }
286 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
287 for (uint32_t i = 0; i < kBlockSize; i++) {
288 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
289 ASSERT_EQ(reference_output, outputs[i])
290 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
291 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
292 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
293 }
294 }
295 }
296
297 TEST(CVT__SSE41, positive_subnormal) {
298 TEST_REQUIRES_X86_SSE41;
299
300 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
301 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
302 for (uint32_t n = UINT32_C(0x33000001); n < UINT32_C(0x387FE000); n += kBlockSize) {
303 for (uint32_t i = 0; i < kBlockSize; i++) {
304 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0x387FDFFF)));
305 }
306 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
307 for (uint32_t i = 0; i < kBlockSize; i++) {
308 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
309 ASSERT_EQ(reference_output, outputs[i])
310 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
311 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
312 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
313 }
314 }
315 }
316
317 TEST(CVT__SSE41, negative_subnormal) {
318 TEST_REQUIRES_X86_SSE41;
319
320 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
321 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
322 for (uint32_t n = UINT32_C(0xB3000001); n < UINT32_C(0xB87FE000); n += kBlockSize) {
323 for (uint32_t i = 0; i < kBlockSize; i++) {
324 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0xB87FDFFF)));
325 }
326 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
327 for (uint32_t i = 0; i < kBlockSize; i++) {
328 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
329 ASSERT_EQ(reference_output, outputs[i])
330 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
331 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
332 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
333 }
334 }
335 }
336
337 TEST(CVT__SSE41, positive_underflow) {
338 TEST_REQUIRES_X86_SSE41;
339
340 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
341 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
342 for (uint32_t n = UINT32_C(0x00000001); n < UINT32_C(0x33000001); n += kBlockSize) {
343 for (uint32_t i = 0; i < kBlockSize; i++) {
344 inputs[i] = fp32_from_bits(n + i);
345 }
346 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
347 for (uint32_t i = 0; i < kBlockSize; i++) {
348 const uint16_t reference_output = UINT16_C(0x0000);
349 ASSERT_EQ(reference_output, outputs[i])
350 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
351 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
352 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
353 }
354 }
355 }
356
357 TEST(CVT__SSE41, negative_underflow) {
358 TEST_REQUIRES_X86_SSE41;
359
360 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
361 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
362 for (uint32_t n = UINT32_C(0x80000001); n < UINT32_C(0xB3000001); n += kBlockSize) {
363 for (uint32_t i = 0; i < kBlockSize; i++) {
364 inputs[i] = fp32_from_bits(n + i);
365 }
366 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
367 for (uint32_t i = 0; i < kBlockSize; i++) {
368 const uint16_t reference_output = UINT16_C(0x8000);
369 ASSERT_EQ(reference_output, outputs[i])
370 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
371 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
372 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
373 }
374 }
375 }
376
377 TEST(CVT__SSE41, positive_zero) {
378 TEST_REQUIRES_X86_SSE41;
379
380 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
381 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
382 std::fill(inputs.begin(), inputs.end(), +0.0f);
383 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
384 const uint16_t reference_output = UINT16_C(0x0000);
385 ASSERT_EQ(reference_output, outputs[0])
386 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
387 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
388 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
389 }
390
391 TEST(CVT__SSE41, negative_zero) {
392 TEST_REQUIRES_X86_SSE41;
393
394 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
395 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
396 std::fill(inputs.begin(), inputs.end(), -0.0f);
397 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
398 const uint16_t reference_output = UINT16_C(0x8000);
399 ASSERT_EQ(reference_output, outputs[0])
400 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
401 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
402 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
403 }
404
405 TEST(CVT__SSE41, positive_overflow) {
406 TEST_REQUIRES_X86_SSE41;
407
408 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
409 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
410 for (uint32_t n = UINT32_C(0x477FF000); n < UINT32_C(0x7F800000); n += kBlockSize) {
411 for (uint32_t i = 0; i < kBlockSize; i++) {
412 inputs[i] = fp32_from_bits(n + i);
413 }
414 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
415 for (uint32_t i = 0; i < kBlockSize; i++) {
416 const uint16_t reference_output = UINT16_C(0x7C00);
417 ASSERT_EQ(reference_output, outputs[i])
418 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
419 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
420 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
421 }
422 }
423 }
424
425 TEST(CVT__SSE41, negative_overflow) {
426 TEST_REQUIRES_X86_SSE41;
427
428 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
429 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
430 for (uint32_t n = UINT32_C(0xC77FF000); n < UINT32_C(0xFF800000); n += kBlockSize) {
431 for (uint32_t i = 0; i < kBlockSize; i++) {
432 inputs[i] = fp32_from_bits(n + i);
433 }
434 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
435 for (uint32_t i = 0; i < kBlockSize; i++) {
436 const uint16_t reference_output = UINT16_C(0xFC00);
437 ASSERT_EQ(reference_output, outputs[i])
438 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
439 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
440 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
441 }
442 }
443 }
444
445 TEST(CVT__SSE41, positive_infinity) {
446 TEST_REQUIRES_X86_SSE41;
447
448 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
449 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
450 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
451 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
452 const uint16_t reference_output = UINT16_C(0x7C00);
453 ASSERT_EQ(reference_output, outputs[0])
454 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
455 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
456 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
457 }
458
459 TEST(CVT__SSE41, negative_infinity) {
460 TEST_REQUIRES_X86_SSE41;
461
462 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
463 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
464 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
465 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
466 const uint16_t reference_output = UINT16_C(0xFC00);
467 ASSERT_EQ(reference_output, outputs[0])
468 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
469 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
470 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
471 }
472
473 TEST(CVT__SSE41, positive_nan) {
474 TEST_REQUIRES_X86_SSE41;
475
476 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
477 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
478 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
479 for (uint32_t i = 0; i < kBlockSize; i++) {
480 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
481 }
482 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
483 for (uint32_t i = 0; i < kBlockSize; i++) {
484 ASSERT_GT(outputs[i], UINT16_C(0x7C00))
485 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
486 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
487 ASSERT_LT(outputs[i], UINT16_C(0x8000))
488 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
489 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
490 }
491 }
492 }
493
494 TEST(CVT__SSE41, negative_nan) {
495 TEST_REQUIRES_X86_SSE41;
496
497 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
498 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
499 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
500 for (uint32_t i = 0; i < kBlockSize; i++) {
501 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
502 }
503 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
504 for (uint32_t i = 0; i < kBlockSize; i++) {
505 ASSERT_GT(outputs[i], UINT16_C(0xFC00))
506 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
507 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
508 }
509 }
510 }
511#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
512
513#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan582e1842021-10-25 17:18:36 -0700514 TEST(CVT__F16C, positive_normal) {
515 TEST_REQUIRES_X86_F16C;
516
517 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
518 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
519 for (uint32_t n = UINT32_C(0x387FE000); n < UINT32_C(0x477FF000); n += kBlockSize) {
520 for (uint32_t i = 0; i < kBlockSize; i++) {
521 inputs[i] = fp32_from_bits(n + i);
522 }
523 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
524 for (uint32_t i = 0; i < kBlockSize; i++) {
525 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
526 ASSERT_EQ(reference_output, outputs[i])
527 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
528 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
529 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
530 }
531 }
532 }
533
534 TEST(CVT__F16C, negative_normal) {
535 TEST_REQUIRES_X86_F16C;
536
537 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
538 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
539 for (uint32_t n = UINT32_C(0xB87FE000); n < UINT32_C(0xC77FF000); n += kBlockSize) {
540 for (uint32_t i = 0; i < kBlockSize; i++) {
541 inputs[i] = fp32_from_bits(n + i);
542 }
543 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
544 for (uint32_t i = 0; i < kBlockSize; i++) {
545 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
546 ASSERT_EQ(reference_output, outputs[i])
547 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
548 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
549 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
550 }
551 }
552 }
553
554 TEST(CVT__F16C, positive_subnormal) {
555 TEST_REQUIRES_X86_F16C;
556
557 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
558 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
559 for (uint32_t n = UINT32_C(0x33000001); n < UINT32_C(0x387FE000); n += kBlockSize) {
560 for (uint32_t i = 0; i < kBlockSize; i++) {
561 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0x387FDFFF)));
562 }
563 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
564 for (uint32_t i = 0; i < kBlockSize; i++) {
565 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
566 ASSERT_EQ(reference_output, outputs[i])
567 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
568 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
569 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
570 }
571 }
572 }
573
574 TEST(CVT__F16C, negative_subnormal) {
575 TEST_REQUIRES_X86_F16C;
576
577 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
578 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
579 for (uint32_t n = UINT32_C(0xB3000001); n < UINT32_C(0xB87FE000); n += kBlockSize) {
580 for (uint32_t i = 0; i < kBlockSize; i++) {
581 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0xB87FDFFF)));
582 }
583 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
584 for (uint32_t i = 0; i < kBlockSize; i++) {
585 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
586 ASSERT_EQ(reference_output, outputs[i])
587 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
588 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
589 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
590 }
591 }
592 }
593
594 TEST(CVT__F16C, positive_underflow) {
595 TEST_REQUIRES_X86_F16C;
596
597 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
598 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
599 for (uint32_t n = UINT32_C(0x00000001); n < UINT32_C(0x33000001); n += kBlockSize) {
600 for (uint32_t i = 0; i < kBlockSize; i++) {
601 inputs[i] = fp32_from_bits(n + i);
602 }
603 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
604 for (uint32_t i = 0; i < kBlockSize; i++) {
605 const uint16_t reference_output = UINT16_C(0x0000);
606 ASSERT_EQ(reference_output, outputs[i])
607 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
608 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
609 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
610 }
611 }
612 }
613
614 TEST(CVT__F16C, negative_underflow) {
615 TEST_REQUIRES_X86_F16C;
616
617 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
618 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
619 for (uint32_t n = UINT32_C(0x80000001); n < UINT32_C(0xB3000001); n += kBlockSize) {
620 for (uint32_t i = 0; i < kBlockSize; i++) {
621 inputs[i] = fp32_from_bits(n + i);
622 }
623 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
624 for (uint32_t i = 0; i < kBlockSize; i++) {
625 const uint16_t reference_output = UINT16_C(0x8000);
626 ASSERT_EQ(reference_output, outputs[i])
627 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
628 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
629 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
630 }
631 }
632 }
633
634 TEST(CVT__F16C, positive_zero) {
635 TEST_REQUIRES_X86_F16C;
636
637 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
638 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
639 std::fill(inputs.begin(), inputs.end(), +0.0f);
640 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
641 const uint16_t reference_output = UINT16_C(0x0000);
642 ASSERT_EQ(reference_output, outputs[0])
643 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
644 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
645 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
646 }
647
648 TEST(CVT__F16C, negative_zero) {
649 TEST_REQUIRES_X86_F16C;
650
651 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
652 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
653 std::fill(inputs.begin(), inputs.end(), -0.0f);
654 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
655 const uint16_t reference_output = UINT16_C(0x8000);
656 ASSERT_EQ(reference_output, outputs[0])
657 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
658 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
659 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
660 }
661
662 TEST(CVT__F16C, positive_overflow) {
663 TEST_REQUIRES_X86_F16C;
664
665 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
666 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
667 for (uint32_t n = UINT32_C(0x477FF000); n < UINT32_C(0x7F800000); n += kBlockSize) {
668 for (uint32_t i = 0; i < kBlockSize; i++) {
669 inputs[i] = fp32_from_bits(n + i);
670 }
671 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
672 for (uint32_t i = 0; i < kBlockSize; i++) {
673 const uint16_t reference_output = UINT16_C(0x7C00);
674 ASSERT_EQ(reference_output, outputs[i])
675 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
676 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
677 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
678 }
679 }
680 }
681
682 TEST(CVT__F16C, negative_overflow) {
683 TEST_REQUIRES_X86_F16C;
684
685 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
686 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
687 for (uint32_t n = UINT32_C(0xC77FF000); n < UINT32_C(0xFF800000); n += kBlockSize) {
688 for (uint32_t i = 0; i < kBlockSize; i++) {
689 inputs[i] = fp32_from_bits(n + i);
690 }
691 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
692 for (uint32_t i = 0; i < kBlockSize; i++) {
693 const uint16_t reference_output = UINT16_C(0xFC00);
694 ASSERT_EQ(reference_output, outputs[i])
695 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
696 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
697 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
698 }
699 }
700 }
701
702 TEST(CVT__F16C, positive_infinity) {
703 TEST_REQUIRES_X86_F16C;
704
705 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
706 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
707 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
708 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
709 const uint16_t reference_output = UINT16_C(0x7C00);
710 ASSERT_EQ(reference_output, outputs[0])
711 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
712 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
713 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
714 }
715
716 TEST(CVT__F16C, negative_infinity) {
717 TEST_REQUIRES_X86_F16C;
718
719 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
720 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
721 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
722 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
723 const uint16_t reference_output = UINT16_C(0xFC00);
724 ASSERT_EQ(reference_output, outputs[0])
725 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
726 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
727 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
728 }
729
730 TEST(CVT__F16C, positive_nan) {
731 TEST_REQUIRES_X86_F16C;
732
733 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
734 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
735 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
736 for (uint32_t i = 0; i < kBlockSize; i++) {
737 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
738 }
739 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
740 for (uint32_t i = 0; i < kBlockSize; i++) {
741 ASSERT_GT(outputs[i], UINT16_C(0x7C00))
742 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
743 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
744 ASSERT_LT(outputs[i], UINT16_C(0x8000))
745 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
746 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
747 }
748 }
749 }
750
751 TEST(CVT__F16C, negative_nan) {
752 TEST_REQUIRES_X86_F16C;
753
754 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
755 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
756 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
757 for (uint32_t i = 0; i < kBlockSize; i++) {
758 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
759 }
760 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
761 for (uint32_t i = 0; i < kBlockSize; i++) {
762 ASSERT_GT(outputs[i], UINT16_C(0xFC00))
763 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
764 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
765 }
766 }
767 }
768#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
769
770#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhana6eb1e52021-11-06 18:29:36 -0700771 TEST(CVT__NEON, positive_normal) {
772 TEST_REQUIRES_ARM_NEON;
773
774 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
775 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
776 for (uint32_t n = UINT32_C(0x387FE000); n < UINT32_C(0x477FF000); n += kBlockSize) {
777 for (uint32_t i = 0; i < kBlockSize; i++) {
778 inputs[i] = fp32_from_bits(n + i);
779 }
780 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
781 for (uint32_t i = 0; i < kBlockSize; i++) {
782 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
783 ASSERT_EQ(reference_output, outputs[i])
784 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
785 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
786 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
787 }
788 }
789 }
790
791 TEST(CVT__NEON, negative_normal) {
792 TEST_REQUIRES_ARM_NEON;
793
794 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
795 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
796 for (uint32_t n = UINT32_C(0xB87FE000); n < UINT32_C(0xC77FF000); n += kBlockSize) {
797 for (uint32_t i = 0; i < kBlockSize; i++) {
798 inputs[i] = fp32_from_bits(n + i);
799 }
800 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
801 for (uint32_t i = 0; i < kBlockSize; i++) {
802 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
803 ASSERT_EQ(reference_output, outputs[i])
804 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
805 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
806 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
807 }
808 }
809 }
810
811 TEST(CVT__NEON, positive_subnormal) {
812 TEST_REQUIRES_ARM_NEON;
813
814 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
815 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
816 for (uint32_t n = UINT32_C(0x33000001); n < UINT32_C(0x387FE000); n += kBlockSize) {
817 for (uint32_t i = 0; i < kBlockSize; i++) {
818 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0x387FDFFF)));
819 }
820 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
821 for (uint32_t i = 0; i < kBlockSize; i++) {
822 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
823 ASSERT_EQ(reference_output, outputs[i])
824 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
825 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
826 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
827 }
828 }
829 }
830
831 TEST(CVT__NEON, negative_subnormal) {
832 TEST_REQUIRES_ARM_NEON;
833
834 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
835 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
836 for (uint32_t n = UINT32_C(0xB3000001); n < UINT32_C(0xB87FE000); n += kBlockSize) {
837 for (uint32_t i = 0; i < kBlockSize; i++) {
838 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0xB87FDFFF)));
839 }
840 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
841 for (uint32_t i = 0; i < kBlockSize; i++) {
842 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
843 ASSERT_EQ(reference_output, outputs[i])
844 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
845 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
846 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
847 }
848 }
849 }
850
851 TEST(CVT__NEON, positive_underflow) {
852 TEST_REQUIRES_ARM_NEON;
853
854 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
855 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
856 for (uint32_t n = UINT32_C(0x00000001); n < UINT32_C(0x33000001); n += kBlockSize) {
857 for (uint32_t i = 0; i < kBlockSize; i++) {
858 inputs[i] = fp32_from_bits(n + i);
859 }
860 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
861 for (uint32_t i = 0; i < kBlockSize; i++) {
862 const uint16_t reference_output = UINT16_C(0x0000);
863 ASSERT_EQ(reference_output, outputs[i])
864 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
865 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
866 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
867 }
868 }
869 }
870
871 TEST(CVT__NEON, negative_underflow) {
872 TEST_REQUIRES_ARM_NEON;
873
874 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
875 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
876 for (uint32_t n = UINT32_C(0x80000001); n < UINT32_C(0xB3000001); n += kBlockSize) {
877 for (uint32_t i = 0; i < kBlockSize; i++) {
878 inputs[i] = fp32_from_bits(n + i);
879 }
880 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
881 for (uint32_t i = 0; i < kBlockSize; i++) {
882 const uint16_t reference_output = UINT16_C(0x8000);
883 ASSERT_EQ(reference_output, outputs[i])
884 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
885 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
886 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
887 }
888 }
889 }
890
891 TEST(CVT__NEON, positive_zero) {
892 TEST_REQUIRES_ARM_NEON;
893
894 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
895 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
896 std::fill(inputs.begin(), inputs.end(), +0.0f);
897 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
898 const uint16_t reference_output = UINT16_C(0x0000);
899 ASSERT_EQ(reference_output, outputs[0])
900 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
901 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
902 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
903 }
904
905 TEST(CVT__NEON, negative_zero) {
906 TEST_REQUIRES_ARM_NEON;
907
908 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
909 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
910 std::fill(inputs.begin(), inputs.end(), -0.0f);
911 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
912 const uint16_t reference_output = UINT16_C(0x8000);
913 ASSERT_EQ(reference_output, outputs[0])
914 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
915 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
916 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
917 }
918
919 TEST(CVT__NEON, positive_overflow) {
920 TEST_REQUIRES_ARM_NEON;
921
922 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
923 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
924 for (uint32_t n = UINT32_C(0x477FF000); n < UINT32_C(0x7F800000); n += kBlockSize) {
925 for (uint32_t i = 0; i < kBlockSize; i++) {
926 inputs[i] = fp32_from_bits(n + i);
927 }
928 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
929 for (uint32_t i = 0; i < kBlockSize; i++) {
930 const uint16_t reference_output = UINT16_C(0x7C00);
931 ASSERT_EQ(reference_output, outputs[i])
932 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
933 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
934 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
935 }
936 }
937 }
938
939 TEST(CVT__NEON, negative_overflow) {
940 TEST_REQUIRES_ARM_NEON;
941
942 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
943 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
944 for (uint32_t n = UINT32_C(0xC77FF000); n < UINT32_C(0xFF800000); n += kBlockSize) {
945 for (uint32_t i = 0; i < kBlockSize; i++) {
946 inputs[i] = fp32_from_bits(n + i);
947 }
948 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
949 for (uint32_t i = 0; i < kBlockSize; i++) {
950 const uint16_t reference_output = UINT16_C(0xFC00);
951 ASSERT_EQ(reference_output, outputs[i])
952 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
953 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
954 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
955 }
956 }
957 }
958
959 TEST(CVT__NEON, positive_infinity) {
960 TEST_REQUIRES_ARM_NEON;
961
962 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
963 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
964 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
965 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
966 const uint16_t reference_output = UINT16_C(0x7C00);
967 ASSERT_EQ(reference_output, outputs[0])
968 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
969 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
970 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
971 }
972
973 TEST(CVT__NEON, negative_infinity) {
974 TEST_REQUIRES_ARM_NEON;
975
976 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
977 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
978 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
979 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
980 const uint16_t reference_output = UINT16_C(0xFC00);
981 ASSERT_EQ(reference_output, outputs[0])
982 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
983 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
984 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
985 }
986
987 TEST(CVT__NEON, positive_nan) {
988 TEST_REQUIRES_ARM_NEON;
989
990 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
991 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
992 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
993 for (uint32_t i = 0; i < kBlockSize; i++) {
994 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
995 }
996 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
997 for (uint32_t i = 0; i < kBlockSize; i++) {
998 ASSERT_GT(outputs[i], UINT16_C(0x7C00))
999 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1000 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1001 ASSERT_LT(outputs[i], UINT16_C(0x8000))
1002 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1003 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1004 }
1005 }
1006 }
1007
1008 TEST(CVT__NEON, negative_nan) {
1009 TEST_REQUIRES_ARM_NEON;
1010
1011 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1012 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1013 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1014 for (uint32_t i = 0; i < kBlockSize; i++) {
1015 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
1016 }
1017 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1018 for (uint32_t i = 0; i < kBlockSize; i++) {
1019 ASSERT_GT(outputs[i], UINT16_C(0xFC00))
1020 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1021 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1022 }
1023 }
1024 }
1025#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1026
1027#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1028 TEST(CVT__NEONFP16, positive_normal) {
Marat Dukhan582e1842021-10-25 17:18:36 -07001029 TEST_REQUIRES_ARM_NEON_FP16;
1030
1031 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1032 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1033 for (uint32_t n = UINT32_C(0x387FE000); n < UINT32_C(0x477FF000); n += kBlockSize) {
1034 for (uint32_t i = 0; i < kBlockSize; i++) {
1035 inputs[i] = fp32_from_bits(n + i);
1036 }
1037 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1038 for (uint32_t i = 0; i < kBlockSize; i++) {
1039 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1040 ASSERT_EQ(reference_output, outputs[i])
1041 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1042 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1043 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1044 }
1045 }
1046 }
1047
Marat Dukhana6eb1e52021-11-06 18:29:36 -07001048 TEST(CVT__NEONFP16, negative_normal) {
Marat Dukhan582e1842021-10-25 17:18:36 -07001049 TEST_REQUIRES_ARM_NEON_FP16;
1050
1051 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1052 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1053 for (uint32_t n = UINT32_C(0xB87FE000); n < UINT32_C(0xC77FF000); n += kBlockSize) {
1054 for (uint32_t i = 0; i < kBlockSize; i++) {
1055 inputs[i] = fp32_from_bits(n + i);
1056 }
1057 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1058 for (uint32_t i = 0; i < kBlockSize; i++) {
1059 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1060 ASSERT_EQ(reference_output, outputs[i])
1061 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1062 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1063 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1064 }
1065 }
1066 }
1067
Marat Dukhana6eb1e52021-11-06 18:29:36 -07001068 TEST(CVT__NEONFP16, positive_subnormal) {
Marat Dukhan582e1842021-10-25 17:18:36 -07001069 TEST_REQUIRES_ARM_NEON_FP16;
1070
1071 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1072 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1073 for (uint32_t n = UINT32_C(0x33000001); n < UINT32_C(0x387FE000); n += kBlockSize) {
1074 for (uint32_t i = 0; i < kBlockSize; i++) {
1075 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0x387FDFFF)));
1076 }
1077 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1078 for (uint32_t i = 0; i < kBlockSize; i++) {
1079 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1080 ASSERT_EQ(reference_output, outputs[i])
1081 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1082 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1083 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1084 }
1085 }
1086 }
1087
Marat Dukhana6eb1e52021-11-06 18:29:36 -07001088 TEST(CVT__NEONFP16, negative_subnormal) {
Marat Dukhan582e1842021-10-25 17:18:36 -07001089 TEST_REQUIRES_ARM_NEON_FP16;
1090
1091 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1092 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1093 for (uint32_t n = UINT32_C(0xB3000001); n < UINT32_C(0xB87FE000); n += kBlockSize) {
1094 for (uint32_t i = 0; i < kBlockSize; i++) {
1095 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0xB87FDFFF)));
1096 }
1097 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1098 for (uint32_t i = 0; i < kBlockSize; i++) {
1099 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1100 ASSERT_EQ(reference_output, outputs[i])
1101 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1102 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1103 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1104 }
1105 }
1106 }
1107
Marat Dukhana6eb1e52021-11-06 18:29:36 -07001108 TEST(CVT__NEONFP16, positive_underflow) {
Marat Dukhan582e1842021-10-25 17:18:36 -07001109 TEST_REQUIRES_ARM_NEON_FP16;
1110
1111 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1112 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1113 for (uint32_t n = UINT32_C(0x00000001); n < UINT32_C(0x33000001); n += kBlockSize) {
1114 for (uint32_t i = 0; i < kBlockSize; i++) {
1115 inputs[i] = fp32_from_bits(n + i);
1116 }
1117 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1118 for (uint32_t i = 0; i < kBlockSize; i++) {
1119 const uint16_t reference_output = UINT16_C(0x0000);
1120 ASSERT_EQ(reference_output, outputs[i])
1121 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1122 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1123 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1124 }
1125 }
1126 }
1127
Marat Dukhana6eb1e52021-11-06 18:29:36 -07001128 TEST(CVT__NEONFP16, negative_underflow) {
Marat Dukhan582e1842021-10-25 17:18:36 -07001129 TEST_REQUIRES_ARM_NEON_FP16;
1130
1131 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1132 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1133 for (uint32_t n = UINT32_C(0x80000001); n < UINT32_C(0xB3000001); n += kBlockSize) {
1134 for (uint32_t i = 0; i < kBlockSize; i++) {
1135 inputs[i] = fp32_from_bits(n + i);
1136 }
1137 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1138 for (uint32_t i = 0; i < kBlockSize; i++) {
1139 const uint16_t reference_output = UINT16_C(0x8000);
1140 ASSERT_EQ(reference_output, outputs[i])
1141 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1142 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1143 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1144 }
1145 }
1146 }
1147
Marat Dukhana6eb1e52021-11-06 18:29:36 -07001148 TEST(CVT__NEONFP16, positive_zero) {
Marat Dukhan582e1842021-10-25 17:18:36 -07001149 TEST_REQUIRES_ARM_NEON_FP16;
1150
1151 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1152 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1153 std::fill(inputs.begin(), inputs.end(), +0.0f);
1154 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1155 const uint16_t reference_output = UINT16_C(0x0000);
1156 ASSERT_EQ(reference_output, outputs[0])
1157 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1158 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1159 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1160 }
1161
Marat Dukhana6eb1e52021-11-06 18:29:36 -07001162 TEST(CVT__NEONFP16, negative_zero) {
Marat Dukhan582e1842021-10-25 17:18:36 -07001163 TEST_REQUIRES_ARM_NEON_FP16;
1164
1165 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1166 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1167 std::fill(inputs.begin(), inputs.end(), -0.0f);
1168 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1169 const uint16_t reference_output = UINT16_C(0x8000);
1170 ASSERT_EQ(reference_output, outputs[0])
1171 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1172 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1173 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1174 }
1175
Marat Dukhana6eb1e52021-11-06 18:29:36 -07001176 TEST(CVT__NEONFP16, positive_overflow) {
Marat Dukhan582e1842021-10-25 17:18:36 -07001177 TEST_REQUIRES_ARM_NEON_FP16;
1178
1179 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1180 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1181 for (uint32_t n = UINT32_C(0x477FF000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1182 for (uint32_t i = 0; i < kBlockSize; i++) {
1183 inputs[i] = fp32_from_bits(n + i);
1184 }
1185 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1186 for (uint32_t i = 0; i < kBlockSize; i++) {
1187 const uint16_t reference_output = UINT16_C(0x7C00);
1188 ASSERT_EQ(reference_output, outputs[i])
1189 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1190 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1191 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1192 }
1193 }
1194 }
1195
Marat Dukhana6eb1e52021-11-06 18:29:36 -07001196 TEST(CVT__NEONFP16, negative_overflow) {
Marat Dukhan582e1842021-10-25 17:18:36 -07001197 TEST_REQUIRES_ARM_NEON_FP16;
1198
1199 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1200 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1201 for (uint32_t n = UINT32_C(0xC77FF000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1202 for (uint32_t i = 0; i < kBlockSize; i++) {
1203 inputs[i] = fp32_from_bits(n + i);
1204 }
1205 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1206 for (uint32_t i = 0; i < kBlockSize; i++) {
1207 const uint16_t reference_output = UINT16_C(0xFC00);
1208 ASSERT_EQ(reference_output, outputs[i])
1209 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1210 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1211 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1212 }
1213 }
1214 }
1215
Marat Dukhana6eb1e52021-11-06 18:29:36 -07001216 TEST(CVT__NEONFP16, positive_infinity) {
Marat Dukhan582e1842021-10-25 17:18:36 -07001217 TEST_REQUIRES_ARM_NEON_FP16;
1218
1219 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1220 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1221 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1222 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1223 const uint16_t reference_output = UINT16_C(0x7C00);
1224 ASSERT_EQ(reference_output, outputs[0])
1225 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1226 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1227 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1228 }
1229
Marat Dukhana6eb1e52021-11-06 18:29:36 -07001230 TEST(CVT__NEONFP16, negative_infinity) {
Marat Dukhan582e1842021-10-25 17:18:36 -07001231 TEST_REQUIRES_ARM_NEON_FP16;
1232
1233 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1234 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1235 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1236 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1237 const uint16_t reference_output = UINT16_C(0xFC00);
1238 ASSERT_EQ(reference_output, outputs[0])
1239 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1240 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1241 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1242 }
1243
Marat Dukhana6eb1e52021-11-06 18:29:36 -07001244 TEST(CVT__NEONFP16, positive_nan) {
Marat Dukhan582e1842021-10-25 17:18:36 -07001245 TEST_REQUIRES_ARM_NEON_FP16;
1246
1247 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1248 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1249 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1250 for (uint32_t i = 0; i < kBlockSize; i++) {
1251 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
1252 }
1253 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1254 for (uint32_t i = 0; i < kBlockSize; i++) {
1255 ASSERT_GT(outputs[i], UINT16_C(0x7C00))
1256 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1257 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1258 ASSERT_LT(outputs[i], UINT16_C(0x8000))
1259 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1260 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1261 }
1262 }
1263 }
1264
Marat Dukhana6eb1e52021-11-06 18:29:36 -07001265 TEST(CVT__NEONFP16, negative_nan) {
Marat Dukhan582e1842021-10-25 17:18:36 -07001266 TEST_REQUIRES_ARM_NEON_FP16;
1267
1268 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1269 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1270 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1271 for (uint32_t i = 0; i < kBlockSize; i++) {
1272 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
1273 }
1274 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1275 for (uint32_t i = 0; i < kBlockSize; i++) {
1276 ASSERT_GT(outputs[i], UINT16_C(0xFC00))
1277 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1278 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1279 }
1280 }
1281 }
1282#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001283
Marat Dukhan4c617792021-12-21 15:47:58 -08001284#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan79c78b22021-11-08 20:44:27 -08001285 TEST(CVT__WASMSIMD, positive_normal) {
1286 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1287 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1288 for (uint32_t n = UINT32_C(0x387FE000); n < UINT32_C(0x477FF000); n += kBlockSize) {
1289 for (uint32_t i = 0; i < kBlockSize; i++) {
1290 inputs[i] = fp32_from_bits(n + i);
1291 }
1292 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1293 for (uint32_t i = 0; i < kBlockSize; i++) {
1294 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1295 ASSERT_EQ(reference_output, outputs[i])
1296 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1297 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1298 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1299 }
1300 }
1301 }
1302
1303 TEST(CVT__WASMSIMD, negative_normal) {
1304 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1305 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1306 for (uint32_t n = UINT32_C(0xB87FE000); n < UINT32_C(0xC77FF000); n += kBlockSize) {
1307 for (uint32_t i = 0; i < kBlockSize; i++) {
1308 inputs[i] = fp32_from_bits(n + i);
1309 }
1310 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1311 for (uint32_t i = 0; i < kBlockSize; i++) {
1312 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1313 ASSERT_EQ(reference_output, outputs[i])
1314 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1315 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1316 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1317 }
1318 }
1319 }
1320
1321 TEST(CVT__WASMSIMD, positive_subnormal) {
1322 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1323 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1324 for (uint32_t n = UINT32_C(0x33000001); n < UINT32_C(0x387FE000); n += kBlockSize) {
1325 for (uint32_t i = 0; i < kBlockSize; i++) {
1326 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0x387FDFFF)));
1327 }
1328 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1329 for (uint32_t i = 0; i < kBlockSize; i++) {
1330 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1331 ASSERT_EQ(reference_output, outputs[i])
1332 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1333 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1334 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1335 }
1336 }
1337 }
1338
1339 TEST(CVT__WASMSIMD, negative_subnormal) {
1340 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1341 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1342 for (uint32_t n = UINT32_C(0xB3000001); n < UINT32_C(0xB87FE000); n += kBlockSize) {
1343 for (uint32_t i = 0; i < kBlockSize; i++) {
1344 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0xB87FDFFF)));
1345 }
1346 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1347 for (uint32_t i = 0; i < kBlockSize; i++) {
1348 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1349 ASSERT_EQ(reference_output, outputs[i])
1350 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1351 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1352 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1353 }
1354 }
1355 }
1356
1357 TEST(CVT__WASMSIMD, positive_underflow) {
1358 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1359 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1360 for (uint32_t n = UINT32_C(0x00000001); n < UINT32_C(0x33000001); n += kBlockSize) {
1361 for (uint32_t i = 0; i < kBlockSize; i++) {
1362 inputs[i] = fp32_from_bits(n + i);
1363 }
1364 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1365 for (uint32_t i = 0; i < kBlockSize; i++) {
1366 const uint16_t reference_output = UINT16_C(0x0000);
1367 ASSERT_EQ(reference_output, outputs[i])
1368 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1369 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1370 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1371 }
1372 }
1373 }
1374
1375 TEST(CVT__WASMSIMD, negative_underflow) {
1376 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1377 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1378 for (uint32_t n = UINT32_C(0x80000001); n < UINT32_C(0xB3000001); n += kBlockSize) {
1379 for (uint32_t i = 0; i < kBlockSize; i++) {
1380 inputs[i] = fp32_from_bits(n + i);
1381 }
1382 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1383 for (uint32_t i = 0; i < kBlockSize; i++) {
1384 const uint16_t reference_output = UINT16_C(0x8000);
1385 ASSERT_EQ(reference_output, outputs[i])
1386 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1387 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1388 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1389 }
1390 }
1391 }
1392
1393 TEST(CVT__WASMSIMD, positive_zero) {
1394 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1395 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1396 std::fill(inputs.begin(), inputs.end(), +0.0f);
1397 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1398 const uint16_t reference_output = UINT16_C(0x0000);
1399 ASSERT_EQ(reference_output, outputs[0])
1400 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1401 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1402 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1403 }
1404
1405 TEST(CVT__WASMSIMD, negative_zero) {
1406 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1407 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1408 std::fill(inputs.begin(), inputs.end(), -0.0f);
1409 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1410 const uint16_t reference_output = UINT16_C(0x8000);
1411 ASSERT_EQ(reference_output, outputs[0])
1412 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1413 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1414 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1415 }
1416
1417 TEST(CVT__WASMSIMD, positive_overflow) {
1418 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1419 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1420 for (uint32_t n = UINT32_C(0x477FF000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1421 for (uint32_t i = 0; i < kBlockSize; i++) {
1422 inputs[i] = fp32_from_bits(n + i);
1423 }
1424 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1425 for (uint32_t i = 0; i < kBlockSize; i++) {
1426 const uint16_t reference_output = UINT16_C(0x7C00);
1427 ASSERT_EQ(reference_output, outputs[i])
1428 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1429 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1430 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1431 }
1432 }
1433 }
1434
1435 TEST(CVT__WASMSIMD, negative_overflow) {
1436 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1437 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1438 for (uint32_t n = UINT32_C(0xC77FF000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1439 for (uint32_t i = 0; i < kBlockSize; i++) {
1440 inputs[i] = fp32_from_bits(n + i);
1441 }
1442 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1443 for (uint32_t i = 0; i < kBlockSize; i++) {
1444 const uint16_t reference_output = UINT16_C(0xFC00);
1445 ASSERT_EQ(reference_output, outputs[i])
1446 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1447 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1448 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1449 }
1450 }
1451 }
1452
1453 TEST(CVT__WASMSIMD, positive_infinity) {
1454 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1455 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1456 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1457 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1458 const uint16_t reference_output = UINT16_C(0x7C00);
1459 ASSERT_EQ(reference_output, outputs[0])
1460 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1461 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1462 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1463 }
1464
1465 TEST(CVT__WASMSIMD, negative_infinity) {
1466 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1467 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1468 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1469 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1470 const uint16_t reference_output = UINT16_C(0xFC00);
1471 ASSERT_EQ(reference_output, outputs[0])
1472 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1473 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1474 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1475 }
1476
1477 TEST(CVT__WASMSIMD, positive_nan) {
1478 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1479 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1480 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1481 for (uint32_t i = 0; i < kBlockSize; i++) {
1482 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
1483 }
1484 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1485 for (uint32_t i = 0; i < kBlockSize; i++) {
1486 ASSERT_GT(outputs[i], UINT16_C(0x7C00))
1487 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1488 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1489 ASSERT_LT(outputs[i], UINT16_C(0x8000))
1490 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1491 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1492 }
1493 }
1494 }
1495
1496 TEST(CVT__WASMSIMD, negative_nan) {
1497 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1498 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1499 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1500 for (uint32_t i = 0; i < kBlockSize; i++) {
1501 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
1502 }
1503 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1504 for (uint32_t i = 0; i < kBlockSize; i++) {
1505 ASSERT_GT(outputs[i], UINT16_C(0xFC00))
1506 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1507 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1508 }
1509 }
1510 }
Marat Dukhan4c617792021-12-21 15:47:58 -08001511#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan79c78b22021-11-08 20:44:27 -08001512
Marat Dukhan78f039d2021-11-09 16:42:27 -08001513TEST(CVT__SCALAR_BITCAST, positive_normal) {
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001514 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1515 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1516 for (uint32_t n = UINT32_C(0x387FE000); n < UINT32_C(0x477FF000); n += kBlockSize) {
1517 for (uint32_t i = 0; i < kBlockSize; i++) {
1518 inputs[i] = fp32_from_bits(n + i);
1519 }
Marat Dukhan78f039d2021-11-09 16:42:27 -08001520 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001521 for (uint32_t i = 0; i < kBlockSize; i++) {
1522 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1523 ASSERT_EQ(reference_output, outputs[i])
1524 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1525 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1526 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1527 }
1528 }
1529}
1530
Marat Dukhan78f039d2021-11-09 16:42:27 -08001531TEST(CVT__SCALAR_BITCAST, negative_normal) {
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001532 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1533 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1534 for (uint32_t n = UINT32_C(0xB87FE000); n < UINT32_C(0xC77FF000); n += kBlockSize) {
1535 for (uint32_t i = 0; i < kBlockSize; i++) {
1536 inputs[i] = fp32_from_bits(n + i);
1537 }
Marat Dukhan78f039d2021-11-09 16:42:27 -08001538 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001539 for (uint32_t i = 0; i < kBlockSize; i++) {
1540 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1541 ASSERT_EQ(reference_output, outputs[i])
1542 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1543 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1544 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1545 }
1546 }
1547}
1548
Marat Dukhan78f039d2021-11-09 16:42:27 -08001549TEST(CVT__SCALAR_BITCAST, positive_subnormal) {
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001550 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1551 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1552 for (uint32_t n = UINT32_C(0x33000001); n < UINT32_C(0x387FE000); n += kBlockSize) {
1553 for (uint32_t i = 0; i < kBlockSize; i++) {
1554 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0x387FDFFF)));
1555 }
Marat Dukhan78f039d2021-11-09 16:42:27 -08001556 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001557 for (uint32_t i = 0; i < kBlockSize; i++) {
1558 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1559 ASSERT_EQ(reference_output, outputs[i])
1560 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1561 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1562 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1563 }
1564 }
1565}
1566
Marat Dukhan78f039d2021-11-09 16:42:27 -08001567TEST(CVT__SCALAR_BITCAST, negative_subnormal) {
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001568 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1569 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1570 for (uint32_t n = UINT32_C(0xB3000001); n < UINT32_C(0xB87FE000); n += kBlockSize) {
1571 for (uint32_t i = 0; i < kBlockSize; i++) {
1572 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0xB87FDFFF)));
1573 }
Marat Dukhan78f039d2021-11-09 16:42:27 -08001574 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001575 for (uint32_t i = 0; i < kBlockSize; i++) {
1576 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1577 ASSERT_EQ(reference_output, outputs[i])
1578 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1579 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1580 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1581 }
1582 }
1583}
1584
Marat Dukhan78f039d2021-11-09 16:42:27 -08001585TEST(CVT__SCALAR_BITCAST, positive_underflow) {
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001586 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1587 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1588 for (uint32_t n = UINT32_C(0x00000001); n < UINT32_C(0x33000001); n += kBlockSize) {
1589 for (uint32_t i = 0; i < kBlockSize; i++) {
1590 inputs[i] = fp32_from_bits(n + i);
1591 }
Marat Dukhan78f039d2021-11-09 16:42:27 -08001592 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001593 for (uint32_t i = 0; i < kBlockSize; i++) {
1594 const uint16_t reference_output = UINT16_C(0x0000);
1595 ASSERT_EQ(reference_output, outputs[i])
1596 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1597 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1598 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1599 }
1600 }
1601}
1602
Marat Dukhan78f039d2021-11-09 16:42:27 -08001603TEST(CVT__SCALAR_BITCAST, negative_underflow) {
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001604 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1605 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1606 for (uint32_t n = UINT32_C(0x80000001); n < UINT32_C(0xB3000001); n += kBlockSize) {
1607 for (uint32_t i = 0; i < kBlockSize; i++) {
1608 inputs[i] = fp32_from_bits(n + i);
1609 }
Marat Dukhan78f039d2021-11-09 16:42:27 -08001610 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001611 for (uint32_t i = 0; i < kBlockSize; i++) {
1612 const uint16_t reference_output = UINT16_C(0x8000);
1613 ASSERT_EQ(reference_output, outputs[i])
1614 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1615 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1616 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1617 }
1618 }
1619}
1620
Marat Dukhan78f039d2021-11-09 16:42:27 -08001621TEST(CVT__SCALAR_BITCAST, positive_zero) {
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001622 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1623 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1624 std::fill(inputs.begin(), inputs.end(), +0.0f);
Marat Dukhan78f039d2021-11-09 16:42:27 -08001625 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001626 const uint16_t reference_output = UINT16_C(0x0000);
1627 ASSERT_EQ(reference_output, outputs[0])
1628 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1629 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1630 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1631}
1632
Marat Dukhan78f039d2021-11-09 16:42:27 -08001633TEST(CVT__SCALAR_BITCAST, negative_zero) {
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001634 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1635 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1636 std::fill(inputs.begin(), inputs.end(), -0.0f);
Marat Dukhan78f039d2021-11-09 16:42:27 -08001637 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001638 const uint16_t reference_output = UINT16_C(0x8000);
1639 ASSERT_EQ(reference_output, outputs[0])
1640 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1641 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1642 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1643}
1644
Marat Dukhan78f039d2021-11-09 16:42:27 -08001645TEST(CVT__SCALAR_BITCAST, positive_overflow) {
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001646 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1647 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1648 for (uint32_t n = UINT32_C(0x477FF000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1649 for (uint32_t i = 0; i < kBlockSize; i++) {
1650 inputs[i] = fp32_from_bits(n + i);
1651 }
Marat Dukhan78f039d2021-11-09 16:42:27 -08001652 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001653 for (uint32_t i = 0; i < kBlockSize; i++) {
1654 const uint16_t reference_output = UINT16_C(0x7C00);
1655 ASSERT_EQ(reference_output, outputs[i])
1656 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1657 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1658 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1659 }
1660 }
1661}
1662
Marat Dukhan78f039d2021-11-09 16:42:27 -08001663TEST(CVT__SCALAR_BITCAST, negative_overflow) {
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001664 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1665 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1666 for (uint32_t n = UINT32_C(0xC77FF000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1667 for (uint32_t i = 0; i < kBlockSize; i++) {
1668 inputs[i] = fp32_from_bits(n + i);
1669 }
Marat Dukhan78f039d2021-11-09 16:42:27 -08001670 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001671 for (uint32_t i = 0; i < kBlockSize; i++) {
1672 const uint16_t reference_output = UINT16_C(0xFC00);
1673 ASSERT_EQ(reference_output, outputs[i])
1674 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1675 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1676 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1677 }
1678 }
1679}
1680
Marat Dukhan78f039d2021-11-09 16:42:27 -08001681TEST(CVT__SCALAR_BITCAST, positive_infinity) {
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001682 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1683 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1684 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan78f039d2021-11-09 16:42:27 -08001685 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001686 const uint16_t reference_output = UINT16_C(0x7C00);
1687 ASSERT_EQ(reference_output, outputs[0])
1688 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1689 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1690 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1691}
1692
Marat Dukhan78f039d2021-11-09 16:42:27 -08001693TEST(CVT__SCALAR_BITCAST, negative_infinity) {
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001694 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1695 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1696 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan78f039d2021-11-09 16:42:27 -08001697 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001698 const uint16_t reference_output = UINT16_C(0xFC00);
1699 ASSERT_EQ(reference_output, outputs[0])
1700 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1701 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1702 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1703}
1704
Marat Dukhan78f039d2021-11-09 16:42:27 -08001705TEST(CVT__SCALAR_BITCAST, positive_nan) {
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001706 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1707 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1708 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1709 for (uint32_t i = 0; i < kBlockSize; i++) {
1710 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
1711 }
Marat Dukhan78f039d2021-11-09 16:42:27 -08001712 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001713 for (uint32_t i = 0; i < kBlockSize; i++) {
1714 ASSERT_GT(outputs[i], UINT16_C(0x7C00))
1715 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1716 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1717 ASSERT_LT(outputs[i], UINT16_C(0x8000))
1718 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1719 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1720 }
1721 }
1722}
1723
Marat Dukhan78f039d2021-11-09 16:42:27 -08001724TEST(CVT__SCALAR_BITCAST, negative_nan) {
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001725 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1726 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1727 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1728 for (uint32_t i = 0; i < kBlockSize; i++) {
1729 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
1730 }
Marat Dukhan78f039d2021-11-09 16:42:27 -08001731 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1732 for (uint32_t i = 0; i < kBlockSize; i++) {
1733 ASSERT_GT(outputs[i], UINT16_C(0xFC00))
1734 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1735 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1736 }
1737 }
1738}
1739
1740TEST(CVT__SCALAR_FABSF, positive_normal) {
1741 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1742 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1743 for (uint32_t n = UINT32_C(0x387FE000); n < UINT32_C(0x477FF000); n += kBlockSize) {
1744 for (uint32_t i = 0; i < kBlockSize; i++) {
1745 inputs[i] = fp32_from_bits(n + i);
1746 }
1747 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1748 for (uint32_t i = 0; i < kBlockSize; i++) {
1749 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1750 ASSERT_EQ(reference_output, outputs[i])
1751 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1752 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1753 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1754 }
1755 }
1756}
1757
1758TEST(CVT__SCALAR_FABSF, negative_normal) {
1759 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1760 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1761 for (uint32_t n = UINT32_C(0xB87FE000); n < UINT32_C(0xC77FF000); n += kBlockSize) {
1762 for (uint32_t i = 0; i < kBlockSize; i++) {
1763 inputs[i] = fp32_from_bits(n + i);
1764 }
1765 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1766 for (uint32_t i = 0; i < kBlockSize; i++) {
1767 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1768 ASSERT_EQ(reference_output, outputs[i])
1769 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1770 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1771 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1772 }
1773 }
1774}
1775
1776TEST(CVT__SCALAR_FABSF, positive_subnormal) {
1777 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1778 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1779 for (uint32_t n = UINT32_C(0x33000001); n < UINT32_C(0x387FE000); n += kBlockSize) {
1780 for (uint32_t i = 0; i < kBlockSize; i++) {
1781 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0x387FDFFF)));
1782 }
1783 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1784 for (uint32_t i = 0; i < kBlockSize; i++) {
1785 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1786 ASSERT_EQ(reference_output, outputs[i])
1787 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1788 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1789 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1790 }
1791 }
1792}
1793
1794TEST(CVT__SCALAR_FABSF, negative_subnormal) {
1795 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1796 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1797 for (uint32_t n = UINT32_C(0xB3000001); n < UINT32_C(0xB87FE000); n += kBlockSize) {
1798 for (uint32_t i = 0; i < kBlockSize; i++) {
1799 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0xB87FDFFF)));
1800 }
1801 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1802 for (uint32_t i = 0; i < kBlockSize; i++) {
1803 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1804 ASSERT_EQ(reference_output, outputs[i])
1805 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1806 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1807 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1808 }
1809 }
1810}
1811
1812TEST(CVT__SCALAR_FABSF, positive_underflow) {
1813 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1814 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1815 for (uint32_t n = UINT32_C(0x00000001); n < UINT32_C(0x33000001); n += kBlockSize) {
1816 for (uint32_t i = 0; i < kBlockSize; i++) {
1817 inputs[i] = fp32_from_bits(n + i);
1818 }
1819 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1820 for (uint32_t i = 0; i < kBlockSize; i++) {
1821 const uint16_t reference_output = UINT16_C(0x0000);
1822 ASSERT_EQ(reference_output, outputs[i])
1823 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1824 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1825 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1826 }
1827 }
1828}
1829
1830TEST(CVT__SCALAR_FABSF, negative_underflow) {
1831 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1832 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1833 for (uint32_t n = UINT32_C(0x80000001); n < UINT32_C(0xB3000001); n += kBlockSize) {
1834 for (uint32_t i = 0; i < kBlockSize; i++) {
1835 inputs[i] = fp32_from_bits(n + i);
1836 }
1837 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1838 for (uint32_t i = 0; i < kBlockSize; i++) {
1839 const uint16_t reference_output = UINT16_C(0x8000);
1840 ASSERT_EQ(reference_output, outputs[i])
1841 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1842 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1843 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1844 }
1845 }
1846}
1847
1848TEST(CVT__SCALAR_FABSF, positive_zero) {
1849 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1850 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1851 std::fill(inputs.begin(), inputs.end(), +0.0f);
1852 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1853 const uint16_t reference_output = UINT16_C(0x0000);
1854 ASSERT_EQ(reference_output, outputs[0])
1855 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1856 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1857 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1858}
1859
1860TEST(CVT__SCALAR_FABSF, negative_zero) {
1861 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1862 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1863 std::fill(inputs.begin(), inputs.end(), -0.0f);
1864 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1865 const uint16_t reference_output = UINT16_C(0x8000);
1866 ASSERT_EQ(reference_output, outputs[0])
1867 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1868 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1869 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1870}
1871
1872TEST(CVT__SCALAR_FABSF, positive_overflow) {
1873 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1874 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1875 for (uint32_t n = UINT32_C(0x477FF000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1876 for (uint32_t i = 0; i < kBlockSize; i++) {
1877 inputs[i] = fp32_from_bits(n + i);
1878 }
1879 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1880 for (uint32_t i = 0; i < kBlockSize; i++) {
1881 const uint16_t reference_output = UINT16_C(0x7C00);
1882 ASSERT_EQ(reference_output, outputs[i])
1883 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1884 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1885 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1886 }
1887 }
1888}
1889
1890TEST(CVT__SCALAR_FABSF, negative_overflow) {
1891 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1892 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1893 for (uint32_t n = UINT32_C(0xC77FF000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1894 for (uint32_t i = 0; i < kBlockSize; i++) {
1895 inputs[i] = fp32_from_bits(n + i);
1896 }
1897 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1898 for (uint32_t i = 0; i < kBlockSize; i++) {
1899 const uint16_t reference_output = UINT16_C(0xFC00);
1900 ASSERT_EQ(reference_output, outputs[i])
1901 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1902 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1903 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1904 }
1905 }
1906}
1907
1908TEST(CVT__SCALAR_FABSF, positive_infinity) {
1909 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1910 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1911 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1912 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1913 const uint16_t reference_output = UINT16_C(0x7C00);
1914 ASSERT_EQ(reference_output, outputs[0])
1915 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1916 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1917 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1918}
1919
1920TEST(CVT__SCALAR_FABSF, negative_infinity) {
1921 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1922 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1923 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1924 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1925 const uint16_t reference_output = UINT16_C(0xFC00);
1926 ASSERT_EQ(reference_output, outputs[0])
1927 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1928 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1929 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1930}
1931
1932TEST(CVT__SCALAR_FABSF, positive_nan) {
1933 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1934 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1935 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1936 for (uint32_t i = 0; i < kBlockSize; i++) {
1937 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
1938 }
1939 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1940 for (uint32_t i = 0; i < kBlockSize; i++) {
1941 ASSERT_GT(outputs[i], UINT16_C(0x7C00))
1942 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1943 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1944 ASSERT_LT(outputs[i], UINT16_C(0x8000))
1945 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1946 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1947 }
1948 }
1949}
1950
1951TEST(CVT__SCALAR_FABSF, negative_nan) {
1952 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1953 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1954 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1955 for (uint32_t i = 0; i < kBlockSize; i++) {
1956 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
1957 }
1958 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
Marat Dukhan46cc1e12021-11-04 21:16:49 -07001959 for (uint32_t i = 0; i < kBlockSize; i++) {
1960 ASSERT_GT(outputs[i], UINT16_C(0xFC00))
1961 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1962 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1963 }
1964 }
1965}