blob: b2746c5d15b6f2b90ae8302540423aaa9aac7d20 [file] [log] [blame]
Marat Dukhanf44f0222020-12-14 11:53:27 -08001// Copyright 2020 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <cstddef>
9#include <cstdint>
10#include <cstdlib>
11#include <iomanip>
12#include <ios>
13#include <vector>
14
15#include <gtest/gtest.h>
16
17#include <fp16.h>
18
19#include <xnnpack/AlignedAllocator.h>
20#include <xnnpack/common.h>
21#include <xnnpack/isa-checks.h>
22#include <xnnpack/math-stubs.h>
23
24
25constexpr int kBlockSize = 1024;
26
27
28#if XNN_ARCH_ARM || XNN_ARCH_ARM64
29 TEST(EXPM1MINUS__NEON_RR2_LUT16_P3, negative_zero) {
30 TEST_REQUIRES_ARM_NEON;
31
32 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
33 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
34 std::fill(inputs.begin(), inputs.end(), -0.0f);
35 xnn_math_f32_expm1minus__neon_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
36 const float reference_output = 0.0f;
37 ASSERT_EQ(reference_output, outputs[0])
38 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
39 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
40 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
41 }
42
43 TEST(EXPM1MINUS__NEON_RR2_LUT16_P3, negative_saturation) {
44 TEST_REQUIRES_ARM_NEON;
45
46 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
47 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
48 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
49 for (uint32_t i = 0; i < kBlockSize; i++) {
50 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
51 }
52 xnn_math_f32_expm1minus__neon_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
53 for (uint32_t i = 0; i < kBlockSize; i++) {
54 const float reference_output = -1.0f;
55 ASSERT_EQ(reference_output, outputs[i])
56 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
57 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
58 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
59 }
60 }
61 }
62
63 TEST(EXPM1MINUS__NEON_RR2_LUT16_P3, positive_nan) {
64 TEST_REQUIRES_ARM_NEON;
65
66 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
67 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
68 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
69 for (uint32_t i = 0; i < kBlockSize; i++) {
70 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
71 }
72 xnn_math_f32_expm1minus__neon_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
73 for (uint32_t i = 0; i < kBlockSize; i++) {
74 ASSERT_TRUE(std::isnan(outputs[i]))
75 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
76 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
77 }
78 }
79 }
80
81 TEST(EXPM1MINUS__NEON_RR2_LUT16_P3, negative_nan) {
82 TEST_REQUIRES_ARM_NEON;
83
84 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
85 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
86 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
87 for (uint32_t i = 0; i < kBlockSize; i++) {
88 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
89 }
90 xnn_math_f32_expm1minus__neon_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
91 for (uint32_t i = 0; i < kBlockSize; i++) {
92 ASSERT_TRUE(std::isnan(outputs[i]))
93 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
94 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
95 }
96 }
97 }
98#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
99
100
101#if XNN_ARCH_ARM || XNN_ARCH_ARM64
102 TEST(EXPM1MINUS__NEON_RR2_P6, negative_zero) {
103 TEST_REQUIRES_ARM_NEON;
104
105 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
106 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
107 std::fill(inputs.begin(), inputs.end(), -0.0f);
108 xnn_math_f32_expm1minus__neon_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
109 const float reference_output = 0.0f;
110 ASSERT_EQ(reference_output, outputs[0])
111 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
112 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
113 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
114 }
115
116 TEST(EXPM1MINUS__NEON_RR2_P6, negative_saturation) {
117 TEST_REQUIRES_ARM_NEON;
118
119 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
120 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
121 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
122 for (uint32_t i = 0; i < kBlockSize; i++) {
123 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
124 }
125 xnn_math_f32_expm1minus__neon_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
126 for (uint32_t i = 0; i < kBlockSize; i++) {
127 const float reference_output = -1.0f;
128 ASSERT_EQ(reference_output, outputs[i])
129 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
130 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
131 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
132 }
133 }
134 }
135
136 TEST(EXPM1MINUS__NEON_RR2_P6, positive_nan) {
137 TEST_REQUIRES_ARM_NEON;
138
139 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
140 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
141 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
142 for (uint32_t i = 0; i < kBlockSize; i++) {
143 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
144 }
145 xnn_math_f32_expm1minus__neon_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
146 for (uint32_t i = 0; i < kBlockSize; i++) {
147 ASSERT_TRUE(std::isnan(outputs[i]))
148 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
149 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
150 }
151 }
152 }
153
154 TEST(EXPM1MINUS__NEON_RR2_P6, negative_nan) {
155 TEST_REQUIRES_ARM_NEON;
156
157 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
158 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
159 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
160 for (uint32_t i = 0; i < kBlockSize; i++) {
161 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
162 }
163 xnn_math_f32_expm1minus__neon_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
164 for (uint32_t i = 0; i < kBlockSize; i++) {
165 ASSERT_TRUE(std::isnan(outputs[i]))
166 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
167 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
168 }
169 }
170 }
171#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
172
173
174#if XNN_ARCH_ARM || XNN_ARCH_ARM64
175 TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3, negative_zero) {
176 TEST_REQUIRES_ARM_NEON_FMA;
177
178 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
179 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
180 std::fill(inputs.begin(), inputs.end(), -0.0f);
181 xnn_math_f32_expm1minus__neonfma_rr1_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
182 const float reference_output = 0.0f;
183 ASSERT_EQ(reference_output, outputs[0])
184 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
185 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
186 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
187 }
188
189 TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3, negative_saturation) {
190 TEST_REQUIRES_ARM_NEON_FMA;
191
192 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
193 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
194 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
195 for (uint32_t i = 0; i < kBlockSize; i++) {
196 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
197 }
198 xnn_math_f32_expm1minus__neonfma_rr1_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
199 for (uint32_t i = 0; i < kBlockSize; i++) {
200 const float reference_output = -1.0f;
201 ASSERT_EQ(reference_output, outputs[i])
202 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
203 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
204 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
205 }
206 }
207 }
208
209 TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3, positive_nan) {
210 TEST_REQUIRES_ARM_NEON_FMA;
211
212 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
213 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
214 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
215 for (uint32_t i = 0; i < kBlockSize; i++) {
216 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
217 }
218 xnn_math_f32_expm1minus__neonfma_rr1_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
219 for (uint32_t i = 0; i < kBlockSize; i++) {
220 ASSERT_TRUE(std::isnan(outputs[i]))
221 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
222 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
223 }
224 }
225 }
226
227 TEST(EXPM1MINUS__NEONFMA_RR1_LUT16_P3, negative_nan) {
228 TEST_REQUIRES_ARM_NEON_FMA;
229
230 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
231 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
232 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
233 for (uint32_t i = 0; i < kBlockSize; i++) {
234 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
235 }
236 xnn_math_f32_expm1minus__neonfma_rr1_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
237 for (uint32_t i = 0; i < kBlockSize; i++) {
238 ASSERT_TRUE(std::isnan(outputs[i]))
239 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
240 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
241 }
242 }
243 }
244#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
245
246
247#if XNN_ARCH_ARM || XNN_ARCH_ARM64
248 TEST(EXPM1MINUS__NEONFMA_RR1_P6, negative_zero) {
249 TEST_REQUIRES_ARM_NEON_FMA;
250
251 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
252 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
253 std::fill(inputs.begin(), inputs.end(), -0.0f);
254 xnn_math_f32_expm1minus__neonfma_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
255 const float reference_output = 0.0f;
256 ASSERT_EQ(reference_output, outputs[0])
257 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
258 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
259 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
260 }
261
262 TEST(EXPM1MINUS__NEONFMA_RR1_P6, negative_saturation) {
263 TEST_REQUIRES_ARM_NEON_FMA;
264
265 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
266 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
267 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
268 for (uint32_t i = 0; i < kBlockSize; i++) {
269 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
270 }
271 xnn_math_f32_expm1minus__neonfma_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
272 for (uint32_t i = 0; i < kBlockSize; i++) {
273 const float reference_output = -1.0f;
274 ASSERT_EQ(reference_output, outputs[i])
275 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
276 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
277 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
278 }
279 }
280 }
281
282 TEST(EXPM1MINUS__NEONFMA_RR1_P6, positive_nan) {
283 TEST_REQUIRES_ARM_NEON_FMA;
284
285 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
286 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
287 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
288 for (uint32_t i = 0; i < kBlockSize; i++) {
289 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
290 }
291 xnn_math_f32_expm1minus__neonfma_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
292 for (uint32_t i = 0; i < kBlockSize; i++) {
293 ASSERT_TRUE(std::isnan(outputs[i]))
294 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
295 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
296 }
297 }
298 }
299
300 TEST(EXPM1MINUS__NEONFMA_RR1_P6, negative_nan) {
301 TEST_REQUIRES_ARM_NEON_FMA;
302
303 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
304 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
305 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
306 for (uint32_t i = 0; i < kBlockSize; i++) {
307 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
308 }
309 xnn_math_f32_expm1minus__neonfma_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
310 for (uint32_t i = 0; i < kBlockSize; i++) {
311 ASSERT_TRUE(std::isnan(outputs[i]))
312 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
313 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
314 }
315 }
316 }
317#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
318
319
320#if XNN_ARCH_X86 || XNN_ARCH_X86_64
321 TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM, negative_zero) {
322 TEST_REQUIRES_X86_AVX512F;
323
324 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
325 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
326 std::fill(inputs.begin(), inputs.end(), -0.0f);
327 xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
328 const float reference_output = 0.0f;
329 ASSERT_EQ(reference_output, outputs[0])
330 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
331 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
332 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
333 }
334
335 TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM, negative_saturation) {
336 TEST_REQUIRES_X86_AVX512F;
337
338 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
339 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
340 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
341 for (uint32_t i = 0; i < kBlockSize; i++) {
342 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
343 }
344 xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
345 for (uint32_t i = 0; i < kBlockSize; i++) {
346 const float reference_output = -1.0f;
347 ASSERT_EQ(reference_output, outputs[i])
348 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
349 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
350 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
351 }
352 }
353 }
354
355 TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM, positive_nan) {
356 TEST_REQUIRES_X86_AVX512F;
357
358 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
359 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
360 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
361 for (uint32_t i = 0; i < kBlockSize; i++) {
362 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
363 }
364 xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
365 for (uint32_t i = 0; i < kBlockSize; i++) {
366 ASSERT_TRUE(std::isnan(outputs[i]))
367 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
368 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
369 }
370 }
371 }
372
373 TEST(EXPM1MINUS__AVX512F_RR1_LUT16_P3_PERM, negative_nan) {
374 TEST_REQUIRES_X86_AVX512F;
375
376 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
377 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
378 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
379 for (uint32_t i = 0; i < kBlockSize; i++) {
380 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
381 }
382 xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
383 for (uint32_t i = 0; i < kBlockSize; i++) {
384 ASSERT_TRUE(std::isnan(outputs[i]))
385 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
386 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
387 }
388 }
389 }
390#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
391
392
393#if XNN_ARCH_X86 || XNN_ARCH_X86_64
394 TEST(EXPM1MINUS__AVX512F_RR1_P6, negative_zero) {
395 TEST_REQUIRES_X86_AVX512F;
396
397 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
398 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
399 std::fill(inputs.begin(), inputs.end(), -0.0f);
400 xnn_math_f32_expm1minus__avx512f_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
401 const float reference_output = 0.0f;
402 ASSERT_EQ(reference_output, outputs[0])
403 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
404 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
405 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
406 }
407
408 TEST(EXPM1MINUS__AVX512F_RR1_P6, negative_saturation) {
409 TEST_REQUIRES_X86_AVX512F;
410
411 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
412 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
413 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
414 for (uint32_t i = 0; i < kBlockSize; i++) {
415 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
416 }
417 xnn_math_f32_expm1minus__avx512f_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
418 for (uint32_t i = 0; i < kBlockSize; i++) {
419 const float reference_output = -1.0f;
420 ASSERT_EQ(reference_output, outputs[i])
421 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
422 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
423 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
424 }
425 }
426 }
427
428 TEST(EXPM1MINUS__AVX512F_RR1_P6, positive_nan) {
429 TEST_REQUIRES_X86_AVX512F;
430
431 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
432 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
433 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
434 for (uint32_t i = 0; i < kBlockSize; i++) {
435 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
436 }
437 xnn_math_f32_expm1minus__avx512f_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
438 for (uint32_t i = 0; i < kBlockSize; i++) {
439 ASSERT_TRUE(std::isnan(outputs[i]))
440 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
441 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
442 }
443 }
444 }
445
446 TEST(EXPM1MINUS__AVX512F_RR1_P6, negative_nan) {
447 TEST_REQUIRES_X86_AVX512F;
448
449 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
450 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
451 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
452 for (uint32_t i = 0; i < kBlockSize; i++) {
453 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
454 }
455 xnn_math_f32_expm1minus__avx512f_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
456 for (uint32_t i = 0; i < kBlockSize; i++) {
457 ASSERT_TRUE(std::isnan(outputs[i]))
458 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
459 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
460 }
461 }
462 }
463#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
464
465
466#if XNN_ARCH_X86 || XNN_ARCH_X86_64
467 TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM, negative_zero) {
468 TEST_REQUIRES_X86_AVX2;
469
470 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
471 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
472 std::fill(inputs.begin(), inputs.end(), -0.0f);
473 xnn_math_f32_expm1minus__avx2_rr1_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
474 const float reference_output = 0.0f;
475 ASSERT_EQ(reference_output, outputs[0])
476 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
477 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
478 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
479 }
480
481 TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM, negative_saturation) {
482 TEST_REQUIRES_X86_AVX2;
483
484 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
485 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
486 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
487 for (uint32_t i = 0; i < kBlockSize; i++) {
488 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
489 }
490 xnn_math_f32_expm1minus__avx2_rr1_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
491 for (uint32_t i = 0; i < kBlockSize; i++) {
492 const float reference_output = -1.0f;
493 ASSERT_EQ(reference_output, outputs[i])
494 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
495 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
496 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
497 }
498 }
499 }
500
501 TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM, positive_nan) {
502 TEST_REQUIRES_X86_AVX2;
503
504 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
505 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
506 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
507 for (uint32_t i = 0; i < kBlockSize; i++) {
508 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
509 }
510 xnn_math_f32_expm1minus__avx2_rr1_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
511 for (uint32_t i = 0; i < kBlockSize; i++) {
512 ASSERT_TRUE(std::isnan(outputs[i]))
513 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
514 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
515 }
516 }
517 }
518
519 TEST(EXPM1MINUS__AVX2_RR1_LUT4_P4_PERM, negative_nan) {
520 TEST_REQUIRES_X86_AVX2;
521
522 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
523 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
524 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
525 for (uint32_t i = 0; i < kBlockSize; i++) {
526 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
527 }
528 xnn_math_f32_expm1minus__avx2_rr1_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
529 for (uint32_t i = 0; i < kBlockSize; i++) {
530 ASSERT_TRUE(std::isnan(outputs[i]))
531 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
532 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
533 }
534 }
535 }
536#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
537
538
539#if XNN_ARCH_X86 || XNN_ARCH_X86_64
540 TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM, negative_zero) {
541 TEST_REQUIRES_X86_AVX2;
542
543 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
544 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
545 std::fill(inputs.begin(), inputs.end(), -0.0f);
546 xnn_math_f32_expm1minus__avx2_rr1_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
547 const float reference_output = 0.0f;
548 ASSERT_EQ(reference_output, outputs[0])
549 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
550 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
551 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
552 }
553
554 TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM, negative_saturation) {
555 TEST_REQUIRES_X86_AVX2;
556
557 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
558 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
559 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
560 for (uint32_t i = 0; i < kBlockSize; i++) {
561 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
562 }
563 xnn_math_f32_expm1minus__avx2_rr1_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
564 for (uint32_t i = 0; i < kBlockSize; i++) {
565 const float reference_output = -1.0f;
566 ASSERT_EQ(reference_output, outputs[i])
567 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
568 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
569 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
570 }
571 }
572 }
573
574 TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM, positive_nan) {
575 TEST_REQUIRES_X86_AVX2;
576
577 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
578 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
579 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
580 for (uint32_t i = 0; i < kBlockSize; i++) {
581 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
582 }
583 xnn_math_f32_expm1minus__avx2_rr1_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
584 for (uint32_t i = 0; i < kBlockSize; i++) {
585 ASSERT_TRUE(std::isnan(outputs[i]))
586 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
587 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
588 }
589 }
590 }
591
592 TEST(EXPM1MINUS__AVX2_RR1_LUT8_P4_PERM, negative_nan) {
593 TEST_REQUIRES_X86_AVX2;
594
595 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
596 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
597 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
598 for (uint32_t i = 0; i < kBlockSize; i++) {
599 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
600 }
601 xnn_math_f32_expm1minus__avx2_rr1_lut8_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
602 for (uint32_t i = 0; i < kBlockSize; i++) {
603 ASSERT_TRUE(std::isnan(outputs[i]))
604 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
605 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
606 }
607 }
608 }
609#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
610
611
612#if XNN_ARCH_X86 || XNN_ARCH_X86_64
613 TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER, negative_zero) {
614 TEST_REQUIRES_X86_AVX2;
615
616 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
617 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
618 std::fill(inputs.begin(), inputs.end(), -0.0f);
619 xnn_math_f32_expm1minus__avx2_rr1_lut16_p3_gather(kBlockSize * sizeof(float), inputs.data(), outputs.data());
620 const float reference_output = 0.0f;
621 ASSERT_EQ(reference_output, outputs[0])
622 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
623 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
624 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
625 }
626
627 TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER, negative_saturation) {
628 TEST_REQUIRES_X86_AVX2;
629
630 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
631 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
632 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
633 for (uint32_t i = 0; i < kBlockSize; i++) {
634 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
635 }
636 xnn_math_f32_expm1minus__avx2_rr1_lut16_p3_gather(kBlockSize * sizeof(float), inputs.data(), outputs.data());
637 for (uint32_t i = 0; i < kBlockSize; i++) {
638 const float reference_output = -1.0f;
639 ASSERT_EQ(reference_output, outputs[i])
640 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
641 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
642 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
643 }
644 }
645 }
646
647 TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER, positive_nan) {
648 TEST_REQUIRES_X86_AVX2;
649
650 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
651 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
652 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
653 for (uint32_t i = 0; i < kBlockSize; i++) {
654 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
655 }
656 xnn_math_f32_expm1minus__avx2_rr1_lut16_p3_gather(kBlockSize * sizeof(float), inputs.data(), outputs.data());
657 for (uint32_t i = 0; i < kBlockSize; i++) {
658 ASSERT_TRUE(std::isnan(outputs[i]))
659 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
660 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
661 }
662 }
663 }
664
665 TEST(EXPM1MINUS__AVX2_RR1_LUT16_P3_GATHER, negative_nan) {
666 TEST_REQUIRES_X86_AVX2;
667
668 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
669 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
670 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
671 for (uint32_t i = 0; i < kBlockSize; i++) {
672 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
673 }
674 xnn_math_f32_expm1minus__avx2_rr1_lut16_p3_gather(kBlockSize * sizeof(float), inputs.data(), outputs.data());
675 for (uint32_t i = 0; i < kBlockSize; i++) {
676 ASSERT_TRUE(std::isnan(outputs[i]))
677 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
678 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
679 }
680 }
681 }
682#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
683
684
685#if XNN_ARCH_X86 || XNN_ARCH_X86_64
686 TEST(EXPM1MINUS__AVX2_RR1_P6, negative_zero) {
687 TEST_REQUIRES_X86_AVX2;
688
689 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
690 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
691 std::fill(inputs.begin(), inputs.end(), -0.0f);
692 xnn_math_f32_expm1minus__avx2_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
693 const float reference_output = 0.0f;
694 ASSERT_EQ(reference_output, outputs[0])
695 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
696 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
697 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
698 }
699
700 TEST(EXPM1MINUS__AVX2_RR1_P6, negative_saturation) {
701 TEST_REQUIRES_X86_AVX2;
702
703 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
704 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
705 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
706 for (uint32_t i = 0; i < kBlockSize; i++) {
707 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
708 }
709 xnn_math_f32_expm1minus__avx2_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
710 for (uint32_t i = 0; i < kBlockSize; i++) {
711 const float reference_output = -1.0f;
712 ASSERT_EQ(reference_output, outputs[i])
713 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
714 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
715 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
716 }
717 }
718 }
719
720 TEST(EXPM1MINUS__AVX2_RR1_P6, positive_nan) {
721 TEST_REQUIRES_X86_AVX2;
722
723 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
724 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
725 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
726 for (uint32_t i = 0; i < kBlockSize; i++) {
727 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
728 }
729 xnn_math_f32_expm1minus__avx2_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
730 for (uint32_t i = 0; i < kBlockSize; i++) {
731 ASSERT_TRUE(std::isnan(outputs[i]))
732 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
733 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
734 }
735 }
736 }
737
738 TEST(EXPM1MINUS__AVX2_RR1_P6, negative_nan) {
739 TEST_REQUIRES_X86_AVX2;
740
741 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
742 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
743 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
744 for (uint32_t i = 0; i < kBlockSize; i++) {
745 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
746 }
747 xnn_math_f32_expm1minus__avx2_rr1_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
748 for (uint32_t i = 0; i < kBlockSize; i++) {
749 ASSERT_TRUE(std::isnan(outputs[i]))
750 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
751 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
752 }
753 }
754 }
755#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
756
757
758#if XNN_ARCH_X86 || XNN_ARCH_X86_64
759 TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM, negative_zero) {
760 TEST_REQUIRES_X86_AVX;
761
762 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
763 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
764 std::fill(inputs.begin(), inputs.end(), -0.0f);
765 xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
766 const float reference_output = 0.0f;
767 ASSERT_EQ(reference_output, outputs[0])
768 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
769 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
770 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
771 }
772
773 TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM, negative_saturation) {
774 TEST_REQUIRES_X86_AVX;
775
776 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
777 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
778 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
779 for (uint32_t i = 0; i < kBlockSize; i++) {
780 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
781 }
782 xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
783 for (uint32_t i = 0; i < kBlockSize; i++) {
784 const float reference_output = -1.0f;
785 ASSERT_EQ(reference_output, outputs[i])
786 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
787 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
788 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
789 }
790 }
791 }
792
793 TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM, positive_nan) {
794 TEST_REQUIRES_X86_AVX;
795
796 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
797 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
798 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
799 for (uint32_t i = 0; i < kBlockSize; i++) {
800 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
801 }
802 xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
803 for (uint32_t i = 0; i < kBlockSize; i++) {
804 ASSERT_TRUE(std::isnan(outputs[i]))
805 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
806 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
807 }
808 }
809 }
810
811 TEST(EXPM1MINUS__AVX_RR2_LUT4_P4_PERM, negative_nan) {
812 TEST_REQUIRES_X86_AVX;
813
814 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
815 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
816 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
817 for (uint32_t i = 0; i < kBlockSize; i++) {
818 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
819 }
820 xnn_math_f32_expm1minus__avx_rr2_lut4_p4_perm(kBlockSize * sizeof(float), inputs.data(), outputs.data());
821 for (uint32_t i = 0; i < kBlockSize; i++) {
822 ASSERT_TRUE(std::isnan(outputs[i]))
823 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
824 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
825 }
826 }
827 }
828#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
829
830
831#if XNN_ARCH_X86 || XNN_ARCH_X86_64
832 TEST(EXPM1MINUS__AVX_RR2_LUT16_P3, negative_zero) {
833 TEST_REQUIRES_X86_AVX;
834
835 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
836 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
837 std::fill(inputs.begin(), inputs.end(), -0.0f);
838 xnn_math_f32_expm1minus__avx_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
839 const float reference_output = 0.0f;
840 ASSERT_EQ(reference_output, outputs[0])
841 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
842 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
843 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
844 }
845
846 TEST(EXPM1MINUS__AVX_RR2_LUT16_P3, negative_saturation) {
847 TEST_REQUIRES_X86_AVX;
848
849 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
850 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
851 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
852 for (uint32_t i = 0; i < kBlockSize; i++) {
853 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
854 }
855 xnn_math_f32_expm1minus__avx_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
856 for (uint32_t i = 0; i < kBlockSize; i++) {
857 const float reference_output = -1.0f;
858 ASSERT_EQ(reference_output, outputs[i])
859 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
860 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
861 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
862 }
863 }
864 }
865
866 TEST(EXPM1MINUS__AVX_RR2_LUT16_P3, positive_nan) {
867 TEST_REQUIRES_X86_AVX;
868
869 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
870 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
871 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
872 for (uint32_t i = 0; i < kBlockSize; i++) {
873 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
874 }
875 xnn_math_f32_expm1minus__avx_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
876 for (uint32_t i = 0; i < kBlockSize; i++) {
877 ASSERT_TRUE(std::isnan(outputs[i]))
878 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
879 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
880 }
881 }
882 }
883
884 TEST(EXPM1MINUS__AVX_RR2_LUT16_P3, negative_nan) {
885 TEST_REQUIRES_X86_AVX;
886
887 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
888 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
889 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
890 for (uint32_t i = 0; i < kBlockSize; i++) {
891 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
892 }
893 xnn_math_f32_expm1minus__avx_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
894 for (uint32_t i = 0; i < kBlockSize; i++) {
895 ASSERT_TRUE(std::isnan(outputs[i]))
896 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
897 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
898 }
899 }
900 }
901#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
902
903
904#if XNN_ARCH_X86 || XNN_ARCH_X86_64
905 TEST(EXPM1MINUS__AVX_RR2_P6, negative_zero) {
906 TEST_REQUIRES_X86_AVX;
907
908 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
909 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
910 std::fill(inputs.begin(), inputs.end(), -0.0f);
911 xnn_math_f32_expm1minus__avx_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
912 const float reference_output = 0.0f;
913 ASSERT_EQ(reference_output, outputs[0])
914 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
915 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
916 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
917 }
918
919 TEST(EXPM1MINUS__AVX_RR2_P6, negative_saturation) {
920 TEST_REQUIRES_X86_AVX;
921
922 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
923 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
924 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
925 for (uint32_t i = 0; i < kBlockSize; i++) {
926 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
927 }
928 xnn_math_f32_expm1minus__avx_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
929 for (uint32_t i = 0; i < kBlockSize; i++) {
930 const float reference_output = -1.0f;
931 ASSERT_EQ(reference_output, outputs[i])
932 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
933 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
934 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
935 }
936 }
937 }
938
939 TEST(EXPM1MINUS__AVX_RR2_P6, positive_nan) {
940 TEST_REQUIRES_X86_AVX;
941
942 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
943 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
944 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
945 for (uint32_t i = 0; i < kBlockSize; i++) {
946 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
947 }
948 xnn_math_f32_expm1minus__avx_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
949 for (uint32_t i = 0; i < kBlockSize; i++) {
950 ASSERT_TRUE(std::isnan(outputs[i]))
951 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
952 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
953 }
954 }
955 }
956
957 TEST(EXPM1MINUS__AVX_RR2_P6, negative_nan) {
958 TEST_REQUIRES_X86_AVX;
959
960 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
961 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
962 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
963 for (uint32_t i = 0; i < kBlockSize; i++) {
964 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
965 }
966 xnn_math_f32_expm1minus__avx_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
967 for (uint32_t i = 0; i < kBlockSize; i++) {
968 ASSERT_TRUE(std::isnan(outputs[i]))
969 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
970 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
971 }
972 }
973 }
974#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
975
976
977#if XNN_ARCH_X86 || XNN_ARCH_X86_64
978 TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3, negative_zero) {
979 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
980 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
981 std::fill(inputs.begin(), inputs.end(), -0.0f);
982 xnn_math_f32_expm1minus__sse2_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
983 const float reference_output = 0.0f;
984 ASSERT_EQ(reference_output, outputs[0])
985 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
986 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
987 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
988 }
989
990 TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3, negative_saturation) {
991 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
992 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
993 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
994 for (uint32_t i = 0; i < kBlockSize; i++) {
995 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
996 }
997 xnn_math_f32_expm1minus__sse2_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
998 for (uint32_t i = 0; i < kBlockSize; i++) {
999 const float reference_output = -1.0f;
1000 ASSERT_EQ(reference_output, outputs[i])
1001 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1002 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1003 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1004 }
1005 }
1006 }
1007
1008 TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3, positive_nan) {
1009 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1010 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1011 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1012 for (uint32_t i = 0; i < kBlockSize; i++) {
1013 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
1014 }
1015 xnn_math_f32_expm1minus__sse2_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1016 for (uint32_t i = 0; i < kBlockSize; i++) {
1017 ASSERT_TRUE(std::isnan(outputs[i]))
1018 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1019 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1020 }
1021 }
1022 }
1023
1024 TEST(EXPM1MINUS__SSE2_RR2_LUT16_P3, negative_nan) {
1025 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1026 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1027 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1028 for (uint32_t i = 0; i < kBlockSize; i++) {
1029 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1030 }
1031 xnn_math_f32_expm1minus__sse2_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1032 for (uint32_t i = 0; i < kBlockSize; i++) {
1033 ASSERT_TRUE(std::isnan(outputs[i]))
1034 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1035 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1036 }
1037 }
1038 }
1039#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1040
1041
1042#if XNN_ARCH_X86 || XNN_ARCH_X86_64
1043 TEST(EXPM1MINUS__SSE2_RR2_P6, negative_zero) {
1044 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1045 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1046 std::fill(inputs.begin(), inputs.end(), -0.0f);
1047 xnn_math_f32_expm1minus__sse2_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1048 const float reference_output = 0.0f;
1049 ASSERT_EQ(reference_output, outputs[0])
1050 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1051 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1052 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1053 }
1054
1055 TEST(EXPM1MINUS__SSE2_RR2_P6, negative_saturation) {
1056 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1057 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1058 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1059 for (uint32_t i = 0; i < kBlockSize; i++) {
1060 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
1061 }
1062 xnn_math_f32_expm1minus__sse2_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1063 for (uint32_t i = 0; i < kBlockSize; i++) {
1064 const float reference_output = -1.0f;
1065 ASSERT_EQ(reference_output, outputs[i])
1066 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1067 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1068 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1069 }
1070 }
1071 }
1072
1073 TEST(EXPM1MINUS__SSE2_RR2_P6, positive_nan) {
1074 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1075 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1076 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1077 for (uint32_t i = 0; i < kBlockSize; i++) {
1078 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
1079 }
1080 xnn_math_f32_expm1minus__sse2_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1081 for (uint32_t i = 0; i < kBlockSize; i++) {
1082 ASSERT_TRUE(std::isnan(outputs[i]))
1083 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1084 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1085 }
1086 }
1087 }
1088
1089 TEST(EXPM1MINUS__SSE2_RR2_P6, negative_nan) {
1090 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1091 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1092 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1093 for (uint32_t i = 0; i < kBlockSize; i++) {
1094 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1095 }
1096 xnn_math_f32_expm1minus__sse2_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1097 for (uint32_t i = 0; i < kBlockSize; i++) {
1098 ASSERT_TRUE(std::isnan(outputs[i]))
1099 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1100 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1101 }
1102 }
1103 }
1104#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1105
1106
1107#if XNN_ARCH_WASMSIMD
1108 TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT, negative_zero) {
1109 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1110 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1111 std::fill(inputs.begin(), inputs.end(), -0.0f);
1112 xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1113 const float reference_output = 0.0f;
1114 ASSERT_EQ(reference_output, outputs[0])
1115 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1116 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1117 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1118 }
1119
1120 TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT, negative_saturation) {
1121 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1122 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1123 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1124 for (uint32_t i = 0; i < kBlockSize; i++) {
1125 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
1126 }
1127 xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1128 for (uint32_t i = 0; i < kBlockSize; i++) {
1129 const float reference_output = -1.0f;
1130 ASSERT_EQ(reference_output, outputs[i])
1131 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1132 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1133 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1134 }
1135 }
1136 }
1137
1138 TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT, positive_nan) {
1139 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1140 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1141 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1142 for (uint32_t i = 0; i < kBlockSize; i++) {
1143 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
1144 }
1145 xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1146 for (uint32_t i = 0; i < kBlockSize; i++) {
1147 ASSERT_TRUE(std::isnan(outputs[i]))
1148 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1149 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1150 }
1151 }
1152 }
1153
1154 TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_ANDNOT, negative_nan) {
1155 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1156 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1157 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1158 for (uint32_t i = 0; i < kBlockSize; i++) {
1159 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1160 }
1161 xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1162 for (uint32_t i = 0; i < kBlockSize; i++) {
1163 ASSERT_TRUE(std::isnan(outputs[i]))
1164 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1165 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1166 }
1167 }
1168 }
1169#endif // XNN_ARCH_WASMSIMD
1170
1171
1172#if XNN_ARCH_WASMSIMD
1173 TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX, negative_zero) {
1174 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1175 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1176 std::fill(inputs.begin(), inputs.end(), -0.0f);
1177 xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1178 const float reference_output = 0.0f;
1179 ASSERT_EQ(reference_output, outputs[0])
1180 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1181 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1182 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1183 }
1184
1185 TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX, negative_saturation) {
1186 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1187 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1188 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1189 for (uint32_t i = 0; i < kBlockSize; i++) {
1190 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
1191 }
1192 xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1193 for (uint32_t i = 0; i < kBlockSize; i++) {
1194 const float reference_output = -1.0f;
1195 ASSERT_EQ(reference_output, outputs[i])
1196 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1197 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1198 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1199 }
1200 }
1201 }
1202
1203 TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX, positive_nan) {
1204 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1205 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1206 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1207 for (uint32_t i = 0; i < kBlockSize; i++) {
1208 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
1209 }
1210 xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1211 for (uint32_t i = 0; i < kBlockSize; i++) {
1212 ASSERT_TRUE(std::isnan(outputs[i]))
1213 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1214 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1215 }
1216 }
1217 }
1218
1219 TEST(EXPM1MINUS__WASMSIMD_RR2_LUT16_P3_MAX, negative_nan) {
1220 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1221 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1222 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1223 for (uint32_t i = 0; i < kBlockSize; i++) {
1224 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1225 }
1226 xnn_math_f32_expm1minus__wasmsimd_rr2_lut16_p3_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1227 for (uint32_t i = 0; i < kBlockSize; i++) {
1228 ASSERT_TRUE(std::isnan(outputs[i]))
1229 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1230 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1231 }
1232 }
1233 }
1234#endif // XNN_ARCH_WASMSIMD
1235
1236
1237#if XNN_ARCH_WASMSIMD
1238 TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT, negative_zero) {
1239 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1240 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1241 std::fill(inputs.begin(), inputs.end(), -0.0f);
1242 xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1243 const float reference_output = 0.0f;
1244 ASSERT_EQ(reference_output, outputs[0])
1245 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1246 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1247 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1248 }
1249
1250 TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT, negative_saturation) {
1251 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1252 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1253 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1254 for (uint32_t i = 0; i < kBlockSize; i++) {
1255 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
1256 }
1257 xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1258 for (uint32_t i = 0; i < kBlockSize; i++) {
1259 const float reference_output = -1.0f;
1260 ASSERT_EQ(reference_output, outputs[i])
1261 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1262 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1263 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1264 }
1265 }
1266 }
1267
1268 TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT, positive_nan) {
1269 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1270 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1271 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1272 for (uint32_t i = 0; i < kBlockSize; i++) {
1273 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
1274 }
1275 xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1276 for (uint32_t i = 0; i < kBlockSize; i++) {
1277 ASSERT_TRUE(std::isnan(outputs[i]))
1278 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1279 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1280 }
1281 }
1282 }
1283
1284 TEST(EXPM1MINUS__WASMSIMD_RR2_P6_ANDNOT, negative_nan) {
1285 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1286 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1287 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1288 for (uint32_t i = 0; i < kBlockSize; i++) {
1289 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1290 }
1291 xnn_math_f32_expm1minus__wasmsimd_rr2_p6_andnot(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1292 for (uint32_t i = 0; i < kBlockSize; i++) {
1293 ASSERT_TRUE(std::isnan(outputs[i]))
1294 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1295 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1296 }
1297 }
1298 }
1299#endif // XNN_ARCH_WASMSIMD
1300
1301
1302#if XNN_ARCH_WASMSIMD
1303 TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX, negative_zero) {
1304 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1305 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1306 std::fill(inputs.begin(), inputs.end(), -0.0f);
1307 xnn_math_f32_expm1minus__wasmsimd_rr2_p6_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1308 const float reference_output = 0.0f;
1309 ASSERT_EQ(reference_output, outputs[0])
1310 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1311 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1312 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1313 }
1314
1315 TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX, negative_saturation) {
1316 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1317 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1318 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1319 for (uint32_t i = 0; i < kBlockSize; i++) {
1320 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
1321 }
1322 xnn_math_f32_expm1minus__wasmsimd_rr2_p6_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1323 for (uint32_t i = 0; i < kBlockSize; i++) {
1324 const float reference_output = -1.0f;
1325 ASSERT_EQ(reference_output, outputs[i])
1326 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1327 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1328 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1329 }
1330 }
1331 }
1332
1333 TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX, positive_nan) {
1334 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1335 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1336 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1337 for (uint32_t i = 0; i < kBlockSize; i++) {
1338 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
1339 }
1340 xnn_math_f32_expm1minus__wasmsimd_rr2_p6_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1341 for (uint32_t i = 0; i < kBlockSize; i++) {
1342 ASSERT_TRUE(std::isnan(outputs[i]))
1343 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1344 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1345 }
1346 }
1347 }
1348
1349 TEST(EXPM1MINUS__WASMSIMD_RR2_P6_MAX, negative_nan) {
1350 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1351 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1352 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1353 for (uint32_t i = 0; i < kBlockSize; i++) {
1354 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1355 }
1356 xnn_math_f32_expm1minus__wasmsimd_rr2_p6_max(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1357 for (uint32_t i = 0; i < kBlockSize; i++) {
1358 ASSERT_TRUE(std::isnan(outputs[i]))
1359 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1360 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1361 }
1362 }
1363 }
1364#endif // XNN_ARCH_WASMSIMD
1365
1366
1367TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4, negative_zero) {
1368 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1369 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1370 std::fill(inputs.begin(), inputs.end(), -0.0f);
1371 xnn_math_f32_expm1minus__scalar_rr2_lut4_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1372 const float reference_output = 0.0f;
1373 ASSERT_EQ(reference_output, outputs[0])
1374 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1375 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1376 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1377}
1378
1379TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4, negative_saturation) {
1380 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1381 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1382 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1383 for (uint32_t i = 0; i < kBlockSize; i++) {
1384 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
1385 }
1386 xnn_math_f32_expm1minus__scalar_rr2_lut4_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1387 for (uint32_t i = 0; i < kBlockSize; i++) {
1388 const float reference_output = -1.0f;
1389 ASSERT_EQ(reference_output, outputs[i])
1390 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1391 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1392 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1393 }
1394 }
1395}
1396
1397TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4, positive_nan) {
1398 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1399 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1400 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1401 for (uint32_t i = 0; i < kBlockSize; i++) {
1402 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
1403 }
1404 xnn_math_f32_expm1minus__scalar_rr2_lut4_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1405 for (uint32_t i = 0; i < kBlockSize; i++) {
1406 ASSERT_TRUE(std::isnan(outputs[i]))
1407 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1408 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1409 }
1410 }
1411}
1412
1413TEST(EXPM1MINUS__SCALAR_RR2_LUT4_P4, negative_nan) {
1414 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1415 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1416 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1417 for (uint32_t i = 0; i < kBlockSize; i++) {
1418 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1419 }
1420 xnn_math_f32_expm1minus__scalar_rr2_lut4_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1421 for (uint32_t i = 0; i < kBlockSize; i++) {
1422 ASSERT_TRUE(std::isnan(outputs[i]))
1423 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1424 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1425 }
1426 }
1427}
1428
1429
1430TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3, negative_zero) {
1431 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1432 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1433 std::fill(inputs.begin(), inputs.end(), -0.0f);
1434 xnn_math_f32_expm1minus__scalar_rr2_lut8_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1435 const float reference_output = 0.0f;
1436 ASSERT_EQ(reference_output, outputs[0])
1437 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1438 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1439 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1440}
1441
1442TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3, negative_saturation) {
1443 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1444 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1445 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1446 for (uint32_t i = 0; i < kBlockSize; i++) {
1447 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
1448 }
1449 xnn_math_f32_expm1minus__scalar_rr2_lut8_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1450 for (uint32_t i = 0; i < kBlockSize; i++) {
1451 const float reference_output = -1.0f;
1452 ASSERT_EQ(reference_output, outputs[i])
1453 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1454 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1455 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1456 }
1457 }
1458}
1459
1460TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3, positive_nan) {
1461 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1462 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1463 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1464 for (uint32_t i = 0; i < kBlockSize; i++) {
1465 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
1466 }
1467 xnn_math_f32_expm1minus__scalar_rr2_lut8_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1468 for (uint32_t i = 0; i < kBlockSize; i++) {
1469 ASSERT_TRUE(std::isnan(outputs[i]))
1470 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1471 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1472 }
1473 }
1474}
1475
1476TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P3, negative_nan) {
1477 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1478 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1479 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1480 for (uint32_t i = 0; i < kBlockSize; i++) {
1481 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1482 }
1483 xnn_math_f32_expm1minus__scalar_rr2_lut8_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1484 for (uint32_t i = 0; i < kBlockSize; i++) {
1485 ASSERT_TRUE(std::isnan(outputs[i]))
1486 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1487 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1488 }
1489 }
1490}
1491
1492
1493TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4, negative_zero) {
1494 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1495 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1496 std::fill(inputs.begin(), inputs.end(), -0.0f);
1497 xnn_math_f32_expm1minus__scalar_rr2_lut8_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1498 const float reference_output = 0.0f;
1499 ASSERT_EQ(reference_output, outputs[0])
1500 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1501 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1502 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1503}
1504
1505TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4, negative_saturation) {
1506 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1507 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1508 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1509 for (uint32_t i = 0; i < kBlockSize; i++) {
1510 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
1511 }
1512 xnn_math_f32_expm1minus__scalar_rr2_lut8_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1513 for (uint32_t i = 0; i < kBlockSize; i++) {
1514 const float reference_output = -1.0f;
1515 ASSERT_EQ(reference_output, outputs[i])
1516 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1517 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1518 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1519 }
1520 }
1521}
1522
1523TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4, positive_nan) {
1524 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1525 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1526 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1527 for (uint32_t i = 0; i < kBlockSize; i++) {
1528 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
1529 }
1530 xnn_math_f32_expm1minus__scalar_rr2_lut8_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1531 for (uint32_t i = 0; i < kBlockSize; i++) {
1532 ASSERT_TRUE(std::isnan(outputs[i]))
1533 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1534 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1535 }
1536 }
1537}
1538
1539TEST(EXPM1MINUS__SCALAR_RR2_LUT8_P4, negative_nan) {
1540 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1541 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1542 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1543 for (uint32_t i = 0; i < kBlockSize; i++) {
1544 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1545 }
1546 xnn_math_f32_expm1minus__scalar_rr2_lut8_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1547 for (uint32_t i = 0; i < kBlockSize; i++) {
1548 ASSERT_TRUE(std::isnan(outputs[i]))
1549 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1550 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1551 }
1552 }
1553}
1554
1555
1556TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3, negative_zero) {
1557 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1558 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1559 std::fill(inputs.begin(), inputs.end(), -0.0f);
1560 xnn_math_f32_expm1minus__scalar_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1561 const float reference_output = 0.0f;
1562 ASSERT_EQ(reference_output, outputs[0])
1563 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1564 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1565 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1566}
1567
1568TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3, negative_saturation) {
1569 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1570 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1571 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1572 for (uint32_t i = 0; i < kBlockSize; i++) {
1573 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
1574 }
1575 xnn_math_f32_expm1minus__scalar_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1576 for (uint32_t i = 0; i < kBlockSize; i++) {
1577 const float reference_output = -1.0f;
1578 ASSERT_EQ(reference_output, outputs[i])
1579 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1580 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1581 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1582 }
1583 }
1584}
1585
1586TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3, positive_nan) {
1587 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1588 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1589 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1590 for (uint32_t i = 0; i < kBlockSize; i++) {
1591 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
1592 }
1593 xnn_math_f32_expm1minus__scalar_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1594 for (uint32_t i = 0; i < kBlockSize; i++) {
1595 ASSERT_TRUE(std::isnan(outputs[i]))
1596 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1597 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1598 }
1599 }
1600}
1601
1602TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P3, negative_nan) {
1603 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1604 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1605 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1606 for (uint32_t i = 0; i < kBlockSize; i++) {
1607 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1608 }
1609 xnn_math_f32_expm1minus__scalar_rr2_lut16_p3(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1610 for (uint32_t i = 0; i < kBlockSize; i++) {
1611 ASSERT_TRUE(std::isnan(outputs[i]))
1612 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1613 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1614 }
1615 }
1616}
1617
1618
1619TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4, negative_zero) {
1620 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1621 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1622 std::fill(inputs.begin(), inputs.end(), -0.0f);
1623 xnn_math_f32_expm1minus__scalar_rr2_lut16_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1624 const float reference_output = 0.0f;
1625 ASSERT_EQ(reference_output, outputs[0])
1626 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1627 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1628 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1629}
1630
1631TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4, negative_saturation) {
1632 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1633 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1634 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1635 for (uint32_t i = 0; i < kBlockSize; i++) {
1636 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
1637 }
1638 xnn_math_f32_expm1minus__scalar_rr2_lut16_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1639 for (uint32_t i = 0; i < kBlockSize; i++) {
1640 const float reference_output = -1.0f;
1641 ASSERT_EQ(reference_output, outputs[i])
1642 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1643 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1644 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1645 }
1646 }
1647}
1648
1649TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4, positive_nan) {
1650 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1651 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1652 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1653 for (uint32_t i = 0; i < kBlockSize; i++) {
1654 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
1655 }
1656 xnn_math_f32_expm1minus__scalar_rr2_lut16_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1657 for (uint32_t i = 0; i < kBlockSize; i++) {
1658 ASSERT_TRUE(std::isnan(outputs[i]))
1659 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1660 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1661 }
1662 }
1663}
1664
1665TEST(EXPM1MINUS__SCALAR_RR2_LUT16_P4, negative_nan) {
1666 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1667 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1668 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1669 for (uint32_t i = 0; i < kBlockSize; i++) {
1670 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1671 }
1672 xnn_math_f32_expm1minus__scalar_rr2_lut16_p4(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1673 for (uint32_t i = 0; i < kBlockSize; i++) {
1674 ASSERT_TRUE(std::isnan(outputs[i]))
1675 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1676 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1677 }
1678 }
1679}
1680
1681
1682TEST(EXPM1MINUS__SCALAR_RR2_P5, negative_zero) {
1683 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1684 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1685 std::fill(inputs.begin(), inputs.end(), -0.0f);
1686 xnn_math_f32_expm1minus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1687 const float reference_output = 0.0f;
1688 ASSERT_EQ(reference_output, outputs[0])
1689 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1690 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1691 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1692}
1693
1694TEST(EXPM1MINUS__SCALAR_RR2_P5, negative_saturation) {
1695 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1696 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1697 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1698 for (uint32_t i = 0; i < kBlockSize; i++) {
1699 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
1700 }
1701 xnn_math_f32_expm1minus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1702 for (uint32_t i = 0; i < kBlockSize; i++) {
1703 const float reference_output = -1.0f;
1704 ASSERT_EQ(reference_output, outputs[i])
1705 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1706 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1707 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1708 }
1709 }
1710}
1711
1712TEST(EXPM1MINUS__SCALAR_RR2_P5, positive_nan) {
1713 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1714 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1715 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1716 for (uint32_t i = 0; i < kBlockSize; i++) {
1717 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
1718 }
1719 xnn_math_f32_expm1minus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1720 for (uint32_t i = 0; i < kBlockSize; i++) {
1721 ASSERT_TRUE(std::isnan(outputs[i]))
1722 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1723 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1724 }
1725 }
1726}
1727
1728TEST(EXPM1MINUS__SCALAR_RR2_P5, negative_nan) {
1729 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1730 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1731 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1732 for (uint32_t i = 0; i < kBlockSize; i++) {
1733 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1734 }
1735 xnn_math_f32_expm1minus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1736 for (uint32_t i = 0; i < kBlockSize; i++) {
1737 ASSERT_TRUE(std::isnan(outputs[i]))
1738 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1739 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1740 }
1741 }
1742}
1743
1744
1745TEST(EXPM1MINUS__SCALAR_RR2_P6, negative_zero) {
1746 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1747 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1748 std::fill(inputs.begin(), inputs.end(), -0.0f);
1749 xnn_math_f32_expm1minus__scalar_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1750 const float reference_output = 0.0f;
1751 ASSERT_EQ(reference_output, outputs[0])
1752 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1753 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1754 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1755}
1756
1757TEST(EXPM1MINUS__SCALAR_RR2_P6, negative_saturation) {
1758 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1759 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1760 for (uint32_t n = UINT32_C(0xC18AA123); n <= UINT32_C(0xFF800000); n += kBlockSize) {
1761 for (uint32_t i = 0; i < kBlockSize; i++) {
1762 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
1763 }
1764 xnn_math_f32_expm1minus__scalar_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1765 for (uint32_t i = 0; i < kBlockSize; i++) {
1766 const float reference_output = -1.0f;
1767 ASSERT_EQ(reference_output, outputs[i])
1768 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1769 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
1770 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1771 }
1772 }
1773}
1774
1775TEST(EXPM1MINUS__SCALAR_RR2_P6, positive_nan) {
1776 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1777 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1778 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1779 for (uint32_t i = 0; i < kBlockSize; i++) {
1780 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
1781 }
1782 xnn_math_f32_expm1minus__scalar_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1783 for (uint32_t i = 0; i < kBlockSize; i++) {
1784 ASSERT_TRUE(std::isnan(outputs[i]))
1785 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1786 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1787 }
1788 }
1789}
1790
1791TEST(EXPM1MINUS__SCALAR_RR2_P6, negative_nan) {
1792 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1793 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1794 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1795 for (uint32_t i = 0; i < kBlockSize; i++) {
1796 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
1797 }
1798 xnn_math_f32_expm1minus__scalar_rr2_p6(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1799 for (uint32_t i = 0; i < kBlockSize; i++) {
1800 ASSERT_TRUE(std::isnan(outputs[i]))
1801 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1802 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1803 }
1804 }
1805}