blob: 2895acb2717fa5f0d9e75e1f9b0e22abe170ba44 [file] [log] [blame]
Marat Dukhand28a5a22020-12-14 15:27:22 -08001// Copyright 2020 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <cstddef>
9#include <cstdint>
10#include <cstdlib>
11#include <iomanip>
12#include <ios>
13#include <vector>
14
15#include <gtest/gtest.h>
16
17#include <fp16.h>
18
19#include <xnnpack/AlignedAllocator.h>
20#include <xnnpack/common.h>
21#include <xnnpack/isa-checks.h>
22#include <xnnpack/math-stubs.h>
23
24
25constexpr int kBlockSize = 1024;
26
27
28#if XNN_ARCH_ARM || XNN_ARCH_ARM64
29 TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, negative_zero) {
30 TEST_REQUIRES_ARM_NEON_FMA;
31
32 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
33 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
34 std::fill(inputs.begin(), inputs.end(), -0.0f);
35 xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
36 const float reference_output = 1.0f;
37 ASSERT_EQ(reference_output, outputs[0])
38 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
39 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
40 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
41 }
42
43 TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, positive_zero) {
44 TEST_REQUIRES_ARM_NEON_FMA;
45
46 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
47 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
48 std::fill(inputs.begin(), inputs.end(), +0.0f);
49 xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
50 const float reference_output = 1.0f;
51 ASSERT_EQ(reference_output, outputs[0])
52 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
53 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
54 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
55 }
56
57 TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, negative_saturation) {
58 TEST_REQUIRES_ARM_NEON_FMA;
59
60 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
61 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
62 for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
63 for (uint32_t i = 0; i < kBlockSize; i++) {
64 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
65 }
66 xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
67 for (uint32_t i = 0; i < kBlockSize; i++) {
68 const uint32_t reference_output = UINT32_C(0x00000000);
69 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
70 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
71 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
72 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
73 }
74 }
75 }
76
77 TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, positive_nan) {
78 TEST_REQUIRES_ARM_NEON_FMA;
79
80 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
81 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
82 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
83 for (uint32_t i = 0; i < kBlockSize; i++) {
84 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
85 }
86 xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
87 for (uint32_t i = 0; i < kBlockSize; i++) {
88 ASSERT_TRUE(std::isnan(outputs[i]))
89 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
90 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
91 }
92 }
93 }
94
95 TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, negative_nan) {
96 TEST_REQUIRES_ARM_NEON_FMA;
97
98 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
99 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
100 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
101 for (uint32_t i = 0; i < kBlockSize; i++) {
102 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
103 }
104 xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
105 for (uint32_t i = 0; i < kBlockSize; i++) {
106 ASSERT_TRUE(std::isnan(outputs[i]))
107 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
108 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
109 }
110 }
111 }
112#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
113
114
115#if XNN_ARCH_ARM || XNN_ARCH_ARM64
116 TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, negative_zero) {
117 TEST_REQUIRES_ARM_NEON_FMA;
118
119 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
120 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
121 std::fill(inputs.begin(), inputs.end(), -0.0f);
122 xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
123 const float reference_output = 1.0f;
124 ASSERT_EQ(reference_output, outputs[0])
125 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
126 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
127 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
128 }
129
130 TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, positive_zero) {
131 TEST_REQUIRES_ARM_NEON_FMA;
132
133 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
134 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
135 std::fill(inputs.begin(), inputs.end(), +0.0f);
136 xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
137 const float reference_output = 1.0f;
138 ASSERT_EQ(reference_output, outputs[0])
139 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
140 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
141 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
142 }
143
144 TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, negative_saturation) {
145 TEST_REQUIRES_ARM_NEON_FMA;
146
147 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
148 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
149 for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
150 for (uint32_t i = 0; i < kBlockSize; i++) {
151 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
152 }
153 xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
154 for (uint32_t i = 0; i < kBlockSize; i++) {
155 const uint32_t reference_output = UINT32_C(0x00000000);
156 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
157 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
158 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
159 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
160 }
161 }
162 }
163
164 TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, positive_nan) {
165 TEST_REQUIRES_ARM_NEON_FMA;
166
167 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
168 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
169 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
170 for (uint32_t i = 0; i < kBlockSize; i++) {
171 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
172 }
173 xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
174 for (uint32_t i = 0; i < kBlockSize; i++) {
175 ASSERT_TRUE(std::isnan(outputs[i]))
176 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
177 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
178 }
179 }
180 }
181
182 TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, negative_nan) {
183 TEST_REQUIRES_ARM_NEON_FMA;
184
185 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
186 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
187 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
188 for (uint32_t i = 0; i < kBlockSize; i++) {
189 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
190 }
191 xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
192 for (uint32_t i = 0; i < kBlockSize; i++) {
193 ASSERT_TRUE(std::isnan(outputs[i]))
194 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
195 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
196 }
197 }
198 }
199#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
200
201
202#if XNN_ARCH_ARM || XNN_ARCH_ARM64
203 TEST(EXPMINUS__NEONFMA_RR2_P5, negative_zero) {
204 TEST_REQUIRES_ARM_NEON_FMA;
205
206 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
207 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
208 std::fill(inputs.begin(), inputs.end(), -0.0f);
209 xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
210 const float reference_output = 1.0f;
211 ASSERT_EQ(reference_output, outputs[0])
212 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
213 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
214 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
215 }
216
217 TEST(EXPMINUS__NEONFMA_RR2_P5, positive_zero) {
218 TEST_REQUIRES_ARM_NEON_FMA;
219
220 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
221 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
222 std::fill(inputs.begin(), inputs.end(), +0.0f);
223 xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
224 const float reference_output = 1.0f;
225 ASSERT_EQ(reference_output, outputs[0])
226 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
227 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
228 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
229 }
230
231 TEST(EXPMINUS__NEONFMA_RR2_P5, negative_saturation) {
232 TEST_REQUIRES_ARM_NEON_FMA;
233
234 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
235 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
236 for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
237 for (uint32_t i = 0; i < kBlockSize; i++) {
238 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
239 }
240 xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
241 for (uint32_t i = 0; i < kBlockSize; i++) {
242 const uint32_t reference_output = UINT32_C(0x00000000);
243 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
244 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
245 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
246 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
247 }
248 }
249 }
250
251 TEST(EXPMINUS__NEONFMA_RR2_P5, positive_nan) {
252 TEST_REQUIRES_ARM_NEON_FMA;
253
254 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
255 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
256 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
257 for (uint32_t i = 0; i < kBlockSize; i++) {
258 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
259 }
260 xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
261 for (uint32_t i = 0; i < kBlockSize; i++) {
262 ASSERT_TRUE(std::isnan(outputs[i]))
263 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
264 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
265 }
266 }
267 }
268
269 TEST(EXPMINUS__NEONFMA_RR2_P5, negative_nan) {
270 TEST_REQUIRES_ARM_NEON_FMA;
271
272 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
273 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
274 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
275 for (uint32_t i = 0; i < kBlockSize; i++) {
276 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
277 }
278 xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
279 for (uint32_t i = 0; i < kBlockSize; i++) {
280 ASSERT_TRUE(std::isnan(outputs[i]))
281 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
282 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
283 }
284 }
285 }
286#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
287
288
289#if XNN_ARCH_X86 || XNN_ARCH_X86_64
290 TEST(EXPMINUS__AVX2_RR2_P5, negative_zero) {
291 TEST_REQUIRES_X86_AVX2;
292
293 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
294 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
295 std::fill(inputs.begin(), inputs.end(), -0.0f);
296 xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
297 const float reference_output = 1.0f;
298 ASSERT_EQ(reference_output, outputs[0])
299 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
300 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
301 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
302 }
303
304 TEST(EXPMINUS__AVX2_RR2_P5, positive_zero) {
305 TEST_REQUIRES_X86_AVX2;
306
307 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
308 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
309 std::fill(inputs.begin(), inputs.end(), +0.0f);
310 xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
311 const float reference_output = 1.0f;
312 ASSERT_EQ(reference_output, outputs[0])
313 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
314 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
315 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
316 }
317
318 TEST(EXPMINUS__AVX2_RR2_P5, negative_saturation) {
319 TEST_REQUIRES_X86_AVX2;
320
321 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
322 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
323 for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
324 for (uint32_t i = 0; i < kBlockSize; i++) {
325 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
326 }
327 xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
328 for (uint32_t i = 0; i < kBlockSize; i++) {
329 const uint32_t reference_output = UINT32_C(0x00000000);
330 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
331 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
332 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
333 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
334 }
335 }
336 }
337
338 TEST(EXPMINUS__AVX2_RR2_P5, positive_nan) {
339 TEST_REQUIRES_X86_AVX2;
340
341 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
342 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
343 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
344 for (uint32_t i = 0; i < kBlockSize; i++) {
345 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
346 }
347 xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
348 for (uint32_t i = 0; i < kBlockSize; i++) {
349 ASSERT_TRUE(std::isnan(outputs[i]))
350 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
351 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
352 }
353 }
354 }
355
356 TEST(EXPMINUS__AVX2_RR2_P5, negative_nan) {
357 TEST_REQUIRES_X86_AVX2;
358
359 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
360 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
361 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
362 for (uint32_t i = 0; i < kBlockSize; i++) {
363 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
364 }
365 xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
366 for (uint32_t i = 0; i < kBlockSize; i++) {
367 ASSERT_TRUE(std::isnan(outputs[i]))
368 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
369 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
370 }
371 }
372 }
373#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
374
375
376#if XNN_ARCH_X86 || XNN_ARCH_X86_64
377 TEST(EXPMINUS__SSE2_RR2_P5, negative_zero) {
378 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
379 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
380 std::fill(inputs.begin(), inputs.end(), -0.0f);
381 xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
382 const float reference_output = 1.0f;
383 ASSERT_EQ(reference_output, outputs[0])
384 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
385 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
386 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
387 }
388
389 TEST(EXPMINUS__SSE2_RR2_P5, positive_zero) {
390 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
391 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
392 std::fill(inputs.begin(), inputs.end(), +0.0f);
393 xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
394 const float reference_output = 1.0f;
395 ASSERT_EQ(reference_output, outputs[0])
396 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
397 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
398 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
399 }
400
401 TEST(EXPMINUS__SSE2_RR2_P5, negative_saturation) {
402 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
403 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
404 for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
405 for (uint32_t i = 0; i < kBlockSize; i++) {
406 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
407 }
408 xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
409 for (uint32_t i = 0; i < kBlockSize; i++) {
410 const uint32_t reference_output = UINT32_C(0x00000000);
411 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
412 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
413 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
414 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
415 }
416 }
417 }
418
419 TEST(EXPMINUS__SSE2_RR2_P5, positive_nan) {
420 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
421 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
422 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
423 for (uint32_t i = 0; i < kBlockSize; i++) {
424 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
425 }
426 xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
427 for (uint32_t i = 0; i < kBlockSize; i++) {
428 ASSERT_TRUE(std::isnan(outputs[i]))
429 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
430 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
431 }
432 }
433 }
434
435 TEST(EXPMINUS__SSE2_RR2_P5, negative_nan) {
436 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
437 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
438 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
439 for (uint32_t i = 0; i < kBlockSize; i++) {
440 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
441 }
442 xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
443 for (uint32_t i = 0; i < kBlockSize; i++) {
444 ASSERT_TRUE(std::isnan(outputs[i]))
445 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
446 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
447 }
448 }
449 }
450#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
451
452
453TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, negative_zero) {
454 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
455 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
456 std::fill(inputs.begin(), inputs.end(), -0.0f);
457 xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
458 const float reference_output = 1.0f;
459 ASSERT_EQ(reference_output, outputs[0])
460 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
461 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
462 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
463}
464
465TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, positive_zero) {
466 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
467 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
468 std::fill(inputs.begin(), inputs.end(), +0.0f);
469 xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
470 const float reference_output = 1.0f;
471 ASSERT_EQ(reference_output, outputs[0])
472 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
473 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
474 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
475}
476
477TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, negative_saturation) {
478 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
479 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
480 for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
481 for (uint32_t i = 0; i < kBlockSize; i++) {
482 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
483 }
484 xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
485 for (uint32_t i = 0; i < kBlockSize; i++) {
486 const uint32_t reference_output = UINT32_C(0x00000000);
487 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
488 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
489 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
490 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
491 }
492 }
493}
494
495TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, positive_nan) {
496 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
497 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
498 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
499 for (uint32_t i = 0; i < kBlockSize; i++) {
500 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
501 }
502 xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
503 for (uint32_t i = 0; i < kBlockSize; i++) {
504 ASSERT_TRUE(std::isnan(outputs[i]))
505 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
506 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
507 }
508 }
509}
510
511TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, negative_nan) {
512 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
513 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
514 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
515 for (uint32_t i = 0; i < kBlockSize; i++) {
516 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
517 }
518 xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
519 for (uint32_t i = 0; i < kBlockSize; i++) {
520 ASSERT_TRUE(std::isnan(outputs[i]))
521 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
522 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
523 }
524 }
525}
526
527
528TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, negative_zero) {
529 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
530 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
531 std::fill(inputs.begin(), inputs.end(), -0.0f);
532 xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
533 const float reference_output = 1.0f;
534 ASSERT_EQ(reference_output, outputs[0])
535 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
536 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
537 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
538}
539
540TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, positive_zero) {
541 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
542 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
543 std::fill(inputs.begin(), inputs.end(), +0.0f);
544 xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
545 const float reference_output = 1.0f;
546 ASSERT_EQ(reference_output, outputs[0])
547 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
548 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
549 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
550}
551
552TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, negative_saturation) {
553 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
554 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
555 for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
556 for (uint32_t i = 0; i < kBlockSize; i++) {
557 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
558 }
559 xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
560 for (uint32_t i = 0; i < kBlockSize; i++) {
561 const uint32_t reference_output = UINT32_C(0x00000000);
562 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
563 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
564 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
565 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
566 }
567 }
568}
569
570TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, positive_nan) {
571 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
572 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
573 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
574 for (uint32_t i = 0; i < kBlockSize; i++) {
575 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
576 }
577 xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
578 for (uint32_t i = 0; i < kBlockSize; i++) {
579 ASSERT_TRUE(std::isnan(outputs[i]))
580 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
581 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
582 }
583 }
584}
585
586TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, negative_nan) {
587 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
588 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
589 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
590 for (uint32_t i = 0; i < kBlockSize; i++) {
591 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
592 }
593 xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
594 for (uint32_t i = 0; i < kBlockSize; i++) {
595 ASSERT_TRUE(std::isnan(outputs[i]))
596 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
597 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
598 }
599 }
600}
601
602
603TEST(EXPMINUS__SCALAR_RR2_P5, negative_zero) {
604 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
605 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
606 std::fill(inputs.begin(), inputs.end(), -0.0f);
607 xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
608 const float reference_output = 1.0f;
609 ASSERT_EQ(reference_output, outputs[0])
610 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
611 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
612 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
613}
614
615TEST(EXPMINUS__SCALAR_RR2_P5, positive_zero) {
616 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
617 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
618 std::fill(inputs.begin(), inputs.end(), +0.0f);
619 xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
620 const float reference_output = 1.0f;
621 ASSERT_EQ(reference_output, outputs[0])
622 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
623 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(reference_output)
624 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
625}
626
627TEST(EXPMINUS__SCALAR_RR2_P5, negative_saturation) {
628 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
629 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
630 for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
631 for (uint32_t i = 0; i < kBlockSize; i++) {
632 inputs[i] = fp32_from_bits(std::min(n + i, UINT32_C(0xFF800000)));
633 }
634 xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
635 for (uint32_t i = 0; i < kBlockSize; i++) {
636 const uint32_t reference_output = UINT32_C(0x00000000);
637 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
638 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
639 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
640 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
641 }
642 }
643}
644
645TEST(EXPMINUS__SCALAR_RR2_P5, positive_nan) {
646 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
647 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
648 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
649 for (uint32_t i = 0; i < kBlockSize; i++) {
650 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), n + i));
651 }
652 xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
653 for (uint32_t i = 0; i < kBlockSize; i++) {
654 ASSERT_TRUE(std::isnan(outputs[i]))
655 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
656 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
657 }
658 }
659}
660
661TEST(EXPMINUS__SCALAR_RR2_P5, negative_nan) {
662 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
663 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
664 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
665 for (uint32_t i = 0; i < kBlockSize; i++) {
666 inputs[i] = fp32_from_bits(std::min(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
667 }
668 xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
669 for (uint32_t i = 0; i < kBlockSize; i++) {
670 ASSERT_TRUE(std::isnan(outputs[i]))
671 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
672 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
673 }
674 }
675}