blob: 0e69d2f35b8f0d604cfc209ca2c2fcb39b785b87 [file] [log] [blame]
Marat Dukhan8853b822020-05-07 12:19:01 -07001// Copyright 2020 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
Marat Dukhanb929d772020-05-12 09:19:36 -07006#include <algorithm>
Marat Dukhan8853b822020-05-07 12:19:01 -07007#include <cmath>
8#include <cstddef>
Marat Dukhanb929d772020-05-12 09:19:36 -07009#include <cstdint>
Marat Dukhan8853b822020-05-07 12:19:01 -070010#include <cstdlib>
11#include <iomanip>
12#include <ios>
13#include <vector>
14
15#include <gtest/gtest.h>
16
17#include <fp16.h>
18
19#include <xnnpack/AlignedAllocator.h>
20#include <xnnpack/common.h>
21#include <xnnpack/math-stubs.h>
22
23
24constexpr int kBlockSize = 1024;
25
26#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan075088a2020-05-12 19:42:12 -070027 TEST(ROUNDNE__SSE_ADDSUB, positive_normal) {
Marat Dukhan8853b822020-05-07 12:19:01 -070028 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
29 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
30 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
31 for (uint32_t i = 0; i < kBlockSize; i++) {
32 inputs[i] = fp32_from_bits(n + i);
33 }
Marat Dukhan075088a2020-05-12 19:42:12 -070034 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -070035 for (uint32_t i = 0; i < kBlockSize; i++) {
36 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
37 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
38 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
39 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
40 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
41 }
42 }
43 }
44
Marat Dukhan075088a2020-05-12 19:42:12 -070045 TEST(ROUNDNE__SSE_ADDSUB, negative_normal) {
Marat Dukhan8853b822020-05-07 12:19:01 -070046 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
47 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
48 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
49 for (uint32_t i = 0; i < kBlockSize; i++) {
50 inputs[i] = fp32_from_bits(n + i);
51 }
Marat Dukhan075088a2020-05-12 19:42:12 -070052 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -070053 for (uint32_t i = 0; i < kBlockSize; i++) {
54 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
55 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
56 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
57 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
58 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
59 }
60 }
61 }
62
Marat Dukhan075088a2020-05-12 19:42:12 -070063 TEST(ROUNDNE__SSE_ADDSUB, positive_integral) {
Marat Dukhan8853b822020-05-07 12:19:01 -070064 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
65 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
66 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
67 for (uint32_t i = 0; i < kBlockSize; i++) {
68 inputs[i] = fp32_from_bits(n + i);
69 }
Marat Dukhan075088a2020-05-12 19:42:12 -070070 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -070071 for (uint32_t i = 0; i < kBlockSize; i++) {
72 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
73 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
74 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
75 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
76 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
77 }
78 }
79 }
80
Marat Dukhan075088a2020-05-12 19:42:12 -070081 TEST(ROUNDNE__SSE_ADDSUB, negative_integral) {
Marat Dukhan8853b822020-05-07 12:19:01 -070082 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
83 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
84 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
85 for (uint32_t i = 0; i < kBlockSize; i++) {
86 inputs[i] = fp32_from_bits(n + i);
87 }
Marat Dukhan075088a2020-05-12 19:42:12 -070088 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -070089 for (uint32_t i = 0; i < kBlockSize; i++) {
90 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
91 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
92 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
93 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
94 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
95 }
96 }
97 }
98
Marat Dukhan075088a2020-05-12 19:42:12 -070099 TEST(ROUNDNE__SSE_ADDSUB, positive_infinity) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700100 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
101 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700102 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan075088a2020-05-12 19:42:12 -0700103 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700104 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
105 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
106 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
107 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
108 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
109 }
110
Marat Dukhan075088a2020-05-12 19:42:12 -0700111 TEST(ROUNDNE__SSE_ADDSUB, negative_infinity) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700112 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
113 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700114 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan075088a2020-05-12 19:42:12 -0700115 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700116 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
117 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
118 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
119 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
120 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
121 }
122
Marat Dukhan075088a2020-05-12 19:42:12 -0700123 TEST(ROUNDNE__SSE_ADDSUB, positive_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700124 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
125 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
126 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
127 for (uint32_t i = 0; i < kBlockSize; i++) {
128 inputs[i] = fp32_from_bits(n + i);
129 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700130 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700131 for (uint32_t i = 0; i < kBlockSize; i++) {
132 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
133 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
134 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
135 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
136 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
137 }
138 }
139 }
140
Marat Dukhan075088a2020-05-12 19:42:12 -0700141 TEST(ROUNDNE__SSE_ADDSUB, negative_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700142 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
143 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
144 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
145 for (uint32_t i = 0; i < kBlockSize; i++) {
146 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
147 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700148 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700149 for (uint32_t i = 0; i < kBlockSize; i++) {
150 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
151 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
152 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
153 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
154 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
155 }
156 }
157 }
158
Marat Dukhan075088a2020-05-12 19:42:12 -0700159 TEST(ROUNDNE__SSE_ADDSUB, positive_snan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700160 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
161 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
162 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
163 for (uint32_t i = 0; i < kBlockSize; i++) {
164 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
165 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700166 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700167 for (uint32_t i = 0; i < kBlockSize; i++) {
168 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
169 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
170 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
171 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
172 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
173 }
174 }
175 }
176
Marat Dukhan075088a2020-05-12 19:42:12 -0700177 TEST(ROUNDNE__SSE_ADDSUB, negative_snan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700178 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
179 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
180 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
181 for (uint32_t i = 0; i < kBlockSize; i++) {
182 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
183 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700184 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700185 for (uint32_t i = 0; i < kBlockSize; i++) {
186 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
187 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
188 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
189 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
190 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
191 }
192 }
193 }
194
Marat Dukhan075088a2020-05-12 19:42:12 -0700195 TEST(ROUNDNE__SSE_ADDSUB, positive_snan_to_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700196 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
197 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
198 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
199 for (uint32_t i = 0; i < kBlockSize; i++) {
200 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
201 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700202 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700203 for (uint32_t i = 0; i < kBlockSize; i++) {
204 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
205 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
206 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
207 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
208 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
209 }
210 }
211 }
212
Marat Dukhan075088a2020-05-12 19:42:12 -0700213 TEST(ROUNDNE__SSE_ADDSUB, negative_snan_to_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700214 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
215 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
216 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
217 for (uint32_t i = 0; i < kBlockSize; i++) {
218 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
219 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700220 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700221 for (uint32_t i = 0; i < kBlockSize; i++) {
222 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
223 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
224 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
225 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
226 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
227 }
228 }
229 }
230#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
231
232#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan075088a2020-05-12 19:42:12 -0700233 TEST(ROUNDNE__SSE2_CVT, positive_normal) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700234 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
235 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
236 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
237 for (uint32_t i = 0; i < kBlockSize; i++) {
238 inputs[i] = fp32_from_bits(n + i);
239 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700240 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700241 for (uint32_t i = 0; i < kBlockSize; i++) {
242 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
243 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
244 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
245 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
246 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
247 }
248 }
249 }
250
Marat Dukhan075088a2020-05-12 19:42:12 -0700251 TEST(ROUNDNE__SSE2_CVT, negative_normal) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700252 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
253 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
254 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
255 for (uint32_t i = 0; i < kBlockSize; i++) {
256 inputs[i] = fp32_from_bits(n + i);
257 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700258 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700259 for (uint32_t i = 0; i < kBlockSize; i++) {
260 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
261 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
262 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
263 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
264 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
265 }
266 }
267 }
268
Marat Dukhan075088a2020-05-12 19:42:12 -0700269 TEST(ROUNDNE__SSE2_CVT, positive_integral) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700270 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
271 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
272 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
273 for (uint32_t i = 0; i < kBlockSize; i++) {
274 inputs[i] = fp32_from_bits(n + i);
275 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700276 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700277 for (uint32_t i = 0; i < kBlockSize; i++) {
278 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
279 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
280 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
281 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
282 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
283 }
284 }
285 }
286
Marat Dukhan075088a2020-05-12 19:42:12 -0700287 TEST(ROUNDNE__SSE2_CVT, negative_integral) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700288 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
289 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
290 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
291 for (uint32_t i = 0; i < kBlockSize; i++) {
292 inputs[i] = fp32_from_bits(n + i);
293 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700294 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700295 for (uint32_t i = 0; i < kBlockSize; i++) {
296 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
297 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
298 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
299 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
300 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
301 }
302 }
303 }
304
Marat Dukhan075088a2020-05-12 19:42:12 -0700305 TEST(ROUNDNE__SSE2_CVT, positive_infinity) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700306 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
307 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700308 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan075088a2020-05-12 19:42:12 -0700309 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700310 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
311 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
312 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
313 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
314 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
315 }
316
Marat Dukhan075088a2020-05-12 19:42:12 -0700317 TEST(ROUNDNE__SSE2_CVT, negative_infinity) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700318 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
319 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700320 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan075088a2020-05-12 19:42:12 -0700321 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700322 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
323 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
324 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
325 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
326 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
327 }
328
Marat Dukhan075088a2020-05-12 19:42:12 -0700329 TEST(ROUNDNE__SSE2_CVT, positive_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700330 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
331 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
332 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
333 for (uint32_t i = 0; i < kBlockSize; i++) {
334 inputs[i] = fp32_from_bits(n + i);
335 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700336 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700337 for (uint32_t i = 0; i < kBlockSize; i++) {
338 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
339 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
340 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
341 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
342 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
343 }
344 }
345 }
346
Marat Dukhan075088a2020-05-12 19:42:12 -0700347 TEST(ROUNDNE__SSE2_CVT, negative_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700348 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
349 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
350 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
351 for (uint32_t i = 0; i < kBlockSize; i++) {
352 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
353 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700354 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700355 for (uint32_t i = 0; i < kBlockSize; i++) {
356 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
357 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
358 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
359 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
360 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
361 }
362 }
363 }
364
Marat Dukhan075088a2020-05-12 19:42:12 -0700365 TEST(ROUNDNE__SSE2_CVT, positive_snan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700366 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
367 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
368 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
369 for (uint32_t i = 0; i < kBlockSize; i++) {
370 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
371 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700372 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700373 for (uint32_t i = 0; i < kBlockSize; i++) {
374 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
375 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
376 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
377 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
378 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
379 }
380 }
381 }
382
Marat Dukhan075088a2020-05-12 19:42:12 -0700383 TEST(ROUNDNE__SSE2_CVT, negative_snan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700384 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
385 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
386 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
387 for (uint32_t i = 0; i < kBlockSize; i++) {
388 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
389 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700390 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700391 for (uint32_t i = 0; i < kBlockSize; i++) {
392 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
393 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
394 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
395 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
396 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
397 }
398 }
399 }
400
Marat Dukhan075088a2020-05-12 19:42:12 -0700401 TEST(ROUNDNE__SSE2_CVT, DISABLED_positive_snan_to_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700402 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
403 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
404 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
405 for (uint32_t i = 0; i < kBlockSize; i++) {
406 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
407 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700408 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700409 for (uint32_t i = 0; i < kBlockSize; i++) {
410 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
411 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
412 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
413 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
414 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
415 }
416 }
417 }
418
Marat Dukhan075088a2020-05-12 19:42:12 -0700419 TEST(ROUNDNE__SSE2_CVT, DISABLED_negative_snan_to_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700420 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
421 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
422 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
423 for (uint32_t i = 0; i < kBlockSize; i++) {
424 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
425 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700426 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700427 for (uint32_t i = 0; i < kBlockSize; i++) {
428 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
429 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
430 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
431 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
432 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
433 }
434 }
435 }
436#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
437
438#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan4781dd02020-05-12 15:40:18 -0700439 TEST(ROUNDNE__SSE41, positive_normal) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700440 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
441 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
442 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
443 for (uint32_t i = 0; i < kBlockSize; i++) {
444 inputs[i] = fp32_from_bits(n + i);
445 }
446 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
447 for (uint32_t i = 0; i < kBlockSize; i++) {
448 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
449 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
450 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
451 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
452 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
453 }
454 }
455 }
456
Marat Dukhan4781dd02020-05-12 15:40:18 -0700457 TEST(ROUNDNE__SSE41, negative_normal) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700458 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
459 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
460 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
461 for (uint32_t i = 0; i < kBlockSize; i++) {
462 inputs[i] = fp32_from_bits(n + i);
463 }
464 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
465 for (uint32_t i = 0; i < kBlockSize; i++) {
466 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
467 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
468 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
469 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
470 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
471 }
472 }
473 }
474
Marat Dukhan4781dd02020-05-12 15:40:18 -0700475 TEST(ROUNDNE__SSE41, positive_integral) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700476 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
477 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
478 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
479 for (uint32_t i = 0; i < kBlockSize; i++) {
480 inputs[i] = fp32_from_bits(n + i);
481 }
482 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
483 for (uint32_t i = 0; i < kBlockSize; i++) {
484 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
485 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
486 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
487 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
488 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
489 }
490 }
491 }
492
Marat Dukhan4781dd02020-05-12 15:40:18 -0700493 TEST(ROUNDNE__SSE41, negative_integral) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700494 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
495 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
496 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
497 for (uint32_t i = 0; i < kBlockSize; i++) {
498 inputs[i] = fp32_from_bits(n + i);
499 }
500 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
501 for (uint32_t i = 0; i < kBlockSize; i++) {
502 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
503 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
504 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
505 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
506 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
507 }
508 }
509 }
510
Marat Dukhan4781dd02020-05-12 15:40:18 -0700511 TEST(ROUNDNE__SSE41, positive_infinity) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700512 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
513 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700514 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan8853b822020-05-07 12:19:01 -0700515 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
516 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
517 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
518 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
519 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
520 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
521 }
522
Marat Dukhan4781dd02020-05-12 15:40:18 -0700523 TEST(ROUNDNE__SSE41, negative_infinity) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700524 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
525 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700526 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan8853b822020-05-07 12:19:01 -0700527 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
528 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
529 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
530 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
531 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
532 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
533 }
534
Marat Dukhan4781dd02020-05-12 15:40:18 -0700535 TEST(ROUNDNE__SSE41, positive_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700536 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
537 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
538 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
539 for (uint32_t i = 0; i < kBlockSize; i++) {
540 inputs[i] = fp32_from_bits(n + i);
541 }
542 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
543 for (uint32_t i = 0; i < kBlockSize; i++) {
544 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
545 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
546 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
547 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
548 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
549 }
550 }
551 }
552
Marat Dukhan4781dd02020-05-12 15:40:18 -0700553 TEST(ROUNDNE__SSE41, negative_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700554 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
555 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
556 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
557 for (uint32_t i = 0; i < kBlockSize; i++) {
558 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
559 }
560 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
561 for (uint32_t i = 0; i < kBlockSize; i++) {
562 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
563 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
564 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
565 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
566 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
567 }
568 }
569 }
570
Marat Dukhan4781dd02020-05-12 15:40:18 -0700571 TEST(ROUNDNE__SSE41, positive_snan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700572 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
573 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
574 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
575 for (uint32_t i = 0; i < kBlockSize; i++) {
576 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
577 }
578 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
579 for (uint32_t i = 0; i < kBlockSize; i++) {
580 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
581 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
582 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
583 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
584 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
585 }
586 }
587 }
588
Marat Dukhan4781dd02020-05-12 15:40:18 -0700589 TEST(ROUNDNE__SSE41, negative_snan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700590 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
591 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
592 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
593 for (uint32_t i = 0; i < kBlockSize; i++) {
594 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
595 }
596 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
597 for (uint32_t i = 0; i < kBlockSize; i++) {
598 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
599 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
600 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
601 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
602 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
603 }
604 }
605 }
606
Marat Dukhan4781dd02020-05-12 15:40:18 -0700607 TEST(ROUNDNE__SSE41, positive_snan_to_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700608 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
609 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
610 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
611 for (uint32_t i = 0; i < kBlockSize; i++) {
612 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
613 }
614 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
615 for (uint32_t i = 0; i < kBlockSize; i++) {
616 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
617 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
618 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
619 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
620 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
621 }
622 }
623 }
624
Marat Dukhan4781dd02020-05-12 15:40:18 -0700625 TEST(ROUNDNE__SSE41, negative_snan_to_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700626 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
627 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
628 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
629 for (uint32_t i = 0; i < kBlockSize; i++) {
630 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
631 }
632 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
633 for (uint32_t i = 0; i < kBlockSize; i++) {
634 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
635 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
636 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
637 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
638 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
639 }
640 }
641 }
642#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
643
644#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan075088a2020-05-12 19:42:12 -0700645 TEST(ROUNDNE__NEON_ADDSUB, positive_normal) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700646 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
647 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
648 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
649 for (uint32_t i = 0; i < kBlockSize; i++) {
650 inputs[i] = fp32_from_bits(n + i);
651 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700652 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700653 for (uint32_t i = 0; i < kBlockSize; i++) {
654 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
655 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
656 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
657 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
658 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
659 }
660 }
661 }
662
Marat Dukhan075088a2020-05-12 19:42:12 -0700663 TEST(ROUNDNE__NEON_ADDSUB, negative_normal) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700664 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
665 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
666 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
667 for (uint32_t i = 0; i < kBlockSize; i++) {
668 inputs[i] = fp32_from_bits(n + i);
669 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700670 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700671 for (uint32_t i = 0; i < kBlockSize; i++) {
672 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
673 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
674 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
675 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
676 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
677 }
678 }
679 }
680
Marat Dukhan075088a2020-05-12 19:42:12 -0700681 TEST(ROUNDNE__NEON_ADDSUB, positive_integral) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700682 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
683 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
684 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
685 for (uint32_t i = 0; i < kBlockSize; i++) {
686 inputs[i] = fp32_from_bits(n + i);
687 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700688 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700689 for (uint32_t i = 0; i < kBlockSize; i++) {
690 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
691 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
692 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
693 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
694 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
695 }
696 }
697 }
698
Marat Dukhan075088a2020-05-12 19:42:12 -0700699 TEST(ROUNDNE__NEON_ADDSUB, negative_integral) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700700 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
701 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
702 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
703 for (uint32_t i = 0; i < kBlockSize; i++) {
704 inputs[i] = fp32_from_bits(n + i);
705 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700706 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700707 for (uint32_t i = 0; i < kBlockSize; i++) {
708 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
709 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
710 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
711 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
712 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
713 }
714 }
715 }
716
Marat Dukhan075088a2020-05-12 19:42:12 -0700717 TEST(ROUNDNE__NEON_ADDSUB, positive_infinity) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700718 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
719 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700720 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan075088a2020-05-12 19:42:12 -0700721 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700722 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
723 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
724 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
725 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
726 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
727 }
728
Marat Dukhan075088a2020-05-12 19:42:12 -0700729 TEST(ROUNDNE__NEON_ADDSUB, negative_infinity) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700730 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
731 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700732 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan075088a2020-05-12 19:42:12 -0700733 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700734 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
735 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
736 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
737 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
738 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
739 }
740
Marat Dukhan075088a2020-05-12 19:42:12 -0700741 TEST(ROUNDNE__NEON_ADDSUB, positive_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700742 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
743 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
744 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
745 for (uint32_t i = 0; i < kBlockSize; i++) {
746 inputs[i] = fp32_from_bits(n + i);
747 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700748 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700749 for (uint32_t i = 0; i < kBlockSize; i++) {
750 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
751 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
752 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
753 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
754 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
755 }
756 }
757 }
758
Marat Dukhan075088a2020-05-12 19:42:12 -0700759 TEST(ROUNDNE__NEON_ADDSUB, negative_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700760 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
761 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
762 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
763 for (uint32_t i = 0; i < kBlockSize; i++) {
764 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
765 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700766 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700767 for (uint32_t i = 0; i < kBlockSize; i++) {
768 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
769 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
770 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
771 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
772 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
773 }
774 }
775 }
776
Marat Dukhan075088a2020-05-12 19:42:12 -0700777 TEST(ROUNDNE__NEON_ADDSUB, positive_snan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700778 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
779 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
780 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
781 for (uint32_t i = 0; i < kBlockSize; i++) {
782 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
783 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700784 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700785 for (uint32_t i = 0; i < kBlockSize; i++) {
786 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
787 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
788 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
789 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
790 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
791 }
792 }
793 }
794
Marat Dukhan075088a2020-05-12 19:42:12 -0700795 TEST(ROUNDNE__NEON_ADDSUB, negative_snan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700796 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
797 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
798 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
799 for (uint32_t i = 0; i < kBlockSize; i++) {
800 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
801 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700802 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700803 for (uint32_t i = 0; i < kBlockSize; i++) {
804 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
805 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
806 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
807 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
808 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
809 }
810 }
811 }
812
Marat Dukhan075088a2020-05-12 19:42:12 -0700813 TEST(ROUNDNE__NEON_ADDSUB, positive_snan_to_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700814 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
815 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
816 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
817 for (uint32_t i = 0; i < kBlockSize; i++) {
818 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
819 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700820 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700821 for (uint32_t i = 0; i < kBlockSize; i++) {
822 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
823 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
824 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
825 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
826 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
827 }
828 }
829 }
830
Marat Dukhan075088a2020-05-12 19:42:12 -0700831 TEST(ROUNDNE__NEON_ADDSUB, negative_snan_to_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700832 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
833 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
834 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
835 for (uint32_t i = 0; i < kBlockSize; i++) {
836 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
837 }
Marat Dukhan075088a2020-05-12 19:42:12 -0700838 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -0700839 for (uint32_t i = 0; i < kBlockSize; i++) {
840 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
841 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
842 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
843 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
844 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
845 }
846 }
847 }
848#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
849
850#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan4781dd02020-05-12 15:40:18 -0700851 TEST(ROUNDNE__NEONV8, positive_normal) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700852 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
853 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
854 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
855 for (uint32_t i = 0; i < kBlockSize; i++) {
856 inputs[i] = fp32_from_bits(n + i);
857 }
858 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
859 for (uint32_t i = 0; i < kBlockSize; i++) {
860 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
861 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
862 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
863 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
864 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
865 }
866 }
867 }
868
Marat Dukhan4781dd02020-05-12 15:40:18 -0700869 TEST(ROUNDNE__NEONV8, negative_normal) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700870 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
871 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
872 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
873 for (uint32_t i = 0; i < kBlockSize; i++) {
874 inputs[i] = fp32_from_bits(n + i);
875 }
876 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
877 for (uint32_t i = 0; i < kBlockSize; i++) {
878 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
879 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
880 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
881 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
882 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
883 }
884 }
885 }
886
Marat Dukhan4781dd02020-05-12 15:40:18 -0700887 TEST(ROUNDNE__NEONV8, positive_integral) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700888 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
889 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
890 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
891 for (uint32_t i = 0; i < kBlockSize; i++) {
892 inputs[i] = fp32_from_bits(n + i);
893 }
894 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
895 for (uint32_t i = 0; i < kBlockSize; i++) {
896 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
897 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
898 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
899 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
900 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
901 }
902 }
903 }
904
Marat Dukhan4781dd02020-05-12 15:40:18 -0700905 TEST(ROUNDNE__NEONV8, negative_integral) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700906 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
907 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
908 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
909 for (uint32_t i = 0; i < kBlockSize; i++) {
910 inputs[i] = fp32_from_bits(n + i);
911 }
912 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
913 for (uint32_t i = 0; i < kBlockSize; i++) {
914 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
915 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
916 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
917 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
918 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
919 }
920 }
921 }
922
Marat Dukhan4781dd02020-05-12 15:40:18 -0700923 TEST(ROUNDNE__NEONV8, positive_infinity) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700924 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
925 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700926 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan8853b822020-05-07 12:19:01 -0700927 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
928 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
929 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
930 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
931 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
932 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
933 }
934
Marat Dukhan4781dd02020-05-12 15:40:18 -0700935 TEST(ROUNDNE__NEONV8, negative_infinity) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700936 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
937 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -0700938 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan8853b822020-05-07 12:19:01 -0700939 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
940 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
941 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
942 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
943 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
944 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
945 }
946
Marat Dukhan4781dd02020-05-12 15:40:18 -0700947 TEST(ROUNDNE__NEONV8, positive_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700948 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
949 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
950 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
951 for (uint32_t i = 0; i < kBlockSize; i++) {
952 inputs[i] = fp32_from_bits(n + i);
953 }
954 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
955 for (uint32_t i = 0; i < kBlockSize; i++) {
956 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
957 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
958 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
959 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
960 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
961 }
962 }
963 }
964
Marat Dukhan4781dd02020-05-12 15:40:18 -0700965 TEST(ROUNDNE__NEONV8, negative_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700966 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
967 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
968 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
969 for (uint32_t i = 0; i < kBlockSize; i++) {
970 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
971 }
972 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
973 for (uint32_t i = 0; i < kBlockSize; i++) {
974 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
975 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
976 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
977 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
978 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
979 }
980 }
981 }
982
Marat Dukhan4781dd02020-05-12 15:40:18 -0700983 TEST(ROUNDNE__NEONV8, positive_snan) {
Marat Dukhan8853b822020-05-07 12:19:01 -0700984 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
985 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
986 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
987 for (uint32_t i = 0; i < kBlockSize; i++) {
988 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
989 }
990 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
991 for (uint32_t i = 0; i < kBlockSize; i++) {
992 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
993 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
994 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
995 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
996 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
997 }
998 }
999 }
1000
Marat Dukhan4781dd02020-05-12 15:40:18 -07001001 TEST(ROUNDNE__NEONV8, negative_snan) {
Marat Dukhan8853b822020-05-07 12:19:01 -07001002 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1003 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1004 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1005 for (uint32_t i = 0; i < kBlockSize; i++) {
1006 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1007 }
1008 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1009 for (uint32_t i = 0; i < kBlockSize; i++) {
1010 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1011 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1012 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1013 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1014 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1015 }
1016 }
1017 }
1018
Marat Dukhan4781dd02020-05-12 15:40:18 -07001019 TEST(ROUNDNE__NEONV8, positive_snan_to_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -07001020 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1021 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1022 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1023 for (uint32_t i = 0; i < kBlockSize; i++) {
1024 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1025 }
1026 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1027 for (uint32_t i = 0; i < kBlockSize; i++) {
1028 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1029 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1030 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1031 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1032 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1033 }
1034 }
1035 }
1036
Marat Dukhan4781dd02020-05-12 15:40:18 -07001037 TEST(ROUNDNE__NEONV8, negative_snan_to_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -07001038 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1039 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1040 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1041 for (uint32_t i = 0; i < kBlockSize; i++) {
1042 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1043 }
1044 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1045 for (uint32_t i = 0; i < kBlockSize; i++) {
1046 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1047 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1048 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1049 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1050 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1051 }
1052 }
1053 }
1054#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1055
Marat Dukhan4c617792021-12-21 15:47:58 -08001056#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhand3f3d872020-06-24 13:08:25 -07001057 TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_normal) {
1058 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1059 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1060 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1061 for (uint32_t i = 0; i < kBlockSize; i++) {
1062 inputs[i] = fp32_from_bits(n + i);
1063 }
1064 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1065 for (uint32_t i = 0; i < kBlockSize; i++) {
1066 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1067 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1068 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1069 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1070 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1071 }
1072 }
1073 }
1074
1075 TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_normal) {
1076 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1077 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1078 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1079 for (uint32_t i = 0; i < kBlockSize; i++) {
1080 inputs[i] = fp32_from_bits(n + i);
1081 }
1082 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1083 for (uint32_t i = 0; i < kBlockSize; i++) {
1084 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1085 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1086 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1087 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1088 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1089 }
1090 }
1091 }
1092
1093 TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_integral) {
1094 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1095 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1096 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1097 for (uint32_t i = 0; i < kBlockSize; i++) {
1098 inputs[i] = fp32_from_bits(n + i);
1099 }
1100 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1101 for (uint32_t i = 0; i < kBlockSize; i++) {
1102 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1103 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1104 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1105 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1106 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1107 }
1108 }
1109 }
1110
1111 TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_integral) {
1112 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1113 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1114 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1115 for (uint32_t i = 0; i < kBlockSize; i++) {
1116 inputs[i] = fp32_from_bits(n + i);
1117 }
1118 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1119 for (uint32_t i = 0; i < kBlockSize; i++) {
1120 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1121 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1122 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1123 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1124 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1125 }
1126 }
1127 }
1128
1129 TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_infinity) {
1130 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1131 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1132 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1133 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1134 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
1135 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1136 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1137 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1138 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1139 }
1140
1141 TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_infinity) {
1142 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1143 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1144 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1145 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1146 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
1147 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1148 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1149 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1150 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1151 }
1152
1153 TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_qnan) {
1154 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1155 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1156 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1157 for (uint32_t i = 0; i < kBlockSize; i++) {
1158 inputs[i] = fp32_from_bits(n + i);
1159 }
1160 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1161 for (uint32_t i = 0; i < kBlockSize; i++) {
1162 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1163 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1164 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1165 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1166 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1167 }
1168 }
1169 }
1170
1171 TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_qnan) {
1172 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1173 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1174 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1175 for (uint32_t i = 0; i < kBlockSize; i++) {
1176 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1177 }
1178 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1179 for (uint32_t i = 0; i < kBlockSize; i++) {
1180 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1181 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1182 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1183 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1184 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1185 }
1186 }
1187 }
1188
1189 TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_snan) {
1190 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1191 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1192 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1193 for (uint32_t i = 0; i < kBlockSize; i++) {
1194 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1195 }
1196 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1197 for (uint32_t i = 0; i < kBlockSize; i++) {
1198 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1199 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1200 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1201 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1202 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1203 }
1204 }
1205 }
1206
1207 TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_snan) {
1208 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1209 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1210 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1211 for (uint32_t i = 0; i < kBlockSize; i++) {
1212 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1213 }
1214 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1215 for (uint32_t i = 0; i < kBlockSize; i++) {
1216 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1217 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1218 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1219 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1220 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1221 }
1222 }
1223 }
1224
1225 TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_snan_to_qnan) {
1226 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1227 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1228 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1229 for (uint32_t i = 0; i < kBlockSize; i++) {
1230 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1231 }
1232 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1233 for (uint32_t i = 0; i < kBlockSize; i++) {
1234 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1235 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1236 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1237 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1238 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1239 }
1240 }
1241 }
1242
1243 TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_snan_to_qnan) {
1244 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1245 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1246 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1247 for (uint32_t i = 0; i < kBlockSize; i++) {
1248 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1249 }
1250 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1251 for (uint32_t i = 0; i < kBlockSize; i++) {
1252 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1253 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1254 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1255 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1256 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1257 }
1258 }
1259 }
Marat Dukhan4c617792021-12-21 15:47:58 -08001260#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhand3f3d872020-06-24 13:08:25 -07001261
Marat Dukhan4c617792021-12-21 15:47:58 -08001262#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan33b4f752021-09-03 10:53:53 -07001263 TEST(ROUNDNE__WASMSIMD_NATIVE, positive_normal) {
1264 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1265 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1266 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1267 for (uint32_t i = 0; i < kBlockSize; i++) {
1268 inputs[i] = fp32_from_bits(n + i);
1269 }
1270 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1271 for (uint32_t i = 0; i < kBlockSize; i++) {
1272 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1273 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1274 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1275 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1276 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1277 }
1278 }
1279 }
1280
1281 TEST(ROUNDNE__WASMSIMD_NATIVE, negative_normal) {
1282 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1283 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1284 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1285 for (uint32_t i = 0; i < kBlockSize; i++) {
1286 inputs[i] = fp32_from_bits(n + i);
1287 }
1288 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1289 for (uint32_t i = 0; i < kBlockSize; i++) {
1290 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1291 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1292 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1293 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1294 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1295 }
1296 }
1297 }
1298
1299 TEST(ROUNDNE__WASMSIMD_NATIVE, positive_integral) {
1300 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1301 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1302 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1303 for (uint32_t i = 0; i < kBlockSize; i++) {
1304 inputs[i] = fp32_from_bits(n + i);
1305 }
1306 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1307 for (uint32_t i = 0; i < kBlockSize; i++) {
1308 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1309 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1310 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1311 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1312 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1313 }
1314 }
1315 }
1316
1317 TEST(ROUNDNE__WASMSIMD_NATIVE, negative_integral) {
1318 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1319 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1320 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1321 for (uint32_t i = 0; i < kBlockSize; i++) {
1322 inputs[i] = fp32_from_bits(n + i);
1323 }
1324 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1325 for (uint32_t i = 0; i < kBlockSize; i++) {
1326 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1327 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1328 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1329 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1330 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1331 }
1332 }
1333 }
1334
1335 TEST(ROUNDNE__WASMSIMD_NATIVE, positive_infinity) {
1336 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1337 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1338 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1339 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1340 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
1341 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1342 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1343 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1344 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1345 }
1346
1347 TEST(ROUNDNE__WASMSIMD_NATIVE, negative_infinity) {
1348 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1349 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1350 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1351 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1352 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
1353 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1354 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1355 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1356 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1357 }
1358
1359 TEST(ROUNDNE__WASMSIMD_NATIVE, positive_qnan) {
1360 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1361 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1362 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1363 for (uint32_t i = 0; i < kBlockSize; i++) {
1364 inputs[i] = fp32_from_bits(n + i);
1365 }
1366 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1367 for (uint32_t i = 0; i < kBlockSize; i++) {
1368 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1369 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1370 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1371 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1372 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1373 }
1374 }
1375 }
1376
1377 TEST(ROUNDNE__WASMSIMD_NATIVE, negative_qnan) {
1378 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1379 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1380 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1381 for (uint32_t i = 0; i < kBlockSize; i++) {
1382 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1383 }
1384 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1385 for (uint32_t i = 0; i < kBlockSize; i++) {
1386 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1387 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1388 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1389 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1390 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1391 }
1392 }
1393 }
1394
1395 TEST(ROUNDNE__WASMSIMD_NATIVE, positive_snan) {
1396 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1397 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1398 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1399 for (uint32_t i = 0; i < kBlockSize; i++) {
1400 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1401 }
1402 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1403 for (uint32_t i = 0; i < kBlockSize; i++) {
1404 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1405 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1406 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1407 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1408 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1409 }
1410 }
1411 }
1412
1413 TEST(ROUNDNE__WASMSIMD_NATIVE, negative_snan) {
1414 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1415 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1416 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1417 for (uint32_t i = 0; i < kBlockSize; i++) {
1418 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1419 }
1420 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1421 for (uint32_t i = 0; i < kBlockSize; i++) {
1422 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1423 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1424 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1425 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1426 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1427 }
1428 }
1429 }
1430
1431 TEST(ROUNDNE__WASMSIMD_NATIVE, positive_snan_to_qnan) {
1432 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1433 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1434 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1435 for (uint32_t i = 0; i < kBlockSize; i++) {
1436 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1437 }
1438 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1439 for (uint32_t i = 0; i < kBlockSize; i++) {
1440 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1441 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1442 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1443 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1444 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1445 }
1446 }
1447 }
1448
1449 TEST(ROUNDNE__WASMSIMD_NATIVE, negative_snan_to_qnan) {
1450 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1451 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1452 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1453 for (uint32_t i = 0; i < kBlockSize; i++) {
1454 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1455 }
1456 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1457 for (uint32_t i = 0; i < kBlockSize; i++) {
1458 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1459 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1460 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1461 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1462 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1463 }
1464 }
1465 }
Marat Dukhan4c617792021-12-21 15:47:58 -08001466#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan33b4f752021-09-03 10:53:53 -07001467
Marat Dukhan075088a2020-05-12 19:42:12 -07001468TEST(ROUNDNE__SCALAR_ADDSUB, positive_normal) {
Marat Dukhan8853b822020-05-07 12:19:01 -07001469 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1470 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1471 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1472 for (uint32_t i = 0; i < kBlockSize; i++) {
1473 inputs[i] = fp32_from_bits(n + i);
1474 }
Marat Dukhan075088a2020-05-12 19:42:12 -07001475 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -07001476 for (uint32_t i = 0; i < kBlockSize; i++) {
1477 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1478 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1479 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1480 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1481 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1482 }
1483 }
1484}
1485
Marat Dukhan075088a2020-05-12 19:42:12 -07001486TEST(ROUNDNE__SCALAR_ADDSUB, negative_normal) {
Marat Dukhan8853b822020-05-07 12:19:01 -07001487 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1488 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1489 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1490 for (uint32_t i = 0; i < kBlockSize; i++) {
1491 inputs[i] = fp32_from_bits(n + i);
1492 }
Marat Dukhan075088a2020-05-12 19:42:12 -07001493 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -07001494 for (uint32_t i = 0; i < kBlockSize; i++) {
1495 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1496 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1497 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1498 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1499 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1500 }
1501 }
1502}
1503
Marat Dukhan075088a2020-05-12 19:42:12 -07001504TEST(ROUNDNE__SCALAR_ADDSUB, positive_integral) {
Marat Dukhan8853b822020-05-07 12:19:01 -07001505 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1506 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1507 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1508 for (uint32_t i = 0; i < kBlockSize; i++) {
1509 inputs[i] = fp32_from_bits(n + i);
1510 }
Marat Dukhan075088a2020-05-12 19:42:12 -07001511 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -07001512 for (uint32_t i = 0; i < kBlockSize; i++) {
1513 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1514 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1515 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1516 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1517 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1518 }
1519 }
1520}
1521
Marat Dukhan075088a2020-05-12 19:42:12 -07001522TEST(ROUNDNE__SCALAR_ADDSUB, negative_integral) {
Marat Dukhan8853b822020-05-07 12:19:01 -07001523 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1524 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1525 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1526 for (uint32_t i = 0; i < kBlockSize; i++) {
1527 inputs[i] = fp32_from_bits(n + i);
1528 }
Marat Dukhan075088a2020-05-12 19:42:12 -07001529 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -07001530 for (uint32_t i = 0; i < kBlockSize; i++) {
1531 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1532 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1533 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1534 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1535 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1536 }
1537 }
1538}
1539
Marat Dukhan075088a2020-05-12 19:42:12 -07001540TEST(ROUNDNE__SCALAR_ADDSUB, positive_infinity) {
Marat Dukhan8853b822020-05-07 12:19:01 -07001541 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1542 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -07001543 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan075088a2020-05-12 19:42:12 -07001544 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -07001545 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
1546 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1547 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1548 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1549 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1550}
1551
Marat Dukhan075088a2020-05-12 19:42:12 -07001552TEST(ROUNDNE__SCALAR_ADDSUB, negative_infinity) {
Marat Dukhan8853b822020-05-07 12:19:01 -07001553 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1554 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhand3102142020-06-08 01:24:01 -07001555 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan075088a2020-05-12 19:42:12 -07001556 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -07001557 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
1558 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1559 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1560 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1561 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1562}
1563
Marat Dukhan075088a2020-05-12 19:42:12 -07001564TEST(ROUNDNE__SCALAR_ADDSUB, positive_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -07001565 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1566 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1567 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1568 for (uint32_t i = 0; i < kBlockSize; i++) {
1569 inputs[i] = fp32_from_bits(n + i);
1570 }
Marat Dukhan075088a2020-05-12 19:42:12 -07001571 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -07001572 for (uint32_t i = 0; i < kBlockSize; i++) {
1573 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1574 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1575 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1576 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1577 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1578 }
1579 }
1580}
1581
Marat Dukhan075088a2020-05-12 19:42:12 -07001582TEST(ROUNDNE__SCALAR_ADDSUB, negative_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -07001583 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1584 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1585 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1586 for (uint32_t i = 0; i < kBlockSize; i++) {
1587 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1588 }
Marat Dukhan075088a2020-05-12 19:42:12 -07001589 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -07001590 for (uint32_t i = 0; i < kBlockSize; i++) {
1591 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1592 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1593 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1594 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1595 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1596 }
1597 }
1598}
1599
Marat Dukhan075088a2020-05-12 19:42:12 -07001600TEST(ROUNDNE__SCALAR_ADDSUB, positive_snan) {
Marat Dukhan8853b822020-05-07 12:19:01 -07001601 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1602 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1603 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1604 for (uint32_t i = 0; i < kBlockSize; i++) {
1605 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1606 }
Marat Dukhan075088a2020-05-12 19:42:12 -07001607 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -07001608 for (uint32_t i = 0; i < kBlockSize; i++) {
1609 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1610 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1611 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1612 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1613 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1614 }
1615 }
1616}
1617
Marat Dukhan075088a2020-05-12 19:42:12 -07001618TEST(ROUNDNE__SCALAR_ADDSUB, negative_snan) {
Marat Dukhan8853b822020-05-07 12:19:01 -07001619 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1620 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1621 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1622 for (uint32_t i = 0; i < kBlockSize; i++) {
1623 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1624 }
Marat Dukhan075088a2020-05-12 19:42:12 -07001625 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -07001626 for (uint32_t i = 0; i < kBlockSize; i++) {
1627 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1628 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1629 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1630 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1631 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1632 }
1633 }
1634}
1635
Marat Dukhan075088a2020-05-12 19:42:12 -07001636TEST(ROUNDNE__SCALAR_ADDSUB, positive_snan_to_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -07001637 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1638 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1639 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1640 for (uint32_t i = 0; i < kBlockSize; i++) {
1641 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1642 }
Marat Dukhan075088a2020-05-12 19:42:12 -07001643 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -07001644 for (uint32_t i = 0; i < kBlockSize; i++) {
1645 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1646 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1647 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1648 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1649 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1650 }
1651 }
1652}
1653
Marat Dukhan075088a2020-05-12 19:42:12 -07001654TEST(ROUNDNE__SCALAR_ADDSUB, negative_snan_to_qnan) {
Marat Dukhan8853b822020-05-07 12:19:01 -07001655 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1656 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1657 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1658 for (uint32_t i = 0; i < kBlockSize; i++) {
1659 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1660 }
Marat Dukhan075088a2020-05-12 19:42:12 -07001661 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan8853b822020-05-07 12:19:01 -07001662 for (uint32_t i = 0; i < kBlockSize; i++) {
1663 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1664 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1665 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1666 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1667 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1668 }
1669 }
1670}