blob: 2188f2e094589fe7389f9eac7eedbd07704642cd [file] [log] [blame]
Marat Dukhand24301d2021-12-02 00:13:45 -08001// Copyright 2021 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <cstddef>
9#include <cstdint>
10#include <cstdlib>
11#include <iomanip>
12#include <ios>
13#include <vector>
14
15#include <gtest/gtest.h>
16
17#include <fp16.h>
18
19#include <xnnpack/AlignedAllocator.h>
20#include <xnnpack/common.h>
21#include <xnnpack/isa-checks.h>
22#include <xnnpack/math-stubs.h>
23
24
25constexpr int kBlockSize = 1024;
26
27#if XNN_ARCH_ARM || XNN_ARCH_ARM64
28 TEST(CVT__NEON, positive_normal) {
29 TEST_REQUIRES_ARM_NEON;
30
31 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
32 std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize);
33 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
34 zero_point <= std::numeric_limits<int8_t>::max();
35 zero_point++)
36 {
37 const uint32_t max_input = fp32_to_bits((float) (std::numeric_limits<int8_t>::max() - zero_point));
38 for (uint32_t n = 0; n < max_input; n += kBlockSize) {
39 for (uint32_t i = 0; i < kBlockSize; i++) {
40 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, max_input));
41 }
42 xnn_math_f32_qs8_cvt__neon(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point));
43 for (uint32_t i = 0; i < kBlockSize; i++) {
44 long reference_output = std::lrintf(inputs[i]) + long(zero_point);
45 if (inputs[i] >= float(std::numeric_limits<long>::max())) {
46 reference_output = std::numeric_limits<int8_t>::max();
47 } else if (inputs[i] <= float(std::numeric_limits<long>::min())) {
48 reference_output = std::numeric_limits<int8_t>::min();
49 }
50 ASSERT_EQ(reference_output, long(outputs[i]))
51 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
52 << ", reference = " << std::dec << reference_output
53 << ", optimized = " << std::dec << int32_t(outputs[i])
54 << ", zero point = " << std::dec << zero_point;
55 }
56 }
57 }
58 }
59
60 TEST(CVT__NEON, negative_normal) {
61 TEST_REQUIRES_ARM_NEON;
62
63 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
64 std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize);
65 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
66 zero_point <= std::numeric_limits<int8_t>::max();
67 zero_point++)
68 {
69 const uint32_t max_input = fp32_to_bits((float) (zero_point - std::numeric_limits<int8_t>::min()));
70 for (uint32_t n = 0; n < max_input; n += kBlockSize) {
71 for (uint32_t i = 0; i < kBlockSize; i++) {
72 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, max_input));
73 }
74 xnn_math_f32_qs8_cvt__neon(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point));
75 for (uint32_t i = 0; i < kBlockSize; i++) {
76 long reference_output = std::lrintf(inputs[i]) + long(zero_point);
77 if (inputs[i] >= float(std::numeric_limits<long>::max())) {
78 reference_output = std::numeric_limits<int8_t>::max();
79 } else if (inputs[i] <= float(std::numeric_limits<long>::min())) {
80 reference_output = std::numeric_limits<int8_t>::min();
81 }
82 ASSERT_EQ(reference_output, long(outputs[i]))
83 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
84 << ", reference = " << std::dec << reference_output
85 << ", optimized = " << std::dec << int32_t(outputs[i])
86 << ", zero point = " << std::dec << zero_point;
87 }
88 }
89 }
90 }
91
92 TEST(CVT__NEON, positive_saturation) {
93 TEST_REQUIRES_ARM_NEON;
94
95 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
96 std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize);
97 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
98 zero_point <= std::numeric_limits<int8_t>::max();
99 zero_point++)
100 {
101 const uint32_t min_input = fp32_to_bits((float) (std::numeric_limits<int8_t>::max() - zero_point));
102 const uint32_t max_input = UINT32_C(0x7F800000);
103 for (uint32_t n = min_input; n < max_input; n += kBlockSize) {
104 for (uint32_t i = 0; i < kBlockSize; i++) {
105 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, max_input));
106 }
107 xnn_math_f32_qs8_cvt__neon(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point));
108 for (uint32_t i = 0; i < kBlockSize; i++) {
109 const int32_t reference_output = std::numeric_limits<int8_t>::max();
110 ASSERT_EQ(reference_output, int32_t(outputs[i]))
111 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
112 << ", reference = " << std::dec << reference_output
113 << ", optimized = " << std::dec << int32_t(outputs[i])
114 << ", zero point = " << std::dec << zero_point;
115 }
116 }
117 }
118 }
119
120 TEST(CVT__NEON, negative_saturation) {
121 TEST_REQUIRES_ARM_NEON;
122
123 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
124 std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize);
125 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
126 zero_point <= std::numeric_limits<int8_t>::max();
127 zero_point++)
128 {
129 const uint32_t min_input = fp32_to_bits((float) (zero_point - std::numeric_limits<int8_t>::min()));
130 const uint32_t max_input = UINT32_C(0x7F800000);
131 for (uint32_t n = min_input; n < max_input; n += kBlockSize) {
132 for (uint32_t i = 0; i < kBlockSize; i++) {
133 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, max_input));
134 }
135 xnn_math_f32_qs8_cvt__neon(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point));
136 for (uint32_t i = 0; i < kBlockSize; i++) {
137 const int32_t reference_output = std::numeric_limits<int8_t>::min();
138 ASSERT_EQ(reference_output, int32_t(outputs[i]))
139 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
140 << ", reference = " << std::dec << reference_output
141 << ", optimized = " << std::dec << int32_t(outputs[i])
142 << ", zero point = " << std::dec << zero_point;
143 }
144 }
145 }
146 }
147#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
148
149#if XNN_ARCH_ARM || XNN_ARCH_ARM64
150 TEST(CVT__NEONV8, positive_normal) {
151 TEST_REQUIRES_ARM_NEON_V8;
152
153 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
154 std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize);
155 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
156 zero_point <= std::numeric_limits<int8_t>::max();
157 zero_point++)
158 {
159 const uint32_t max_input = fp32_to_bits((float) (std::numeric_limits<int8_t>::max() - zero_point));
160 for (uint32_t n = 0; n < max_input; n += kBlockSize) {
161 for (uint32_t i = 0; i < kBlockSize; i++) {
162 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, max_input));
163 }
164 xnn_math_f32_qs8_cvt__neonv8(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point));
165 for (uint32_t i = 0; i < kBlockSize; i++) {
166 long reference_output = std::lrintf(inputs[i]) + long(zero_point);
167 if (inputs[i] >= float(std::numeric_limits<long>::max())) {
168 reference_output = std::numeric_limits<int8_t>::max();
169 } else if (inputs[i] <= float(std::numeric_limits<long>::min())) {
170 reference_output = std::numeric_limits<int8_t>::min();
171 }
172 ASSERT_EQ(reference_output, long(outputs[i]))
173 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
174 << ", reference = " << std::dec << reference_output
175 << ", optimized = " << std::dec << int32_t(outputs[i])
176 << ", zero point = " << std::dec << zero_point;
177 }
178 }
179 }
180 }
181
182 TEST(CVT__NEONV8, negative_normal) {
183 TEST_REQUIRES_ARM_NEON_V8;
184
185 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
186 std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize);
187 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
188 zero_point <= std::numeric_limits<int8_t>::max();
189 zero_point++)
190 {
191 const uint32_t max_input = fp32_to_bits((float) (zero_point - std::numeric_limits<int8_t>::min()));
192 for (uint32_t n = 0; n < max_input; n += kBlockSize) {
193 for (uint32_t i = 0; i < kBlockSize; i++) {
194 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, max_input));
195 }
196 xnn_math_f32_qs8_cvt__neonv8(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point));
197 for (uint32_t i = 0; i < kBlockSize; i++) {
198 long reference_output = std::lrintf(inputs[i]) + long(zero_point);
199 if (inputs[i] >= float(std::numeric_limits<long>::max())) {
200 reference_output = std::numeric_limits<int8_t>::max();
201 } else if (inputs[i] <= float(std::numeric_limits<long>::min())) {
202 reference_output = std::numeric_limits<int8_t>::min();
203 }
204 ASSERT_EQ(reference_output, long(outputs[i]))
205 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
206 << ", reference = " << std::dec << reference_output
207 << ", optimized = " << std::dec << int32_t(outputs[i])
208 << ", zero point = " << std::dec << zero_point;
209 }
210 }
211 }
212 }
213
214 TEST(CVT__NEONV8, positive_saturation) {
215 TEST_REQUIRES_ARM_NEON_V8;
216
217 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
218 std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize);
219 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
220 zero_point <= std::numeric_limits<int8_t>::max();
221 zero_point++)
222 {
223 const uint32_t min_input = fp32_to_bits((float) (std::numeric_limits<int8_t>::max() - zero_point));
224 const uint32_t max_input = UINT32_C(0x7F800000);
225 for (uint32_t n = min_input; n < max_input; n += kBlockSize) {
226 for (uint32_t i = 0; i < kBlockSize; i++) {
227 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, max_input));
228 }
229 xnn_math_f32_qs8_cvt__neonv8(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point));
230 for (uint32_t i = 0; i < kBlockSize; i++) {
231 const int32_t reference_output = std::numeric_limits<int8_t>::max();
232 ASSERT_EQ(reference_output, int32_t(outputs[i]))
233 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
234 << ", reference = " << std::dec << reference_output
235 << ", optimized = " << std::dec << int32_t(outputs[i])
236 << ", zero point = " << std::dec << zero_point;
237 }
238 }
239 }
240 }
241
242 TEST(CVT__NEONV8, negative_saturation) {
243 TEST_REQUIRES_ARM_NEON_V8;
244
245 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
246 std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize);
247 for (int32_t zero_point = std::numeric_limits<int8_t>::min();
248 zero_point <= std::numeric_limits<int8_t>::max();
249 zero_point++)
250 {
251 const uint32_t min_input = fp32_to_bits((float) (zero_point - std::numeric_limits<int8_t>::min()));
252 const uint32_t max_input = UINT32_C(0x7F800000);
253 for (uint32_t n = min_input; n < max_input; n += kBlockSize) {
254 for (uint32_t i = 0; i < kBlockSize; i++) {
255 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, max_input));
256 }
257 xnn_math_f32_qs8_cvt__neonv8(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point));
258 for (uint32_t i = 0; i < kBlockSize; i++) {
259 const int32_t reference_output = std::numeric_limits<int8_t>::min();
260 ASSERT_EQ(reference_output, int32_t(outputs[i]))
261 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
262 << ", reference = " << std::dec << reference_output
263 << ", optimized = " << std::dec << int32_t(outputs[i])
264 << ", zero point = " << std::dec << zero_point;
265 }
266 }
267 }
268 }
269#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64