Marat Dukhan | 7ae046d | 2017-02-16 16:57:21 -0500 | [diff] [blame] | 1 | #include <gtest/gtest.h> |
| 2 | |
| 3 | #include <cstdint> |
| 4 | |
| 5 | #include <fp16.h> |
| 6 | #include <tables.h> |
| 7 | |
| 8 | #if (defined(__i386__) || defined(__x86_64__)) && defined(__F16C__) |
| 9 | #include <x86intrin.h> |
| 10 | #endif |
| 11 | |
Marat Dukhan | c6a15ea | 2017-02-17 12:40:14 -0500 | [diff] [blame^] | 12 | |
Marat Dukhan | 7ae046d | 2017-02-16 16:57:21 -0500 | [diff] [blame] | 13 | TEST(FP16_ALT_FROM_FP32_VALUE, normalized_powers_of_2) { |
| 14 | const uint16_t min_po2_f16 = UINT16_C(0x0400); |
| 15 | const uint16_t eighths_f16 = UINT16_C(0x3000); |
| 16 | const uint16_t quarter_f16 = UINT16_C(0x3400); |
| 17 | const uint16_t half_f16 = UINT16_C(0x3800); |
| 18 | const uint16_t one_f16 = UINT16_C(0x3C00); |
| 19 | const uint16_t two_f16 = UINT16_C(0x4000); |
| 20 | const uint16_t four_f16 = UINT16_C(0x4400); |
| 21 | const uint16_t eight_f16 = UINT16_C(0x4800); |
| 22 | const uint16_t sixteen_f16 = UINT16_C(0x4C00); |
| 23 | const uint16_t thirtytwo_f16 = UINT16_C(0x5000); |
| 24 | const uint16_t sixtyfour_f16 = UINT16_C(0x5400); |
| 25 | const uint16_t max_po2_f16 = UINT16_C(0x7C00); |
| 26 | |
| 27 | const uint32_t min_po2_f32 = UINT32_C(0x38800000); |
| 28 | const uint32_t eighths_f32 = UINT32_C(0x3E000000); |
| 29 | const uint32_t quarter_f32 = UINT32_C(0x3E800000); |
| 30 | const uint32_t half_f32 = UINT32_C(0x3F000000); |
| 31 | const uint32_t one_f32 = UINT32_C(0x3F800000); |
| 32 | const uint32_t two_f32 = UINT32_C(0x40000000); |
| 33 | const uint32_t four_f32 = UINT32_C(0x40800000); |
| 34 | const uint32_t eight_f32 = UINT32_C(0x41000000); |
| 35 | const uint32_t sixteen_f32 = UINT32_C(0x41800000); |
| 36 | const uint32_t thirtytwo_f32 = UINT32_C(0x42000000); |
| 37 | const uint32_t sixtyfour_f32 = UINT32_C(0x42800000); |
| 38 | const uint32_t max_po2_f32 = UINT32_C(0x47800000); |
| 39 | |
| 40 | float min_po2_value; |
| 41 | memcpy(&min_po2_value, &min_po2_f32, sizeof(min_po2_value)); |
| 42 | EXPECT_EQ(min_po2_f16, fp16_alt_from_fp32_value(min_po2_value)) << |
| 43 | std::hex << std::uppercase << std::setfill('0') << |
| 44 | "F32 = 0x" << std::setw(8) << min_po2_f32 << ", " << |
| 45 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(min_po2_value) << ", " << |
| 46 | "F16 = 0x" << std::setw(4) << min_po2_f16; |
| 47 | |
| 48 | float eighths_value; |
| 49 | memcpy(&eighths_value, &eighths_f32, sizeof(eighths_value)); |
| 50 | EXPECT_EQ(eighths_f16, fp16_alt_from_fp32_value(eighths_value)) << |
| 51 | std::hex << std::uppercase << std::setfill('0') << |
| 52 | "F32 = 0x" << std::setw(8) << eighths_f32 << ", " << |
| 53 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(eighths_value) << ", " << |
| 54 | "F16 = 0x" << std::setw(4) << eighths_f16; |
| 55 | |
| 56 | float quarter_value; |
| 57 | memcpy(&quarter_value, &quarter_f32, sizeof(quarter_value)); |
| 58 | EXPECT_EQ(quarter_f16, fp16_alt_from_fp32_value(quarter_value)) << |
| 59 | std::hex << std::uppercase << std::setfill('0') << |
| 60 | "F32 = 0x" << std::setw(8) << quarter_f32 << ", " << |
| 61 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(quarter_value) << ", " << |
| 62 | "F16 = 0x" << std::setw(4) << quarter_f16; |
| 63 | |
| 64 | float half_value; |
| 65 | memcpy(&half_value, &half_f32, sizeof(half_value)); |
| 66 | EXPECT_EQ(half_f16, fp16_alt_from_fp32_value(half_value)) << |
| 67 | std::hex << std::uppercase << std::setfill('0') << |
| 68 | "F32 = 0x" << std::setw(8) << half_f32 << ", " << |
| 69 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(half_value) << ", " << |
| 70 | "F16 = 0x" << std::setw(4) << half_f16; |
| 71 | |
| 72 | float one_value; |
| 73 | memcpy(&one_value, &one_f32, sizeof(one_value)); |
| 74 | EXPECT_EQ(one_f16, fp16_alt_from_fp32_value(one_value)) << |
| 75 | std::hex << std::uppercase << std::setfill('0') << |
| 76 | "F32 = 0x" << std::setw(8) << one_f32 << ", " << |
| 77 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(one_value) << ", " << |
| 78 | "F16 = 0x" << std::setw(4) << one_f16; |
| 79 | |
| 80 | float two_value; |
| 81 | memcpy(&two_value, &two_f32, sizeof(two_value)); |
| 82 | EXPECT_EQ(two_f16, fp16_alt_from_fp32_value(two_value)) << |
| 83 | std::hex << std::uppercase << std::setfill('0') << |
| 84 | "F32 = 0x" << std::setw(8) << two_f32 << ", " << |
| 85 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(two_value) << ", " << |
| 86 | "F16 = 0x" << std::setw(4) << two_f16; |
| 87 | |
| 88 | float four_value; |
| 89 | memcpy(&four_value, &four_f32, sizeof(four_value)); |
| 90 | EXPECT_EQ(four_f16, fp16_alt_from_fp32_value(four_value)) << |
| 91 | std::hex << std::uppercase << std::setfill('0') << |
| 92 | "F32 = 0x" << std::setw(8) << four_f32 << ", " << |
| 93 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(four_value) << ", " << |
| 94 | "F16 = 0x" << std::setw(4) << four_f16; |
| 95 | |
| 96 | float eight_value; |
| 97 | memcpy(&eight_value, &eight_f32, sizeof(eight_value)); |
| 98 | EXPECT_EQ(eight_f16, fp16_alt_from_fp32_value(eight_value)) << |
| 99 | std::hex << std::uppercase << std::setfill('0') << |
| 100 | "F32 = 0x" << std::setw(8) << eight_f32 << ", " << |
| 101 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(eight_value) << ", " << |
| 102 | "F16 = 0x" << std::setw(4) << eight_f16; |
| 103 | |
| 104 | float sixteen_value; |
| 105 | memcpy(&sixteen_value, &sixteen_f32, sizeof(sixteen_value)); |
| 106 | EXPECT_EQ(sixteen_f16, fp16_alt_from_fp32_value(sixteen_value)) << |
| 107 | std::hex << std::uppercase << std::setfill('0') << |
| 108 | "F32 = 0x" << std::setw(8) << sixteen_f32 << ", " << |
| 109 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(sixteen_value) << ", " << |
| 110 | "F16 = 0x" << std::setw(4) << sixteen_f16; |
| 111 | |
| 112 | float thirtytwo_value; |
| 113 | memcpy(&thirtytwo_value, &thirtytwo_f32, sizeof(thirtytwo_value)); |
| 114 | EXPECT_EQ(thirtytwo_f16, fp16_alt_from_fp32_value(thirtytwo_value)) << |
| 115 | std::hex << std::uppercase << std::setfill('0') << |
| 116 | "F32 = 0x" << std::setw(8) << thirtytwo_f32 << ", " << |
| 117 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(thirtytwo_value) << ", " << |
| 118 | "F16 = 0x" << std::setw(4) << thirtytwo_f16; |
| 119 | |
| 120 | float sixtyfour_value; |
| 121 | memcpy(&sixtyfour_value, &sixtyfour_f32, sizeof(sixtyfour_value)); |
| 122 | EXPECT_EQ(sixtyfour_f16, fp16_alt_from_fp32_value(sixtyfour_value)) << |
| 123 | std::hex << std::uppercase << std::setfill('0') << |
| 124 | "F32 = 0x" << std::setw(8) << sixtyfour_f32 << ", " << |
| 125 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(sixtyfour_value) << ", " << |
| 126 | "F16 = 0x" << std::setw(4) << sixtyfour_f16; |
| 127 | } |
| 128 | |
| 129 | TEST(FP16_ALT_FROM_FP32_VALUE, denormalized_powers_of_2) { |
| 130 | const uint16_t exp2_minus_15_f16 = UINT16_C(0x0200); |
| 131 | const uint16_t exp2_minus_16_f16 = UINT16_C(0x0100); |
| 132 | const uint16_t exp2_minus_17_f16 = UINT16_C(0x0080); |
| 133 | const uint16_t exp2_minus_18_f16 = UINT16_C(0x0040); |
| 134 | const uint16_t exp2_minus_19_f16 = UINT16_C(0x0020); |
| 135 | const uint16_t exp2_minus_20_f16 = UINT16_C(0x0010); |
| 136 | const uint16_t exp2_minus_21_f16 = UINT16_C(0x0008); |
| 137 | const uint16_t exp2_minus_22_f16 = UINT16_C(0x0004); |
| 138 | const uint16_t exp2_minus_23_f16 = UINT16_C(0x0002); |
| 139 | const uint16_t exp2_minus_24_f16 = UINT16_C(0x0001); |
| 140 | const uint16_t exp2_minus_25_f16 = UINT16_C(0x0000); |
| 141 | |
| 142 | const uint32_t exp2_minus_15_f32 = UINT32_C(0x38000000); |
| 143 | const uint32_t exp2_minus_16_f32 = UINT32_C(0x37800000); |
| 144 | const uint32_t exp2_minus_17_f32 = UINT32_C(0x37000000); |
| 145 | const uint32_t exp2_minus_18_f32 = UINT32_C(0x36800000); |
| 146 | const uint32_t exp2_minus_19_f32 = UINT32_C(0x36000000); |
| 147 | const uint32_t exp2_minus_20_f32 = UINT32_C(0x35800000); |
| 148 | const uint32_t exp2_minus_21_f32 = UINT32_C(0x35000000); |
| 149 | const uint32_t exp2_minus_22_f32 = UINT32_C(0x34800000); |
| 150 | const uint32_t exp2_minus_23_f32 = UINT32_C(0x34000000); |
| 151 | const uint32_t exp2_minus_24_f32 = UINT32_C(0x33800000); |
| 152 | const uint32_t exp2_minus_25_f32 = UINT32_C(0x33000000); |
| 153 | |
| 154 | float exp2_minus_15_value; |
| 155 | memcpy(&exp2_minus_15_value, &exp2_minus_15_f32, sizeof(exp2_minus_15_value)); |
| 156 | EXPECT_EQ(exp2_minus_15_f16, fp16_alt_from_fp32_value(exp2_minus_15_value)) << |
| 157 | std::hex << std::uppercase << std::setfill('0') << |
| 158 | "F32 = 0x" << std::setw(8) << exp2_minus_15_f32 << ", " << |
| 159 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_15_value) << ", " << |
| 160 | "F16 = 0x" << std::setw(4) << exp2_minus_15_f16; |
| 161 | |
| 162 | float exp2_minus_16_value; |
| 163 | memcpy(&exp2_minus_16_value, &exp2_minus_16_f32, sizeof(exp2_minus_16_value)); |
| 164 | EXPECT_EQ(exp2_minus_16_f16, fp16_alt_from_fp32_value(exp2_minus_16_value)) << |
| 165 | std::hex << std::uppercase << std::setfill('0') << |
| 166 | "F32 = 0x" << std::setw(8) << exp2_minus_16_f32 << ", " << |
| 167 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_16_value) << ", " << |
| 168 | "F16 = 0x" << std::setw(4) << exp2_minus_16_f16; |
| 169 | |
| 170 | float exp2_minus_17_value; |
| 171 | memcpy(&exp2_minus_17_value, &exp2_minus_17_f32, sizeof(exp2_minus_17_value)); |
| 172 | EXPECT_EQ(exp2_minus_17_f16, fp16_alt_from_fp32_value(exp2_minus_17_value)) << |
| 173 | std::hex << std::uppercase << std::setfill('0') << |
| 174 | "F32 = 0x" << std::setw(8) << exp2_minus_17_f32 << ", " << |
| 175 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_17_value) << ", " << |
| 176 | "F16 = 0x" << std::setw(4) << exp2_minus_17_f16; |
| 177 | |
| 178 | float exp2_minus_18_value; |
| 179 | memcpy(&exp2_minus_18_value, &exp2_minus_18_f32, sizeof(exp2_minus_18_value)); |
| 180 | EXPECT_EQ(exp2_minus_18_f16, fp16_alt_from_fp32_value(exp2_minus_18_value)) << |
| 181 | std::hex << std::uppercase << std::setfill('0') << |
| 182 | "F32 = 0x" << std::setw(8) << exp2_minus_18_f32 << ", " << |
| 183 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_18_value) << ", " << |
| 184 | "F16 = 0x" << std::setw(4) << exp2_minus_18_f16; |
| 185 | |
| 186 | float exp2_minus_19_value; |
| 187 | memcpy(&exp2_minus_19_value, &exp2_minus_19_f32, sizeof(exp2_minus_19_value)); |
| 188 | EXPECT_EQ(exp2_minus_19_f16, fp16_alt_from_fp32_value(exp2_minus_19_value)) << |
| 189 | std::hex << std::uppercase << std::setfill('0') << |
| 190 | "F32 = 0x" << std::setw(8) << exp2_minus_19_f32 << ", " << |
| 191 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_19_value) << ", " << |
| 192 | "F16 = 0x" << std::setw(4) << exp2_minus_19_f16; |
| 193 | |
| 194 | float exp2_minus_20_value; |
| 195 | memcpy(&exp2_minus_20_value, &exp2_minus_20_f32, sizeof(exp2_minus_20_value)); |
| 196 | EXPECT_EQ(exp2_minus_20_f16, fp16_alt_from_fp32_value(exp2_minus_20_value)) << |
| 197 | std::hex << std::uppercase << std::setfill('0') << |
| 198 | "F32 = 0x" << std::setw(8) << exp2_minus_20_f32 << ", " << |
| 199 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_20_value) << ", " << |
| 200 | "F16 = 0x" << std::setw(4) << exp2_minus_20_f16; |
| 201 | |
| 202 | float exp2_minus_21_value; |
| 203 | memcpy(&exp2_minus_21_value, &exp2_minus_21_f32, sizeof(exp2_minus_21_value)); |
| 204 | EXPECT_EQ(exp2_minus_21_f16, fp16_alt_from_fp32_value(exp2_minus_21_value)) << |
| 205 | std::hex << std::uppercase << std::setfill('0') << |
| 206 | "F32 = 0x" << std::setw(8) << exp2_minus_21_f32 << ", " << |
| 207 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_21_value) << ", " << |
| 208 | "F16 = 0x" << std::setw(4) << exp2_minus_21_f16; |
| 209 | |
| 210 | float exp2_minus_22_value; |
| 211 | memcpy(&exp2_minus_22_value, &exp2_minus_22_f32, sizeof(exp2_minus_22_value)); |
| 212 | EXPECT_EQ(exp2_minus_22_f16, fp16_alt_from_fp32_value(exp2_minus_22_value)) << |
| 213 | std::hex << std::uppercase << std::setfill('0') << |
| 214 | "F32 = 0x" << std::setw(8) << exp2_minus_22_f32 << ", " << |
| 215 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_22_value) << ", " << |
| 216 | "F16 = 0x" << std::setw(4) << exp2_minus_22_f16; |
| 217 | |
| 218 | float exp2_minus_23_value; |
| 219 | memcpy(&exp2_minus_23_value, &exp2_minus_23_f32, sizeof(exp2_minus_23_value)); |
| 220 | EXPECT_EQ(exp2_minus_23_f16, fp16_alt_from_fp32_value(exp2_minus_23_value)) << |
| 221 | std::hex << std::uppercase << std::setfill('0') << |
| 222 | "F32 = 0x" << std::setw(8) << exp2_minus_23_f32 << ", " << |
| 223 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_23_value) << ", " << |
| 224 | "F16 = 0x" << std::setw(4) << exp2_minus_23_f16; |
| 225 | |
| 226 | float exp2_minus_24_value; |
| 227 | memcpy(&exp2_minus_24_value, &exp2_minus_24_f32, sizeof(exp2_minus_24_value)); |
| 228 | EXPECT_EQ(exp2_minus_24_f16, fp16_alt_from_fp32_value(exp2_minus_24_value)) << |
| 229 | std::hex << std::uppercase << std::setfill('0') << |
| 230 | "F32 = 0x" << std::setw(8) << exp2_minus_24_f32 << ", " << |
| 231 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_24_value) << ", " << |
| 232 | "F16 = 0x" << std::setw(4) << exp2_minus_24_f16; |
| 233 | |
| 234 | float exp2_minus_25_value; |
| 235 | memcpy(&exp2_minus_25_value, &exp2_minus_25_f32, sizeof(exp2_minus_25_value)); |
| 236 | EXPECT_EQ(exp2_minus_25_f16, fp16_alt_from_fp32_value(exp2_minus_25_value)) << |
| 237 | std::hex << std::uppercase << std::setfill('0') << |
| 238 | "F32 = 0x" << std::setw(8) << exp2_minus_25_f32 << ", " << |
| 239 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(exp2_minus_25_value) << ", " << |
| 240 | "F16 = 0x" << std::setw(4) << exp2_minus_25_f16; |
| 241 | } |
| 242 | |
| 243 | TEST(FP16_ALT_FROM_FP32_VALUE, zero) { |
| 244 | const uint16_t positive_zero_f16 = UINT16_C(0x0000); |
| 245 | const uint16_t negative_zero_f16 = UINT16_C(0x8000); |
| 246 | |
| 247 | const uint32_t positive_zero_f32 = UINT32_C(0x00000000); |
| 248 | const uint32_t negative_zero_f32 = UINT32_C(0x80000000); |
| 249 | |
| 250 | float positive_zero_value; |
| 251 | memcpy(&positive_zero_value, &positive_zero_f32, sizeof(positive_zero_value)); |
| 252 | EXPECT_EQ(positive_zero_f16, fp16_alt_from_fp32_value(positive_zero_value)) << |
| 253 | std::hex << std::uppercase << std::setfill('0') << |
| 254 | "F32 = 0x" << std::setw(8) << positive_zero_f32 << ", " << |
| 255 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(positive_zero_value) << ", " << |
| 256 | "F16 = 0x" << std::setw(4) << positive_zero_f16; |
| 257 | |
| 258 | float negative_zero_value; |
| 259 | memcpy(&negative_zero_value, &negative_zero_f32, sizeof(negative_zero_value)); |
| 260 | EXPECT_EQ(negative_zero_f16, fp16_alt_from_fp32_value(negative_zero_value)) << |
| 261 | std::hex << std::uppercase << std::setfill('0') << |
| 262 | "F32 = 0x" << std::setw(8) << negative_zero_f32 << ", " << |
| 263 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(negative_zero_value) << ", " << |
| 264 | "F16 = 0x" << std::setw(4) << negative_zero_f16; |
| 265 | } |
| 266 | |
| 267 | TEST(FP16_ALT_FROM_FP32_VALUE, infinity) { |
| 268 | const uint16_t max_f16 = UINT16_C(0x7FFF); |
| 269 | const uint16_t min_f16 = UINT16_C(0xFFFF); |
| 270 | |
| 271 | const uint32_t positive_infinity_f32 = UINT32_C(0x7F800000); |
| 272 | const uint32_t negative_infinity_f32 = UINT32_C(0xFF800000); |
| 273 | |
| 274 | float positive_infinity_value; |
| 275 | memcpy(&positive_infinity_value, &positive_infinity_f32, sizeof(positive_infinity_value)); |
| 276 | EXPECT_EQ(max_f16, fp16_alt_from_fp32_value(positive_infinity_value)) << |
| 277 | std::hex << std::uppercase << std::setfill('0') << |
| 278 | "F32 = 0x" << std::setw(8) << positive_infinity_f32 << ", " << |
| 279 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(positive_infinity_value) << ", " << |
| 280 | "F16 = 0x" << std::setw(4) << max_f16; |
| 281 | |
| 282 | float negative_infinity_value; |
| 283 | memcpy(&negative_infinity_value, &negative_infinity_f32, sizeof(negative_infinity_value)); |
| 284 | EXPECT_EQ(min_f16, fp16_alt_from_fp32_value(negative_infinity_value)) << |
| 285 | std::hex << std::uppercase << std::setfill('0') << |
| 286 | "F32 = 0x" << std::setw(8) << negative_infinity_f32 << ", " << |
| 287 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(negative_infinity_value) << ", " << |
| 288 | "F16 = 0x" << std::setw(4) << min_f16; |
| 289 | } |
| 290 | |
| 291 | TEST(FP16_ALT_FROM_FP32_VALUE, positive_nan) { |
| 292 | for (uint32_t nan_f32 = UINT32_C(0x7FFFFFFF); nan_f32 > UINT32_C(0x7F800000); nan_f32--) { |
| 293 | float nan_value; |
| 294 | memcpy(&nan_value, &nan_f32, sizeof(nan_value)); |
| 295 | const uint16_t nan_f16 = fp16_alt_from_fp32_value(nan_value); |
| 296 | |
| 297 | /* Check sign */ |
| 298 | ASSERT_EQ(nan_f16 & UINT16_C(0x8000), 0) << |
| 299 | std::hex << std::uppercase << std::setfill('0') << |
| 300 | "F32 = 0x" << std::setw(8) << nan_f32 << ", " << |
| 301 | "F16(F32) = 0x" << std::setw(4) << nan_f16; |
| 302 | |
| 303 | /* Check exponent */ |
| 304 | ASSERT_EQ(nan_f16 & UINT16_C(0x7C00), UINT16_C(0x7C00)) << |
| 305 | std::hex << std::uppercase << std::setfill('0') << |
| 306 | "F32 = 0x" << std::setw(8) << nan_f32 << ", " << |
| 307 | "F16(F32) = 0x" << std::setw(4) << nan_f16; |
| 308 | |
| 309 | /* Check mantissa */ |
| 310 | ASSERT_NE(nan_f16 & UINT16_C(0x03FF), 0) << |
| 311 | std::hex << std::uppercase << std::setfill('0') << |
| 312 | "F32 = 0x" << std::setw(8) << nan_f32 << ", " << |
| 313 | "F16(F32) = 0x" << std::setw(4) << nan_f16; |
| 314 | } |
| 315 | } |
| 316 | |
| 317 | TEST(FP16_ALT_FROM_FP32_VALUE, negative_nan) { |
| 318 | for (uint32_t nan_f32 = UINT32_C(0xFFFFFFFF); nan_f32 > UINT32_C(0xFF800000); nan_f32--) { |
| 319 | float nan_value; |
| 320 | memcpy(&nan_value, &nan_f32, sizeof(nan_value)); |
| 321 | const uint16_t nan_f16 = fp16_alt_from_fp32_value(nan_value); |
| 322 | |
| 323 | /* Check sign */ |
| 324 | ASSERT_EQ(nan_f16 & UINT16_C(0x8000), UINT16_C(0x8000)) << |
| 325 | std::hex << std::uppercase << std::setfill('0') << |
| 326 | "F32 = 0x" << std::setw(8) << nan_f32 << ", " << |
| 327 | "F16(F32) = 0x" << std::setw(4) << nan_f16; |
| 328 | |
| 329 | /* Check exponent */ |
| 330 | ASSERT_EQ(nan_f16 & UINT16_C(0x7C00), UINT16_C(0x7C00)) << |
| 331 | std::hex << std::uppercase << std::setfill('0') << |
| 332 | "F32 = 0x" << std::setw(8) << nan_f32 << ", " << |
| 333 | "F16(F32) = 0x" << std::setw(4) << nan_f16; |
| 334 | |
| 335 | /* Check mantissa */ |
| 336 | ASSERT_NE(nan_f16 & UINT16_C(0x03FF), 0) << |
| 337 | std::hex << std::uppercase << std::setfill('0') << |
| 338 | "F32 = 0x" << std::setw(8) << nan_f32 << ", " << |
| 339 | "F16(F32) = 0x" << std::setw(4) << nan_f16; |
| 340 | } |
| 341 | } |
| 342 | |
| 343 | TEST(FP16_ALT_FROM_FP32_VALUE, revertible) { |
| 344 | /* Positive values */ |
| 345 | for (uint16_t f16 = UINT16_C(0x0000); f16 <= UINT16_C(0x7FFF); f16++) { |
| 346 | const float value_f32 = fp16_alt_to_fp32_value(f16); |
| 347 | uint32_t bits_f32; |
| 348 | memcpy(&bits_f32, &value_f32, sizeof(bits_f32)); |
| 349 | |
| 350 | ASSERT_EQ(f16, fp16_alt_from_fp32_value(value_f32)) << |
| 351 | std::hex << std::uppercase << std::setfill('0') << |
| 352 | "F16 = 0x" << std::setw(4) << f16 << ", " << |
| 353 | "F32(F16) = 0x" << std::setw(8) << bits_f32 << ", " << |
| 354 | "F16(F32(F16)) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(value_f32); |
| 355 | } |
| 356 | |
| 357 | /* Negative values */ |
| 358 | for (uint16_t f16 = UINT16_C(0xFFFF); f16 >= UINT16_C(0x8000); f16--) { |
| 359 | const float value_f32 = fp16_alt_to_fp32_value(f16); |
| 360 | uint32_t bits_f32; |
| 361 | memcpy(&bits_f32, &value_f32, sizeof(bits_f32)); |
| 362 | |
| 363 | ASSERT_EQ(f16, fp16_alt_from_fp32_value(value_f32)) << |
| 364 | std::hex << std::uppercase << std::setfill('0') << |
| 365 | "F16 = 0x" << std::setw(4) << f16 << ", " << |
| 366 | "F32(F16) = 0x" << std::setw(8) << bits_f32 << ", " << |
| 367 | "F16(F32(F16)) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(value_f32); |
| 368 | } |
| 369 | } |
| 370 | |
| 371 | TEST(FP16_ALT_FROM_FP32_VALUE, underflow) { |
| 372 | const uint32_t min_nonzero_f32 = UINT32_C(0x33000001); |
| 373 | const uint16_t zero_f16 = UINT16_C(0x0000); |
| 374 | const uint16_t min_f16 = UINT16_C(0x0001); |
| 375 | for (uint32_t bits = UINT32_C(0x00000001); bits < min_nonzero_f32; bits++) { |
| 376 | float value; |
| 377 | memcpy(&value, &bits, sizeof(value)); |
| 378 | ASSERT_EQ(zero_f16, fp16_alt_from_fp32_value(value)) << |
| 379 | std::hex << std::uppercase << std::setfill('0') << |
| 380 | "F32 = 0x" << std::setw(8) << bits << ", " << |
| 381 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(value) << ", " << |
| 382 | "F16 = 0x" << std::setw(4) << zero_f16; |
| 383 | } |
| 384 | float min_nonzero_value; |
| 385 | memcpy(&min_nonzero_value, &min_nonzero_f32, sizeof(min_nonzero_value)); |
| 386 | ASSERT_EQ(min_f16, fp16_alt_from_fp32_value(min_nonzero_value)) << |
| 387 | std::hex << std::uppercase << std::setfill('0') << |
| 388 | "F32 = 0x" << std::setw(8) << min_nonzero_f32 << ", " << |
| 389 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(min_nonzero_value) << ", " << |
| 390 | "F16 = 0x" << std::setw(4) << min_f16; |
| 391 | } |
| 392 | |
| 393 | TEST(FP16_ALT_FROM_FP32_VALUE, saturation) { |
| 394 | const uint32_t max_f16_f32 = UINT32_C(0x47FFE000); |
| 395 | const uint16_t max_f16 = UINT16_C(0x7FFF); |
| 396 | const uint32_t positive_infinity_f32 = UINT32_C(0x7F800000); |
| 397 | for (uint32_t bits = positive_infinity_f32; bits > max_f16_f32; bits--) { |
| 398 | float value; |
| 399 | memcpy(&value, &bits, sizeof(value)); |
| 400 | ASSERT_EQ(max_f16, fp16_alt_from_fp32_value(value)) << |
| 401 | std::hex << std::uppercase << std::setfill('0') << |
| 402 | "F32 = 0x" << std::setw(8) << bits << ", " << |
| 403 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(value) << ", " << |
| 404 | "F16 = 0x" << std::setw(4) << max_f16; |
| 405 | } |
| 406 | } |
| 407 | |
| 408 | TEST(FP16_ALT_FROM_FP32_VALUE, positive_denormalized_values) { |
| 409 | const uint32_t min_nonzero_f32 = UINT32_C(0x33000001); |
| 410 | |
| 411 | uint32_t f32_begin = min_nonzero_f32; |
| 412 | for (uint16_t f16 = 0; f16 < UINT16_C(0x0400); f16++) { |
| 413 | const uint32_t f32_end = fp16::denormalizedRanges[f16]; |
| 414 | for (uint32_t f32 = f32_begin; f32 < f32_end; f32++) { |
| 415 | float value; |
| 416 | memcpy(&value, &f32, sizeof(value)); |
| 417 | ASSERT_EQ(f16, fp16_alt_from_fp32_value(value)) << |
| 418 | std::hex << std::uppercase << std::setfill('0') << |
| 419 | "F32 = 0x" << std::setw(8) << f32 << ", " << |
| 420 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(value) << ", " << |
| 421 | "F16 = 0x" << std::setw(4) << f16; |
| 422 | } |
| 423 | f32_begin = f32_end; |
| 424 | } |
| 425 | } |
| 426 | |
| 427 | TEST(FP16_ALT_FROM_FP32_VALUE, negative_denormalized_values) { |
| 428 | const uint32_t min_nonzero_f32 = UINT32_C(0x33000001); |
| 429 | |
| 430 | uint32_t f32_begin = min_nonzero_f32 | UINT32_C(0x80000000); |
| 431 | for (uint16_t f16 = UINT16_C(0x8000); f16 < UINT16_C(0x8400); f16++) { |
| 432 | const uint32_t f32_end = fp16::denormalizedRanges[f16 & UINT16_C(0x7FFF)] | UINT32_C(0x80000000); |
| 433 | for (uint32_t f32 = f32_begin; f32 < f32_end; f32++) { |
| 434 | float value; |
| 435 | memcpy(&value, &f32, sizeof(value)); |
| 436 | ASSERT_EQ(f16, fp16_alt_from_fp32_value(value)) << |
| 437 | std::hex << std::uppercase << std::setfill('0') << |
| 438 | "F32 = 0x" << std::setw(8) << f32 << ", " << |
| 439 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(value) << ", " << |
| 440 | "F16 = 0x" << std::setw(4) << f16; |
| 441 | } |
| 442 | f32_begin = f32_end; |
| 443 | } |
| 444 | } |
| 445 | |
| 446 | TEST(FP16_ALT_FROM_FP32_VALUE, positive_normalized_values) { |
| 447 | /* Minimum number that rounds to 1.0h when converted to half-precision */ |
| 448 | const uint32_t min_one_f32 = UINT32_C(0x3F7FF000); |
| 449 | const uint32_t e_bias = 15; |
| 450 | |
| 451 | for (int32_t e = -14; e <= 16; e++) { |
| 452 | uint32_t f32_begin = min_one_f32 + (uint32_t(e) << 23); |
| 453 | for (uint16_t f16 = uint16_t(e + e_bias) << 10; f16 < uint16_t(e + e_bias + 1) << 10; f16++) { |
| 454 | const uint32_t f32_end = fp16::normalizedRanges[f16 & UINT16_C(0x3FF)] + (uint32_t(e) << 23); |
| 455 | for (uint32_t f32 = f32_begin; f32 < f32_end; f32++) { |
| 456 | float value; |
| 457 | memcpy(&value, &f32, sizeof(value)); |
| 458 | ASSERT_EQ(f16, fp16_alt_from_fp32_value(value)) << |
| 459 | std::hex << std::uppercase << std::setfill('0') << |
| 460 | "F32 = 0x" << std::setw(8) << f32 << ", " << |
| 461 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(value) << ", " << |
| 462 | "F16 = 0x" << std::setw(4) << f16; |
| 463 | } |
| 464 | f32_begin = f32_end; |
| 465 | } |
| 466 | } |
| 467 | } |
| 468 | |
| 469 | TEST(FP16_ALT_FROM_FP32_VALUE, negative_normalized_values) { |
| 470 | /* Minimum number that rounds to 1.0h when converted to half-precision */ |
| 471 | const uint32_t min_one_f32 = UINT32_C(0x3F7FF000); |
| 472 | const uint32_t e_bias = 15; |
| 473 | |
| 474 | for (int32_t e = -14; e <= 16; e++) { |
| 475 | uint32_t f32_begin = (min_one_f32 | UINT32_C(0x80000000)) + (uint32_t(e) << 23); |
| 476 | for (uint16_t f16 = (UINT16_C(0x8000) | (uint16_t(e + e_bias) << 10)); f16 < (UINT16_C(0x8000) | (uint16_t(e + e_bias + 1) << 10)); f16++) { |
| 477 | const uint32_t f32_end = (fp16::normalizedRanges[f16 & UINT16_C(0x3FF)] | UINT32_C(0x80000000)) + (uint32_t(e) << 23); |
| 478 | for (uint32_t f32 = f32_begin; f32 < f32_end; f32++) { |
| 479 | float value; |
| 480 | memcpy(&value, &f32, sizeof(value)); |
| 481 | ASSERT_EQ(f16, fp16_alt_from_fp32_value(value)) << |
| 482 | std::hex << std::uppercase << std::setfill('0') << |
| 483 | "F32 = 0x" << std::setw(8) << f32 << ", " << |
| 484 | "F16(F32) = 0x" << std::setw(4) << fp16_alt_from_fp32_value(value) << ", " << |
| 485 | "F16 = 0x" << std::setw(4) << f16; |
| 486 | } |
| 487 | f32_begin = f32_end; |
| 488 | } |
| 489 | } |
| 490 | } |