blob: e874012cffa2dacf57b788d1c8ed057c0936f741 [file] [log] [blame]
sewardjcbe8efa2004-09-06 14:57:52 +00001
2#include "../pub/libvex_basictypes.h"
3#include <stdio.h>
4#include <malloc.h>
5#include <stdlib.h>
6#include <string.h>
7
8
9/* Test program for developing code for conversions between
10 x87 64-bit and 80-bit floats.
11
12 80-bit format exists only for x86/x86-64, and so the routines
13 hardwire it as little-endian. The 64-bit format (IEEE double)
14 could exist on any platform, little or big-endian and so we
15 have to take that into account. IOW, these routines have to
16 work correctly when compiled on both big- and little-endian
17 targets, but the 80-bit images only ever have to exist in
18 little-endian format.
19*/
20
21static
22UInt read_bit_array ( UChar* arr, UInt n )
23{
24 UChar c = arr[n >> 3];
25 c >>= (n&7);
26 return c & 1;
27}
28
29
30static void convert_f80le_to_f64le_HW ( /*IN*/UChar* f80, /*OUT*/UChar* f64 )
31{
32 asm volatile ("ffree %%st(7); fldt (%0); fstpl (%1)"
33 :
34 : "r" (&f80[0]), "r" (&f64[0])
35 : "memory" );
36}
37
38static void convert_f64le_to_f80le_HW ( /*IN*/UChar* f64, /*OUT*/UChar* f80 )
39{
40 asm volatile ("ffree %%st(7); fldl (%0); fstpt (%1)"
41 :
42 : "r" (&f64[0]), "r" (&f80[0])
43 : "memory" );
44}
45
46/* 80 and 64-bit floating point formats:
47
48 80-bit:
49
50 S 0 0-------0 zero
51 S 0 0X------X denormals
52 S 1-7FFE 1X------X normals (all normals have leading 1)
53 S 7FFF 10------0 infinity
54 S 7FFF 10X-----X snan
55 S 7FFF 11X-----X qnan
56
57 S is the sign bit. For runs X----X, at least one of the Xs must be
58 nonzero. Exponent is 15 bits, fractional part is 63 bits, and
59 there is an explicitly represented leading 1, and a sign bit,
60 giving 80 in total.
61
62 64-bit avoids the confusion of an explicitly represented leading 1
63 and so is simpler:
64
65 S 0 0------0 zero
66 S 0 X------X denormals
67 S 1-7FE any normals
68 S 7FF 0------0 infinity
69 S 7FF 0X-----X snan
70 S 7FF 1X-----X qnan
71
72 Exponent is 11 bits, fractional part is 52 bits, and there is a
73 sign bit, giving 64 in total.
74*/
75
76
77/* Convert a IEEE754 double (64-bit) into an x87 extended double
78 (80-bit), mimicing the hardware fairly closely. Both numbers are
79 stored little-endian. Limitations, all of which could be fixed,
80 given some level of hassle:
81
82 * Does not handle double precision denormals. As a result, values
83 with magnitudes less than 1e-308 are flushed to zero when they
84 need not be.
85
86 * Identity of NaNs is not preserved.
87
88 See comments in the code for more details.
89*/
90static void convert_f64le_to_f80le ( /*IN*/UChar* f64, /*OUT*/UChar* f80 )
91{
92 Bool isInf;
93 Int bexp;
94 UChar sign;
95
96 sign = (f64[7] >> 7) & 1;
97 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
98 bexp &= 0x7FF;
99
100 /* If the exponent is zero, either we have a zero or a denormal.
101 Produce a zero. This is a hack in that it forces denormals to
102 zero. Could do better. */
103 if (bexp == 0) {
104 f80[9] = sign << 7;
105 f80[8] = f80[7] = f80[6] = f80[5] = f80[4]
106 = f80[3] = f80[2] = f80[1] = f80[0] = 0;
107 return;
108 }
109
110 /* If the exponent is 7FF, this is either an Infinity, a SNaN or
111 QNaN, as determined by examining bits 51:0, thus:
112 0 ... 0 Inf
113 0X ... X SNaN
114 1X ... X QNaN
115 where at least one of the Xs is not zero.
116 */
117 if (bexp == 0x7FF) {
118 isInf = (f64[6] & 0x0F) == 0
119 && f64[5] == 0 && f64[4] == 0 && f64[3] == 0
120 && f64[2] == 0 && f64[1] == 0 && f64[0] == 0;
121 if (isInf) {
122 /* Produce an appropriately signed infinity:
123 S 1--1 (15) 1 0--0 (63)
124 */
125 f80[9] = (sign << 7) | 0x7F;
126 f80[8] = 0xFF;
127 f80[7] = 0x80;
128 f80[6] = f80[5] = f80[4] = f80[3]
129 = f80[2] = f80[1] = f80[0] = 0;
130 return;
131 }
132 /* So it's either a QNaN or SNaN. Distinguish by considering
133 bit 51. Note, this destroys all the trailing bits
134 (identity?) of the NaN. IEEE754 doesn't require preserving
135 these (it only requires that there be one QNaN value and one
136 SNaN value), but x87 does seem to have some ability to
137 preserve them. Anyway, here, the NaN's identity is
138 destroyed. Could be improved. */
139 if (f64[6] & 8) {
140 /* QNaN. Make a QNaN:
141 S 1--1 (15) 1 1--1 (63)
142 */
143 f80[9] = (sign << 7) | 0x7F;
144 f80[8] = 0xFF;
145 f80[7] = 0xFF;
146 f80[6] = f80[5] = f80[4] = f80[3]
147 = f80[2] = f80[1] = f80[0] = 0xFF;
148 } else {
149 /* SNaN. Make a SNaN:
150 S 1--1 (15) 0 1--1 (63)
151 */
152 f80[9] = (sign << 7) | 0x7F;
153 f80[8] = 0xFF;
154 f80[7] = 0x7F;
155 f80[6] = f80[5] = f80[4] = f80[3]
156 = f80[2] = f80[1] = f80[0] = 0xFF;
157 }
158 return;
159 }
160
161 /* It's not a zero, denormal, infinity or nan. So it must be a
162 normalised number. Rebias the exponent and build the new
163 number. */
164 bexp += (16383 - 1023);
165
166 f80[9] = (sign << 7) | ((bexp >> 8) & 0xFF);
167 f80[8] = bexp & 0xFF;
168 f80[7] = (1 << 7) | ((f64[6] << 3) & 0x78) | ((f64[5] >> 5) & 7);
169 f80[6] = ((f64[5] << 3) & 0xF8) | ((f64[4] >> 5) & 7);
170 f80[5] = ((f64[4] << 3) & 0xF8) | ((f64[3] >> 5) & 7);
171 f80[4] = ((f64[3] << 3) & 0xF8) | ((f64[2] >> 5) & 7);
172 f80[3] = ((f64[2] << 3) & 0xF8) | ((f64[1] >> 5) & 7);
173 f80[2] = ((f64[1] << 3) & 0xF8) | ((f64[0] >> 5) & 7);
174 f80[1] = ((f64[0] << 3) & 0xF8);
175 f80[0] = 0;
176}
177
178
179/////////////////////////////////////////////////////////////////
180
181/* Convert a x87 extended double (80-bit) into an IEEE 754 double
182 (64-bit), mimicing the hardware fairly closely. Both numbers are
183 stored little-endian. Limitations, all of which could be fixed,
184 given some level of hassle:
185
186 * Does not create double precision denormals. As a result, values
187 with magnitudes less than 1e-308 are flushed to zero when they
188 need not be.
189
190 * Rounding following truncation could be a bit better.
191
192 * Identity of NaNs is not preserved.
193
194 See comments in the code for more details.
195*/
196static void convert_f80le_to_f64le ( /*IN*/UChar* f80, /*OUT*/UChar* f64 )
197{
198 Bool isInf;
199 Int bexp;
200 UChar sign;
201
202 sign = (f80[9] >> 7) & 1;
203 bexp = (((UInt)f80[9]) << 8) | (UInt)f80[8];
204 bexp &= 0x7FFF;
205
206 /* If the exponent is zero, either we have a zero or a denormal.
207 But an extended precision denormal becomes a double precision
208 zero, so in either case, just produce the appropriately signed
209 zero. */
210 if (bexp == 0) {
211 f64[7] = sign << 7;
212 f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
213 return;
214 }
215
216 /* If the exponent is 7FFF, this is either an Infinity, a SNaN or
217 QNaN, as determined by examining bits 62:0, thus:
218 0 ... 0 Inf
219 0X ... X SNaN
220 1X ... X QNaN
221 where at least one of the Xs is not zero.
222 */
223 if (bexp == 0x7FFF) {
224 isInf = (f80[7] & 0x7F) == 0
225 && f80[6] == 0 && f80[5] == 0 && f80[4] == 0
226 && f80[3] == 0 && f80[2] == 0 && f80[1] == 0 && f80[0] == 0;
227 if (isInf) {
228 if (0 == (f80[7] & 0x80))
229 goto wierd_NaN;
230 /* Produce an appropriately signed infinity:
231 S 1--1 (11) 0--0 (52)
232 */
233 f64[7] = (sign << 7) | 0x7F;
234 f64[6] = 0xF0;
235 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
236 return;
237 }
238 /* So it's either a QNaN or SNaN. Distinguish by considering
239 bit 62. Note, this destroys all the trailing bits
240 (identity?) of the NaN. IEEE754 doesn't require preserving
241 these (it only requires that there be one QNaN value and one
242 SNaN value), but x87 does seem to have some ability to
243 preserve them. Anyway, here, the NaN's identity is
244 destroyed. Could be improved. */
245 if (f80[8] & 0x40) {
246 /* QNaN. Make a QNaN:
247 S 1--1 (11) 1 1--1 (51)
248 */
249 f64[7] = (sign << 7) | 0x7F;
250 f64[6] = 0xFF;
251 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
252 } else {
253 /* SNaN. Make a SNaN:
254 S 1--1 (11) 0 1--1 (51)
255 */
256 f64[7] = (sign << 7) | 0x7F;
257 f64[6] = 0xF7;
258 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
259 }
260 return;
261 }
262
263 /* If it's not a Zero, NaN or Inf, and the integer part (bit 62) is
264 zero, the x87 FPU appears to consider the number denormalised
265 and converts it to a QNaN. */
266 if (0 == (f80[7] & 0x80)) {
267 wierd_NaN:
268 /* Strange hardware QNaN:
269 S 1--1 (11) 1 0--0 (51)
270 */
271 /* On a PIII, these QNaNs always appear with sign==1. I have
272 no idea why. */
273 f64[7] = (1 /*sign*/ << 7) | 0x7F;
274 f64[6] = 0xF8;
275 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
276 return;
277 }
278
279 /* It's not a zero, denormal, infinity or nan. So it must be a
280 normalised number. Rebias the exponent and consider. */
281 bexp -= (16383 - 1023);
282 if (bexp >= 0x7FF) {
283 /* It's too big for a double. Construct an infinity. */
284 f64[7] = (sign << 7) | 0x7F;
285 f64[6] = 0xF0;
286 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
287 return;
288 }
289
290 if (bexp < 0) {
291 /* It's too small for a double. Construct a zero. Note, this
292 is a kludge since we could conceivably create a
293 denormalised number for bexp in -1 to -51, but we don't
294 bother. This means the conversion flushes values
295 approximately in the range 1e-309 to 1e-324 ish to zero
296 when it doesn't actually need to. This could be
297 improved. */
298 f64[7] = sign << 7;
299 f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
300 return;
301 }
302
303 /* Ok, it's a normalised number which is representable as a double.
304 Copy the exponent and mantissa into place. */
305 /*
306 for (i = 0; i < 52; i++)
307 write_bit_array ( f64,
308 i,
309 read_bit_array ( f80, i+11 ) );
310 */
311 f64[0] = (f80[1] >> 3) | (f80[2] << 5);
312 f64[1] = (f80[2] >> 3) | (f80[3] << 5);
313 f64[2] = (f80[3] >> 3) | (f80[4] << 5);
314 f64[3] = (f80[4] >> 3) | (f80[5] << 5);
315 f64[4] = (f80[5] >> 3) | (f80[6] << 5);
316 f64[5] = (f80[6] >> 3) | (f80[7] << 5);
317
318 f64[6] = ((bexp << 4) & 0xF0) | ((f80[7] >> 3) & 0x0F);
319
320 f64[7] = (sign << 7) | ((bexp >> 4) & 0x7F);
321
322 /* Now consider any rounding that needs to happen as a result of
323 truncating the mantissa. */
324 if (f80[1] & 4) /* read_bit_array(f80, 10) == 1) */ {
325 /* Round upwards. This is a kludge. Once in every 64k
326 roundings (statistically) the bottom two bytes are both 0xFF
327 and so we don't round at all. Could be improved. */
328 if (f64[0] != 0xFF) {
329 f64[0]++;
330 }
331 else
332 if (f64[0] == 0xFF && f64[1] != 0xFF) {
333 f64[0] = 0;
334 f64[1]++;
335 }
336 /* else we don't round, but we should. */
337 }
338}
339
340
341//////////////
342
343static void show_f80 ( UChar* f80 )
344{
345 Int i;
346 printf("%d ", read_bit_array(f80, 79));
347
348 for (i = 78; i >= 64; i--)
349 printf("%d", read_bit_array(f80, i));
350
351 printf(" %d ", read_bit_array(f80, 63));
352
353 for (i = 62; i >= 0; i--)
354 printf("%d", read_bit_array(f80, i));
355}
356
357static void show_f64le ( UChar* f64 )
358{
359 Int i;
360 printf("%d ", read_bit_array(f64, 63));
361
362 for (i = 62; i >= 52; i--)
363 printf("%d", read_bit_array(f64, i));
364
365 printf(" ");
366 for (i = 51; i >= 0; i--)
367 printf("%d", read_bit_array(f64, i));
368}
369
370//////////////
371
372
373/* Convert f80 to a 64-bit IEEE double using both the hardware and the
374 soft version, and compare the results. If they differ, print
375 details and return 1. If they are identical, return 0.
376*/
377int do_80_to_64_test ( Int test_no, UChar* f80, UChar* f64h, UChar* f64s)
378{
379 Char buf64s[100], buf64h[100];
380 Bool same;
381 Int k;
382 convert_f80le_to_f64le_HW(f80, f64h);
383 convert_f80le_to_f64le(f80, f64s);
384 same = True;
385 for (k = 0; k < 8; k++) {
386 if (f64s[k] != f64h[k]) {
387 same = False; break;
388 }
389 }
390 /* bitwise identical */
391 if (same)
392 return 0;
393
394 sprintf(buf64s, "%.16e", *(double*)f64s);
395 sprintf(buf64h, "%.16e", *(double*)f64h);
396
397 /* Not bitwise identical, but pretty darn close */
398 if (0 == strcmp(buf64s, buf64h))
399 return 0;
400
401 printf("\n");
402 printf("f80: "); show_f80(f80); printf("\n");
403 printf("f64h: "); show_f64le(f64h); printf("\n");
404 printf("f64s: "); show_f64le(f64s); printf("\n");
405
406 printf("[test %d] %.16Le -> (hw %s, sw %s)\n",
407 test_no, *(long double*)f80,
408 buf64h, buf64s );
409
410 return 1;
411}
412
413
414/* Convert an IEEE 64-bit double to a x87 extended double (80 bit)
415 using both the hardware and the soft version, and compare the
416 results. If they differ, print details and return 1. If they are
417 identical, return 0.
418*/
419int do_64_to_80_test ( Int test_no, UChar* f64, UChar* f80h, UChar* f80s)
420{
421 Char buf80s[100], buf80h[100];
422 Bool same;
423 Int k;
424 convert_f64le_to_f80le_HW(f64, f80h);
425 convert_f64le_to_f80le(f64, f80s);
426 same = True;
427 for (k = 0; k < 10; k++) {
428 if (f80s[k] != f80h[k]) {
429 same = False; break;
430 }
431 }
432 /* bitwise identical */
433 if (same)
434 return 0;
435
436 sprintf(buf80s, "%.20Le", *(long double*)f80s);
437 sprintf(buf80h, "%.20Le", *(long double*)f80h);
438
439 /* Not bitwise identical, but pretty darn close */
440 if (0 == strcmp(buf80s, buf80h))
441 return 0;
442
443 printf("\n");
444 printf("f64: "); show_f64le(f64); printf("\n");
445 printf("f80h: "); show_f80(f80h); printf("\n");
446 printf("f80s: "); show_f80(f80s); printf("\n");
447
448 printf("[test %d] %.16e -> (hw %s, sw %s)\n",
449 test_no, *(double*)f64,
450 buf80h, buf80s );
451
452 return 1;
453}
454
455
456
457void do_80_to_64_tests ( void )
458{
459 UInt b9,b8,b7,i, j;
460 Int fails=0, tests=0;
461 UChar* f64h = malloc(8);
462 UChar* f64s = malloc(8);
463 UChar* f80 = malloc(10);
464 int STEP = 1;
465
466 srandom(4343);
467
468 /* Ten million random bit patterns */
469 for (i = 0; i < 10000000; i++) {
470 tests++;
471 for (j = 0; j < 10; j++)
472 f80[j] = (random() >> 7) & 255;
473
474 fails += do_80_to_64_test(tests, f80, f64h, f64s);
475 }
476
477 /* 2^24 numbers in which the first 24 bits are tested exhaustively
478 -- this covers the sign, exponent and leading part of the
479 mantissa. */
480 for (b9 = 0; b9 < 256; b9 += STEP) {
481 for (b8 = 0; b8 < 256; b8 += STEP) {
482 for (b7 = 0; b7 < 256; b7 += STEP) {
483 tests++;
484 for (i = 0; i < 10; i++)
485 f80[i] = 0;
486 for (i = 0; i < 8; i++)
487 f64h[i] = f64s[i] = 0;
488 f80[9] = b9;
489 f80[8] = b8;
490 f80[7] = b7;
491
492 fails += do_80_to_64_test(tests, f80, f64h, f64s);
493 }}}
494
495 printf("\n80 -> 64: %d tests, %d fails\n\n", tests, fails);
496}
497
498
499void do_64_to_80_tests ( void )
500{
501 UInt b7,b6,b5,i, j;
502 Int fails=0, tests=0;
503 UChar* f80h = malloc(10);
504 UChar* f80s = malloc(10);
505 UChar* f64 = malloc(8);
506 int STEP = 1;
507
508 srandom(2323);
509
510 /* Ten million random bit patterns */
511 for (i = 0; i < 10000000; i++) {
512 tests++;
513 for (j = 0; j < 8; j++)
514 f64[j] = (random() >> 13) & 255;
515
516 fails += do_64_to_80_test(tests, f64, f80h, f80s);
517 }
518
519 /* 2^24 numbers in which the first 24 bits are tested exhaustively
520 -- this covers the sign, exponent and leading part of the
521 mantissa. */
522 for (b7 = 0; b7 < 256; b7 += STEP) {
523 for (b6 = 0; b6 < 256; b6 += STEP) {
524 for (b5 = 0; b5 < 256; b5 += STEP) {
525 tests++;
526 for (i = 0; i < 8; i++)
527 f64[i] = 0;
528 for (i = 0; i < 10; i++)
529 f80h[i] = f80s[i] = 0;
530 f64[7] = b7;
531 f64[6] = b6;
532 f64[5] = b5;
533
534 fails += do_64_to_80_test(tests, f64, f80h, f80s);
535 }}}
536
537 printf("\n64 -> 80: %d tests, %d fails\n\n", tests, fails);
538}
539
540
541int main ( void )
542{
543 do_80_to_64_tests();
544 do_64_to_80_tests();
545 return 0;
546}
547
548
549
550