blob: 954a9c9e3cc1c074f76a2d9713bca47de64ef8fc [file] [log] [blame]
sewardjcbe8efa2004-09-06 14:57:52 +00001
sewardj07e09622004-09-06 20:39:20 +00002#ifndef USED_AS_INCLUDE
3
sewardjcbe8efa2004-09-06 14:57:52 +00004#include "../pub/libvex_basictypes.h"
5#include <stdio.h>
6#include <malloc.h>
7#include <stdlib.h>
8#include <string.h>
9
10
11/* Test program for developing code for conversions between
12 x87 64-bit and 80-bit floats.
13
14 80-bit format exists only for x86/x86-64, and so the routines
15 hardwire it as little-endian. The 64-bit format (IEEE double)
16 could exist on any platform, little or big-endian and so we
17 have to take that into account. IOW, these routines have to
18 work correctly when compiled on both big- and little-endian
19 targets, but the 80-bit images only ever have to exist in
20 little-endian format.
21*/
22
23static
24UInt read_bit_array ( UChar* arr, UInt n )
25{
26 UChar c = arr[n >> 3];
27 c >>= (n&7);
28 return c & 1;
29}
30
31
32static void convert_f80le_to_f64le_HW ( /*IN*/UChar* f80, /*OUT*/UChar* f64 )
33{
34 asm volatile ("ffree %%st(7); fldt (%0); fstpl (%1)"
35 :
36 : "r" (&f80[0]), "r" (&f64[0])
37 : "memory" );
38}
39
40static void convert_f64le_to_f80le_HW ( /*IN*/UChar* f64, /*OUT*/UChar* f80 )
41{
42 asm volatile ("ffree %%st(7); fldl (%0); fstpt (%1)"
43 :
44 : "r" (&f64[0]), "r" (&f80[0])
45 : "memory" );
46}
47
sewardj07e09622004-09-06 20:39:20 +000048#endif /* ndef USED_AS_INCLUDE */
49
sewardjcbe8efa2004-09-06 14:57:52 +000050/* 80 and 64-bit floating point formats:
51
52 80-bit:
53
54 S 0 0-------0 zero
55 S 0 0X------X denormals
56 S 1-7FFE 1X------X normals (all normals have leading 1)
57 S 7FFF 10------0 infinity
58 S 7FFF 10X-----X snan
59 S 7FFF 11X-----X qnan
60
61 S is the sign bit. For runs X----X, at least one of the Xs must be
62 nonzero. Exponent is 15 bits, fractional part is 63 bits, and
63 there is an explicitly represented leading 1, and a sign bit,
64 giving 80 in total.
65
66 64-bit avoids the confusion of an explicitly represented leading 1
67 and so is simpler:
68
69 S 0 0------0 zero
70 S 0 X------X denormals
71 S 1-7FE any normals
72 S 7FF 0------0 infinity
73 S 7FF 0X-----X snan
74 S 7FF 1X-----X qnan
75
76 Exponent is 11 bits, fractional part is 52 bits, and there is a
77 sign bit, giving 64 in total.
78*/
79
80
81/* Convert a IEEE754 double (64-bit) into an x87 extended double
82 (80-bit), mimicing the hardware fairly closely. Both numbers are
83 stored little-endian. Limitations, all of which could be fixed,
84 given some level of hassle:
85
86 * Does not handle double precision denormals. As a result, values
87 with magnitudes less than 1e-308 are flushed to zero when they
88 need not be.
89
90 * Identity of NaNs is not preserved.
91
92 See comments in the code for more details.
93*/
94static void convert_f64le_to_f80le ( /*IN*/UChar* f64, /*OUT*/UChar* f80 )
95{
96 Bool isInf;
97 Int bexp;
98 UChar sign;
99
100 sign = (f64[7] >> 7) & 1;
101 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
102 bexp &= 0x7FF;
103
104 /* If the exponent is zero, either we have a zero or a denormal.
105 Produce a zero. This is a hack in that it forces denormals to
106 zero. Could do better. */
107 if (bexp == 0) {
108 f80[9] = sign << 7;
109 f80[8] = f80[7] = f80[6] = f80[5] = f80[4]
110 = f80[3] = f80[2] = f80[1] = f80[0] = 0;
111 return;
112 }
113
114 /* If the exponent is 7FF, this is either an Infinity, a SNaN or
115 QNaN, as determined by examining bits 51:0, thus:
116 0 ... 0 Inf
117 0X ... X SNaN
118 1X ... X QNaN
119 where at least one of the Xs is not zero.
120 */
121 if (bexp == 0x7FF) {
122 isInf = (f64[6] & 0x0F) == 0
123 && f64[5] == 0 && f64[4] == 0 && f64[3] == 0
124 && f64[2] == 0 && f64[1] == 0 && f64[0] == 0;
125 if (isInf) {
126 /* Produce an appropriately signed infinity:
127 S 1--1 (15) 1 0--0 (63)
128 */
129 f80[9] = (sign << 7) | 0x7F;
130 f80[8] = 0xFF;
131 f80[7] = 0x80;
132 f80[6] = f80[5] = f80[4] = f80[3]
133 = f80[2] = f80[1] = f80[0] = 0;
134 return;
135 }
136 /* So it's either a QNaN or SNaN. Distinguish by considering
137 bit 51. Note, this destroys all the trailing bits
138 (identity?) of the NaN. IEEE754 doesn't require preserving
139 these (it only requires that there be one QNaN value and one
140 SNaN value), but x87 does seem to have some ability to
141 preserve them. Anyway, here, the NaN's identity is
142 destroyed. Could be improved. */
143 if (f64[6] & 8) {
144 /* QNaN. Make a QNaN:
145 S 1--1 (15) 1 1--1 (63)
146 */
147 f80[9] = (sign << 7) | 0x7F;
148 f80[8] = 0xFF;
149 f80[7] = 0xFF;
150 f80[6] = f80[5] = f80[4] = f80[3]
151 = f80[2] = f80[1] = f80[0] = 0xFF;
152 } else {
153 /* SNaN. Make a SNaN:
154 S 1--1 (15) 0 1--1 (63)
155 */
156 f80[9] = (sign << 7) | 0x7F;
157 f80[8] = 0xFF;
158 f80[7] = 0x7F;
159 f80[6] = f80[5] = f80[4] = f80[3]
160 = f80[2] = f80[1] = f80[0] = 0xFF;
161 }
162 return;
163 }
164
165 /* It's not a zero, denormal, infinity or nan. So it must be a
166 normalised number. Rebias the exponent and build the new
167 number. */
168 bexp += (16383 - 1023);
169
170 f80[9] = (sign << 7) | ((bexp >> 8) & 0xFF);
171 f80[8] = bexp & 0xFF;
172 f80[7] = (1 << 7) | ((f64[6] << 3) & 0x78) | ((f64[5] >> 5) & 7);
173 f80[6] = ((f64[5] << 3) & 0xF8) | ((f64[4] >> 5) & 7);
174 f80[5] = ((f64[4] << 3) & 0xF8) | ((f64[3] >> 5) & 7);
175 f80[4] = ((f64[3] << 3) & 0xF8) | ((f64[2] >> 5) & 7);
176 f80[3] = ((f64[2] << 3) & 0xF8) | ((f64[1] >> 5) & 7);
177 f80[2] = ((f64[1] << 3) & 0xF8) | ((f64[0] >> 5) & 7);
178 f80[1] = ((f64[0] << 3) & 0xF8);
179 f80[0] = 0;
180}
181
182
183/////////////////////////////////////////////////////////////////
184
185/* Convert a x87 extended double (80-bit) into an IEEE 754 double
186 (64-bit), mimicing the hardware fairly closely. Both numbers are
187 stored little-endian. Limitations, all of which could be fixed,
188 given some level of hassle:
189
190 * Does not create double precision denormals. As a result, values
191 with magnitudes less than 1e-308 are flushed to zero when they
192 need not be.
193
194 * Rounding following truncation could be a bit better.
195
196 * Identity of NaNs is not preserved.
197
198 See comments in the code for more details.
199*/
200static void convert_f80le_to_f64le ( /*IN*/UChar* f80, /*OUT*/UChar* f64 )
201{
202 Bool isInf;
203 Int bexp;
204 UChar sign;
205
206 sign = (f80[9] >> 7) & 1;
207 bexp = (((UInt)f80[9]) << 8) | (UInt)f80[8];
208 bexp &= 0x7FFF;
209
210 /* If the exponent is zero, either we have a zero or a denormal.
211 But an extended precision denormal becomes a double precision
212 zero, so in either case, just produce the appropriately signed
213 zero. */
214 if (bexp == 0) {
215 f64[7] = sign << 7;
216 f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
217 return;
218 }
219
220 /* If the exponent is 7FFF, this is either an Infinity, a SNaN or
221 QNaN, as determined by examining bits 62:0, thus:
222 0 ... 0 Inf
223 0X ... X SNaN
224 1X ... X QNaN
225 where at least one of the Xs is not zero.
226 */
227 if (bexp == 0x7FFF) {
228 isInf = (f80[7] & 0x7F) == 0
229 && f80[6] == 0 && f80[5] == 0 && f80[4] == 0
230 && f80[3] == 0 && f80[2] == 0 && f80[1] == 0 && f80[0] == 0;
231 if (isInf) {
232 if (0 == (f80[7] & 0x80))
233 goto wierd_NaN;
234 /* Produce an appropriately signed infinity:
235 S 1--1 (11) 0--0 (52)
236 */
237 f64[7] = (sign << 7) | 0x7F;
238 f64[6] = 0xF0;
239 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
240 return;
241 }
242 /* So it's either a QNaN or SNaN. Distinguish by considering
243 bit 62. Note, this destroys all the trailing bits
244 (identity?) of the NaN. IEEE754 doesn't require preserving
245 these (it only requires that there be one QNaN value and one
246 SNaN value), but x87 does seem to have some ability to
247 preserve them. Anyway, here, the NaN's identity is
248 destroyed. Could be improved. */
249 if (f80[8] & 0x40) {
250 /* QNaN. Make a QNaN:
251 S 1--1 (11) 1 1--1 (51)
252 */
253 f64[7] = (sign << 7) | 0x7F;
254 f64[6] = 0xFF;
255 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
256 } else {
257 /* SNaN. Make a SNaN:
258 S 1--1 (11) 0 1--1 (51)
259 */
260 f64[7] = (sign << 7) | 0x7F;
261 f64[6] = 0xF7;
262 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
263 }
264 return;
265 }
266
267 /* If it's not a Zero, NaN or Inf, and the integer part (bit 62) is
268 zero, the x87 FPU appears to consider the number denormalised
269 and converts it to a QNaN. */
270 if (0 == (f80[7] & 0x80)) {
271 wierd_NaN:
272 /* Strange hardware QNaN:
273 S 1--1 (11) 1 0--0 (51)
274 */
275 /* On a PIII, these QNaNs always appear with sign==1. I have
276 no idea why. */
277 f64[7] = (1 /*sign*/ << 7) | 0x7F;
278 f64[6] = 0xF8;
279 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
280 return;
281 }
282
283 /* It's not a zero, denormal, infinity or nan. So it must be a
284 normalised number. Rebias the exponent and consider. */
285 bexp -= (16383 - 1023);
286 if (bexp >= 0x7FF) {
287 /* It's too big for a double. Construct an infinity. */
288 f64[7] = (sign << 7) | 0x7F;
289 f64[6] = 0xF0;
290 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
291 return;
292 }
293
294 if (bexp < 0) {
295 /* It's too small for a double. Construct a zero. Note, this
296 is a kludge since we could conceivably create a
297 denormalised number for bexp in -1 to -51, but we don't
298 bother. This means the conversion flushes values
299 approximately in the range 1e-309 to 1e-324 ish to zero
300 when it doesn't actually need to. This could be
301 improved. */
302 f64[7] = sign << 7;
303 f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
304 return;
305 }
306
307 /* Ok, it's a normalised number which is representable as a double.
308 Copy the exponent and mantissa into place. */
309 /*
310 for (i = 0; i < 52; i++)
311 write_bit_array ( f64,
312 i,
313 read_bit_array ( f80, i+11 ) );
314 */
315 f64[0] = (f80[1] >> 3) | (f80[2] << 5);
316 f64[1] = (f80[2] >> 3) | (f80[3] << 5);
317 f64[2] = (f80[3] >> 3) | (f80[4] << 5);
318 f64[3] = (f80[4] >> 3) | (f80[5] << 5);
319 f64[4] = (f80[5] >> 3) | (f80[6] << 5);
320 f64[5] = (f80[6] >> 3) | (f80[7] << 5);
321
322 f64[6] = ((bexp << 4) & 0xF0) | ((f80[7] >> 3) & 0x0F);
323
324 f64[7] = (sign << 7) | ((bexp >> 4) & 0x7F);
325
326 /* Now consider any rounding that needs to happen as a result of
327 truncating the mantissa. */
328 if (f80[1] & 4) /* read_bit_array(f80, 10) == 1) */ {
329 /* Round upwards. This is a kludge. Once in every 64k
330 roundings (statistically) the bottom two bytes are both 0xFF
331 and so we don't round at all. Could be improved. */
332 if (f64[0] != 0xFF) {
333 f64[0]++;
334 }
335 else
336 if (f64[0] == 0xFF && f64[1] != 0xFF) {
337 f64[0] = 0;
338 f64[1]++;
339 }
340 /* else we don't round, but we should. */
341 }
342}
343
sewardj07e09622004-09-06 20:39:20 +0000344#ifndef USED_AS_INCLUDE
sewardjcbe8efa2004-09-06 14:57:52 +0000345
346//////////////
347
348static void show_f80 ( UChar* f80 )
349{
350 Int i;
351 printf("%d ", read_bit_array(f80, 79));
352
353 for (i = 78; i >= 64; i--)
354 printf("%d", read_bit_array(f80, i));
355
356 printf(" %d ", read_bit_array(f80, 63));
357
358 for (i = 62; i >= 0; i--)
359 printf("%d", read_bit_array(f80, i));
360}
361
362static void show_f64le ( UChar* f64 )
363{
364 Int i;
365 printf("%d ", read_bit_array(f64, 63));
366
367 for (i = 62; i >= 52; i--)
368 printf("%d", read_bit_array(f64, i));
369
370 printf(" ");
371 for (i = 51; i >= 0; i--)
372 printf("%d", read_bit_array(f64, i));
373}
374
375//////////////
376
377
378/* Convert f80 to a 64-bit IEEE double using both the hardware and the
379 soft version, and compare the results. If they differ, print
380 details and return 1. If they are identical, return 0.
381*/
382int do_80_to_64_test ( Int test_no, UChar* f80, UChar* f64h, UChar* f64s)
383{
384 Char buf64s[100], buf64h[100];
385 Bool same;
386 Int k;
387 convert_f80le_to_f64le_HW(f80, f64h);
388 convert_f80le_to_f64le(f80, f64s);
389 same = True;
390 for (k = 0; k < 8; k++) {
391 if (f64s[k] != f64h[k]) {
392 same = False; break;
393 }
394 }
395 /* bitwise identical */
396 if (same)
397 return 0;
398
399 sprintf(buf64s, "%.16e", *(double*)f64s);
400 sprintf(buf64h, "%.16e", *(double*)f64h);
401
402 /* Not bitwise identical, but pretty darn close */
403 if (0 == strcmp(buf64s, buf64h))
404 return 0;
405
406 printf("\n");
407 printf("f80: "); show_f80(f80); printf("\n");
408 printf("f64h: "); show_f64le(f64h); printf("\n");
409 printf("f64s: "); show_f64le(f64s); printf("\n");
410
411 printf("[test %d] %.16Le -> (hw %s, sw %s)\n",
412 test_no, *(long double*)f80,
413 buf64h, buf64s );
414
415 return 1;
416}
417
418
419/* Convert an IEEE 64-bit double to a x87 extended double (80 bit)
420 using both the hardware and the soft version, and compare the
421 results. If they differ, print details and return 1. If they are
422 identical, return 0.
423*/
424int do_64_to_80_test ( Int test_no, UChar* f64, UChar* f80h, UChar* f80s)
425{
426 Char buf80s[100], buf80h[100];
427 Bool same;
428 Int k;
429 convert_f64le_to_f80le_HW(f64, f80h);
430 convert_f64le_to_f80le(f64, f80s);
431 same = True;
432 for (k = 0; k < 10; k++) {
433 if (f80s[k] != f80h[k]) {
434 same = False; break;
435 }
436 }
437 /* bitwise identical */
438 if (same)
439 return 0;
440
441 sprintf(buf80s, "%.20Le", *(long double*)f80s);
442 sprintf(buf80h, "%.20Le", *(long double*)f80h);
443
444 /* Not bitwise identical, but pretty darn close */
445 if (0 == strcmp(buf80s, buf80h))
446 return 0;
447
448 printf("\n");
449 printf("f64: "); show_f64le(f64); printf("\n");
450 printf("f80h: "); show_f80(f80h); printf("\n");
451 printf("f80s: "); show_f80(f80s); printf("\n");
452
453 printf("[test %d] %.16e -> (hw %s, sw %s)\n",
454 test_no, *(double*)f64,
455 buf80h, buf80s );
456
457 return 1;
458}
459
460
461
462void do_80_to_64_tests ( void )
463{
464 UInt b9,b8,b7,i, j;
465 Int fails=0, tests=0;
466 UChar* f64h = malloc(8);
467 UChar* f64s = malloc(8);
468 UChar* f80 = malloc(10);
469 int STEP = 1;
470
471 srandom(4343);
472
473 /* Ten million random bit patterns */
474 for (i = 0; i < 10000000; i++) {
475 tests++;
476 for (j = 0; j < 10; j++)
477 f80[j] = (random() >> 7) & 255;
478
479 fails += do_80_to_64_test(tests, f80, f64h, f64s);
480 }
481
482 /* 2^24 numbers in which the first 24 bits are tested exhaustively
483 -- this covers the sign, exponent and leading part of the
484 mantissa. */
485 for (b9 = 0; b9 < 256; b9 += STEP) {
486 for (b8 = 0; b8 < 256; b8 += STEP) {
487 for (b7 = 0; b7 < 256; b7 += STEP) {
488 tests++;
489 for (i = 0; i < 10; i++)
490 f80[i] = 0;
491 for (i = 0; i < 8; i++)
492 f64h[i] = f64s[i] = 0;
493 f80[9] = b9;
494 f80[8] = b8;
495 f80[7] = b7;
496
497 fails += do_80_to_64_test(tests, f80, f64h, f64s);
498 }}}
499
500 printf("\n80 -> 64: %d tests, %d fails\n\n", tests, fails);
501}
502
503
504void do_64_to_80_tests ( void )
505{
506 UInt b7,b6,b5,i, j;
507 Int fails=0, tests=0;
508 UChar* f80h = malloc(10);
509 UChar* f80s = malloc(10);
510 UChar* f64 = malloc(8);
511 int STEP = 1;
512
513 srandom(2323);
514
515 /* Ten million random bit patterns */
516 for (i = 0; i < 10000000; i++) {
517 tests++;
518 for (j = 0; j < 8; j++)
519 f64[j] = (random() >> 13) & 255;
520
521 fails += do_64_to_80_test(tests, f64, f80h, f80s);
522 }
523
524 /* 2^24 numbers in which the first 24 bits are tested exhaustively
525 -- this covers the sign, exponent and leading part of the
526 mantissa. */
527 for (b7 = 0; b7 < 256; b7 += STEP) {
528 for (b6 = 0; b6 < 256; b6 += STEP) {
529 for (b5 = 0; b5 < 256; b5 += STEP) {
530 tests++;
531 for (i = 0; i < 8; i++)
532 f64[i] = 0;
533 for (i = 0; i < 10; i++)
534 f80h[i] = f80s[i] = 0;
535 f64[7] = b7;
536 f64[6] = b6;
537 f64[5] = b5;
538
539 fails += do_64_to_80_test(tests, f64, f80h, f80s);
540 }}}
541
542 printf("\n64 -> 80: %d tests, %d fails\n\n", tests, fails);
543}
544
545
546int main ( void )
547{
548 do_80_to_64_tests();
549 do_64_to_80_tests();
550 return 0;
551}
552
sewardj07e09622004-09-06 20:39:20 +0000553#endif /* ndef USED_AS_INCLUDE */