blob: b7178cad30183105069b68572f340d0a36acd13e [file] [log] [blame]
Ben Murdoch61f157c2016-09-16 13:49:30 +01001// The following is adapted from fdlibm (http://www.netlib.org/fdlibm).
2//
3// ====================================================
4// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5//
6// Developed at SunSoft, a Sun Microsystems, Inc. business.
7// Permission to use, copy, modify, and distribute this
8// software is freely granted, provided that this notice
9// is preserved.
10// ====================================================
11//
12// The original source code covered by the above license above has been
13// modified significantly by Google Inc.
14// Copyright 2016 the V8 project authors. All rights reserved.
15
16#include "src/base/ieee754.h"
17
18#include <cmath>
19#include <limits>
20
21#include "src/base/build_config.h"
22#include "src/base/macros.h"
23
24namespace v8 {
25namespace base {
26namespace ieee754 {
27
28namespace {
29
30/* Disable "potential divide by 0" warning in Visual Studio compiler. */
31
32#if V8_CC_MSVC
33
34#pragma warning(disable : 4723)
35
36#endif
37
38/*
39 * The original fdlibm code used statements like:
40 * n0 = ((*(int*)&one)>>29)^1; * index of high word *
41 * ix0 = *(n0+(int*)&x); * high word of x *
42 * ix1 = *((1-n0)+(int*)&x); * low word of x *
43 * to dig two 32 bit words out of the 64 bit IEEE floating point
44 * value. That is non-ANSI, and, moreover, the gcc instruction
45 * scheduler gets it wrong. We instead use the following macros.
46 * Unlike the original code, we determine the endianness at compile
47 * time, not at run time; I don't see much benefit to selecting
48 * endianness at run time.
49 */
50
51/*
52 * A union which permits us to convert between a double and two 32 bit
53 * ints.
54 */
55
56#if V8_TARGET_LITTLE_ENDIAN
57
58typedef union {
59 double value;
60 struct {
61 uint32_t lsw;
62 uint32_t msw;
63 } parts;
64 struct {
65 uint64_t w;
66 } xparts;
67} ieee_double_shape_type;
68
69#else
70
71typedef union {
72 double value;
73 struct {
74 uint32_t msw;
75 uint32_t lsw;
76 } parts;
77 struct {
78 uint64_t w;
79 } xparts;
80} ieee_double_shape_type;
81
82#endif
83
84/* Get two 32 bit ints from a double. */
85
86#define EXTRACT_WORDS(ix0, ix1, d) \
87 do { \
88 ieee_double_shape_type ew_u; \
89 ew_u.value = (d); \
90 (ix0) = ew_u.parts.msw; \
91 (ix1) = ew_u.parts.lsw; \
92 } while (0)
93
94/* Get a 64-bit int from a double. */
95#define EXTRACT_WORD64(ix, d) \
96 do { \
97 ieee_double_shape_type ew_u; \
98 ew_u.value = (d); \
99 (ix) = ew_u.xparts.w; \
100 } while (0)
101
102/* Get the more significant 32 bit int from a double. */
103
104#define GET_HIGH_WORD(i, d) \
105 do { \
106 ieee_double_shape_type gh_u; \
107 gh_u.value = (d); \
108 (i) = gh_u.parts.msw; \
109 } while (0)
110
111/* Get the less significant 32 bit int from a double. */
112
113#define GET_LOW_WORD(i, d) \
114 do { \
115 ieee_double_shape_type gl_u; \
116 gl_u.value = (d); \
117 (i) = gl_u.parts.lsw; \
118 } while (0)
119
120/* Set a double from two 32 bit ints. */
121
122#define INSERT_WORDS(d, ix0, ix1) \
123 do { \
124 ieee_double_shape_type iw_u; \
125 iw_u.parts.msw = (ix0); \
126 iw_u.parts.lsw = (ix1); \
127 (d) = iw_u.value; \
128 } while (0)
129
130/* Set a double from a 64-bit int. */
131#define INSERT_WORD64(d, ix) \
132 do { \
133 ieee_double_shape_type iw_u; \
134 iw_u.xparts.w = (ix); \
135 (d) = iw_u.value; \
136 } while (0)
137
138/* Set the more significant 32 bits of a double from an int. */
139
140#define SET_HIGH_WORD(d, v) \
141 do { \
142 ieee_double_shape_type sh_u; \
143 sh_u.value = (d); \
144 sh_u.parts.msw = (v); \
145 (d) = sh_u.value; \
146 } while (0)
147
148/* Set the less significant 32 bits of a double from an int. */
149
150#define SET_LOW_WORD(d, v) \
151 do { \
152 ieee_double_shape_type sl_u; \
153 sl_u.value = (d); \
154 sl_u.parts.lsw = (v); \
155 (d) = sl_u.value; \
156 } while (0)
157
158/* Support macro. */
159
160#define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval))
161
162int32_t __ieee754_rem_pio2(double x, double *y) WARN_UNUSED_RESULT;
163double __kernel_cos(double x, double y) WARN_UNUSED_RESULT;
164int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
165 const int32_t *ipio2) WARN_UNUSED_RESULT;
166double __kernel_sin(double x, double y, int iy) WARN_UNUSED_RESULT;
167
168/* __ieee754_rem_pio2(x,y)
169 *
170 * return the remainder of x rem pi/2 in y[0]+y[1]
171 * use __kernel_rem_pio2()
172 */
173int32_t __ieee754_rem_pio2(double x, double *y) {
174 /*
175 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
176 */
177 static const int32_t two_over_pi[] = {
178 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C,
179 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649,
180 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 0xA73EE8, 0x8235F5, 0x2EBB44,
181 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, 0x845F8B,
182 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D,
183 0x367ECF, 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
184 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330,
185 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 0x91615E, 0xE61B08,
186 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA,
187 0x73A8C9, 0x60E27B, 0xC08C6B,
188 };
189
190 static const int32_t npio2_hw[] = {
191 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
192 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
193 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
194 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
195 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
196 0x404858EB, 0x404921FB,
197 };
198
199 /*
200 * invpio2: 53 bits of 2/pi
201 * pio2_1: first 33 bit of pi/2
202 * pio2_1t: pi/2 - pio2_1
203 * pio2_2: second 33 bit of pi/2
204 * pio2_2t: pi/2 - (pio2_1+pio2_2)
205 * pio2_3: third 33 bit of pi/2
206 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
207 */
208
209 static const double
210 zero = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
211 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
212 two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
213 invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
214 pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
215 pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
216 pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
217 pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
218 pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
219 pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
220
221 double z, w, t, r, fn;
222 double tx[3];
223 int32_t e0, i, j, nx, n, ix, hx;
224 uint32_t low;
225
226 z = 0;
227 GET_HIGH_WORD(hx, x); /* high word of x */
228 ix = hx & 0x7fffffff;
229 if (ix <= 0x3fe921fb) { /* |x| ~<= pi/4 , no need for reduction */
230 y[0] = x;
231 y[1] = 0;
232 return 0;
233 }
234 if (ix < 0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
235 if (hx > 0) {
236 z = x - pio2_1;
237 if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */
238 y[0] = z - pio2_1t;
239 y[1] = (z - y[0]) - pio2_1t;
240 } else { /* near pi/2, use 33+33+53 bit pi */
241 z -= pio2_2;
242 y[0] = z - pio2_2t;
243 y[1] = (z - y[0]) - pio2_2t;
244 }
245 return 1;
246 } else { /* negative x */
247 z = x + pio2_1;
248 if (ix != 0x3ff921fb) { /* 33+53 bit pi is good enough */
249 y[0] = z + pio2_1t;
250 y[1] = (z - y[0]) + pio2_1t;
251 } else { /* near pi/2, use 33+33+53 bit pi */
252 z += pio2_2;
253 y[0] = z + pio2_2t;
254 y[1] = (z - y[0]) + pio2_2t;
255 }
256 return -1;
257 }
258 }
259 if (ix <= 0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
260 t = fabs(x);
261 n = static_cast<int32_t>(t * invpio2 + half);
262 fn = static_cast<double>(n);
263 r = t - fn * pio2_1;
264 w = fn * pio2_1t; /* 1st round good to 85 bit */
265 if (n < 32 && ix != npio2_hw[n - 1]) {
266 y[0] = r - w; /* quick check no cancellation */
267 } else {
268 uint32_t high;
269 j = ix >> 20;
270 y[0] = r - w;
271 GET_HIGH_WORD(high, y[0]);
272 i = j - ((high >> 20) & 0x7ff);
273 if (i > 16) { /* 2nd iteration needed, good to 118 */
274 t = r;
275 w = fn * pio2_2;
276 r = t - w;
277 w = fn * pio2_2t - ((t - r) - w);
278 y[0] = r - w;
279 GET_HIGH_WORD(high, y[0]);
280 i = j - ((high >> 20) & 0x7ff);
281 if (i > 49) { /* 3rd iteration need, 151 bits acc */
282 t = r; /* will cover all possible cases */
283 w = fn * pio2_3;
284 r = t - w;
285 w = fn * pio2_3t - ((t - r) - w);
286 y[0] = r - w;
287 }
288 }
289 }
290 y[1] = (r - y[0]) - w;
291 if (hx < 0) {
292 y[0] = -y[0];
293 y[1] = -y[1];
294 return -n;
295 } else {
296 return n;
297 }
298 }
299 /*
300 * all other (large) arguments
301 */
302 if (ix >= 0x7ff00000) { /* x is inf or NaN */
303 y[0] = y[1] = x - x;
304 return 0;
305 }
306 /* set z = scalbn(|x|,ilogb(x)-23) */
307 GET_LOW_WORD(low, x);
308 SET_LOW_WORD(z, low);
309 e0 = (ix >> 20) - 1046; /* e0 = ilogb(z)-23; */
310 SET_HIGH_WORD(z, ix - static_cast<int32_t>(e0 << 20));
311 for (i = 0; i < 2; i++) {
312 tx[i] = static_cast<double>(static_cast<int32_t>(z));
313 z = (z - tx[i]) * two24;
314 }
315 tx[2] = z;
316 nx = 3;
317 while (tx[nx - 1] == zero) nx--; /* skip zero term */
318 n = __kernel_rem_pio2(tx, y, e0, nx, 2, two_over_pi);
319 if (hx < 0) {
320 y[0] = -y[0];
321 y[1] = -y[1];
322 return -n;
323 }
324 return n;
325}
326
327/* __kernel_cos( x, y )
328 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
329 * Input x is assumed to be bounded by ~pi/4 in magnitude.
330 * Input y is the tail of x.
331 *
332 * Algorithm
333 * 1. Since cos(-x) = cos(x), we need only to consider positive x.
334 * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
335 * 3. cos(x) is approximated by a polynomial of degree 14 on
336 * [0,pi/4]
337 * 4 14
338 * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
339 * where the remez error is
340 *
341 * | 2 4 6 8 10 12 14 | -58
342 * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
343 * | |
344 *
345 * 4 6 8 10 12 14
346 * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
347 * cos(x) = 1 - x*x/2 + r
348 * since cos(x+y) ~ cos(x) - sin(x)*y
349 * ~ cos(x) - x*y,
350 * a correction term is necessary in cos(x) and hence
351 * cos(x+y) = 1 - (x*x/2 - (r - x*y))
352 * For better accuracy when x > 0.3, let qx = |x|/4 with
353 * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
354 * Then
355 * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
356 * Note that 1-qx and (x*x/2-qx) is EXACT here, and the
357 * magnitude of the latter is at least a quarter of x*x/2,
358 * thus, reducing the rounding error in the subtraction.
359 */
360V8_INLINE double __kernel_cos(double x, double y) {
361 static const double
362 one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
363 C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
364 C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
365 C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
366 C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
367 C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
368 C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
369
370 double a, iz, z, r, qx;
371 int32_t ix;
372 GET_HIGH_WORD(ix, x);
373 ix &= 0x7fffffff; /* ix = |x|'s high word*/
374 if (ix < 0x3e400000) { /* if x < 2**27 */
375 if (static_cast<int>(x) == 0) return one; /* generate inexact */
376 }
377 z = x * x;
378 r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6)))));
379 if (ix < 0x3FD33333) { /* if |x| < 0.3 */
380 return one - (0.5 * z - (z * r - x * y));
381 } else {
382 if (ix > 0x3fe90000) { /* x > 0.78125 */
383 qx = 0.28125;
384 } else {
385 INSERT_WORDS(qx, ix - 0x00200000, 0); /* x/4 */
386 }
387 iz = 0.5 * z - qx;
388 a = one - qx;
389 return a - (iz - (z * r - x * y));
390 }
391}
392
393/* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
394 * double x[],y[]; int e0,nx,prec; int ipio2[];
395 *
396 * __kernel_rem_pio2 return the last three digits of N with
397 * y = x - N*pi/2
398 * so that |y| < pi/2.
399 *
400 * The method is to compute the integer (mod 8) and fraction parts of
401 * (2/pi)*x without doing the full multiplication. In general we
402 * skip the part of the product that are known to be a huge integer (
403 * more accurately, = 0 mod 8 ). Thus the number of operations are
404 * independent of the exponent of the input.
405 *
406 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
407 *
408 * Input parameters:
409 * x[] The input value (must be positive) is broken into nx
410 * pieces of 24-bit integers in double precision format.
411 * x[i] will be the i-th 24 bit of x. The scaled exponent
412 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
413 * match x's up to 24 bits.
414 *
415 * Example of breaking a double positive z into x[0]+x[1]+x[2]:
416 * e0 = ilogb(z)-23
417 * z = scalbn(z,-e0)
418 * for i = 0,1,2
419 * x[i] = floor(z)
420 * z = (z-x[i])*2**24
421 *
422 *
423 * y[] output result in an array of double precision numbers.
424 * The dimension of y[] is:
425 * 24-bit precision 1
426 * 53-bit precision 2
427 * 64-bit precision 2
428 * 113-bit precision 3
429 * The actual value is the sum of them. Thus for 113-bit
430 * precison, one may have to do something like:
431 *
432 * long double t,w,r_head, r_tail;
433 * t = (long double)y[2] + (long double)y[1];
434 * w = (long double)y[0];
435 * r_head = t+w;
436 * r_tail = w - (r_head - t);
437 *
438 * e0 The exponent of x[0]
439 *
440 * nx dimension of x[]
441 *
442 * prec an integer indicating the precision:
443 * 0 24 bits (single)
444 * 1 53 bits (double)
445 * 2 64 bits (extended)
446 * 3 113 bits (quad)
447 *
448 * ipio2[]
449 * integer array, contains the (24*i)-th to (24*i+23)-th
450 * bit of 2/pi after binary point. The corresponding
451 * floating value is
452 *
453 * ipio2[i] * 2^(-24(i+1)).
454 *
455 * External function:
456 * double scalbn(), floor();
457 *
458 *
459 * Here is the description of some local variables:
460 *
461 * jk jk+1 is the initial number of terms of ipio2[] needed
462 * in the computation. The recommended value is 2,3,4,
463 * 6 for single, double, extended,and quad.
464 *
465 * jz local integer variable indicating the number of
466 * terms of ipio2[] used.
467 *
468 * jx nx - 1
469 *
470 * jv index for pointing to the suitable ipio2[] for the
471 * computation. In general, we want
472 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
473 * is an integer. Thus
474 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv
475 * Hence jv = max(0,(e0-3)/24).
476 *
477 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
478 *
479 * q[] double array with integral value, representing the
480 * 24-bits chunk of the product of x and 2/pi.
481 *
482 * q0 the corresponding exponent of q[0]. Note that the
483 * exponent for q[i] would be q0-24*i.
484 *
485 * PIo2[] double precision array, obtained by cutting pi/2
486 * into 24 bits chunks.
487 *
488 * f[] ipio2[] in floating point
489 *
490 * iq[] integer array by breaking up q[] in 24-bits chunk.
491 *
492 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
493 *
494 * ih integer. If >0 it indicates q[] is >= 0.5, hence
495 * it also indicates the *sign* of the result.
496 *
497 */
498int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec,
499 const int32_t *ipio2) {
500 /* Constants:
501 * The hexadecimal values are the intended ones for the following
502 * constants. The decimal values may be used, provided that the
503 * compiler will convert from decimal to binary accurately enough
504 * to produce the hexadecimal values shown.
505 */
506 static const int init_jk[] = {2, 3, 4, 6}; /* initial value for jk */
507
508 static const double PIo2[] = {
509 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
510 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
511 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
512 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
513 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
514 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
515 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
516 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
517 };
518
519 static const double
520 zero = 0.0,
521 one = 1.0,
522 two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
523 twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
524
525 int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
526 double z, fw, f[20], fq[20], q[20];
527
528 /* initialize jk*/
529 jk = init_jk[prec];
530 jp = jk;
531
532 /* determine jx,jv,q0, note that 3>q0 */
533 jx = nx - 1;
534 jv = (e0 - 3) / 24;
535 if (jv < 0) jv = 0;
536 q0 = e0 - 24 * (jv + 1);
537
538 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
539 j = jv - jx;
540 m = jx + jk;
541 for (i = 0; i <= m; i++, j++) {
542 f[i] = (j < 0) ? zero : static_cast<double>(ipio2[j]);
543 }
544
545 /* compute q[0],q[1],...q[jk] */
546 for (i = 0; i <= jk; i++) {
547 for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
548 q[i] = fw;
549 }
550
551 jz = jk;
552recompute:
553 /* distill q[] into iq[] reversingly */
554 for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
555 fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
556 iq[i] = static_cast<int32_t>(z - two24 * fw);
557 z = q[j - 1] + fw;
558 }
559
560 /* compute n */
561 z = scalbn(z, q0); /* actual value of z */
562 z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */
563 n = static_cast<int32_t>(z);
564 z -= static_cast<double>(n);
565 ih = 0;
566 if (q0 > 0) { /* need iq[jz-1] to determine n */
567 i = (iq[jz - 1] >> (24 - q0));
568 n += i;
569 iq[jz - 1] -= i << (24 - q0);
570 ih = iq[jz - 1] >> (23 - q0);
571 } else if (q0 == 0) {
572 ih = iq[jz - 1] >> 23;
573 } else if (z >= 0.5) {
574 ih = 2;
575 }
576
577 if (ih > 0) { /* q > 0.5 */
578 n += 1;
579 carry = 0;
580 for (i = 0; i < jz; i++) { /* compute 1-q */
581 j = iq[i];
582 if (carry == 0) {
583 if (j != 0) {
584 carry = 1;
585 iq[i] = 0x1000000 - j;
586 }
587 } else {
588 iq[i] = 0xffffff - j;
589 }
590 }
591 if (q0 > 0) { /* rare case: chance is 1 in 12 */
592 switch (q0) {
593 case 1:
594 iq[jz - 1] &= 0x7fffff;
595 break;
596 case 2:
597 iq[jz - 1] &= 0x3fffff;
598 break;
599 }
600 }
601 if (ih == 2) {
602 z = one - z;
603 if (carry != 0) z -= scalbn(one, q0);
604 }
605 }
606
607 /* check if recomputation is needed */
608 if (z == zero) {
609 j = 0;
610 for (i = jz - 1; i >= jk; i--) j |= iq[i];
611 if (j == 0) { /* need recomputation */
612 for (k = 1; jk >= k && iq[jk - k] == 0; k++) {
613 /* k = no. of terms needed */
614 }
615
616 for (i = jz + 1; i <= jz + k; i++) { /* add q[jz+1] to q[jz+k] */
617 f[jx + i] = ipio2[jv + i];
618 for (j = 0, fw = 0.0; j <= jx; j++) fw += x[j] * f[jx + i - j];
619 q[i] = fw;
620 }
621 jz += k;
622 goto recompute;
623 }
624 }
625
626 /* chop off zero terms */
627 if (z == 0.0) {
628 jz -= 1;
629 q0 -= 24;
630 while (iq[jz] == 0) {
631 jz--;
632 q0 -= 24;
633 }
634 } else { /* break z into 24-bit if necessary */
635 z = scalbn(z, -q0);
636 if (z >= two24) {
637 fw = static_cast<double>(static_cast<int32_t>(twon24 * z));
638 iq[jz] = z - two24 * fw;
639 jz += 1;
640 q0 += 24;
641 iq[jz] = fw;
642 } else {
643 iq[jz] = z;
644 }
645 }
646
647 /* convert integer "bit" chunk to floating-point value */
648 fw = scalbn(one, q0);
649 for (i = jz; i >= 0; i--) {
650 q[i] = fw * iq[i];
651 fw *= twon24;
652 }
653
654 /* compute PIo2[0,...,jp]*q[jz,...,0] */
655 for (i = jz; i >= 0; i--) {
656 for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++) fw += PIo2[k] * q[i + k];
657 fq[jz - i] = fw;
658 }
659
660 /* compress fq[] into y[] */
661 switch (prec) {
662 case 0:
663 fw = 0.0;
664 for (i = jz; i >= 0; i--) fw += fq[i];
665 y[0] = (ih == 0) ? fw : -fw;
666 break;
667 case 1:
668 case 2:
669 fw = 0.0;
670 for (i = jz; i >= 0; i--) fw += fq[i];
671 y[0] = (ih == 0) ? fw : -fw;
672 fw = fq[0] - fw;
673 for (i = 1; i <= jz; i++) fw += fq[i];
674 y[1] = (ih == 0) ? fw : -fw;
675 break;
676 case 3: /* painful */
677 for (i = jz; i > 0; i--) {
678 fw = fq[i - 1] + fq[i];
679 fq[i] += fq[i - 1] - fw;
680 fq[i - 1] = fw;
681 }
682 for (i = jz; i > 1; i--) {
683 fw = fq[i - 1] + fq[i];
684 fq[i] += fq[i - 1] - fw;
685 fq[i - 1] = fw;
686 }
687 for (fw = 0.0, i = jz; i >= 2; i--) fw += fq[i];
688 if (ih == 0) {
689 y[0] = fq[0];
690 y[1] = fq[1];
691 y[2] = fw;
692 } else {
693 y[0] = -fq[0];
694 y[1] = -fq[1];
695 y[2] = -fw;
696 }
697 }
698 return n & 7;
699}
700
701/* __kernel_sin( x, y, iy)
702 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
703 * Input x is assumed to be bounded by ~pi/4 in magnitude.
704 * Input y is the tail of x.
705 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
706 *
707 * Algorithm
708 * 1. Since sin(-x) = -sin(x), we need only to consider positive x.
709 * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
710 * 3. sin(x) is approximated by a polynomial of degree 13 on
711 * [0,pi/4]
712 * 3 13
713 * sin(x) ~ x + S1*x + ... + S6*x
714 * where
715 *
716 * |sin(x) 2 4 6 8 10 12 | -58
717 * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
718 * | x |
719 *
720 * 4. sin(x+y) = sin(x) + sin'(x')*y
721 * ~ sin(x) + (1-x*x/2)*y
722 * For better accuracy, let
723 * 3 2 2 2 2
724 * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
725 * then 3 2
726 * sin(x) = x + (S1*x + (x *(r-y/2)+y))
727 */
728V8_INLINE double __kernel_sin(double x, double y, int iy) {
729 static const double
730 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
731 S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
732 S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
733 S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
734 S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
735 S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
736 S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
737
738 double z, r, v;
739 int32_t ix;
740 GET_HIGH_WORD(ix, x);
741 ix &= 0x7fffffff; /* high word of x */
742 if (ix < 0x3e400000) { /* |x| < 2**-27 */
743 if (static_cast<int>(x) == 0) return x;
744 } /* generate inexact */
745 z = x * x;
746 v = z * x;
747 r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6)));
748 if (iy == 0) {
749 return x + v * (S1 + z * r);
750 } else {
751 return x - ((z * (half * y - v * r) - y) - v * S1);
752 }
753}
754
755/* __kernel_tan( x, y, k )
756 * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
757 * Input x is assumed to be bounded by ~pi/4 in magnitude.
758 * Input y is the tail of x.
759 * Input k indicates whether tan (if k=1) or
760 * -1/tan (if k= -1) is returned.
761 *
762 * Algorithm
763 * 1. Since tan(-x) = -tan(x), we need only to consider positive x.
764 * 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
765 * 3. tan(x) is approximated by a odd polynomial of degree 27 on
766 * [0,0.67434]
767 * 3 27
768 * tan(x) ~ x + T1*x + ... + T13*x
769 * where
770 *
771 * |tan(x) 2 4 26 | -59.2
772 * |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2
773 * | x |
774 *
775 * Note: tan(x+y) = tan(x) + tan'(x)*y
776 * ~ tan(x) + (1+x*x)*y
777 * Therefore, for better accuracy in computing tan(x+y), let
778 * 3 2 2 2 2
779 * r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
780 * then
781 * 3 2
782 * tan(x+y) = x + (T1*x + (x *(r+y)+y))
783 *
784 * 4. For x in [0.67434,pi/4], let y = pi/4 - x, then
785 * tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
786 * = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
787 */
788double __kernel_tan(double x, double y, int iy) {
789 static const double xxx[] = {
790 3.33333333333334091986e-01, /* 3FD55555, 55555563 */
791 1.33333333333201242699e-01, /* 3FC11111, 1110FE7A */
792 5.39682539762260521377e-02, /* 3FABA1BA, 1BB341FE */
793 2.18694882948595424599e-02, /* 3F9664F4, 8406D637 */
794 8.86323982359930005737e-03, /* 3F8226E3, E96E8493 */
795 3.59207910759131235356e-03, /* 3F6D6D22, C9560328 */
796 1.45620945432529025516e-03, /* 3F57DBC8, FEE08315 */
797 5.88041240820264096874e-04, /* 3F4344D8, F2F26501 */
798 2.46463134818469906812e-04, /* 3F3026F7, 1A8D1068 */
799 7.81794442939557092300e-05, /* 3F147E88, A03792A6 */
800 7.14072491382608190305e-05, /* 3F12B80F, 32F0A7E9 */
801 -1.85586374855275456654e-05, /* BEF375CB, DB605373 */
802 2.59073051863633712884e-05, /* 3EFB2A70, 74BF7AD4 */
803 /* one */ 1.00000000000000000000e+00, /* 3FF00000, 00000000 */
804 /* pio4 */ 7.85398163397448278999e-01, /* 3FE921FB, 54442D18 */
805 /* pio4lo */ 3.06161699786838301793e-17 /* 3C81A626, 33145C07 */
806 };
807#define one xxx[13]
808#define pio4 xxx[14]
809#define pio4lo xxx[15]
810#define T xxx
811
812 double z, r, v, w, s;
813 int32_t ix, hx;
814
815 GET_HIGH_WORD(hx, x); /* high word of x */
816 ix = hx & 0x7fffffff; /* high word of |x| */
817 if (ix < 0x3e300000) { /* x < 2**-28 */
818 if (static_cast<int>(x) == 0) { /* generate inexact */
819 uint32_t low;
820 GET_LOW_WORD(low, x);
821 if (((ix | low) | (iy + 1)) == 0) {
822 return one / fabs(x);
823 } else {
824 if (iy == 1) {
825 return x;
826 } else { /* compute -1 / (x+y) carefully */
827 double a, t;
828
829 z = w = x + y;
830 SET_LOW_WORD(z, 0);
831 v = y - (z - x);
832 t = a = -one / w;
833 SET_LOW_WORD(t, 0);
834 s = one + t * z;
835 return t + a * (s + t * v);
836 }
837 }
838 }
839 }
840 if (ix >= 0x3FE59428) { /* |x| >= 0.6744 */
841 if (hx < 0) {
842 x = -x;
843 y = -y;
844 }
845 z = pio4 - x;
846 w = pio4lo - y;
847 x = z + w;
848 y = 0.0;
849 }
850 z = x * x;
851 w = z * z;
852 /*
853 * Break x^5*(T[1]+x^2*T[2]+...) into
854 * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
855 * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
856 */
857 r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11]))));
858 v = z *
859 (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12])))));
860 s = z * x;
861 r = y + z * (s * (r + v) + y);
862 r += T[0] * s;
863 w = x + r;
864 if (ix >= 0x3FE59428) {
865 v = iy;
866 return (1 - ((hx >> 30) & 2)) * (v - 2.0 * (x - (w * w / (w + v) - r)));
867 }
868 if (iy == 1) {
869 return w;
870 } else {
871 /*
872 * if allow error up to 2 ulp, simply return
873 * -1.0 / (x+r) here
874 */
875 /* compute -1.0 / (x+r) accurately */
876 double a, t;
877 z = w;
878 SET_LOW_WORD(z, 0);
879 v = r - (z - x); /* z+v = r+x */
880 t = a = -1.0 / w; /* a = -1.0/w */
881 SET_LOW_WORD(t, 0);
882 s = 1.0 + t * z;
883 return t + a * (s + t * v);
884 }
885
886#undef one
887#undef pio4
888#undef pio4lo
889#undef T
890}
891
892} // namespace
893
894/* atan(x)
895 * Method
896 * 1. Reduce x to positive by atan(x) = -atan(-x).
897 * 2. According to the integer k=4t+0.25 chopped, t=x, the argument
898 * is further reduced to one of the following intervals and the
899 * arctangent of t is evaluated by the corresponding formula:
900 *
901 * [0,7/16] atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
902 * [7/16,11/16] atan(x) = atan(1/2) + atan( (t-0.5)/(1+t/2) )
903 * [11/16.19/16] atan(x) = atan( 1 ) + atan( (t-1)/(1+t) )
904 * [19/16,39/16] atan(x) = atan(3/2) + atan( (t-1.5)/(1+1.5t) )
905 * [39/16,INF] atan(x) = atan(INF) + atan( -1/t )
906 *
907 * Constants:
908 * The hexadecimal values are the intended ones for the following
909 * constants. The decimal values may be used, provided that the
910 * compiler will convert from decimal to binary accurately enough
911 * to produce the hexadecimal values shown.
912 */
913double atan(double x) {
914 static const double atanhi[] = {
915 4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */
916 7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */
917 9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */
918 1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */
919 };
920
921 static const double atanlo[] = {
922 2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */
923 3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */
924 1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */
925 6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */
926 };
927
928 static const double aT[] = {
929 3.33333333333329318027e-01, /* 0x3FD55555, 0x5555550D */
930 -1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */
931 1.42857142725034663711e-01, /* 0x3FC24924, 0x920083FF */
932 -1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */
933 9.09088713343650656196e-02, /* 0x3FB745CD, 0xC54C206E */
934 -7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */
935 6.66107313738753120669e-02, /* 0x3FB10D66, 0xA0D03D51 */
936 -5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */
937 4.97687799461593236017e-02, /* 0x3FA97B4B, 0x24760DEB */
938 -3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */
939 1.62858201153657823623e-02, /* 0x3F90AD3A, 0xE322DA11 */
940 };
941
942 static const double one = 1.0, huge = 1.0e300;
943
944 double w, s1, s2, z;
945 int32_t ix, hx, id;
946
947 GET_HIGH_WORD(hx, x);
948 ix = hx & 0x7fffffff;
949 if (ix >= 0x44100000) { /* if |x| >= 2^66 */
950 uint32_t low;
951 GET_LOW_WORD(low, x);
952 if (ix > 0x7ff00000 || (ix == 0x7ff00000 && (low != 0)))
953 return x + x; /* NaN */
954 if (hx > 0)
955 return atanhi[3] + *(volatile double *)&atanlo[3];
956 else
957 return -atanhi[3] - *(volatile double *)&atanlo[3];
958 }
959 if (ix < 0x3fdc0000) { /* |x| < 0.4375 */
960 if (ix < 0x3e400000) { /* |x| < 2^-27 */
961 if (huge + x > one) return x; /* raise inexact */
962 }
963 id = -1;
964 } else {
965 x = fabs(x);
966 if (ix < 0x3ff30000) { /* |x| < 1.1875 */
967 if (ix < 0x3fe60000) { /* 7/16 <=|x|<11/16 */
968 id = 0;
969 x = (2.0 * x - one) / (2.0 + x);
970 } else { /* 11/16<=|x|< 19/16 */
971 id = 1;
972 x = (x - one) / (x + one);
973 }
974 } else {
975 if (ix < 0x40038000) { /* |x| < 2.4375 */
976 id = 2;
977 x = (x - 1.5) / (one + 1.5 * x);
978 } else { /* 2.4375 <= |x| < 2^66 */
979 id = 3;
980 x = -1.0 / x;
981 }
982 }
983 }
984 /* end of argument reduction */
985 z = x * x;
986 w = z * z;
987 /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
988 s1 = z * (aT[0] +
989 w * (aT[2] + w * (aT[4] + w * (aT[6] + w * (aT[8] + w * aT[10])))));
990 s2 = w * (aT[1] + w * (aT[3] + w * (aT[5] + w * (aT[7] + w * aT[9]))));
991 if (id < 0) {
992 return x - x * (s1 + s2);
993 } else {
994 z = atanhi[id] - ((x * (s1 + s2) - atanlo[id]) - x);
995 return (hx < 0) ? -z : z;
996 }
997}
998
999/* atan2(y,x)
1000 * Method :
1001 * 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
1002 * 2. Reduce x to positive by (if x and y are unexceptional):
1003 * ARG (x+iy) = arctan(y/x) ... if x > 0,
1004 * ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0,
1005 *
1006 * Special cases:
1007 *
1008 * ATAN2((anything), NaN ) is NaN;
1009 * ATAN2(NAN , (anything) ) is NaN;
1010 * ATAN2(+-0, +(anything but NaN)) is +-0 ;
1011 * ATAN2(+-0, -(anything but NaN)) is +-pi ;
1012 * ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2;
1013 * ATAN2(+-(anything but INF and NaN), +INF) is +-0 ;
1014 * ATAN2(+-(anything but INF and NaN), -INF) is +-pi;
1015 * ATAN2(+-INF,+INF ) is +-pi/4 ;
1016 * ATAN2(+-INF,-INF ) is +-3pi/4;
1017 * ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
1018 *
1019 * Constants:
1020 * The hexadecimal values are the intended ones for the following
1021 * constants. The decimal values may be used, provided that the
1022 * compiler will convert from decimal to binary accurately enough
1023 * to produce the hexadecimal values shown.
1024 */
1025double atan2(double y, double x) {
1026 static volatile double tiny = 1.0e-300;
1027 static const double
1028 zero = 0.0,
1029 pi_o_4 = 7.8539816339744827900E-01, /* 0x3FE921FB, 0x54442D18 */
1030 pi_o_2 = 1.5707963267948965580E+00, /* 0x3FF921FB, 0x54442D18 */
1031 pi = 3.1415926535897931160E+00; /* 0x400921FB, 0x54442D18 */
1032 static volatile double pi_lo =
1033 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
1034
1035 double z;
1036 int32_t k, m, hx, hy, ix, iy;
1037 uint32_t lx, ly;
1038
1039 EXTRACT_WORDS(hx, lx, x);
1040 ix = hx & 0x7fffffff;
1041 EXTRACT_WORDS(hy, ly, y);
1042 iy = hy & 0x7fffffff;
1043 if (((ix | ((lx | -static_cast<int32_t>(lx)) >> 31)) > 0x7ff00000) ||
1044 ((iy | ((ly | -static_cast<int32_t>(ly)) >> 31)) > 0x7ff00000)) {
1045 return x + y; /* x or y is NaN */
1046 }
1047 if (((hx - 0x3ff00000) | lx) == 0) return atan(y); /* x=1.0 */
1048 m = ((hy >> 31) & 1) | ((hx >> 30) & 2); /* 2*sign(x)+sign(y) */
1049
1050 /* when y = 0 */
1051 if ((iy | ly) == 0) {
1052 switch (m) {
1053 case 0:
1054 case 1:
1055 return y; /* atan(+-0,+anything)=+-0 */
1056 case 2:
1057 return pi + tiny; /* atan(+0,-anything) = pi */
1058 case 3:
1059 return -pi - tiny; /* atan(-0,-anything) =-pi */
1060 }
1061 }
1062 /* when x = 0 */
1063 if ((ix | lx) == 0) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
1064
1065 /* when x is INF */
1066 if (ix == 0x7ff00000) {
1067 if (iy == 0x7ff00000) {
1068 switch (m) {
1069 case 0:
1070 return pi_o_4 + tiny; /* atan(+INF,+INF) */
1071 case 1:
1072 return -pi_o_4 - tiny; /* atan(-INF,+INF) */
1073 case 2:
1074 return 3.0 * pi_o_4 + tiny; /*atan(+INF,-INF)*/
1075 case 3:
1076 return -3.0 * pi_o_4 - tiny; /*atan(-INF,-INF)*/
1077 }
1078 } else {
1079 switch (m) {
1080 case 0:
1081 return zero; /* atan(+...,+INF) */
1082 case 1:
1083 return -zero; /* atan(-...,+INF) */
1084 case 2:
1085 return pi + tiny; /* atan(+...,-INF) */
1086 case 3:
1087 return -pi - tiny; /* atan(-...,-INF) */
1088 }
1089 }
1090 }
1091 /* when y is INF */
1092 if (iy == 0x7ff00000) return (hy < 0) ? -pi_o_2 - tiny : pi_o_2 + tiny;
1093
1094 /* compute y/x */
1095 k = (iy - ix) >> 20;
1096 if (k > 60) { /* |y/x| > 2**60 */
1097 z = pi_o_2 + 0.5 * pi_lo;
1098 m &= 1;
1099 } else if (hx < 0 && k < -60) {
1100 z = 0.0; /* 0 > |y|/x > -2**-60 */
1101 } else {
1102 z = atan(fabs(y / x)); /* safe to do y/x */
1103 }
1104 switch (m) {
1105 case 0:
1106 return z; /* atan(+,+) */
1107 case 1:
1108 return -z; /* atan(-,+) */
1109 case 2:
1110 return pi - (z - pi_lo); /* atan(+,-) */
1111 default: /* case 3 */
1112 return (z - pi_lo) - pi; /* atan(-,-) */
1113 }
1114}
1115
1116/* cos(x)
1117 * Return cosine function of x.
1118 *
1119 * kernel function:
1120 * __kernel_sin ... sine function on [-pi/4,pi/4]
1121 * __kernel_cos ... cosine function on [-pi/4,pi/4]
1122 * __ieee754_rem_pio2 ... argument reduction routine
1123 *
1124 * Method.
1125 * Let S,C and T denote the sin, cos and tan respectively on
1126 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
1127 * in [-pi/4 , +pi/4], and let n = k mod 4.
1128 * We have
1129 *
1130 * n sin(x) cos(x) tan(x)
1131 * ----------------------------------------------------------
1132 * 0 S C T
1133 * 1 C -S -1/T
1134 * 2 -S -C T
1135 * 3 -C S -1/T
1136 * ----------------------------------------------------------
1137 *
1138 * Special cases:
1139 * Let trig be any of sin, cos, or tan.
1140 * trig(+-INF) is NaN, with signals;
1141 * trig(NaN) is that NaN;
1142 *
1143 * Accuracy:
1144 * TRIG(x) returns trig(x) nearly rounded
1145 */
1146double cos(double x) {
1147 double y[2], z = 0.0;
1148 int32_t n, ix;
1149
1150 /* High word of x. */
1151 GET_HIGH_WORD(ix, x);
1152
1153 /* |x| ~< pi/4 */
1154 ix &= 0x7fffffff;
1155 if (ix <= 0x3fe921fb) {
1156 return __kernel_cos(x, z);
1157 } else if (ix >= 0x7ff00000) {
1158 /* cos(Inf or NaN) is NaN */
1159 return x - x;
1160 } else {
1161 /* argument reduction needed */
1162 n = __ieee754_rem_pio2(x, y);
1163 switch (n & 3) {
1164 case 0:
1165 return __kernel_cos(y[0], y[1]);
1166 case 1:
1167 return -__kernel_sin(y[0], y[1], 1);
1168 case 2:
1169 return -__kernel_cos(y[0], y[1]);
1170 default:
1171 return __kernel_sin(y[0], y[1], 1);
1172 }
1173 }
1174}
1175
1176/* exp(x)
1177 * Returns the exponential of x.
1178 *
1179 * Method
1180 * 1. Argument reduction:
1181 * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658.
1182 * Given x, find r and integer k such that
1183 *
1184 * x = k*ln2 + r, |r| <= 0.5*ln2.
1185 *
1186 * Here r will be represented as r = hi-lo for better
1187 * accuracy.
1188 *
1189 * 2. Approximation of exp(r) by a special rational function on
1190 * the interval [0,0.34658]:
1191 * Write
1192 * R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ...
1193 * We use a special Remes algorithm on [0,0.34658] to generate
1194 * a polynomial of degree 5 to approximate R. The maximum error
1195 * of this polynomial approximation is bounded by 2**-59. In
1196 * other words,
1197 * R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5
1198 * (where z=r*r, and the values of P1 to P5 are listed below)
1199 * and
1200 * | 5 | -59
1201 * | 2.0+P1*z+...+P5*z - R(z) | <= 2
1202 * | |
1203 * The computation of exp(r) thus becomes
1204 * 2*r
1205 * exp(r) = 1 + -------
1206 * R - r
1207 * r*R1(r)
1208 * = 1 + r + ----------- (for better accuracy)
1209 * 2 - R1(r)
1210 * where
1211 * 2 4 10
1212 * R1(r) = r - (P1*r + P2*r + ... + P5*r ).
1213 *
1214 * 3. Scale back to obtain exp(x):
1215 * From step 1, we have
1216 * exp(x) = 2^k * exp(r)
1217 *
1218 * Special cases:
1219 * exp(INF) is INF, exp(NaN) is NaN;
1220 * exp(-INF) is 0, and
1221 * for finite argument, only exp(0)=1 is exact.
1222 *
1223 * Accuracy:
1224 * according to an error analysis, the error is always less than
1225 * 1 ulp (unit in the last place).
1226 *
1227 * Misc. info.
1228 * For IEEE double
1229 * if x > 7.09782712893383973096e+02 then exp(x) overflow
1230 * if x < -7.45133219101941108420e+02 then exp(x) underflow
1231 *
1232 * Constants:
1233 * The hexadecimal values are the intended ones for the following
1234 * constants. The decimal values may be used, provided that the
1235 * compiler will convert from decimal to binary accurately enough
1236 * to produce the hexadecimal values shown.
1237 */
1238double exp(double x) {
1239 static const double
1240 one = 1.0,
1241 halF[2] = {0.5, -0.5},
1242 o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */
1243 u_threshold = -7.45133219101941108420e+02, /* 0xc0874910, 0xD52D3051 */
1244 ln2HI[2] = {6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */
1245 -6.93147180369123816490e-01}, /* 0xbfe62e42, 0xfee00000 */
1246 ln2LO[2] = {1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */
1247 -1.90821492927058770002e-10}, /* 0xbdea39ef, 0x35793c76 */
1248 invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */
1249 P1 = 1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */
1250 P2 = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */
1251 P3 = 6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */
1252 P4 = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */
1253 P5 = 4.13813679705723846039e-08, /* 0x3E663769, 0x72BEA4D0 */
1254 E = 2.718281828459045; /* 0x4005bf0a, 0x8b145769 */
1255
1256 static volatile double
1257 huge = 1.0e+300,
1258 twom1000 = 9.33263618503218878990e-302, /* 2**-1000=0x01700000,0*/
1259 two1023 = 8.988465674311579539e307; /* 0x1p1023 */
1260
1261 double y, hi = 0.0, lo = 0.0, c, t, twopk;
1262 int32_t k = 0, xsb;
1263 uint32_t hx;
1264
1265 GET_HIGH_WORD(hx, x);
1266 xsb = (hx >> 31) & 1; /* sign bit of x */
1267 hx &= 0x7fffffff; /* high word of |x| */
1268
1269 /* filter out non-finite argument */
1270 if (hx >= 0x40862E42) { /* if |x|>=709.78... */
1271 if (hx >= 0x7ff00000) {
1272 uint32_t lx;
1273 GET_LOW_WORD(lx, x);
1274 if (((hx & 0xfffff) | lx) != 0)
1275 return x + x; /* NaN */
1276 else
1277 return (xsb == 0) ? x : 0.0; /* exp(+-inf)={inf,0} */
1278 }
1279 if (x > o_threshold) return huge * huge; /* overflow */
1280 if (x < u_threshold) return twom1000 * twom1000; /* underflow */
1281 }
1282
1283 /* argument reduction */
1284 if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */
1285 if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
1286 /* TODO(rtoy): We special case exp(1) here to return the correct
1287 * value of E, as the computation below would get the last bit
1288 * wrong. We should probably fix the algorithm instead.
1289 */
1290 if (x == 1.0) return E;
1291 hi = x - ln2HI[xsb];
1292 lo = ln2LO[xsb];
1293 k = 1 - xsb - xsb;
1294 } else {
1295 k = static_cast<int>(invln2 * x + halF[xsb]);
1296 t = k;
1297 hi = x - t * ln2HI[0]; /* t*ln2HI is exact here */
1298 lo = t * ln2LO[0];
1299 }
1300 STRICT_ASSIGN(double, x, hi - lo);
1301 } else if (hx < 0x3e300000) { /* when |x|<2**-28 */
1302 if (huge + x > one) return one + x; /* trigger inexact */
1303 } else {
1304 k = 0;
1305 }
1306
1307 /* x is now in primary range */
1308 t = x * x;
1309 if (k >= -1021) {
1310 INSERT_WORDS(twopk, 0x3ff00000 + (k << 20), 0);
1311 } else {
1312 INSERT_WORDS(twopk, 0x3ff00000 + ((k + 1000) << 20), 0);
1313 }
1314 c = x - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5))));
1315 if (k == 0) {
1316 return one - ((x * c) / (c - 2.0) - x);
1317 } else {
1318 y = one - ((lo - (x * c) / (2.0 - c)) - hi);
1319 }
1320 if (k >= -1021) {
1321 if (k == 1024) return y * 2.0 * two1023;
1322 return y * twopk;
1323 } else {
1324 return y * twopk * twom1000;
1325 }
1326}
1327
1328/*
1329 * Method :
1330 * 1.Reduced x to positive by atanh(-x) = -atanh(x)
1331 * 2.For x>=0.5
1332 * 1 2x x
1333 * atanh(x) = --- * log(1 + -------) = 0.5 * log1p(2 * --------)
1334 * 2 1 - x 1 - x
1335 *
1336 * For x<0.5
1337 * atanh(x) = 0.5*log1p(2x+2x*x/(1-x))
1338 *
1339 * Special cases:
1340 * atanh(x) is NaN if |x| > 1 with signal;
1341 * atanh(NaN) is that NaN with no signal;
1342 * atanh(+-1) is +-INF with signal.
1343 *
1344 */
1345double atanh(double x) {
1346 static const double one = 1.0, huge = 1e300;
1347 static const double zero = 0.0;
1348
1349 double t;
1350 int32_t hx, ix;
1351 uint32_t lx;
1352 EXTRACT_WORDS(hx, lx, x);
1353 ix = hx & 0x7fffffff;
1354 if ((ix | ((lx | -static_cast<int32_t>(lx)) >> 31)) > 0x3ff00000) /* |x|>1 */
1355 return (x - x) / (x - x);
1356 if (ix == 0x3ff00000) return x / zero;
1357 if (ix < 0x3e300000 && (huge + x) > zero) return x; /* x<2**-28 */
1358 SET_HIGH_WORD(x, ix);
1359 if (ix < 0x3fe00000) { /* x < 0.5 */
1360 t = x + x;
1361 t = 0.5 * log1p(t + t * x / (one - x));
1362 } else {
1363 t = 0.5 * log1p((x + x) / (one - x));
1364 }
1365 if (hx >= 0)
1366 return t;
1367 else
1368 return -t;
1369}
1370
1371/* log(x)
1372 * Return the logrithm of x
1373 *
1374 * Method :
1375 * 1. Argument Reduction: find k and f such that
1376 * x = 2^k * (1+f),
1377 * where sqrt(2)/2 < 1+f < sqrt(2) .
1378 *
1379 * 2. Approximation of log(1+f).
1380 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1381 * = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1382 * = 2s + s*R
1383 * We use a special Reme algorithm on [0,0.1716] to generate
1384 * a polynomial of degree 14 to approximate R The maximum error
1385 * of this polynomial approximation is bounded by 2**-58.45. In
1386 * other words,
1387 * 2 4 6 8 10 12 14
1388 * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s
1389 * (the values of Lg1 to Lg7 are listed in the program)
1390 * and
1391 * | 2 14 | -58.45
1392 * | Lg1*s +...+Lg7*s - R(z) | <= 2
1393 * | |
1394 * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1395 * In order to guarantee error in log below 1ulp, we compute log
1396 * by
1397 * log(1+f) = f - s*(f - R) (if f is not too large)
1398 * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
1399 *
1400 * 3. Finally, log(x) = k*ln2 + log(1+f).
1401 * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1402 * Here ln2 is split into two floating point number:
1403 * ln2_hi + ln2_lo,
1404 * where n*ln2_hi is always exact for |n| < 2000.
1405 *
1406 * Special cases:
1407 * log(x) is NaN with signal if x < 0 (including -INF) ;
1408 * log(+INF) is +INF; log(0) is -INF with signal;
1409 * log(NaN) is that NaN with no signal.
1410 *
1411 * Accuracy:
1412 * according to an error analysis, the error is always less than
1413 * 1 ulp (unit in the last place).
1414 *
1415 * Constants:
1416 * The hexadecimal values are the intended ones for the following
1417 * constants. The decimal values may be used, provided that the
1418 * compiler will convert from decimal to binary accurately enough
1419 * to produce the hexadecimal values shown.
1420 */
1421double log(double x) {
1422 static const double /* -- */
1423 ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
1424 ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
1425 two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */
1426 Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
1427 Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
1428 Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
1429 Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
1430 Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
1431 Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
1432 Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
1433
1434 static const double zero = 0.0;
1435 static volatile double vzero = 0.0;
1436
1437 double hfsq, f, s, z, R, w, t1, t2, dk;
1438 int32_t k, hx, i, j;
1439 uint32_t lx;
1440
1441 EXTRACT_WORDS(hx, lx, x);
1442
1443 k = 0;
1444 if (hx < 0x00100000) { /* x < 2**-1022 */
1445 if (((hx & 0x7fffffff) | lx) == 0)
1446 return -two54 / vzero; /* log(+-0)=-inf */
1447 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
1448 k -= 54;
1449 x *= two54; /* subnormal number, scale up x */
1450 GET_HIGH_WORD(hx, x);
1451 }
1452 if (hx >= 0x7ff00000) return x + x;
1453 k += (hx >> 20) - 1023;
1454 hx &= 0x000fffff;
1455 i = (hx + 0x95f64) & 0x100000;
1456 SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */
1457 k += (i >> 20);
1458 f = x - 1.0;
1459 if ((0x000fffff & (2 + hx)) < 3) { /* -2**-20 <= f < 2**-20 */
1460 if (f == zero) {
1461 if (k == 0) {
1462 return zero;
1463 } else {
1464 dk = static_cast<double>(k);
1465 return dk * ln2_hi + dk * ln2_lo;
1466 }
1467 }
1468 R = f * f * (0.5 - 0.33333333333333333 * f);
1469 if (k == 0) {
1470 return f - R;
1471 } else {
1472 dk = static_cast<double>(k);
1473 return dk * ln2_hi - ((R - dk * ln2_lo) - f);
1474 }
1475 }
1476 s = f / (2.0 + f);
1477 dk = static_cast<double>(k);
1478 z = s * s;
1479 i = hx - 0x6147a;
1480 w = z * z;
1481 j = 0x6b851 - hx;
1482 t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
1483 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
1484 i |= j;
1485 R = t2 + t1;
1486 if (i > 0) {
1487 hfsq = 0.5 * f * f;
1488 if (k == 0)
1489 return f - (hfsq - s * (hfsq + R));
1490 else
1491 return dk * ln2_hi - ((hfsq - (s * (hfsq + R) + dk * ln2_lo)) - f);
1492 } else {
1493 if (k == 0)
1494 return f - s * (f - R);
1495 else
1496 return dk * ln2_hi - ((s * (f - R) - dk * ln2_lo) - f);
1497 }
1498}
1499
1500/* double log1p(double x)
1501 *
1502 * Method :
1503 * 1. Argument Reduction: find k and f such that
1504 * 1+x = 2^k * (1+f),
1505 * where sqrt(2)/2 < 1+f < sqrt(2) .
1506 *
1507 * Note. If k=0, then f=x is exact. However, if k!=0, then f
1508 * may not be representable exactly. In that case, a correction
1509 * term is need. Let u=1+x rounded. Let c = (1+x)-u, then
1510 * log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u),
1511 * and add back the correction term c/u.
1512 * (Note: when x > 2**53, one can simply return log(x))
1513 *
1514 * 2. Approximation of log1p(f).
1515 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1516 * = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1517 * = 2s + s*R
1518 * We use a special Reme algorithm on [0,0.1716] to generate
1519 * a polynomial of degree 14 to approximate R The maximum error
1520 * of this polynomial approximation is bounded by 2**-58.45. In
1521 * other words,
1522 * 2 4 6 8 10 12 14
1523 * R(z) ~ Lp1*s +Lp2*s +Lp3*s +Lp4*s +Lp5*s +Lp6*s +Lp7*s
1524 * (the values of Lp1 to Lp7 are listed in the program)
1525 * and
1526 * | 2 14 | -58.45
1527 * | Lp1*s +...+Lp7*s - R(z) | <= 2
1528 * | |
1529 * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1530 * In order to guarantee error in log below 1ulp, we compute log
1531 * by
1532 * log1p(f) = f - (hfsq - s*(hfsq+R)).
1533 *
1534 * 3. Finally, log1p(x) = k*ln2 + log1p(f).
1535 * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1536 * Here ln2 is split into two floating point number:
1537 * ln2_hi + ln2_lo,
1538 * where n*ln2_hi is always exact for |n| < 2000.
1539 *
1540 * Special cases:
1541 * log1p(x) is NaN with signal if x < -1 (including -INF) ;
1542 * log1p(+INF) is +INF; log1p(-1) is -INF with signal;
1543 * log1p(NaN) is that NaN with no signal.
1544 *
1545 * Accuracy:
1546 * according to an error analysis, the error is always less than
1547 * 1 ulp (unit in the last place).
1548 *
1549 * Constants:
1550 * The hexadecimal values are the intended ones for the following
1551 * constants. The decimal values may be used, provided that the
1552 * compiler will convert from decimal to binary accurately enough
1553 * to produce the hexadecimal values shown.
1554 *
1555 * Note: Assuming log() return accurate answer, the following
1556 * algorithm can be used to compute log1p(x) to within a few ULP:
1557 *
1558 * u = 1+x;
1559 * if(u==1.0) return x ; else
1560 * return log(u)*(x/(u-1.0));
1561 *
1562 * See HP-15C Advanced Functions Handbook, p.193.
1563 */
1564double log1p(double x) {
1565 static const double /* -- */
1566 ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
1567 ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
1568 two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */
1569 Lp1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
1570 Lp2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
1571 Lp3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
1572 Lp4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
1573 Lp5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
1574 Lp6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
1575 Lp7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
1576
1577 static const double zero = 0.0;
1578 static volatile double vzero = 0.0;
1579
1580 double hfsq, f, c, s, z, R, u;
1581 int32_t k, hx, hu, ax;
1582
1583 GET_HIGH_WORD(hx, x);
1584 ax = hx & 0x7fffffff;
1585
1586 k = 1;
1587 if (hx < 0x3FDA827A) { /* 1+x < sqrt(2)+ */
1588 if (ax >= 0x3ff00000) { /* x <= -1.0 */
1589 if (x == -1.0)
1590 return -two54 / vzero; /* log1p(-1)=+inf */
1591 else
1592 return (x - x) / (x - x); /* log1p(x<-1)=NaN */
1593 }
1594 if (ax < 0x3e200000) { /* |x| < 2**-29 */
1595 if (two54 + x > zero /* raise inexact */
1596 && ax < 0x3c900000) /* |x| < 2**-54 */
1597 return x;
1598 else
1599 return x - x * x * 0.5;
1600 }
1601 if (hx > 0 || hx <= static_cast<int32_t>(0xbfd2bec4)) {
1602 k = 0;
1603 f = x;
1604 hu = 1;
1605 } /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
1606 }
1607 if (hx >= 0x7ff00000) return x + x;
1608 if (k != 0) {
1609 if (hx < 0x43400000) {
1610 STRICT_ASSIGN(double, u, 1.0 + x);
1611 GET_HIGH_WORD(hu, u);
1612 k = (hu >> 20) - 1023;
1613 c = (k > 0) ? 1.0 - (u - x) : x - (u - 1.0); /* correction term */
1614 c /= u;
1615 } else {
1616 u = x;
1617 GET_HIGH_WORD(hu, u);
1618 k = (hu >> 20) - 1023;
1619 c = 0;
1620 }
1621 hu &= 0x000fffff;
1622 /*
1623 * The approximation to sqrt(2) used in thresholds is not
1624 * critical. However, the ones used above must give less
1625 * strict bounds than the one here so that the k==0 case is
1626 * never reached from here, since here we have committed to
1627 * using the correction term but don't use it if k==0.
1628 */
1629 if (hu < 0x6a09e) { /* u ~< sqrt(2) */
1630 SET_HIGH_WORD(u, hu | 0x3ff00000); /* normalize u */
1631 } else {
1632 k += 1;
1633 SET_HIGH_WORD(u, hu | 0x3fe00000); /* normalize u/2 */
1634 hu = (0x00100000 - hu) >> 2;
1635 }
1636 f = u - 1.0;
1637 }
1638 hfsq = 0.5 * f * f;
1639 if (hu == 0) { /* |f| < 2**-20 */
1640 if (f == zero) {
1641 if (k == 0) {
1642 return zero;
1643 } else {
1644 c += k * ln2_lo;
1645 return k * ln2_hi + c;
1646 }
1647 }
1648 R = hfsq * (1.0 - 0.66666666666666666 * f);
1649 if (k == 0)
1650 return f - R;
1651 else
1652 return k * ln2_hi - ((R - (k * ln2_lo + c)) - f);
1653 }
1654 s = f / (2.0 + f);
1655 z = s * s;
1656 R = z * (Lp1 +
1657 z * (Lp2 + z * (Lp3 + z * (Lp4 + z * (Lp5 + z * (Lp6 + z * Lp7))))));
1658 if (k == 0)
1659 return f - (hfsq - s * (hfsq + R));
1660 else
1661 return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f);
1662}
1663
1664/*
1665 * k_log1p(f):
1666 * Return log(1+f) - f for 1+f in ~[sqrt(2)/2, sqrt(2)].
1667 *
1668 * The following describes the overall strategy for computing
1669 * logarithms in base e. The argument reduction and adding the final
1670 * term of the polynomial are done by the caller for increased accuracy
1671 * when different bases are used.
1672 *
1673 * Method :
1674 * 1. Argument Reduction: find k and f such that
1675 * x = 2^k * (1+f),
1676 * where sqrt(2)/2 < 1+f < sqrt(2) .
1677 *
1678 * 2. Approximation of log(1+f).
1679 * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
1680 * = 2s + 2/3 s**3 + 2/5 s**5 + .....,
1681 * = 2s + s*R
1682 * We use a special Reme algorithm on [0,0.1716] to generate
1683 * a polynomial of degree 14 to approximate R The maximum error
1684 * of this polynomial approximation is bounded by 2**-58.45. In
1685 * other words,
1686 * 2 4 6 8 10 12 14
1687 * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s
1688 * (the values of Lg1 to Lg7 are listed in the program)
1689 * and
1690 * | 2 14 | -58.45
1691 * | Lg1*s +...+Lg7*s - R(z) | <= 2
1692 * | |
1693 * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
1694 * In order to guarantee error in log below 1ulp, we compute log
1695 * by
1696 * log(1+f) = f - s*(f - R) (if f is not too large)
1697 * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
1698 *
1699 * 3. Finally, log(x) = k*ln2 + log(1+f).
1700 * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
1701 * Here ln2 is split into two floating point number:
1702 * ln2_hi + ln2_lo,
1703 * where n*ln2_hi is always exact for |n| < 2000.
1704 *
1705 * Special cases:
1706 * log(x) is NaN with signal if x < 0 (including -INF) ;
1707 * log(+INF) is +INF; log(0) is -INF with signal;
1708 * log(NaN) is that NaN with no signal.
1709 *
1710 * Accuracy:
1711 * according to an error analysis, the error is always less than
1712 * 1 ulp (unit in the last place).
1713 *
1714 * Constants:
1715 * The hexadecimal values are the intended ones for the following
1716 * constants. The decimal values may be used, provided that the
1717 * compiler will convert from decimal to binary accurately enough
1718 * to produce the hexadecimal values shown.
1719 */
1720
1721static const double Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
1722 Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
1723 Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
1724 Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
1725 Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
1726 Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
1727 Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
1728
1729/*
1730 * We always inline k_log1p(), since doing so produces a
1731 * substantial performance improvement (~40% on amd64).
1732 */
1733static inline double k_log1p(double f) {
1734 double hfsq, s, z, R, w, t1, t2;
1735
1736 s = f / (2.0 + f);
1737 z = s * s;
1738 w = z * z;
1739 t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
1740 t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
1741 R = t2 + t1;
1742 hfsq = 0.5 * f * f;
1743 return s * (hfsq + R);
1744}
1745
1746/*
1747 * Return the base 2 logarithm of x. See e_log.c and k_log.h for most
1748 * comments.
1749 *
1750 * This reduces x to {k, 1+f} exactly as in e_log.c, then calls the kernel,
1751 * then does the combining and scaling steps
1752 * log2(x) = (f - 0.5*f*f + k_log1p(f)) / ln2 + k
1753 * in not-quite-routine extra precision.
1754 */
1755double log2(double x) {
1756 static const double
1757 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
1758 ivln2hi = 1.44269504072144627571e+00, /* 0x3ff71547, 0x65200000 */
1759 ivln2lo = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */
1760
1761 static const double zero = 0.0;
1762 static volatile double vzero = 0.0;
1763
1764 double f, hfsq, hi, lo, r, val_hi, val_lo, w, y;
1765 int32_t i, k, hx;
1766 uint32_t lx;
1767
1768 EXTRACT_WORDS(hx, lx, x);
1769
1770 k = 0;
1771 if (hx < 0x00100000) { /* x < 2**-1022 */
1772 if (((hx & 0x7fffffff) | lx) == 0)
1773 return -two54 / vzero; /* log(+-0)=-inf */
1774 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
1775 k -= 54;
1776 x *= two54; /* subnormal number, scale up x */
1777 GET_HIGH_WORD(hx, x);
1778 }
1779 if (hx >= 0x7ff00000) return x + x;
1780 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */
1781 k += (hx >> 20) - 1023;
1782 hx &= 0x000fffff;
1783 i = (hx + 0x95f64) & 0x100000;
1784 SET_HIGH_WORD(x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */
1785 k += (i >> 20);
1786 y = static_cast<double>(k);
1787 f = x - 1.0;
1788 hfsq = 0.5 * f * f;
1789 r = k_log1p(f);
1790
1791 /*
1792 * f-hfsq must (for args near 1) be evaluated in extra precision
1793 * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2).
1794 * This is fairly efficient since f-hfsq only depends on f, so can
1795 * be evaluated in parallel with R. Not combining hfsq with R also
1796 * keeps R small (though not as small as a true `lo' term would be),
1797 * so that extra precision is not needed for terms involving R.
1798 *
1799 * Compiler bugs involving extra precision used to break Dekker's
1800 * theorem for spitting f-hfsq as hi+lo, unless double_t was used
1801 * or the multi-precision calculations were avoided when double_t
1802 * has extra precision. These problems are now automatically
1803 * avoided as a side effect of the optimization of combining the
1804 * Dekker splitting step with the clear-low-bits step.
1805 *
1806 * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra
1807 * precision to avoid a very large cancellation when x is very near
1808 * these values. Unlike the above cancellations, this problem is
1809 * specific to base 2. It is strange that adding +-1 is so much
1810 * harder than adding +-ln2 or +-log10_2.
1811 *
1812 * This uses Dekker's theorem to normalize y+val_hi, so the
1813 * compiler bugs are back in some configurations, sigh. And I
1814 * don't want to used double_t to avoid them, since that gives a
1815 * pessimization and the support for avoiding the pessimization
1816 * is not yet available.
1817 *
1818 * The multi-precision calculations for the multiplications are
1819 * routine.
1820 */
1821 hi = f - hfsq;
1822 SET_LOW_WORD(hi, 0);
1823 lo = (f - hi) - hfsq + r;
1824 val_hi = hi * ivln2hi;
1825 val_lo = (lo + hi) * ivln2lo + lo * ivln2hi;
1826
1827 /* spadd(val_hi, val_lo, y), except for not using double_t: */
1828 w = y + val_hi;
1829 val_lo += (y - w) + val_hi;
1830 val_hi = w;
1831
1832 return val_lo + val_hi;
1833}
1834
1835/*
1836 * Return the base 10 logarithm of x
1837 *
1838 * Method :
1839 * Let log10_2hi = leading 40 bits of log10(2) and
1840 * log10_2lo = log10(2) - log10_2hi,
1841 * ivln10 = 1/log(10) rounded.
1842 * Then
1843 * n = ilogb(x),
1844 * if(n<0) n = n+1;
1845 * x = scalbn(x,-n);
1846 * log10(x) := n*log10_2hi + (n*log10_2lo + ivln10*log(x))
1847 *
1848 * Note 1:
1849 * To guarantee log10(10**n)=n, where 10**n is normal, the rounding
1850 * mode must set to Round-to-Nearest.
1851 * Note 2:
1852 * [1/log(10)] rounded to 53 bits has error .198 ulps;
1853 * log10 is monotonic at all binary break points.
1854 *
1855 * Special cases:
1856 * log10(x) is NaN if x < 0;
1857 * log10(+INF) is +INF; log10(0) is -INF;
1858 * log10(NaN) is that NaN;
1859 * log10(10**N) = N for N=0,1,...,22.
1860 */
1861double log10(double x) {
1862 static const double
1863 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
1864 ivln10 = 4.34294481903251816668e-01,
1865 log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
1866 log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
1867
1868 static const double zero = 0.0;
1869 static volatile double vzero = 0.0;
1870
1871 double y;
1872 int32_t i, k, hx;
1873 uint32_t lx;
1874
1875 EXTRACT_WORDS(hx, lx, x);
1876
1877 k = 0;
1878 if (hx < 0x00100000) { /* x < 2**-1022 */
1879 if (((hx & 0x7fffffff) | lx) == 0)
1880 return -two54 / vzero; /* log(+-0)=-inf */
1881 if (hx < 0) return (x - x) / zero; /* log(-#) = NaN */
1882 k -= 54;
1883 x *= two54; /* subnormal number, scale up x */
1884 GET_HIGH_WORD(hx, x);
1885 GET_LOW_WORD(lx, x);
1886 }
1887 if (hx >= 0x7ff00000) return x + x;
1888 if (hx == 0x3ff00000 && lx == 0) return zero; /* log(1) = +0 */
1889 k += (hx >> 20) - 1023;
1890
1891 i = (k & 0x80000000) >> 31;
1892 hx = (hx & 0x000fffff) | ((0x3ff - i) << 20);
1893 y = k + i;
1894 SET_HIGH_WORD(x, hx);
1895 SET_LOW_WORD(x, lx);
1896
1897 double z = y * log10_2lo + ivln10 * log(x);
1898 return z + y * log10_2hi;
1899}
1900
1901/* expm1(x)
1902 * Returns exp(x)-1, the exponential of x minus 1.
1903 *
1904 * Method
1905 * 1. Argument reduction:
1906 * Given x, find r and integer k such that
1907 *
1908 * x = k*ln2 + r, |r| <= 0.5*ln2 ~ 0.34658
1909 *
1910 * Here a correction term c will be computed to compensate
1911 * the error in r when rounded to a floating-point number.
1912 *
1913 * 2. Approximating expm1(r) by a special rational function on
1914 * the interval [0,0.34658]:
1915 * Since
1916 * r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 - r^4/360 + ...
1917 * we define R1(r*r) by
1918 * r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 * R1(r*r)
1919 * That is,
1920 * R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r)
1921 * = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))
1922 * = 1 - r^2/60 + r^4/2520 - r^6/100800 + ...
1923 * We use a special Reme algorithm on [0,0.347] to generate
1924 * a polynomial of degree 5 in r*r to approximate R1. The
1925 * maximum error of this polynomial approximation is bounded
1926 * by 2**-61. In other words,
1927 * R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5
1928 * where Q1 = -1.6666666666666567384E-2,
1929 * Q2 = 3.9682539681370365873E-4,
1930 * Q3 = -9.9206344733435987357E-6,
1931 * Q4 = 2.5051361420808517002E-7,
1932 * Q5 = -6.2843505682382617102E-9;
1933 * z = r*r,
1934 * with error bounded by
1935 * | 5 | -61
1936 * | 1.0+Q1*z+...+Q5*z - R1(z) | <= 2
1937 * | |
1938 *
1939 * expm1(r) = exp(r)-1 is then computed by the following
1940 * specific way which minimize the accumulation rounding error:
1941 * 2 3
1942 * r r [ 3 - (R1 + R1*r/2) ]
1943 * expm1(r) = r + --- + --- * [--------------------]
1944 * 2 2 [ 6 - r*(3 - R1*r/2) ]
1945 *
1946 * To compensate the error in the argument reduction, we use
1947 * expm1(r+c) = expm1(r) + c + expm1(r)*c
1948 * ~ expm1(r) + c + r*c
1949 * Thus c+r*c will be added in as the correction terms for
1950 * expm1(r+c). Now rearrange the term to avoid optimization
1951 * screw up:
1952 * ( 2 2 )
1953 * ({ ( r [ R1 - (3 - R1*r/2) ] ) } r )
1954 * expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- )
1955 * ({ ( 2 [ 6 - r*(3 - R1*r/2) ] ) } 2 )
1956 * ( )
1957 *
1958 * = r - E
1959 * 3. Scale back to obtain expm1(x):
1960 * From step 1, we have
1961 * expm1(x) = either 2^k*[expm1(r)+1] - 1
1962 * = or 2^k*[expm1(r) + (1-2^-k)]
1963 * 4. Implementation notes:
1964 * (A). To save one multiplication, we scale the coefficient Qi
1965 * to Qi*2^i, and replace z by (x^2)/2.
1966 * (B). To achieve maximum accuracy, we compute expm1(x) by
1967 * (i) if x < -56*ln2, return -1.0, (raise inexact if x!=inf)
1968 * (ii) if k=0, return r-E
1969 * (iii) if k=-1, return 0.5*(r-E)-0.5
1970 * (iv) if k=1 if r < -0.25, return 2*((r+0.5)- E)
1971 * else return 1.0+2.0*(r-E);
1972 * (v) if (k<-2||k>56) return 2^k(1-(E-r)) - 1 (or exp(x)-1)
1973 * (vi) if k <= 20, return 2^k((1-2^-k)-(E-r)), else
1974 * (vii) return 2^k(1-((E+2^-k)-r))
1975 *
1976 * Special cases:
1977 * expm1(INF) is INF, expm1(NaN) is NaN;
1978 * expm1(-INF) is -1, and
1979 * for finite argument, only expm1(0)=0 is exact.
1980 *
1981 * Accuracy:
1982 * according to an error analysis, the error is always less than
1983 * 1 ulp (unit in the last place).
1984 *
1985 * Misc. info.
1986 * For IEEE double
1987 * if x > 7.09782712893383973096e+02 then expm1(x) overflow
1988 *
1989 * Constants:
1990 * The hexadecimal values are the intended ones for the following
1991 * constants. The decimal values may be used, provided that the
1992 * compiler will convert from decimal to binary accurately enough
1993 * to produce the hexadecimal values shown.
1994 */
1995double expm1(double x) {
1996 static const double
1997 one = 1.0,
1998 tiny = 1.0e-300,
1999 o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */
2000 ln2_hi = 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */
2001 ln2_lo = 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */
2002 invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */
2003 /* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs =
2004 x*x/2: */
2005 Q1 = -3.33333333333331316428e-02, /* BFA11111 111110F4 */
2006 Q2 = 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */
2007 Q3 = -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
2008 Q4 = 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */
2009 Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
2010
2011 static volatile double huge = 1.0e+300;
2012
2013 double y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
2014 int32_t k, xsb;
2015 uint32_t hx;
2016
2017 GET_HIGH_WORD(hx, x);
2018 xsb = hx & 0x80000000; /* sign bit of x */
2019 hx &= 0x7fffffff; /* high word of |x| */
2020
2021 /* filter out huge and non-finite argument */
2022 if (hx >= 0x4043687A) { /* if |x|>=56*ln2 */
2023 if (hx >= 0x40862E42) { /* if |x|>=709.78... */
2024 if (hx >= 0x7ff00000) {
2025 uint32_t low;
2026 GET_LOW_WORD(low, x);
2027 if (((hx & 0xfffff) | low) != 0)
2028 return x + x; /* NaN */
2029 else
2030 return (xsb == 0) ? x : -1.0; /* exp(+-inf)={inf,-1} */
2031 }
2032 if (x > o_threshold) return huge * huge; /* overflow */
2033 }
2034 if (xsb != 0) { /* x < -56*ln2, return -1.0 with inexact */
2035 if (x + tiny < 0.0) /* raise inexact */
2036 return tiny - one; /* return -1 */
2037 }
2038 }
2039
2040 /* argument reduction */
2041 if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */
2042 if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
2043 if (xsb == 0) {
2044 hi = x - ln2_hi;
2045 lo = ln2_lo;
2046 k = 1;
2047 } else {
2048 hi = x + ln2_hi;
2049 lo = -ln2_lo;
2050 k = -1;
2051 }
2052 } else {
2053 k = invln2 * x + ((xsb == 0) ? 0.5 : -0.5);
2054 t = k;
2055 hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
2056 lo = t * ln2_lo;
2057 }
2058 STRICT_ASSIGN(double, x, hi - lo);
2059 c = (hi - x) - lo;
2060 } else if (hx < 0x3c900000) { /* when |x|<2**-54, return x */
2061 t = huge + x; /* return x with inexact flags when x!=0 */
2062 return x - (t - (huge + x));
2063 } else {
2064 k = 0;
2065 }
2066
2067 /* x is now in primary range */
2068 hfx = 0.5 * x;
2069 hxs = x * hfx;
2070 r1 = one + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
2071 t = 3.0 - r1 * hfx;
2072 e = hxs * ((r1 - t) / (6.0 - x * t));
2073 if (k == 0) {
2074 return x - (x * e - hxs); /* c is 0 */
2075 } else {
2076 INSERT_WORDS(twopk, 0x3ff00000 + (k << 20), 0); /* 2^k */
2077 e = (x * (e - c) - c);
2078 e -= hxs;
2079 if (k == -1) return 0.5 * (x - e) - 0.5;
2080 if (k == 1) {
2081 if (x < -0.25)
2082 return -2.0 * (e - (x + 0.5));
2083 else
2084 return one + 2.0 * (x - e);
2085 }
2086 if (k <= -2 || k > 56) { /* suffice to return exp(x)-1 */
2087 y = one - (e - x);
2088 // TODO(mvstanton): is this replacement for the hex float
2089 // sufficient?
2090 // if (k == 1024) y = y*2.0*0x1p1023;
2091 if (k == 1024)
2092 y = y * 2.0 * 8.98846567431158e+307;
2093 else
2094 y = y * twopk;
2095 return y - one;
2096 }
2097 t = one;
2098 if (k < 20) {
2099 SET_HIGH_WORD(t, 0x3ff00000 - (0x200000 >> k)); /* t=1-2^-k */
2100 y = t - (e - x);
2101 y = y * twopk;
2102 } else {
2103 SET_HIGH_WORD(t, ((0x3ff - k) << 20)); /* 2^-k */
2104 y = x - (e + t);
2105 y += one;
2106 y = y * twopk;
2107 }
2108 }
2109 return y;
2110}
2111
2112double cbrt(double x) {
2113 static const uint32_t
2114 B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
2115 B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
2116
2117 /* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */
2118 static const double P0 = 1.87595182427177009643, /* 0x3ffe03e6, 0x0f61e692 */
2119 P1 = -1.88497979543377169875, /* 0xbffe28e0, 0x92f02420 */
2120 P2 = 1.621429720105354466140, /* 0x3ff9f160, 0x4a49d6c2 */
2121 P3 = -0.758397934778766047437, /* 0xbfe844cb, 0xbee751d9 */
2122 P4 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */
2123
2124 int32_t hx;
2125 union {
2126 double value;
2127 uint64_t bits;
2128 } u;
2129 double r, s, t = 0.0, w;
2130 uint32_t sign;
2131 uint32_t high, low;
2132
2133 EXTRACT_WORDS(hx, low, x);
2134 sign = hx & 0x80000000; /* sign= sign(x) */
2135 hx ^= sign;
2136 if (hx >= 0x7ff00000) return (x + x); /* cbrt(NaN,INF) is itself */
2137
2138 /*
2139 * Rough cbrt to 5 bits:
2140 * cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3)
2141 * where e is integral and >= 0, m is real and in [0, 1), and "/" and
2142 * "%" are integer division and modulus with rounding towards minus
2143 * infinity. The RHS is always >= the LHS and has a maximum relative
2144 * error of about 1 in 16. Adding a bias of -0.03306235651 to the
2145 * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
2146 * floating point representation, for finite positive normal values,
2147 * ordinary integer divison of the value in bits magically gives
2148 * almost exactly the RHS of the above provided we first subtract the
2149 * exponent bias (1023 for doubles) and later add it back. We do the
2150 * subtraction virtually to keep e >= 0 so that ordinary integer
2151 * division rounds towards minus infinity; this is also efficient.
2152 */
2153 if (hx < 0x00100000) { /* zero or subnormal? */
2154 if ((hx | low) == 0) return (x); /* cbrt(0) is itself */
2155 SET_HIGH_WORD(t, 0x43500000); /* set t= 2**54 */
2156 t *= x;
2157 GET_HIGH_WORD(high, t);
2158 INSERT_WORDS(t, sign | ((high & 0x7fffffff) / 3 + B2), 0);
2159 } else {
2160 INSERT_WORDS(t, sign | (hx / 3 + B1), 0);
2161 }
2162
2163 /*
2164 * New cbrt to 23 bits:
2165 * cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x)
2166 * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
2167 * to within 2**-23.5 when |r - 1| < 1/10. The rough approximation
2168 * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
2169 * gives us bounds for r = t**3/x.
2170 *
2171 * Try to optimize for parallel evaluation as in k_tanf.c.
2172 */
2173 r = (t * t) * (t / x);
2174 t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));
2175
2176 /*
2177 * Round t away from zero to 23 bits (sloppily except for ensuring that
2178 * the result is larger in magnitude than cbrt(x) but not much more than
2179 * 2 23-bit ulps larger). With rounding towards zero, the error bound
2180 * would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps
2181 * in the rounded t, the infinite-precision error in the Newton
2182 * approximation barely affects third digit in the final error
2183 * 0.667; the error in the rounded t can be up to about 3 23-bit ulps
2184 * before the final error is larger than 0.667 ulps.
2185 */
2186 u.value = t;
2187 u.bits = (u.bits + 0x80000000) & 0xffffffffc0000000ULL;
2188 t = u.value;
2189
2190 /* one step Newton iteration to 53 bits with error < 0.667 ulps */
2191 s = t * t; /* t*t is exact */
2192 r = x / s; /* error <= 0.5 ulps; |r| < |t| */
2193 w = t + t; /* t+t is exact */
2194 r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */
2195 t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */
2196
2197 return (t);
2198}
2199
2200/* sin(x)
2201 * Return sine function of x.
2202 *
2203 * kernel function:
2204 * __kernel_sin ... sine function on [-pi/4,pi/4]
2205 * __kernel_cos ... cose function on [-pi/4,pi/4]
2206 * __ieee754_rem_pio2 ... argument reduction routine
2207 *
2208 * Method.
2209 * Let S,C and T denote the sin, cos and tan respectively on
2210 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
2211 * in [-pi/4 , +pi/4], and let n = k mod 4.
2212 * We have
2213 *
2214 * n sin(x) cos(x) tan(x)
2215 * ----------------------------------------------------------
2216 * 0 S C T
2217 * 1 C -S -1/T
2218 * 2 -S -C T
2219 * 3 -C S -1/T
2220 * ----------------------------------------------------------
2221 *
2222 * Special cases:
2223 * Let trig be any of sin, cos, or tan.
2224 * trig(+-INF) is NaN, with signals;
2225 * trig(NaN) is that NaN;
2226 *
2227 * Accuracy:
2228 * TRIG(x) returns trig(x) nearly rounded
2229 */
2230double sin(double x) {
2231 double y[2], z = 0.0;
2232 int32_t n, ix;
2233
2234 /* High word of x. */
2235 GET_HIGH_WORD(ix, x);
2236
2237 /* |x| ~< pi/4 */
2238 ix &= 0x7fffffff;
2239 if (ix <= 0x3fe921fb) {
2240 return __kernel_sin(x, z, 0);
2241 } else if (ix >= 0x7ff00000) {
2242 /* sin(Inf or NaN) is NaN */
2243 return x - x;
2244 } else {
2245 /* argument reduction needed */
2246 n = __ieee754_rem_pio2(x, y);
2247 switch (n & 3) {
2248 case 0:
2249 return __kernel_sin(y[0], y[1], 1);
2250 case 1:
2251 return __kernel_cos(y[0], y[1]);
2252 case 2:
2253 return -__kernel_sin(y[0], y[1], 1);
2254 default:
2255 return -__kernel_cos(y[0], y[1]);
2256 }
2257 }
2258}
2259
2260/* tan(x)
2261 * Return tangent function of x.
2262 *
2263 * kernel function:
2264 * __kernel_tan ... tangent function on [-pi/4,pi/4]
2265 * __ieee754_rem_pio2 ... argument reduction routine
2266 *
2267 * Method.
2268 * Let S,C and T denote the sin, cos and tan respectively on
2269 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
2270 * in [-pi/4 , +pi/4], and let n = k mod 4.
2271 * We have
2272 *
2273 * n sin(x) cos(x) tan(x)
2274 * ----------------------------------------------------------
2275 * 0 S C T
2276 * 1 C -S -1/T
2277 * 2 -S -C T
2278 * 3 -C S -1/T
2279 * ----------------------------------------------------------
2280 *
2281 * Special cases:
2282 * Let trig be any of sin, cos, or tan.
2283 * trig(+-INF) is NaN, with signals;
2284 * trig(NaN) is that NaN;
2285 *
2286 * Accuracy:
2287 * TRIG(x) returns trig(x) nearly rounded
2288 */
2289double tan(double x) {
2290 double y[2], z = 0.0;
2291 int32_t n, ix;
2292
2293 /* High word of x. */
2294 GET_HIGH_WORD(ix, x);
2295
2296 /* |x| ~< pi/4 */
2297 ix &= 0x7fffffff;
2298 if (ix <= 0x3fe921fb) {
2299 return __kernel_tan(x, z, 1);
2300 } else if (ix >= 0x7ff00000) {
2301 /* tan(Inf or NaN) is NaN */
2302 return x - x; /* NaN */
2303 } else {
2304 /* argument reduction needed */
2305 n = __ieee754_rem_pio2(x, y);
2306 /* 1 -> n even, -1 -> n odd */
2307 return __kernel_tan(y[0], y[1], 1 - ((n & 1) << 1));
2308 }
2309}
2310
2311} // namespace ieee754
2312} // namespace base
2313} // namespace v8