blob: f2365e8844fed3091e0d9e1dc34eb01808cb7ed9 [file] [log] [blame]
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001/*===---- __clang_hip_math.h - Device-side HIP math support ----------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __CLANG_HIP_MATH_H__
10#define __CLANG_HIP_MATH_H__
11
12#if !defined(__HIP__)
13#error "This file is for HIP and OpenMP AMDGCN device compilation only."
14#endif
15
16#if defined(__cplusplus)
17#include <algorithm>
18#endif
19#include <limits.h>
20#include <stdint.h>
21
22#pragma push_macro("__DEVICE__")
23#define __DEVICE__ static __device__ inline __attribute__((always_inline))
24
25// A few functions return bool type starting only in C++11.
26#pragma push_macro("__RETURN_TYPE")
27#if defined(__cplusplus)
28#define __RETURN_TYPE bool
29#else
30#define __RETURN_TYPE int
31#endif
32
33#if defined (__cplusplus) && __cplusplus < 201103L
34// emulate static_assert on type sizes
35template<bool>
36struct __compare_result{};
37template<>
38struct __compare_result<true> {
39 static const bool valid;
40};
41
42__DEVICE__
43void __suppress_unused_warning(bool b){};
44template <unsigned int S, unsigned int T>
45__DEVICE__ void __static_assert_equal_size() {
46 __suppress_unused_warning(__compare_result<S == T>::valid);
47}
48
49#define __static_assert_type_size_equal(A, B) \
50 __static_assert_equal_size<A,B>()
51
52#else
53#define __static_assert_type_size_equal(A,B) \
54 static_assert((A) == (B), "")
55
56#endif
57
58__DEVICE__
59uint64_t __make_mantissa_base8(const char *__tagp) {
60 uint64_t __r = 0;
61 while (__tagp) {
62 char __tmp = *__tagp;
63
64 if (__tmp >= '0' && __tmp <= '7')
65 __r = (__r * 8u) + __tmp - '0';
66 else
67 return 0;
68
69 ++__tagp;
70 }
71
72 return __r;
73}
74
75__DEVICE__
76uint64_t __make_mantissa_base10(const char *__tagp) {
77 uint64_t __r = 0;
78 while (__tagp) {
79 char __tmp = *__tagp;
80
81 if (__tmp >= '0' && __tmp <= '9')
82 __r = (__r * 10u) + __tmp - '0';
83 else
84 return 0;
85
86 ++__tagp;
87 }
88
89 return __r;
90}
91
92__DEVICE__
93uint64_t __make_mantissa_base16(const char *__tagp) {
94 uint64_t __r = 0;
95 while (__tagp) {
96 char __tmp = *__tagp;
97
98 if (__tmp >= '0' && __tmp <= '9')
99 __r = (__r * 16u) + __tmp - '0';
100 else if (__tmp >= 'a' && __tmp <= 'f')
101 __r = (__r * 16u) + __tmp - 'a' + 10;
102 else if (__tmp >= 'A' && __tmp <= 'F')
103 __r = (__r * 16u) + __tmp - 'A' + 10;
104 else
105 return 0;
106
107 ++__tagp;
108 }
109
110 return __r;
111}
112
113__DEVICE__
114uint64_t __make_mantissa(const char *__tagp) {
115 if (!__tagp)
116 return 0u;
117
118 if (*__tagp == '0') {
119 ++__tagp;
120
121 if (*__tagp == 'x' || *__tagp == 'X')
122 return __make_mantissa_base16(__tagp);
123 else
124 return __make_mantissa_base8(__tagp);
125 }
126
127 return __make_mantissa_base10(__tagp);
128}
129
130// BEGIN FLOAT
131#if defined(__cplusplus)
132__DEVICE__
133int abs(int __x) {
134 int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1);
135 return (__x ^ __sgn) - __sgn;
136}
137__DEVICE__
138long labs(long __x) {
139 long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1);
140 return (__x ^ __sgn) - __sgn;
141}
142__DEVICE__
143long long llabs(long long __x) {
144 long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1);
145 return (__x ^ __sgn) - __sgn;
146}
147#endif
148
149__DEVICE__
150float acosf(float __x) { return __ocml_acos_f32(__x); }
151
152__DEVICE__
153float acoshf(float __x) { return __ocml_acosh_f32(__x); }
154
155__DEVICE__
156float asinf(float __x) { return __ocml_asin_f32(__x); }
157
158__DEVICE__
159float asinhf(float __x) { return __ocml_asinh_f32(__x); }
160
161__DEVICE__
162float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); }
163
164__DEVICE__
165float atanf(float __x) { return __ocml_atan_f32(__x); }
166
167__DEVICE__
168float atanhf(float __x) { return __ocml_atanh_f32(__x); }
169
170__DEVICE__
171float cbrtf(float __x) { return __ocml_cbrt_f32(__x); }
172
173__DEVICE__
174float ceilf(float __x) { return __ocml_ceil_f32(__x); }
175
176__DEVICE__
177float copysignf(float __x, float __y) { return __ocml_copysign_f32(__x, __y); }
178
179__DEVICE__
180float cosf(float __x) { return __ocml_cos_f32(__x); }
181
182__DEVICE__
183float coshf(float __x) { return __ocml_cosh_f32(__x); }
184
185__DEVICE__
186float cospif(float __x) { return __ocml_cospi_f32(__x); }
187
188__DEVICE__
189float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); }
190
191__DEVICE__
192float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); }
193
194__DEVICE__
195float erfcf(float __x) { return __ocml_erfc_f32(__x); }
196
197__DEVICE__
198float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); }
199
200__DEVICE__
201float erfcxf(float __x) { return __ocml_erfcx_f32(__x); }
202
203__DEVICE__
204float erff(float __x) { return __ocml_erf_f32(__x); }
205
206__DEVICE__
207float erfinvf(float __x) { return __ocml_erfinv_f32(__x); }
208
209__DEVICE__
210float exp10f(float __x) { return __ocml_exp10_f32(__x); }
211
212__DEVICE__
213float exp2f(float __x) { return __ocml_exp2_f32(__x); }
214
215__DEVICE__
216float expf(float __x) { return __ocml_exp_f32(__x); }
217
218__DEVICE__
219float expm1f(float __x) { return __ocml_expm1_f32(__x); }
220
221__DEVICE__
222float fabsf(float __x) { return __ocml_fabs_f32(__x); }
223
224__DEVICE__
225float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); }
226
227__DEVICE__
228float fdividef(float __x, float __y) { return __x / __y; }
229
230__DEVICE__
231float floorf(float __x) { return __ocml_floor_f32(__x); }
232
233__DEVICE__
234float fmaf(float __x, float __y, float __z) {
235 return __ocml_fma_f32(__x, __y, __z);
236}
237
238__DEVICE__
239float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); }
240
241__DEVICE__
242float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); }
243
244__DEVICE__
245float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
246
247__DEVICE__
248float frexpf(float __x, int *__nptr) {
249 int __tmp;
250 float __r =
251 __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
252 *__nptr = __tmp;
253
254 return __r;
255}
256
257__DEVICE__
258float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); }
259
260__DEVICE__
261int ilogbf(float __x) { return __ocml_ilogb_f32(__x); }
262
263__DEVICE__
264__RETURN_TYPE __finitef(float __x) { return __ocml_isfinite_f32(__x); }
265
266__DEVICE__
267__RETURN_TYPE __isinff(float __x) { return __ocml_isinf_f32(__x); }
268
269__DEVICE__
270__RETURN_TYPE __isnanf(float __x) { return __ocml_isnan_f32(__x); }
271
272__DEVICE__
273float j0f(float __x) { return __ocml_j0_f32(__x); }
274
275__DEVICE__
276float j1f(float __x) { return __ocml_j1_f32(__x); }
277
278__DEVICE__
279float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication
280 // and the Miller & Brown algorithm
281 // for linear recurrences to get O(log n) steps, but it's unclear if
282 // it'd be beneficial in this case.
283 if (__n == 0)
284 return j0f(__x);
285 if (__n == 1)
286 return j1f(__x);
287
288 float __x0 = j0f(__x);
289 float __x1 = j1f(__x);
290 for (int __i = 1; __i < __n; ++__i) {
291 float __x2 = (2 * __i) / __x * __x1 - __x0;
292 __x0 = __x1;
293 __x1 = __x2;
294 }
295
296 return __x1;
297}
298
299__DEVICE__
300float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); }
301
302__DEVICE__
303float lgammaf(float __x) { return __ocml_lgamma_f32(__x); }
304
305__DEVICE__
306long long int llrintf(float __x) { return __ocml_rint_f32(__x); }
307
308__DEVICE__
309long long int llroundf(float __x) { return __ocml_round_f32(__x); }
310
311__DEVICE__
312float log10f(float __x) { return __ocml_log10_f32(__x); }
313
314__DEVICE__
315float log1pf(float __x) { return __ocml_log1p_f32(__x); }
316
317__DEVICE__
318float log2f(float __x) { return __ocml_log2_f32(__x); }
319
320__DEVICE__
321float logbf(float __x) { return __ocml_logb_f32(__x); }
322
323__DEVICE__
324float logf(float __x) { return __ocml_log_f32(__x); }
325
326__DEVICE__
327long int lrintf(float __x) { return __ocml_rint_f32(__x); }
328
329__DEVICE__
330long int lroundf(float __x) { return __ocml_round_f32(__x); }
331
332__DEVICE__
333float modff(float __x, float *__iptr) {
334 float __tmp;
335 float __r =
336 __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
337 *__iptr = __tmp;
338 return __r;
339}
340
341__DEVICE__
342float nanf(const char *__tagp) {
343 union {
344 float val;
345 struct ieee_float {
346 unsigned int mantissa : 22;
347 unsigned int quiet : 1;
348 unsigned int exponent : 8;
349 unsigned int sign : 1;
350 } bits;
351 } __tmp;
352 __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
353
354 __tmp.bits.sign = 0u;
355 __tmp.bits.exponent = ~0u;
356 __tmp.bits.quiet = 1u;
357 __tmp.bits.mantissa = __make_mantissa(__tagp);
358
359 return __tmp.val;
360}
361
362__DEVICE__
363float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); }
364
365__DEVICE__
366float nextafterf(float __x, float __y) {
367 return __ocml_nextafter_f32(__x, __y);
368}
369
370__DEVICE__
371float norm3df(float __x, float __y, float __z) {
372 return __ocml_len3_f32(__x, __y, __z);
373}
374
375__DEVICE__
376float norm4df(float __x, float __y, float __z, float __w) {
377 return __ocml_len4_f32(__x, __y, __z, __w);
378}
379
380__DEVICE__
381float normcdff(float __x) { return __ocml_ncdf_f32(__x); }
382
383__DEVICE__
384float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); }
385
386__DEVICE__
387float normf(int __dim,
388 const float *__a) { // TODO: placeholder until OCML adds support.
389 float __r = 0;
390 while (__dim--) {
391 __r += __a[0] * __a[0];
392 ++__a;
393 }
394
395 return __ocml_sqrt_f32(__r);
396}
397
398__DEVICE__
399float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
400
401__DEVICE__
402float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); }
403
404__DEVICE__
405float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); }
406
407__DEVICE__
408float remainderf(float __x, float __y) {
409 return __ocml_remainder_f32(__x, __y);
410}
411
412__DEVICE__
413float remquof(float __x, float __y, int *__quo) {
414 int __tmp;
415 float __r = __ocml_remquo_f32(
416 __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
417 *__quo = __tmp;
418
419 return __r;
420}
421
422__DEVICE__
423float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); }
424
425__DEVICE__
426float rintf(float __x) { return __ocml_rint_f32(__x); }
427
428__DEVICE__
429float rnorm3df(float __x, float __y, float __z) {
430 return __ocml_rlen3_f32(__x, __y, __z);
431}
432
433__DEVICE__
434float rnorm4df(float __x, float __y, float __z, float __w) {
435 return __ocml_rlen4_f32(__x, __y, __z, __w);
436}
437
438__DEVICE__
439float rnormf(int __dim,
440 const float *__a) { // TODO: placeholder until OCML adds support.
441 float __r = 0;
442 while (__dim--) {
443 __r += __a[0] * __a[0];
444 ++__a;
445 }
446
447 return __ocml_rsqrt_f32(__r);
448}
449
450__DEVICE__
451float roundf(float __x) { return __ocml_round_f32(__x); }
452
453__DEVICE__
454float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); }
455
456__DEVICE__
457float scalblnf(float __x, long int __n) {
458 return (__n < INT_MAX) ? __ocml_scalbn_f32(__x, __n)
459 : __ocml_scalb_f32(__x, __n);
460}
461
462__DEVICE__
463float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); }
464
465__DEVICE__
466__RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); }
467
468__DEVICE__
469void sincosf(float __x, float *__sinptr, float *__cosptr) {
470 float __tmp;
471 *__sinptr =
472 __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
473 *__cosptr = __tmp;
474}
475
476__DEVICE__
477void sincospif(float __x, float *__sinptr, float *__cosptr) {
478 float __tmp;
479 *__sinptr = __ocml_sincospi_f32(
480 __x, (__attribute__((address_space(5))) float *)&__tmp);
481 *__cosptr = __tmp;
482}
483
484__DEVICE__
485float sinf(float __x) { return __ocml_sin_f32(__x); }
486
487__DEVICE__
488float sinhf(float __x) { return __ocml_sinh_f32(__x); }
489
490__DEVICE__
491float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
492
493__DEVICE__
494float sqrtf(float __x) { return __ocml_sqrt_f32(__x); }
495
496__DEVICE__
497float tanf(float __x) { return __ocml_tan_f32(__x); }
498
499__DEVICE__
500float tanhf(float __x) { return __ocml_tanh_f32(__x); }
501
502__DEVICE__
503float tgammaf(float __x) { return __ocml_tgamma_f32(__x); }
504
505__DEVICE__
506float truncf(float __x) { return __ocml_trunc_f32(__x); }
507
508__DEVICE__
509float y0f(float __x) { return __ocml_y0_f32(__x); }
510
511__DEVICE__
512float y1f(float __x) { return __ocml_y1_f32(__x); }
513
514__DEVICE__
515float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication
516 // and the Miller & Brown algorithm
517 // for linear recurrences to get O(log n) steps, but it's unclear if
518 // it'd be beneficial in this case. Placeholder until OCML adds
519 // support.
520 if (__n == 0)
521 return y0f(__x);
522 if (__n == 1)
523 return y1f(__x);
524
525 float __x0 = y0f(__x);
526 float __x1 = y1f(__x);
527 for (int __i = 1; __i < __n; ++__i) {
528 float __x2 = (2 * __i) / __x * __x1 - __x0;
529 __x0 = __x1;
530 __x1 = __x2;
531 }
532
533 return __x1;
534}
535
536// BEGIN INTRINSICS
537
538__DEVICE__
539float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
540
541__DEVICE__
542float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); }
543
544__DEVICE__
545float __expf(float __x) { return __ocml_native_exp_f32(__x); }
546
547#if defined OCML_BASIC_ROUNDED_OPERATIONS
548__DEVICE__
549float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
550#endif
551__DEVICE__
552float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
553#if defined OCML_BASIC_ROUNDED_OPERATIONS
554__DEVICE__
555float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
556
557__DEVICE__
558float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
559
560__DEVICE__
561float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
562#endif
563__DEVICE__
564float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
565#if defined OCML_BASIC_ROUNDED_OPERATIONS
566__DEVICE__
567float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
568
569__DEVICE__
570float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
571#endif
572__DEVICE__
573float __fdividef(float __x, float __y) { return __x / __y; }
574#if defined OCML_BASIC_ROUNDED_OPERATIONS
575__DEVICE__
576float __fmaf_rd(float __x, float __y, float __z) {
577 return __ocml_fma_rtn_f32(__x, __y, __z);
578}
579#endif
580__DEVICE__
581float __fmaf_rn(float __x, float __y, float __z) {
582 return __ocml_fma_rte_f32(__x, __y, __z);
583}
584#if defined OCML_BASIC_ROUNDED_OPERATIONS
585__DEVICE__
586float __fmaf_ru(float __x, float __y, float __z) {
587 return __ocml_fma_rtp_f32(__x, __y, __z);
588}
589
590__DEVICE__
591float __fmaf_rz(float __x, float __y, float __z) {
592 return __ocml_fma_rtz_f32(__x, __y, __z);
593}
594
595__DEVICE__
596float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
597#endif
598__DEVICE__
599float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
600#if defined OCML_BASIC_ROUNDED_OPERATIONS
601__DEVICE__
602float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
603
604__DEVICE__
605float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
606
607__DEVICE__
608float __frcp_rd(float __x) { return __llvm_amdgcn_rcp_f32(__x); }
609#endif
610__DEVICE__
611float __frcp_rn(float __x) { return __llvm_amdgcn_rcp_f32(__x); }
612#if defined OCML_BASIC_ROUNDED_OPERATIONS
613__DEVICE__
614float __frcp_ru(float __x) { return __llvm_amdgcn_rcp_f32(__x); }
615
616__DEVICE__
617float __frcp_rz(float __x) { return __llvm_amdgcn_rcp_f32(__x); }
618#endif
619__DEVICE__
620float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); }
621#if defined OCML_BASIC_ROUNDED_OPERATIONS
622__DEVICE__
623float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
624#endif
625__DEVICE__
626float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
627#if defined OCML_BASIC_ROUNDED_OPERATIONS
628__DEVICE__
629float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
630
631__DEVICE__
632float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
633
634__DEVICE__
635float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
636#endif
637__DEVICE__
638float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
639#if defined OCML_BASIC_ROUNDED_OPERATIONS
640__DEVICE__
641float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
642
643__DEVICE__
644float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
645#endif
646__DEVICE__
647float __log10f(float __x) { return __ocml_native_log10_f32(__x); }
648
649__DEVICE__
650float __log2f(float __x) { return __ocml_native_log2_f32(__x); }
651
652__DEVICE__
653float __logf(float __x) { return __ocml_native_log_f32(__x); }
654
655__DEVICE__
656float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
657
658__DEVICE__
659float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
660
661__DEVICE__
662void __sincosf(float __x, float *__sinptr, float *__cosptr) {
663 *__sinptr = __ocml_native_sin_f32(__x);
664 *__cosptr = __ocml_native_cos_f32(__x);
665}
666
667__DEVICE__
668float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
669
670__DEVICE__
671float __tanf(float __x) { return __ocml_tan_f32(__x); }
672// END INTRINSICS
673// END FLOAT
674
675// BEGIN DOUBLE
676__DEVICE__
677double acos(double __x) { return __ocml_acos_f64(__x); }
678
679__DEVICE__
680double acosh(double __x) { return __ocml_acosh_f64(__x); }
681
682__DEVICE__
683double asin(double __x) { return __ocml_asin_f64(__x); }
684
685__DEVICE__
686double asinh(double __x) { return __ocml_asinh_f64(__x); }
687
688__DEVICE__
689double atan(double __x) { return __ocml_atan_f64(__x); }
690
691__DEVICE__
692double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); }
693
694__DEVICE__
695double atanh(double __x) { return __ocml_atanh_f64(__x); }
696
697__DEVICE__
698double cbrt(double __x) { return __ocml_cbrt_f64(__x); }
699
700__DEVICE__
701double ceil(double __x) { return __ocml_ceil_f64(__x); }
702
703__DEVICE__
704double copysign(double __x, double __y) {
705 return __ocml_copysign_f64(__x, __y);
706}
707
708__DEVICE__
709double cos(double __x) { return __ocml_cos_f64(__x); }
710
711__DEVICE__
712double cosh(double __x) { return __ocml_cosh_f64(__x); }
713
714__DEVICE__
715double cospi(double __x) { return __ocml_cospi_f64(__x); }
716
717__DEVICE__
718double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); }
719
720__DEVICE__
721double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); }
722
723__DEVICE__
724double erf(double __x) { return __ocml_erf_f64(__x); }
725
726__DEVICE__
727double erfc(double __x) { return __ocml_erfc_f64(__x); }
728
729__DEVICE__
730double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); }
731
732__DEVICE__
733double erfcx(double __x) { return __ocml_erfcx_f64(__x); }
734
735__DEVICE__
736double erfinv(double __x) { return __ocml_erfinv_f64(__x); }
737
738__DEVICE__
739double exp(double __x) { return __ocml_exp_f64(__x); }
740
741__DEVICE__
742double exp10(double __x) { return __ocml_exp10_f64(__x); }
743
744__DEVICE__
745double exp2(double __x) { return __ocml_exp2_f64(__x); }
746
747__DEVICE__
748double expm1(double __x) { return __ocml_expm1_f64(__x); }
749
750__DEVICE__
751double fabs(double __x) { return __ocml_fabs_f64(__x); }
752
753__DEVICE__
754double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); }
755
756__DEVICE__
757double floor(double __x) { return __ocml_floor_f64(__x); }
758
759__DEVICE__
760double fma(double __x, double __y, double __z) {
761 return __ocml_fma_f64(__x, __y, __z);
762}
763
764__DEVICE__
765double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); }
766
767__DEVICE__
768double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); }
769
770__DEVICE__
771double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
772
773__DEVICE__
774double frexp(double __x, int *__nptr) {
775 int __tmp;
776 double __r =
777 __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
778 *__nptr = __tmp;
779 return __r;
780}
781
782__DEVICE__
783double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); }
784
785__DEVICE__
786int ilogb(double __x) { return __ocml_ilogb_f64(__x); }
787
788__DEVICE__
789__RETURN_TYPE __finite(double __x) { return __ocml_isfinite_f64(__x); }
790
791__DEVICE__
792__RETURN_TYPE __isinf(double __x) { return __ocml_isinf_f64(__x); }
793
794__DEVICE__
795__RETURN_TYPE __isnan(double __x) { return __ocml_isnan_f64(__x); }
796
797__DEVICE__
798double j0(double __x) { return __ocml_j0_f64(__x); }
799
800__DEVICE__
801double j1(double __x) { return __ocml_j1_f64(__x); }
802
803__DEVICE__
804double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication
805 // and the Miller & Brown algorithm
806 // for linear recurrences to get O(log n) steps, but it's unclear if
807 // it'd be beneficial in this case. Placeholder until OCML adds
808 // support.
809 if (__n == 0)
810 return j0(__x);
811 if (__n == 1)
812 return j1(__x);
813
814 double __x0 = j0(__x);
815 double __x1 = j1(__x);
816 for (int __i = 1; __i < __n; ++__i) {
817 double __x2 = (2 * __i) / __x * __x1 - __x0;
818 __x0 = __x1;
819 __x1 = __x2;
820 }
821 return __x1;
822}
823
824__DEVICE__
825double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); }
826
827__DEVICE__
828double lgamma(double __x) { return __ocml_lgamma_f64(__x); }
829
830__DEVICE__
831long long int llrint(double __x) { return __ocml_rint_f64(__x); }
832
833__DEVICE__
834long long int llround(double __x) { return __ocml_round_f64(__x); }
835
836__DEVICE__
837double log(double __x) { return __ocml_log_f64(__x); }
838
839__DEVICE__
840double log10(double __x) { return __ocml_log10_f64(__x); }
841
842__DEVICE__
843double log1p(double __x) { return __ocml_log1p_f64(__x); }
844
845__DEVICE__
846double log2(double __x) { return __ocml_log2_f64(__x); }
847
848__DEVICE__
849double logb(double __x) { return __ocml_logb_f64(__x); }
850
851__DEVICE__
852long int lrint(double __x) { return __ocml_rint_f64(__x); }
853
854__DEVICE__
855long int lround(double __x) { return __ocml_round_f64(__x); }
856
857__DEVICE__
858double modf(double __x, double *__iptr) {
859 double __tmp;
860 double __r =
861 __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
862 *__iptr = __tmp;
863
864 return __r;
865}
866
867__DEVICE__
868double nan(const char *__tagp) {
869#if !_WIN32
870 union {
871 double val;
872 struct ieee_double {
873 uint64_t mantissa : 51;
874 uint32_t quiet : 1;
875 uint32_t exponent : 11;
876 uint32_t sign : 1;
877 } bits;
878 } __tmp;
879 __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
880
881 __tmp.bits.sign = 0u;
882 __tmp.bits.exponent = ~0u;
883 __tmp.bits.quiet = 1u;
884 __tmp.bits.mantissa = __make_mantissa(__tagp);
885
886 return __tmp.val;
887#else
888 __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double));
889 uint64_t __val = __make_mantissa(__tagp);
890 __val |= 0xFFF << 51;
891 return *reinterpret_cast<double *>(&__val);
892#endif
893}
894
895__DEVICE__
896double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); }
897
898__DEVICE__
899double nextafter(double __x, double __y) {
900 return __ocml_nextafter_f64(__x, __y);
901}
902
903__DEVICE__
904double norm(int __dim,
905 const double *__a) { // TODO: placeholder until OCML adds support.
906 double __r = 0;
907 while (__dim--) {
908 __r += __a[0] * __a[0];
909 ++__a;
910 }
911
912 return __ocml_sqrt_f64(__r);
913}
914
915__DEVICE__
916double norm3d(double __x, double __y, double __z) {
917 return __ocml_len3_f64(__x, __y, __z);
918}
919
920__DEVICE__
921double norm4d(double __x, double __y, double __z, double __w) {
922 return __ocml_len4_f64(__x, __y, __z, __w);
923}
924
925__DEVICE__
926double normcdf(double __x) { return __ocml_ncdf_f64(__x); }
927
928__DEVICE__
929double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); }
930
931__DEVICE__
932double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); }
933
934__DEVICE__
935double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); }
936
937__DEVICE__
938double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); }
939
940__DEVICE__
941double remainder(double __x, double __y) {
942 return __ocml_remainder_f64(__x, __y);
943}
944
945__DEVICE__
946double remquo(double __x, double __y, int *__quo) {
947 int __tmp;
948 double __r = __ocml_remquo_f64(
949 __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
950 *__quo = __tmp;
951
952 return __r;
953}
954
955__DEVICE__
956double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); }
957
958__DEVICE__
959double rint(double __x) { return __ocml_rint_f64(__x); }
960
961__DEVICE__
962double rnorm(int __dim,
963 const double *__a) { // TODO: placeholder until OCML adds support.
964 double __r = 0;
965 while (__dim--) {
966 __r += __a[0] * __a[0];
967 ++__a;
968 }
969
970 return __ocml_rsqrt_f64(__r);
971}
972
973__DEVICE__
974double rnorm3d(double __x, double __y, double __z) {
975 return __ocml_rlen3_f64(__x, __y, __z);
976}
977
978__DEVICE__
979double rnorm4d(double __x, double __y, double __z, double __w) {
980 return __ocml_rlen4_f64(__x, __y, __z, __w);
981}
982
983__DEVICE__
984double round(double __x) { return __ocml_round_f64(__x); }
985
986__DEVICE__
987double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); }
988
989__DEVICE__
990double scalbln(double __x, long int __n) {
991 return (__n < INT_MAX) ? __ocml_scalbn_f64(__x, __n)
992 : __ocml_scalb_f64(__x, __n);
993}
994__DEVICE__
995double scalbn(double __x, int __n) { return __ocml_scalbn_f64(__x, __n); }
996
997__DEVICE__
998__RETURN_TYPE __signbit(double __x) { return __ocml_signbit_f64(__x); }
999
1000__DEVICE__
1001double sin(double __x) { return __ocml_sin_f64(__x); }
1002
1003__DEVICE__
1004void sincos(double __x, double *__sinptr, double *__cosptr) {
1005 double __tmp;
1006 *__sinptr = __ocml_sincos_f64(
1007 __x, (__attribute__((address_space(5))) double *)&__tmp);
1008 *__cosptr = __tmp;
1009}
1010
1011__DEVICE__
1012void sincospi(double __x, double *__sinptr, double *__cosptr) {
1013 double __tmp;
1014 *__sinptr = __ocml_sincospi_f64(
1015 __x, (__attribute__((address_space(5))) double *)&__tmp);
1016 *__cosptr = __tmp;
1017}
1018
1019__DEVICE__
1020double sinh(double __x) { return __ocml_sinh_f64(__x); }
1021
1022__DEVICE__
1023double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
1024
1025__DEVICE__
1026double sqrt(double __x) { return __ocml_sqrt_f64(__x); }
1027
1028__DEVICE__
1029double tan(double __x) { return __ocml_tan_f64(__x); }
1030
1031__DEVICE__
1032double tanh(double __x) { return __ocml_tanh_f64(__x); }
1033
1034__DEVICE__
1035double tgamma(double __x) { return __ocml_tgamma_f64(__x); }
1036
1037__DEVICE__
1038double trunc(double __x) { return __ocml_trunc_f64(__x); }
1039
1040__DEVICE__
1041double y0(double __x) { return __ocml_y0_f64(__x); }
1042
1043__DEVICE__
1044double y1(double __x) { return __ocml_y1_f64(__x); }
1045
1046__DEVICE__
1047double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication
1048 // and the Miller & Brown algorithm
1049 // for linear recurrences to get O(log n) steps, but it's unclear if
1050 // it'd be beneficial in this case. Placeholder until OCML adds
1051 // support.
1052 if (__n == 0)
1053 return y0(__x);
1054 if (__n == 1)
1055 return y1(__x);
1056
1057 double __x0 = y0(__x);
1058 double __x1 = y1(__x);
1059 for (int __i = 1; __i < __n; ++__i) {
1060 double __x2 = (2 * __i) / __x * __x1 - __x0;
1061 __x0 = __x1;
1062 __x1 = __x2;
1063 }
1064
1065 return __x1;
1066}
1067
1068// BEGIN INTRINSICS
1069#if defined OCML_BASIC_ROUNDED_OPERATIONS
1070__DEVICE__
1071double __dadd_rd(double __x, double __y) {
1072 return __ocml_add_rtn_f64(__x, __y);
1073}
1074#endif
1075__DEVICE__
1076double __dadd_rn(double __x, double __y) {
1077 return __ocml_add_rte_f64(__x, __y);
1078}
1079#if defined OCML_BASIC_ROUNDED_OPERATIONS
1080__DEVICE__
1081double __dadd_ru(double __x, double __y) {
1082 return __ocml_add_rtp_f64(__x, __y);
1083}
1084
1085__DEVICE__
1086double __dadd_rz(double __x, double __y) {
1087 return __ocml_add_rtz_f64(__x, __y);
1088}
1089
1090__DEVICE__
1091double __ddiv_rd(double __x, double __y) {
1092 return __ocml_div_rtn_f64(__x, __y);
1093}
1094#endif
1095__DEVICE__
1096double __ddiv_rn(double __x, double __y) {
1097 return __ocml_div_rte_f64(__x, __y);
1098}
1099#if defined OCML_BASIC_ROUNDED_OPERATIONS
1100__DEVICE__
1101double __ddiv_ru(double __x, double __y) {
1102 return __ocml_div_rtp_f64(__x, __y);
1103}
1104
1105__DEVICE__
1106double __ddiv_rz(double __x, double __y) {
1107 return __ocml_div_rtz_f64(__x, __y);
1108}
1109
1110__DEVICE__
1111double __dmul_rd(double __x, double __y) {
1112 return __ocml_mul_rtn_f64(__x, __y);
1113}
1114#endif
1115__DEVICE__
1116double __dmul_rn(double __x, double __y) {
1117 return __ocml_mul_rte_f64(__x, __y);
1118}
1119#if defined OCML_BASIC_ROUNDED_OPERATIONS
1120__DEVICE__
1121double __dmul_ru(double __x, double __y) {
1122 return __ocml_mul_rtp_f64(__x, __y);
1123}
1124
1125__DEVICE__
1126double __dmul_rz(double __x, double __y) {
1127 return __ocml_mul_rtz_f64(__x, __y);
1128}
1129
1130__DEVICE__
1131double __drcp_rd(double __x) { return __llvm_amdgcn_rcp_f64(__x); }
1132#endif
1133__DEVICE__
1134double __drcp_rn(double __x) { return __llvm_amdgcn_rcp_f64(__x); }
1135#if defined OCML_BASIC_ROUNDED_OPERATIONS
1136__DEVICE__
1137double __drcp_ru(double __x) { return __llvm_amdgcn_rcp_f64(__x); }
1138
1139__DEVICE__
1140double __drcp_rz(double __x) { return __llvm_amdgcn_rcp_f64(__x); }
1141
1142__DEVICE__
1143double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); }
1144#endif
1145__DEVICE__
1146double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); }
1147#if defined OCML_BASIC_ROUNDED_OPERATIONS
1148__DEVICE__
1149double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); }
1150
1151__DEVICE__
1152double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
1153
1154__DEVICE__
1155double __dsub_rd(double __x, double __y) {
1156 return __ocml_sub_rtn_f64(__x, __y);
1157}
1158#endif
1159__DEVICE__
1160double __dsub_rn(double __x, double __y) {
1161 return __ocml_sub_rte_f64(__x, __y);
1162}
1163#if defined OCML_BASIC_ROUNDED_OPERATIONS
1164__DEVICE__
1165double __dsub_ru(double __x, double __y) {
1166 return __ocml_sub_rtp_f64(__x, __y);
1167}
1168
1169__DEVICE__
1170double __dsub_rz(double __x, double __y) {
1171 return __ocml_sub_rtz_f64(__x, __y);
1172}
1173
1174__DEVICE__
1175double __fma_rd(double __x, double __y, double __z) {
1176 return __ocml_fma_rtn_f64(__x, __y, __z);
1177}
1178#endif
1179__DEVICE__
1180double __fma_rn(double __x, double __y, double __z) {
1181 return __ocml_fma_rte_f64(__x, __y, __z);
1182}
1183#if defined OCML_BASIC_ROUNDED_OPERATIONS
1184__DEVICE__
1185double __fma_ru(double __x, double __y, double __z) {
1186 return __ocml_fma_rtp_f64(__x, __y, __z);
1187}
1188
1189__DEVICE__
1190double __fma_rz(double __x, double __y, double __z) {
1191 return __ocml_fma_rtz_f64(__x, __y, __z);
1192}
1193#endif
1194// END INTRINSICS
1195// END DOUBLE
1196
1197// C only macros
1198#if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1199#define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x)
1200#define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x)
1201#define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x)
1202#define signbit(__x) \
1203 _Generic((__x), float : __signbitf, double : __signbit)(__x)
1204#endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1205
1206#if defined(__cplusplus)
1207template <class T> __DEVICE__ T min(T __arg1, T __arg2) {
1208 return (__arg1 < __arg2) ? __arg1 : __arg2;
1209}
1210
1211template <class T> __DEVICE__ T max(T __arg1, T __arg2) {
1212 return (__arg1 > __arg2) ? __arg1 : __arg2;
1213}
1214
1215__DEVICE__ int min(int __arg1, int __arg2) {
1216 return (__arg1 < __arg2) ? __arg1 : __arg2;
1217}
1218__DEVICE__ int max(int __arg1, int __arg2) {
1219 return (__arg1 > __arg2) ? __arg1 : __arg2;
1220}
1221
1222__DEVICE__
1223float max(float __x, float __y) { return fmaxf(__x, __y); }
1224
1225__DEVICE__
1226double max(double __x, double __y) { return fmax(__x, __y); }
1227
1228__DEVICE__
1229float min(float __x, float __y) { return fminf(__x, __y); }
1230
1231__DEVICE__
1232double min(double __x, double __y) { return fmin(__x, __y); }
1233
1234__host__ inline static int min(int __arg1, int __arg2) {
1235 return std::min(__arg1, __arg2);
1236}
1237
1238__host__ inline static int max(int __arg1, int __arg2) {
1239 return std::max(__arg1, __arg2);
1240}
1241#endif
1242
1243#pragma pop_macro("__DEVICE__")
1244#pragma pop_macro("__RETURN_TYPE")
1245
1246#endif // __CLANG_HIP_MATH_H__