blob: ef7e087b832ca8bd3f2fdcc4ee6b144f7c03d629 [file] [log] [blame]
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001/*===---- __clang_hip_math.h - Device-side HIP math support ----------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __CLANG_HIP_MATH_H__
10#define __CLANG_HIP_MATH_H__
11
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080012#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
Sasha Smundak4b1f33a2021-01-11 15:05:07 -080013#error "This file is for HIP and OpenMP AMDGCN device compilation only."
14#endif
15
Pirama Arumuga Nainar7e1f8392021-08-16 17:30:48 -070016#if !defined(__HIPCC_RTC__)
Sasha Smundak4b1f33a2021-01-11 15:05:07 -080017#if defined(__cplusplus)
18#include <algorithm>
19#endif
20#include <limits.h>
21#include <stdint.h>
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080022#ifdef __OPENMP_AMDGCN__
23#include <omp.h>
24#endif
25#endif // !defined(__HIPCC_RTC__)
Sasha Smundak4b1f33a2021-01-11 15:05:07 -080026
27#pragma push_macro("__DEVICE__")
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080028
29#ifdef __OPENMP_AMDGCN__
30#define __DEVICE__ static inline __attribute__((always_inline, nothrow))
31#else
Sasha Smundak4b1f33a2021-01-11 15:05:07 -080032#define __DEVICE__ static __device__ inline __attribute__((always_inline))
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080033#endif
Sasha Smundak4b1f33a2021-01-11 15:05:07 -080034
35// A few functions return bool type starting only in C++11.
36#pragma push_macro("__RETURN_TYPE")
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080037#ifdef __OPENMP_AMDGCN__
38#define __RETURN_TYPE int
39#else
Sasha Smundak4b1f33a2021-01-11 15:05:07 -080040#if defined(__cplusplus)
41#define __RETURN_TYPE bool
42#else
43#define __RETURN_TYPE int
44#endif
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -080045#endif // __OPENMP_AMDGCN__
Sasha Smundak4b1f33a2021-01-11 15:05:07 -080046
47#if defined (__cplusplus) && __cplusplus < 201103L
48// emulate static_assert on type sizes
49template<bool>
50struct __compare_result{};
51template<>
52struct __compare_result<true> {
Pirama Arumuga Nainar7e1f8392021-08-16 17:30:48 -070053 static const __device__ bool valid;
Sasha Smundak4b1f33a2021-01-11 15:05:07 -080054};
55
56__DEVICE__
57void __suppress_unused_warning(bool b){};
58template <unsigned int S, unsigned int T>
59__DEVICE__ void __static_assert_equal_size() {
60 __suppress_unused_warning(__compare_result<S == T>::valid);
61}
62
63#define __static_assert_type_size_equal(A, B) \
64 __static_assert_equal_size<A,B>()
65
66#else
67#define __static_assert_type_size_equal(A,B) \
68 static_assert((A) == (B), "")
69
70#endif
71
72__DEVICE__
73uint64_t __make_mantissa_base8(const char *__tagp) {
74 uint64_t __r = 0;
75 while (__tagp) {
76 char __tmp = *__tagp;
77
78 if (__tmp >= '0' && __tmp <= '7')
79 __r = (__r * 8u) + __tmp - '0';
80 else
81 return 0;
82
83 ++__tagp;
84 }
85
86 return __r;
87}
88
89__DEVICE__
90uint64_t __make_mantissa_base10(const char *__tagp) {
91 uint64_t __r = 0;
92 while (__tagp) {
93 char __tmp = *__tagp;
94
95 if (__tmp >= '0' && __tmp <= '9')
96 __r = (__r * 10u) + __tmp - '0';
97 else
98 return 0;
99
100 ++__tagp;
101 }
102
103 return __r;
104}
105
106__DEVICE__
107uint64_t __make_mantissa_base16(const char *__tagp) {
108 uint64_t __r = 0;
109 while (__tagp) {
110 char __tmp = *__tagp;
111
112 if (__tmp >= '0' && __tmp <= '9')
113 __r = (__r * 16u) + __tmp - '0';
114 else if (__tmp >= 'a' && __tmp <= 'f')
115 __r = (__r * 16u) + __tmp - 'a' + 10;
116 else if (__tmp >= 'A' && __tmp <= 'F')
117 __r = (__r * 16u) + __tmp - 'A' + 10;
118 else
119 return 0;
120
121 ++__tagp;
122 }
123
124 return __r;
125}
126
127__DEVICE__
128uint64_t __make_mantissa(const char *__tagp) {
129 if (!__tagp)
130 return 0u;
131
132 if (*__tagp == '0') {
133 ++__tagp;
134
135 if (*__tagp == 'x' || *__tagp == 'X')
136 return __make_mantissa_base16(__tagp);
137 else
138 return __make_mantissa_base8(__tagp);
139 }
140
141 return __make_mantissa_base10(__tagp);
142}
143
144// BEGIN FLOAT
145#if defined(__cplusplus)
146__DEVICE__
147int abs(int __x) {
148 int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1);
149 return (__x ^ __sgn) - __sgn;
150}
151__DEVICE__
152long labs(long __x) {
153 long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1);
154 return (__x ^ __sgn) - __sgn;
155}
156__DEVICE__
157long long llabs(long long __x) {
158 long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1);
159 return (__x ^ __sgn) - __sgn;
160}
161#endif
162
163__DEVICE__
164float acosf(float __x) { return __ocml_acos_f32(__x); }
165
166__DEVICE__
167float acoshf(float __x) { return __ocml_acosh_f32(__x); }
168
169__DEVICE__
170float asinf(float __x) { return __ocml_asin_f32(__x); }
171
172__DEVICE__
173float asinhf(float __x) { return __ocml_asinh_f32(__x); }
174
175__DEVICE__
176float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); }
177
178__DEVICE__
179float atanf(float __x) { return __ocml_atan_f32(__x); }
180
181__DEVICE__
182float atanhf(float __x) { return __ocml_atanh_f32(__x); }
183
184__DEVICE__
185float cbrtf(float __x) { return __ocml_cbrt_f32(__x); }
186
187__DEVICE__
188float ceilf(float __x) { return __ocml_ceil_f32(__x); }
189
190__DEVICE__
191float copysignf(float __x, float __y) { return __ocml_copysign_f32(__x, __y); }
192
193__DEVICE__
194float cosf(float __x) { return __ocml_cos_f32(__x); }
195
196__DEVICE__
197float coshf(float __x) { return __ocml_cosh_f32(__x); }
198
199__DEVICE__
200float cospif(float __x) { return __ocml_cospi_f32(__x); }
201
202__DEVICE__
203float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); }
204
205__DEVICE__
206float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); }
207
208__DEVICE__
209float erfcf(float __x) { return __ocml_erfc_f32(__x); }
210
211__DEVICE__
212float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); }
213
214__DEVICE__
215float erfcxf(float __x) { return __ocml_erfcx_f32(__x); }
216
217__DEVICE__
218float erff(float __x) { return __ocml_erf_f32(__x); }
219
220__DEVICE__
221float erfinvf(float __x) { return __ocml_erfinv_f32(__x); }
222
223__DEVICE__
224float exp10f(float __x) { return __ocml_exp10_f32(__x); }
225
226__DEVICE__
227float exp2f(float __x) { return __ocml_exp2_f32(__x); }
228
229__DEVICE__
230float expf(float __x) { return __ocml_exp_f32(__x); }
231
232__DEVICE__
233float expm1f(float __x) { return __ocml_expm1_f32(__x); }
234
235__DEVICE__
236float fabsf(float __x) { return __ocml_fabs_f32(__x); }
237
238__DEVICE__
239float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); }
240
241__DEVICE__
242float fdividef(float __x, float __y) { return __x / __y; }
243
244__DEVICE__
245float floorf(float __x) { return __ocml_floor_f32(__x); }
246
247__DEVICE__
248float fmaf(float __x, float __y, float __z) {
249 return __ocml_fma_f32(__x, __y, __z);
250}
251
252__DEVICE__
253float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); }
254
255__DEVICE__
256float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); }
257
258__DEVICE__
259float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
260
261__DEVICE__
262float frexpf(float __x, int *__nptr) {
263 int __tmp;
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800264#ifdef __OPENMP_AMDGCN__
265#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
266#endif
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800267 float __r =
268 __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
269 *__nptr = __tmp;
270
271 return __r;
272}
273
274__DEVICE__
275float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); }
276
277__DEVICE__
278int ilogbf(float __x) { return __ocml_ilogb_f32(__x); }
279
280__DEVICE__
281__RETURN_TYPE __finitef(float __x) { return __ocml_isfinite_f32(__x); }
282
283__DEVICE__
284__RETURN_TYPE __isinff(float __x) { return __ocml_isinf_f32(__x); }
285
286__DEVICE__
287__RETURN_TYPE __isnanf(float __x) { return __ocml_isnan_f32(__x); }
288
289__DEVICE__
290float j0f(float __x) { return __ocml_j0_f32(__x); }
291
292__DEVICE__
293float j1f(float __x) { return __ocml_j1_f32(__x); }
294
295__DEVICE__
296float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication
297 // and the Miller & Brown algorithm
298 // for linear recurrences to get O(log n) steps, but it's unclear if
299 // it'd be beneficial in this case.
300 if (__n == 0)
301 return j0f(__x);
302 if (__n == 1)
303 return j1f(__x);
304
305 float __x0 = j0f(__x);
306 float __x1 = j1f(__x);
307 for (int __i = 1; __i < __n; ++__i) {
308 float __x2 = (2 * __i) / __x * __x1 - __x0;
309 __x0 = __x1;
310 __x1 = __x2;
311 }
312
313 return __x1;
314}
315
316__DEVICE__
317float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); }
318
319__DEVICE__
320float lgammaf(float __x) { return __ocml_lgamma_f32(__x); }
321
322__DEVICE__
323long long int llrintf(float __x) { return __ocml_rint_f32(__x); }
324
325__DEVICE__
326long long int llroundf(float __x) { return __ocml_round_f32(__x); }
327
328__DEVICE__
329float log10f(float __x) { return __ocml_log10_f32(__x); }
330
331__DEVICE__
332float log1pf(float __x) { return __ocml_log1p_f32(__x); }
333
334__DEVICE__
335float log2f(float __x) { return __ocml_log2_f32(__x); }
336
337__DEVICE__
338float logbf(float __x) { return __ocml_logb_f32(__x); }
339
340__DEVICE__
341float logf(float __x) { return __ocml_log_f32(__x); }
342
343__DEVICE__
344long int lrintf(float __x) { return __ocml_rint_f32(__x); }
345
346__DEVICE__
347long int lroundf(float __x) { return __ocml_round_f32(__x); }
348
349__DEVICE__
350float modff(float __x, float *__iptr) {
351 float __tmp;
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800352#ifdef __OPENMP_AMDGCN__
353#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
354#endif
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800355 float __r =
356 __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
357 *__iptr = __tmp;
358 return __r;
359}
360
361__DEVICE__
362float nanf(const char *__tagp) {
363 union {
364 float val;
365 struct ieee_float {
366 unsigned int mantissa : 22;
367 unsigned int quiet : 1;
368 unsigned int exponent : 8;
369 unsigned int sign : 1;
370 } bits;
371 } __tmp;
372 __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
373
374 __tmp.bits.sign = 0u;
375 __tmp.bits.exponent = ~0u;
376 __tmp.bits.quiet = 1u;
377 __tmp.bits.mantissa = __make_mantissa(__tagp);
378
379 return __tmp.val;
380}
381
382__DEVICE__
383float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); }
384
385__DEVICE__
386float nextafterf(float __x, float __y) {
387 return __ocml_nextafter_f32(__x, __y);
388}
389
390__DEVICE__
391float norm3df(float __x, float __y, float __z) {
392 return __ocml_len3_f32(__x, __y, __z);
393}
394
395__DEVICE__
396float norm4df(float __x, float __y, float __z, float __w) {
397 return __ocml_len4_f32(__x, __y, __z, __w);
398}
399
400__DEVICE__
401float normcdff(float __x) { return __ocml_ncdf_f32(__x); }
402
403__DEVICE__
404float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); }
405
406__DEVICE__
407float normf(int __dim,
408 const float *__a) { // TODO: placeholder until OCML adds support.
409 float __r = 0;
410 while (__dim--) {
411 __r += __a[0] * __a[0];
412 ++__a;
413 }
414
415 return __ocml_sqrt_f32(__r);
416}
417
418__DEVICE__
419float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
420
421__DEVICE__
422float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); }
423
424__DEVICE__
425float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); }
426
427__DEVICE__
428float remainderf(float __x, float __y) {
429 return __ocml_remainder_f32(__x, __y);
430}
431
432__DEVICE__
433float remquof(float __x, float __y, int *__quo) {
434 int __tmp;
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800435#ifdef __OPENMP_AMDGCN__
436#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
437#endif
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800438 float __r = __ocml_remquo_f32(
439 __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
440 *__quo = __tmp;
441
442 return __r;
443}
444
445__DEVICE__
446float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); }
447
448__DEVICE__
449float rintf(float __x) { return __ocml_rint_f32(__x); }
450
451__DEVICE__
452float rnorm3df(float __x, float __y, float __z) {
453 return __ocml_rlen3_f32(__x, __y, __z);
454}
455
456__DEVICE__
457float rnorm4df(float __x, float __y, float __z, float __w) {
458 return __ocml_rlen4_f32(__x, __y, __z, __w);
459}
460
461__DEVICE__
462float rnormf(int __dim,
463 const float *__a) { // TODO: placeholder until OCML adds support.
464 float __r = 0;
465 while (__dim--) {
466 __r += __a[0] * __a[0];
467 ++__a;
468 }
469
470 return __ocml_rsqrt_f32(__r);
471}
472
473__DEVICE__
474float roundf(float __x) { return __ocml_round_f32(__x); }
475
476__DEVICE__
477float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); }
478
479__DEVICE__
480float scalblnf(float __x, long int __n) {
481 return (__n < INT_MAX) ? __ocml_scalbn_f32(__x, __n)
482 : __ocml_scalb_f32(__x, __n);
483}
484
485__DEVICE__
486float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); }
487
488__DEVICE__
489__RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); }
490
491__DEVICE__
492void sincosf(float __x, float *__sinptr, float *__cosptr) {
493 float __tmp;
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800494#ifdef __OPENMP_AMDGCN__
495#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
496#endif
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800497 *__sinptr =
498 __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
499 *__cosptr = __tmp;
500}
501
502__DEVICE__
503void sincospif(float __x, float *__sinptr, float *__cosptr) {
504 float __tmp;
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800505#ifdef __OPENMP_AMDGCN__
506#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
507#endif
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800508 *__sinptr = __ocml_sincospi_f32(
509 __x, (__attribute__((address_space(5))) float *)&__tmp);
510 *__cosptr = __tmp;
511}
512
513__DEVICE__
514float sinf(float __x) { return __ocml_sin_f32(__x); }
515
516__DEVICE__
517float sinhf(float __x) { return __ocml_sinh_f32(__x); }
518
519__DEVICE__
520float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
521
522__DEVICE__
523float sqrtf(float __x) { return __ocml_sqrt_f32(__x); }
524
525__DEVICE__
526float tanf(float __x) { return __ocml_tan_f32(__x); }
527
528__DEVICE__
529float tanhf(float __x) { return __ocml_tanh_f32(__x); }
530
531__DEVICE__
532float tgammaf(float __x) { return __ocml_tgamma_f32(__x); }
533
534__DEVICE__
535float truncf(float __x) { return __ocml_trunc_f32(__x); }
536
537__DEVICE__
538float y0f(float __x) { return __ocml_y0_f32(__x); }
539
540__DEVICE__
541float y1f(float __x) { return __ocml_y1_f32(__x); }
542
543__DEVICE__
544float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication
545 // and the Miller & Brown algorithm
546 // for linear recurrences to get O(log n) steps, but it's unclear if
547 // it'd be beneficial in this case. Placeholder until OCML adds
548 // support.
549 if (__n == 0)
550 return y0f(__x);
551 if (__n == 1)
552 return y1f(__x);
553
554 float __x0 = y0f(__x);
555 float __x1 = y1f(__x);
556 for (int __i = 1; __i < __n; ++__i) {
557 float __x2 = (2 * __i) / __x * __x1 - __x0;
558 __x0 = __x1;
559 __x1 = __x2;
560 }
561
562 return __x1;
563}
564
565// BEGIN INTRINSICS
566
567__DEVICE__
568float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
569
570__DEVICE__
571float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); }
572
573__DEVICE__
574float __expf(float __x) { return __ocml_native_exp_f32(__x); }
575
576#if defined OCML_BASIC_ROUNDED_OPERATIONS
577__DEVICE__
578float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800579__DEVICE__
580float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800581__DEVICE__
582float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800583__DEVICE__
584float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700585#else
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800586__DEVICE__
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700587float __fadd_rn(float __x, float __y) { return __x + __y; }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800588#endif
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700589
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800590#if defined OCML_BASIC_ROUNDED_OPERATIONS
591__DEVICE__
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700592float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
593__DEVICE__
594float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
595__DEVICE__
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800596float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800597__DEVICE__
598float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700599#else
600__DEVICE__
601float __fdiv_rn(float __x, float __y) { return __x / __y; }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800602#endif
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700603
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800604__DEVICE__
605float __fdividef(float __x, float __y) { return __x / __y; }
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700606
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800607#if defined OCML_BASIC_ROUNDED_OPERATIONS
608__DEVICE__
609float __fmaf_rd(float __x, float __y, float __z) {
610 return __ocml_fma_rtn_f32(__x, __y, __z);
611}
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800612__DEVICE__
613float __fmaf_rn(float __x, float __y, float __z) {
614 return __ocml_fma_rte_f32(__x, __y, __z);
615}
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800616__DEVICE__
617float __fmaf_ru(float __x, float __y, float __z) {
618 return __ocml_fma_rtp_f32(__x, __y, __z);
619}
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800620__DEVICE__
621float __fmaf_rz(float __x, float __y, float __z) {
622 return __ocml_fma_rtz_f32(__x, __y, __z);
623}
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700624#else
625__DEVICE__
626float __fmaf_rn(float __x, float __y, float __z) {
627 return __ocml_fma_f32(__x, __y, __z);
628}
629#endif
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800630
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700631#if defined OCML_BASIC_ROUNDED_OPERATIONS
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800632__DEVICE__
633float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800634__DEVICE__
635float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800636__DEVICE__
637float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800638__DEVICE__
639float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700640#else
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800641__DEVICE__
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700642float __fmul_rn(float __x, float __y) { return __x * __y; }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800643#endif
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700644
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800645#if defined OCML_BASIC_ROUNDED_OPERATIONS
646__DEVICE__
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700647float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
648__DEVICE__
649float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
650__DEVICE__
651float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
652__DEVICE__
653float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
654#else
655__DEVICE__
656float __frcp_rn(float __x) { return 1.0f / __x; }
657#endif
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800658
659__DEVICE__
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800660float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); }
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700661
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800662#if defined OCML_BASIC_ROUNDED_OPERATIONS
663__DEVICE__
664float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800665__DEVICE__
666float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800667__DEVICE__
668float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800669__DEVICE__
670float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700671#else
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800672__DEVICE__
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700673float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800674#endif
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700675
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800676#if defined OCML_BASIC_ROUNDED_OPERATIONS
677__DEVICE__
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700678float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
679__DEVICE__
680float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
681__DEVICE__
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800682float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800683__DEVICE__
684float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700685#else
686__DEVICE__
687float __fsub_rn(float __x, float __y) { return __x - __y; }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800688#endif
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -0700689
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800690__DEVICE__
691float __log10f(float __x) { return __ocml_native_log10_f32(__x); }
692
693__DEVICE__
694float __log2f(float __x) { return __ocml_native_log2_f32(__x); }
695
696__DEVICE__
697float __logf(float __x) { return __ocml_native_log_f32(__x); }
698
699__DEVICE__
700float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
701
702__DEVICE__
703float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
704
705__DEVICE__
706void __sincosf(float __x, float *__sinptr, float *__cosptr) {
707 *__sinptr = __ocml_native_sin_f32(__x);
708 *__cosptr = __ocml_native_cos_f32(__x);
709}
710
711__DEVICE__
712float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
713
714__DEVICE__
715float __tanf(float __x) { return __ocml_tan_f32(__x); }
716// END INTRINSICS
717// END FLOAT
718
719// BEGIN DOUBLE
720__DEVICE__
721double acos(double __x) { return __ocml_acos_f64(__x); }
722
723__DEVICE__
724double acosh(double __x) { return __ocml_acosh_f64(__x); }
725
726__DEVICE__
727double asin(double __x) { return __ocml_asin_f64(__x); }
728
729__DEVICE__
730double asinh(double __x) { return __ocml_asinh_f64(__x); }
731
732__DEVICE__
733double atan(double __x) { return __ocml_atan_f64(__x); }
734
735__DEVICE__
736double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); }
737
738__DEVICE__
739double atanh(double __x) { return __ocml_atanh_f64(__x); }
740
741__DEVICE__
742double cbrt(double __x) { return __ocml_cbrt_f64(__x); }
743
744__DEVICE__
745double ceil(double __x) { return __ocml_ceil_f64(__x); }
746
747__DEVICE__
748double copysign(double __x, double __y) {
749 return __ocml_copysign_f64(__x, __y);
750}
751
752__DEVICE__
753double cos(double __x) { return __ocml_cos_f64(__x); }
754
755__DEVICE__
756double cosh(double __x) { return __ocml_cosh_f64(__x); }
757
758__DEVICE__
759double cospi(double __x) { return __ocml_cospi_f64(__x); }
760
761__DEVICE__
762double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); }
763
764__DEVICE__
765double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); }
766
767__DEVICE__
768double erf(double __x) { return __ocml_erf_f64(__x); }
769
770__DEVICE__
771double erfc(double __x) { return __ocml_erfc_f64(__x); }
772
773__DEVICE__
774double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); }
775
776__DEVICE__
777double erfcx(double __x) { return __ocml_erfcx_f64(__x); }
778
779__DEVICE__
780double erfinv(double __x) { return __ocml_erfinv_f64(__x); }
781
782__DEVICE__
783double exp(double __x) { return __ocml_exp_f64(__x); }
784
785__DEVICE__
786double exp10(double __x) { return __ocml_exp10_f64(__x); }
787
788__DEVICE__
789double exp2(double __x) { return __ocml_exp2_f64(__x); }
790
791__DEVICE__
792double expm1(double __x) { return __ocml_expm1_f64(__x); }
793
794__DEVICE__
795double fabs(double __x) { return __ocml_fabs_f64(__x); }
796
797__DEVICE__
798double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); }
799
800__DEVICE__
801double floor(double __x) { return __ocml_floor_f64(__x); }
802
803__DEVICE__
804double fma(double __x, double __y, double __z) {
805 return __ocml_fma_f64(__x, __y, __z);
806}
807
808__DEVICE__
809double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); }
810
811__DEVICE__
812double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); }
813
814__DEVICE__
815double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
816
817__DEVICE__
818double frexp(double __x, int *__nptr) {
819 int __tmp;
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800820#ifdef __OPENMP_AMDGCN__
821#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
822#endif
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800823 double __r =
824 __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
825 *__nptr = __tmp;
826 return __r;
827}
828
829__DEVICE__
830double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); }
831
832__DEVICE__
833int ilogb(double __x) { return __ocml_ilogb_f64(__x); }
834
835__DEVICE__
836__RETURN_TYPE __finite(double __x) { return __ocml_isfinite_f64(__x); }
837
838__DEVICE__
839__RETURN_TYPE __isinf(double __x) { return __ocml_isinf_f64(__x); }
840
841__DEVICE__
842__RETURN_TYPE __isnan(double __x) { return __ocml_isnan_f64(__x); }
843
844__DEVICE__
845double j0(double __x) { return __ocml_j0_f64(__x); }
846
847__DEVICE__
848double j1(double __x) { return __ocml_j1_f64(__x); }
849
850__DEVICE__
851double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication
852 // and the Miller & Brown algorithm
853 // for linear recurrences to get O(log n) steps, but it's unclear if
854 // it'd be beneficial in this case. Placeholder until OCML adds
855 // support.
856 if (__n == 0)
857 return j0(__x);
858 if (__n == 1)
859 return j1(__x);
860
861 double __x0 = j0(__x);
862 double __x1 = j1(__x);
863 for (int __i = 1; __i < __n; ++__i) {
864 double __x2 = (2 * __i) / __x * __x1 - __x0;
865 __x0 = __x1;
866 __x1 = __x2;
867 }
868 return __x1;
869}
870
871__DEVICE__
872double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); }
873
874__DEVICE__
875double lgamma(double __x) { return __ocml_lgamma_f64(__x); }
876
877__DEVICE__
878long long int llrint(double __x) { return __ocml_rint_f64(__x); }
879
880__DEVICE__
881long long int llround(double __x) { return __ocml_round_f64(__x); }
882
883__DEVICE__
884double log(double __x) { return __ocml_log_f64(__x); }
885
886__DEVICE__
887double log10(double __x) { return __ocml_log10_f64(__x); }
888
889__DEVICE__
890double log1p(double __x) { return __ocml_log1p_f64(__x); }
891
892__DEVICE__
893double log2(double __x) { return __ocml_log2_f64(__x); }
894
895__DEVICE__
896double logb(double __x) { return __ocml_logb_f64(__x); }
897
898__DEVICE__
899long int lrint(double __x) { return __ocml_rint_f64(__x); }
900
901__DEVICE__
902long int lround(double __x) { return __ocml_round_f64(__x); }
903
904__DEVICE__
905double modf(double __x, double *__iptr) {
906 double __tmp;
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800907#ifdef __OPENMP_AMDGCN__
908#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
909#endif
Sasha Smundak4b1f33a2021-01-11 15:05:07 -0800910 double __r =
911 __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
912 *__iptr = __tmp;
913
914 return __r;
915}
916
917__DEVICE__
918double nan(const char *__tagp) {
919#if !_WIN32
920 union {
921 double val;
922 struct ieee_double {
923 uint64_t mantissa : 51;
924 uint32_t quiet : 1;
925 uint32_t exponent : 11;
926 uint32_t sign : 1;
927 } bits;
928 } __tmp;
929 __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
930
931 __tmp.bits.sign = 0u;
932 __tmp.bits.exponent = ~0u;
933 __tmp.bits.quiet = 1u;
934 __tmp.bits.mantissa = __make_mantissa(__tagp);
935
936 return __tmp.val;
937#else
938 __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double));
939 uint64_t __val = __make_mantissa(__tagp);
940 __val |= 0xFFF << 51;
941 return *reinterpret_cast<double *>(&__val);
942#endif
943}
944
945__DEVICE__
946double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); }
947
948__DEVICE__
949double nextafter(double __x, double __y) {
950 return __ocml_nextafter_f64(__x, __y);
951}
952
953__DEVICE__
954double norm(int __dim,
955 const double *__a) { // TODO: placeholder until OCML adds support.
956 double __r = 0;
957 while (__dim--) {
958 __r += __a[0] * __a[0];
959 ++__a;
960 }
961
962 return __ocml_sqrt_f64(__r);
963}
964
965__DEVICE__
966double norm3d(double __x, double __y, double __z) {
967 return __ocml_len3_f64(__x, __y, __z);
968}
969
970__DEVICE__
971double norm4d(double __x, double __y, double __z, double __w) {
972 return __ocml_len4_f64(__x, __y, __z, __w);
973}
974
975__DEVICE__
976double normcdf(double __x) { return __ocml_ncdf_f64(__x); }
977
978__DEVICE__
979double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); }
980
981__DEVICE__
982double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); }
983
984__DEVICE__
985double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); }
986
987__DEVICE__
988double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); }
989
990__DEVICE__
991double remainder(double __x, double __y) {
992 return __ocml_remainder_f64(__x, __y);
993}
994
995__DEVICE__
996double remquo(double __x, double __y, int *__quo) {
997 int __tmp;
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -0800998#ifdef __OPENMP_AMDGCN__
999#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1000#endif
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001001 double __r = __ocml_remquo_f64(
1002 __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
1003 *__quo = __tmp;
1004
1005 return __r;
1006}
1007
1008__DEVICE__
1009double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); }
1010
1011__DEVICE__
1012double rint(double __x) { return __ocml_rint_f64(__x); }
1013
1014__DEVICE__
1015double rnorm(int __dim,
1016 const double *__a) { // TODO: placeholder until OCML adds support.
1017 double __r = 0;
1018 while (__dim--) {
1019 __r += __a[0] * __a[0];
1020 ++__a;
1021 }
1022
1023 return __ocml_rsqrt_f64(__r);
1024}
1025
1026__DEVICE__
1027double rnorm3d(double __x, double __y, double __z) {
1028 return __ocml_rlen3_f64(__x, __y, __z);
1029}
1030
1031__DEVICE__
1032double rnorm4d(double __x, double __y, double __z, double __w) {
1033 return __ocml_rlen4_f64(__x, __y, __z, __w);
1034}
1035
1036__DEVICE__
1037double round(double __x) { return __ocml_round_f64(__x); }
1038
1039__DEVICE__
1040double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); }
1041
1042__DEVICE__
1043double scalbln(double __x, long int __n) {
1044 return (__n < INT_MAX) ? __ocml_scalbn_f64(__x, __n)
1045 : __ocml_scalb_f64(__x, __n);
1046}
1047__DEVICE__
1048double scalbn(double __x, int __n) { return __ocml_scalbn_f64(__x, __n); }
1049
1050__DEVICE__
1051__RETURN_TYPE __signbit(double __x) { return __ocml_signbit_f64(__x); }
1052
1053__DEVICE__
1054double sin(double __x) { return __ocml_sin_f64(__x); }
1055
1056__DEVICE__
1057void sincos(double __x, double *__sinptr, double *__cosptr) {
1058 double __tmp;
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001059#ifdef __OPENMP_AMDGCN__
1060#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1061#endif
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001062 *__sinptr = __ocml_sincos_f64(
1063 __x, (__attribute__((address_space(5))) double *)&__tmp);
1064 *__cosptr = __tmp;
1065}
1066
1067__DEVICE__
1068void sincospi(double __x, double *__sinptr, double *__cosptr) {
1069 double __tmp;
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001070#ifdef __OPENMP_AMDGCN__
1071#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1072#endif
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001073 *__sinptr = __ocml_sincospi_f64(
1074 __x, (__attribute__((address_space(5))) double *)&__tmp);
1075 *__cosptr = __tmp;
1076}
1077
1078__DEVICE__
1079double sinh(double __x) { return __ocml_sinh_f64(__x); }
1080
1081__DEVICE__
1082double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
1083
1084__DEVICE__
1085double sqrt(double __x) { return __ocml_sqrt_f64(__x); }
1086
1087__DEVICE__
1088double tan(double __x) { return __ocml_tan_f64(__x); }
1089
1090__DEVICE__
1091double tanh(double __x) { return __ocml_tanh_f64(__x); }
1092
1093__DEVICE__
1094double tgamma(double __x) { return __ocml_tgamma_f64(__x); }
1095
1096__DEVICE__
1097double trunc(double __x) { return __ocml_trunc_f64(__x); }
1098
1099__DEVICE__
1100double y0(double __x) { return __ocml_y0_f64(__x); }
1101
1102__DEVICE__
1103double y1(double __x) { return __ocml_y1_f64(__x); }
1104
1105__DEVICE__
1106double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication
1107 // and the Miller & Brown algorithm
1108 // for linear recurrences to get O(log n) steps, but it's unclear if
1109 // it'd be beneficial in this case. Placeholder until OCML adds
1110 // support.
1111 if (__n == 0)
1112 return y0(__x);
1113 if (__n == 1)
1114 return y1(__x);
1115
1116 double __x0 = y0(__x);
1117 double __x1 = y1(__x);
1118 for (int __i = 1; __i < __n; ++__i) {
1119 double __x2 = (2 * __i) / __x * __x1 - __x0;
1120 __x0 = __x1;
1121 __x1 = __x2;
1122 }
1123
1124 return __x1;
1125}
1126
1127// BEGIN INTRINSICS
1128#if defined OCML_BASIC_ROUNDED_OPERATIONS
1129__DEVICE__
1130double __dadd_rd(double __x, double __y) {
1131 return __ocml_add_rtn_f64(__x, __y);
1132}
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001133__DEVICE__
1134double __dadd_rn(double __x, double __y) {
1135 return __ocml_add_rte_f64(__x, __y);
1136}
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001137__DEVICE__
1138double __dadd_ru(double __x, double __y) {
1139 return __ocml_add_rtp_f64(__x, __y);
1140}
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001141__DEVICE__
1142double __dadd_rz(double __x, double __y) {
1143 return __ocml_add_rtz_f64(__x, __y);
1144}
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07001145#else
1146__DEVICE__
1147double __dadd_rn(double __x, double __y) { return __x + __y; }
1148#endif
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001149
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07001150#if defined OCML_BASIC_ROUNDED_OPERATIONS
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001151__DEVICE__
1152double __ddiv_rd(double __x, double __y) {
1153 return __ocml_div_rtn_f64(__x, __y);
1154}
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001155__DEVICE__
1156double __ddiv_rn(double __x, double __y) {
1157 return __ocml_div_rte_f64(__x, __y);
1158}
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001159__DEVICE__
1160double __ddiv_ru(double __x, double __y) {
1161 return __ocml_div_rtp_f64(__x, __y);
1162}
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001163__DEVICE__
1164double __ddiv_rz(double __x, double __y) {
1165 return __ocml_div_rtz_f64(__x, __y);
1166}
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07001167#else
1168__DEVICE__
1169double __ddiv_rn(double __x, double __y) { return __x / __y; }
1170#endif
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001171
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07001172#if defined OCML_BASIC_ROUNDED_OPERATIONS
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001173__DEVICE__
1174double __dmul_rd(double __x, double __y) {
1175 return __ocml_mul_rtn_f64(__x, __y);
1176}
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001177__DEVICE__
1178double __dmul_rn(double __x, double __y) {
1179 return __ocml_mul_rte_f64(__x, __y);
1180}
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001181__DEVICE__
1182double __dmul_ru(double __x, double __y) {
1183 return __ocml_mul_rtp_f64(__x, __y);
1184}
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001185__DEVICE__
1186double __dmul_rz(double __x, double __y) {
1187 return __ocml_mul_rtz_f64(__x, __y);
1188}
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07001189#else
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001190__DEVICE__
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07001191double __dmul_rn(double __x, double __y) { return __x * __y; }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001192#endif
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07001193
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001194#if defined OCML_BASIC_ROUNDED_OPERATIONS
1195__DEVICE__
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07001196double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001197__DEVICE__
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07001198double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); }
1199__DEVICE__
1200double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); }
1201__DEVICE__
1202double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); }
1203#else
1204__DEVICE__
1205double __drcp_rn(double __x) { return 1.0 / __x; }
1206#endif
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001207
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07001208#if defined OCML_BASIC_ROUNDED_OPERATIONS
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001209__DEVICE__
1210double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001211__DEVICE__
1212double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001213__DEVICE__
1214double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); }
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001215__DEVICE__
1216double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07001217#else
1218__DEVICE__
1219double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); }
1220#endif
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001221
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07001222#if defined OCML_BASIC_ROUNDED_OPERATIONS
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001223__DEVICE__
1224double __dsub_rd(double __x, double __y) {
1225 return __ocml_sub_rtn_f64(__x, __y);
1226}
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001227__DEVICE__
1228double __dsub_rn(double __x, double __y) {
1229 return __ocml_sub_rte_f64(__x, __y);
1230}
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001231__DEVICE__
1232double __dsub_ru(double __x, double __y) {
1233 return __ocml_sub_rtp_f64(__x, __y);
1234}
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001235__DEVICE__
1236double __dsub_rz(double __x, double __y) {
1237 return __ocml_sub_rtz_f64(__x, __y);
1238}
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07001239#else
1240__DEVICE__
1241double __dsub_rn(double __x, double __y) { return __x - __y; }
1242#endif
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001243
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07001244#if defined OCML_BASIC_ROUNDED_OPERATIONS
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001245__DEVICE__
1246double __fma_rd(double __x, double __y, double __z) {
1247 return __ocml_fma_rtn_f64(__x, __y, __z);
1248}
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001249__DEVICE__
1250double __fma_rn(double __x, double __y, double __z) {
1251 return __ocml_fma_rte_f64(__x, __y, __z);
1252}
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001253__DEVICE__
1254double __fma_ru(double __x, double __y, double __z) {
1255 return __ocml_fma_rtp_f64(__x, __y, __z);
1256}
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001257__DEVICE__
1258double __fma_rz(double __x, double __y, double __z) {
1259 return __ocml_fma_rtz_f64(__x, __y, __z);
1260}
Pirama Arumuga Nainar986b8802021-06-03 16:00:34 -07001261#else
1262__DEVICE__
1263double __fma_rn(double __x, double __y, double __z) {
1264 return __ocml_fma_f64(__x, __y, __z);
1265}
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001266#endif
1267// END INTRINSICS
1268// END DOUBLE
1269
1270// C only macros
1271#if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1272#define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x)
1273#define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x)
1274#define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x)
1275#define signbit(__x) \
1276 _Generic((__x), float : __signbitf, double : __signbit)(__x)
1277#endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1278
1279#if defined(__cplusplus)
1280template <class T> __DEVICE__ T min(T __arg1, T __arg2) {
1281 return (__arg1 < __arg2) ? __arg1 : __arg2;
1282}
1283
1284template <class T> __DEVICE__ T max(T __arg1, T __arg2) {
1285 return (__arg1 > __arg2) ? __arg1 : __arg2;
1286}
1287
1288__DEVICE__ int min(int __arg1, int __arg2) {
1289 return (__arg1 < __arg2) ? __arg1 : __arg2;
1290}
1291__DEVICE__ int max(int __arg1, int __arg2) {
1292 return (__arg1 > __arg2) ? __arg1 : __arg2;
1293}
1294
1295__DEVICE__
1296float max(float __x, float __y) { return fmaxf(__x, __y); }
1297
1298__DEVICE__
1299double max(double __x, double __y) { return fmax(__x, __y); }
1300
1301__DEVICE__
1302float min(float __x, float __y) { return fminf(__x, __y); }
1303
1304__DEVICE__
1305double min(double __x, double __y) { return fmin(__x, __y); }
1306
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001307#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001308__host__ inline static int min(int __arg1, int __arg2) {
1309 return std::min(__arg1, __arg2);
1310}
1311
1312__host__ inline static int max(int __arg1, int __arg2) {
1313 return std::max(__arg1, __arg2);
1314}
Pirama Arumuga Nainar494f6452021-12-02 10:42:14 -08001315#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
Sasha Smundak4b1f33a2021-01-11 15:05:07 -08001316#endif
1317
1318#pragma pop_macro("__DEVICE__")
1319#pragma pop_macro("__RETURN_TYPE")
1320
1321#endif // __CLANG_HIP_MATH_H__