Logan Chien | 2833ffb | 2018-10-09 10:03:24 +0800 | [diff] [blame] | 1 | /*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------=== |
| 2 | * |
Logan Chien | df4f766 | 2019-09-04 16:45:23 -0700 | [diff] [blame] | 3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | * See https://llvm.org/LICENSE.txt for license information. |
| 5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Logan Chien | 2833ffb | 2018-10-09 10:03:24 +0800 | [diff] [blame] | 6 | * |
| 7 | *===-----------------------------------------------------------------------=== |
| 8 | */ |
| 9 | #ifndef __CLANG_CUDA_CMATH_H__ |
| 10 | #define __CLANG_CUDA_CMATH_H__ |
| 11 | #ifndef __CUDA__ |
| 12 | #error "This file is for CUDA compilation only." |
| 13 | #endif |
| 14 | |
Sasha Smundak | 0fc590b | 2020-10-07 08:11:59 -0700 | [diff] [blame] | 15 | #ifndef __OPENMP_NVPTX__ |
Logan Chien | 55afb0a | 2018-10-15 10:42:14 +0800 | [diff] [blame] | 16 | #include <limits> |
Sasha Smundak | 0fc590b | 2020-10-07 08:11:59 -0700 | [diff] [blame] | 17 | #endif |
Logan Chien | 55afb0a | 2018-10-15 10:42:14 +0800 | [diff] [blame] | 18 | |
Logan Chien | 2833ffb | 2018-10-09 10:03:24 +0800 | [diff] [blame] | 19 | // CUDA lets us use various std math functions on the device side. This file |
| 20 | // works in concert with __clang_cuda_math_forward_declares.h to make this work. |
| 21 | // |
| 22 | // Specifically, the forward-declares header declares __device__ overloads for |
| 23 | // these functions in the global namespace, then pulls them into namespace std |
| 24 | // with 'using' statements. Then this file implements those functions, after |
Logan Chien | 55afb0a | 2018-10-15 10:42:14 +0800 | [diff] [blame] | 25 | // their implementations have been pulled in. |
Logan Chien | 2833ffb | 2018-10-09 10:03:24 +0800 | [diff] [blame] | 26 | // |
| 27 | // It's important that we declare the functions in the global namespace and pull |
| 28 | // them into namespace std with using statements, as opposed to simply declaring |
| 29 | // these functions in namespace std, because our device functions need to |
| 30 | // overload the standard library functions, which may be declared in the global |
| 31 | // namespace or in std, depending on the degree of conformance of the stdlib |
| 32 | // implementation. Declaring in the global namespace and pulling into namespace |
| 33 | // std covers all of the known knowns. |
| 34 | |
Sasha Smundak | 0fc590b | 2020-10-07 08:11:59 -0700 | [diff] [blame] | 35 | #ifdef __OPENMP_NVPTX__ |
| 36 | #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow)) |
Logan Chien | df4f766 | 2019-09-04 16:45:23 -0700 | [diff] [blame] | 37 | #else |
Logan Chien | 2833ffb | 2018-10-09 10:03:24 +0800 | [diff] [blame] | 38 | #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline)) |
Logan Chien | df4f766 | 2019-09-04 16:45:23 -0700 | [diff] [blame] | 39 | #endif |
Logan Chien | 2833ffb | 2018-10-09 10:03:24 +0800 | [diff] [blame] | 40 | |
| 41 | __DEVICE__ long long abs(long long __n) { return ::llabs(__n); } |
| 42 | __DEVICE__ long abs(long __n) { return ::labs(__n); } |
| 43 | __DEVICE__ float abs(float __x) { return ::fabsf(__x); } |
| 44 | __DEVICE__ double abs(double __x) { return ::fabs(__x); } |
| 45 | __DEVICE__ float acos(float __x) { return ::acosf(__x); } |
| 46 | __DEVICE__ float asin(float __x) { return ::asinf(__x); } |
| 47 | __DEVICE__ float atan(float __x) { return ::atanf(__x); } |
| 48 | __DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); } |
| 49 | __DEVICE__ float ceil(float __x) { return ::ceilf(__x); } |
| 50 | __DEVICE__ float cos(float __x) { return ::cosf(__x); } |
| 51 | __DEVICE__ float cosh(float __x) { return ::coshf(__x); } |
| 52 | __DEVICE__ float exp(float __x) { return ::expf(__x); } |
Sasha Smundak | 0fc590b | 2020-10-07 08:11:59 -0700 | [diff] [blame] | 53 | __DEVICE__ float fabs(float __x) { return ::fabsf(__x); } |
Logan Chien | 2833ffb | 2018-10-09 10:03:24 +0800 | [diff] [blame] | 54 | __DEVICE__ float floor(float __x) { return ::floorf(__x); } |
| 55 | __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); } |
| 56 | __DEVICE__ int fpclassify(float __x) { |
| 57 | return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, |
| 58 | FP_ZERO, __x); |
| 59 | } |
| 60 | __DEVICE__ int fpclassify(double __x) { |
| 61 | return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, |
| 62 | FP_ZERO, __x); |
| 63 | } |
| 64 | __DEVICE__ float frexp(float __arg, int *__exp) { |
| 65 | return ::frexpf(__arg, __exp); |
| 66 | } |
Logan Chien | 55afb0a | 2018-10-15 10:42:14 +0800 | [diff] [blame] | 67 | |
| 68 | // For inscrutable reasons, the CUDA headers define these functions for us on |
Sasha Smundak | 4b1f33a | 2021-01-11 15:05:07 -0800 | [diff] [blame] | 69 | // Windows. |
| 70 | #if !defined(_MSC_VER) || defined(__OPENMP_NVPTX__) |
| 71 | |
| 72 | // For OpenMP we work around some old system headers that have non-conforming |
| 73 | // `isinf(float)` and `isnan(float)` implementations that return an `int`. We do |
| 74 | // this by providing two versions of these functions, differing only in the |
| 75 | // return type. To avoid conflicting definitions we disable implicit base |
| 76 | // function generation. That means we will end up with two specializations, one |
| 77 | // per type, but only one has a base function defined by the system header. |
| 78 | #if defined(__OPENMP_NVPTX__) |
| 79 | #pragma omp begin declare variant match( \ |
| 80 | implementation = {extension(disable_implicit_base)}) |
| 81 | |
| 82 | // FIXME: We lack an extension to customize the mangling of the variants, e.g., |
| 83 | // add a suffix. This means we would clash with the names of the variants |
| 84 | // (note that we do not create implicit base functions here). To avoid |
| 85 | // this clash we add a new trait to some of them that is always true |
| 86 | // (this is LLVM after all ;)). It will only influence the mangled name |
| 87 | // of the variants inside the inner region and avoid the clash. |
| 88 | #pragma omp begin declare variant match(implementation = {vendor(llvm)}) |
| 89 | |
| 90 | __DEVICE__ int isinf(float __x) { return ::__isinff(__x); } |
| 91 | __DEVICE__ int isinf(double __x) { return ::__isinf(__x); } |
| 92 | __DEVICE__ int isfinite(float __x) { return ::__finitef(__x); } |
| 93 | __DEVICE__ int isfinite(double __x) { return ::__isfinited(__x); } |
| 94 | __DEVICE__ int isnan(float __x) { return ::__isnanf(__x); } |
| 95 | __DEVICE__ int isnan(double __x) { return ::__isnan(__x); } |
| 96 | |
| 97 | #pragma omp end declare variant |
| 98 | |
| 99 | #endif |
| 100 | |
Logan Chien | 2833ffb | 2018-10-09 10:03:24 +0800 | [diff] [blame] | 101 | __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } |
| 102 | __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } |
| 103 | __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } |
Logan Chien | 55afb0a | 2018-10-15 10:42:14 +0800 | [diff] [blame] | 104 | // For inscrutable reasons, __finite(), the double-precision version of |
| 105 | // __finitef, does not exist when compiling for MacOS. __isfinited is available |
| 106 | // everywhere and is just as good. |
| 107 | __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); } |
| 108 | __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } |
| 109 | __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } |
Sasha Smundak | 4b1f33a | 2021-01-11 15:05:07 -0800 | [diff] [blame] | 110 | |
| 111 | #if defined(__OPENMP_NVPTX__) |
| 112 | #pragma omp end declare variant |
| 113 | #endif |
| 114 | |
Logan Chien | 55afb0a | 2018-10-15 10:42:14 +0800 | [diff] [blame] | 115 | #endif |
| 116 | |
Logan Chien | 2833ffb | 2018-10-09 10:03:24 +0800 | [diff] [blame] | 117 | __DEVICE__ bool isgreater(float __x, float __y) { |
| 118 | return __builtin_isgreater(__x, __y); |
| 119 | } |
| 120 | __DEVICE__ bool isgreater(double __x, double __y) { |
| 121 | return __builtin_isgreater(__x, __y); |
| 122 | } |
| 123 | __DEVICE__ bool isgreaterequal(float __x, float __y) { |
| 124 | return __builtin_isgreaterequal(__x, __y); |
| 125 | } |
| 126 | __DEVICE__ bool isgreaterequal(double __x, double __y) { |
| 127 | return __builtin_isgreaterequal(__x, __y); |
| 128 | } |
| 129 | __DEVICE__ bool isless(float __x, float __y) { |
| 130 | return __builtin_isless(__x, __y); |
| 131 | } |
| 132 | __DEVICE__ bool isless(double __x, double __y) { |
| 133 | return __builtin_isless(__x, __y); |
| 134 | } |
| 135 | __DEVICE__ bool islessequal(float __x, float __y) { |
| 136 | return __builtin_islessequal(__x, __y); |
| 137 | } |
| 138 | __DEVICE__ bool islessequal(double __x, double __y) { |
| 139 | return __builtin_islessequal(__x, __y); |
| 140 | } |
| 141 | __DEVICE__ bool islessgreater(float __x, float __y) { |
| 142 | return __builtin_islessgreater(__x, __y); |
| 143 | } |
| 144 | __DEVICE__ bool islessgreater(double __x, double __y) { |
| 145 | return __builtin_islessgreater(__x, __y); |
| 146 | } |
Logan Chien | 2833ffb | 2018-10-09 10:03:24 +0800 | [diff] [blame] | 147 | __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); } |
| 148 | __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); } |
| 149 | __DEVICE__ bool isunordered(float __x, float __y) { |
| 150 | return __builtin_isunordered(__x, __y); |
| 151 | } |
| 152 | __DEVICE__ bool isunordered(double __x, double __y) { |
| 153 | return __builtin_isunordered(__x, __y); |
| 154 | } |
| 155 | __DEVICE__ float ldexp(float __arg, int __exp) { |
| 156 | return ::ldexpf(__arg, __exp); |
| 157 | } |
| 158 | __DEVICE__ float log(float __x) { return ::logf(__x); } |
| 159 | __DEVICE__ float log10(float __x) { return ::log10f(__x); } |
| 160 | __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } |
Logan Chien | 2833ffb | 2018-10-09 10:03:24 +0800 | [diff] [blame] | 161 | __DEVICE__ float pow(float __base, float __exp) { |
| 162 | return ::powf(__base, __exp); |
| 163 | } |
| 164 | __DEVICE__ float pow(float __base, int __iexp) { |
| 165 | return ::powif(__base, __iexp); |
| 166 | } |
| 167 | __DEVICE__ double pow(double __base, int __iexp) { |
| 168 | return ::powi(__base, __iexp); |
| 169 | } |
| 170 | __DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); } |
Logan Chien | 55afb0a | 2018-10-15 10:42:14 +0800 | [diff] [blame] | 171 | __DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); } |
Logan Chien | 2833ffb | 2018-10-09 10:03:24 +0800 | [diff] [blame] | 172 | __DEVICE__ float sin(float __x) { return ::sinf(__x); } |
| 173 | __DEVICE__ float sinh(float __x) { return ::sinhf(__x); } |
| 174 | __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); } |
| 175 | __DEVICE__ float tan(float __x) { return ::tanf(__x); } |
| 176 | __DEVICE__ float tanh(float __x) { return ::tanhf(__x); } |
| 177 | |
Pirama Arumuga Nainar | 986b880 | 2021-06-03 16:00:34 -0700 | [diff] [blame] | 178 | // There was a redefinition error for this this overload in CUDA mode. |
| 179 | // We restrict it to OpenMP mode for now, that is where it is actually needed |
| 180 | // anyway. |
| 181 | #ifdef __OPENMP_NVPTX__ |
| 182 | __DEVICE__ float remquo(float __n, float __d, int *__q) { |
| 183 | return ::remquof(__n, __d, __q); |
| 184 | } |
| 185 | #endif |
| 186 | |
Logan Chien | 55afb0a | 2018-10-15 10:42:14 +0800 | [diff] [blame] | 187 | // Notably missing above is nexttoward. We omit it because |
| 188 | // libdevice doesn't provide an implementation, and we don't want to be in the |
| 189 | // business of implementing tricky libm functions in this header. |
| 190 | |
Sasha Smundak | 0fc590b | 2020-10-07 08:11:59 -0700 | [diff] [blame] | 191 | #ifndef __OPENMP_NVPTX__ |
| 192 | |
Logan Chien | 55afb0a | 2018-10-15 10:42:14 +0800 | [diff] [blame] | 193 | // Now we've defined everything we promised we'd define in |
| 194 | // __clang_cuda_math_forward_declares.h. We need to do two additional things to |
| 195 | // fix up our math functions. |
| 196 | // |
| 197 | // 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define |
| 198 | // only sin(float) and sin(double), which means that e.g. sin(0) is |
| 199 | // ambiguous. |
| 200 | // |
| 201 | // 2) Pull the __device__ overloads of "foobarf" math functions into namespace |
| 202 | // std. These are defined in the CUDA headers in the global namespace, |
| 203 | // independent of everything else we've done here. |
| 204 | |
| 205 | // We can't use std::enable_if, because we want to be pre-C++11 compatible. But |
| 206 | // we go ahead and unconditionally define functions that are only available when |
| 207 | // compiling for C++11 to match the behavior of the CUDA headers. |
| 208 | template<bool __B, class __T = void> |
| 209 | struct __clang_cuda_enable_if {}; |
| 210 | |
| 211 | template <class __T> struct __clang_cuda_enable_if<true, __T> { |
| 212 | typedef __T type; |
| 213 | }; |
| 214 | |
| 215 | // Defines an overload of __fn that accepts one integral argument, calls |
| 216 | // __fn((double)x), and returns __retty. |
| 217 | #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \ |
| 218 | template <typename __T> \ |
| 219 | __DEVICE__ \ |
| 220 | typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \ |
| 221 | __retty>::type \ |
| 222 | __fn(__T __x) { \ |
| 223 | return ::__fn((double)__x); \ |
| 224 | } |
| 225 | |
| 226 | // Defines an overload of __fn that accepts one two arithmetic arguments, calls |
| 227 | // __fn((double)x, (double)y), and returns a double. |
| 228 | // |
| 229 | // Note this is different from OVERLOAD_1, which generates an overload that |
| 230 | // accepts only *integral* arguments. |
| 231 | #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \ |
| 232 | template <typename __T1, typename __T2> \ |
| 233 | __DEVICE__ typename __clang_cuda_enable_if< \ |
| 234 | std::numeric_limits<__T1>::is_specialized && \ |
| 235 | std::numeric_limits<__T2>::is_specialized, \ |
| 236 | __retty>::type \ |
| 237 | __fn(__T1 __x, __T2 __y) { \ |
| 238 | return __fn((double)__x, (double)__y); \ |
| 239 | } |
| 240 | |
| 241 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos) |
| 242 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh) |
| 243 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin) |
| 244 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh) |
| 245 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan) |
| 246 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2); |
| 247 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh) |
| 248 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt) |
| 249 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil) |
| 250 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign); |
| 251 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos) |
| 252 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh) |
| 253 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf) |
| 254 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc) |
| 255 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp) |
| 256 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2) |
| 257 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1) |
| 258 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs) |
| 259 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim); |
| 260 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor) |
| 261 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax); |
| 262 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin); |
| 263 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod); |
| 264 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify) |
| 265 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot); |
| 266 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb) |
| 267 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite) |
| 268 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater); |
| 269 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal); |
| 270 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf); |
| 271 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless); |
| 272 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal); |
| 273 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater); |
| 274 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan); |
| 275 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal) |
| 276 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered); |
| 277 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma) |
| 278 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log) |
| 279 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10) |
| 280 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p) |
| 281 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2) |
| 282 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb) |
| 283 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint) |
| 284 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround) |
| 285 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint) |
| 286 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround) |
| 287 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint); |
| 288 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter); |
| 289 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow); |
| 290 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder); |
| 291 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint); |
| 292 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round); |
| 293 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit) |
| 294 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin) |
| 295 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh) |
| 296 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt) |
| 297 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan) |
| 298 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh) |
| 299 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma) |
| 300 | __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc); |
| 301 | |
| 302 | #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1 |
| 303 | #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2 |
| 304 | |
| 305 | // Overloads for functions that don't match the patterns expected by |
| 306 | // __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}. |
| 307 | template <typename __T1, typename __T2, typename __T3> |
| 308 | __DEVICE__ typename __clang_cuda_enable_if< |
| 309 | std::numeric_limits<__T1>::is_specialized && |
| 310 | std::numeric_limits<__T2>::is_specialized && |
| 311 | std::numeric_limits<__T3>::is_specialized, |
| 312 | double>::type |
| 313 | fma(__T1 __x, __T2 __y, __T3 __z) { |
| 314 | return std::fma((double)__x, (double)__y, (double)__z); |
| 315 | } |
| 316 | |
| 317 | template <typename __T> |
| 318 | __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, |
| 319 | double>::type |
| 320 | frexp(__T __x, int *__exp) { |
| 321 | return std::frexp((double)__x, __exp); |
| 322 | } |
| 323 | |
| 324 | template <typename __T> |
| 325 | __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, |
| 326 | double>::type |
| 327 | ldexp(__T __x, int __exp) { |
| 328 | return std::ldexp((double)__x, __exp); |
| 329 | } |
| 330 | |
| 331 | template <typename __T1, typename __T2> |
| 332 | __DEVICE__ typename __clang_cuda_enable_if< |
| 333 | std::numeric_limits<__T1>::is_specialized && |
| 334 | std::numeric_limits<__T2>::is_specialized, |
| 335 | double>::type |
| 336 | remquo(__T1 __x, __T2 __y, int *__quo) { |
| 337 | return std::remquo((double)__x, (double)__y, __quo); |
| 338 | } |
| 339 | |
| 340 | template <typename __T> |
| 341 | __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, |
| 342 | double>::type |
| 343 | scalbln(__T __x, long __exp) { |
| 344 | return std::scalbln((double)__x, __exp); |
| 345 | } |
| 346 | |
| 347 | template <typename __T> |
| 348 | __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, |
| 349 | double>::type |
| 350 | scalbn(__T __x, int __exp) { |
| 351 | return std::scalbn((double)__x, __exp); |
| 352 | } |
| 353 | |
| 354 | // We need to define these overloads in exactly the namespace our standard |
| 355 | // library uses (including the right inline namespace), otherwise they won't be |
| 356 | // picked up by other functions in the standard library (e.g. functions in |
| 357 | // <complex>). Thus the ugliness below. |
| 358 | #ifdef _LIBCPP_BEGIN_NAMESPACE_STD |
| 359 | _LIBCPP_BEGIN_NAMESPACE_STD |
| 360 | #else |
| 361 | namespace std { |
| 362 | #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION |
| 363 | _GLIBCXX_BEGIN_NAMESPACE_VERSION |
| 364 | #endif |
| 365 | #endif |
| 366 | |
| 367 | // Pull the new overloads we defined above into namespace std. |
| 368 | using ::acos; |
| 369 | using ::acosh; |
| 370 | using ::asin; |
| 371 | using ::asinh; |
| 372 | using ::atan; |
| 373 | using ::atan2; |
| 374 | using ::atanh; |
| 375 | using ::cbrt; |
| 376 | using ::ceil; |
| 377 | using ::copysign; |
| 378 | using ::cos; |
| 379 | using ::cosh; |
| 380 | using ::erf; |
| 381 | using ::erfc; |
| 382 | using ::exp; |
| 383 | using ::exp2; |
| 384 | using ::expm1; |
| 385 | using ::fabs; |
| 386 | using ::fdim; |
| 387 | using ::floor; |
| 388 | using ::fma; |
| 389 | using ::fmax; |
| 390 | using ::fmin; |
| 391 | using ::fmod; |
| 392 | using ::fpclassify; |
| 393 | using ::frexp; |
| 394 | using ::hypot; |
| 395 | using ::ilogb; |
| 396 | using ::isfinite; |
| 397 | using ::isgreater; |
| 398 | using ::isgreaterequal; |
| 399 | using ::isless; |
| 400 | using ::islessequal; |
| 401 | using ::islessgreater; |
| 402 | using ::isnormal; |
| 403 | using ::isunordered; |
| 404 | using ::ldexp; |
| 405 | using ::lgamma; |
| 406 | using ::llrint; |
| 407 | using ::llround; |
| 408 | using ::log; |
| 409 | using ::log10; |
| 410 | using ::log1p; |
| 411 | using ::log2; |
| 412 | using ::logb; |
| 413 | using ::lrint; |
| 414 | using ::lround; |
| 415 | using ::nearbyint; |
| 416 | using ::nextafter; |
| 417 | using ::pow; |
| 418 | using ::remainder; |
| 419 | using ::remquo; |
| 420 | using ::rint; |
| 421 | using ::round; |
| 422 | using ::scalbln; |
| 423 | using ::scalbn; |
| 424 | using ::signbit; |
| 425 | using ::sin; |
| 426 | using ::sinh; |
| 427 | using ::sqrt; |
| 428 | using ::tan; |
| 429 | using ::tanh; |
| 430 | using ::tgamma; |
| 431 | using ::trunc; |
| 432 | |
| 433 | // Well this is fun: We need to pull these symbols in for libc++, but we can't |
| 434 | // pull them in with libstdc++, because its ::isinf and ::isnan are different |
| 435 | // than its std::isinf and std::isnan. |
| 436 | #ifndef __GLIBCXX__ |
| 437 | using ::isinf; |
| 438 | using ::isnan; |
| 439 | #endif |
| 440 | |
| 441 | // Finally, pull the "foobarf" functions that CUDA defines in its headers into |
| 442 | // namespace std. |
| 443 | using ::acosf; |
| 444 | using ::acoshf; |
| 445 | using ::asinf; |
| 446 | using ::asinhf; |
| 447 | using ::atan2f; |
| 448 | using ::atanf; |
| 449 | using ::atanhf; |
| 450 | using ::cbrtf; |
| 451 | using ::ceilf; |
| 452 | using ::copysignf; |
| 453 | using ::cosf; |
| 454 | using ::coshf; |
| 455 | using ::erfcf; |
| 456 | using ::erff; |
| 457 | using ::exp2f; |
| 458 | using ::expf; |
| 459 | using ::expm1f; |
| 460 | using ::fabsf; |
| 461 | using ::fdimf; |
| 462 | using ::floorf; |
| 463 | using ::fmaf; |
| 464 | using ::fmaxf; |
| 465 | using ::fminf; |
| 466 | using ::fmodf; |
| 467 | using ::frexpf; |
| 468 | using ::hypotf; |
| 469 | using ::ilogbf; |
| 470 | using ::ldexpf; |
| 471 | using ::lgammaf; |
| 472 | using ::llrintf; |
| 473 | using ::llroundf; |
| 474 | using ::log10f; |
| 475 | using ::log1pf; |
| 476 | using ::log2f; |
| 477 | using ::logbf; |
| 478 | using ::logf; |
| 479 | using ::lrintf; |
| 480 | using ::lroundf; |
| 481 | using ::modff; |
| 482 | using ::nearbyintf; |
| 483 | using ::nextafterf; |
| 484 | using ::powf; |
| 485 | using ::remainderf; |
| 486 | using ::remquof; |
| 487 | using ::rintf; |
| 488 | using ::roundf; |
| 489 | using ::scalblnf; |
| 490 | using ::scalbnf; |
| 491 | using ::sinf; |
| 492 | using ::sinhf; |
| 493 | using ::sqrtf; |
| 494 | using ::tanf; |
| 495 | using ::tanhf; |
| 496 | using ::tgammaf; |
| 497 | using ::truncf; |
| 498 | |
| 499 | #ifdef _LIBCPP_END_NAMESPACE_STD |
| 500 | _LIBCPP_END_NAMESPACE_STD |
| 501 | #else |
| 502 | #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION |
| 503 | _GLIBCXX_END_NAMESPACE_VERSION |
| 504 | #endif |
| 505 | } // namespace std |
| 506 | #endif |
| 507 | |
Sasha Smundak | 0fc590b | 2020-10-07 08:11:59 -0700 | [diff] [blame] | 508 | #endif // __OPENMP_NVPTX__ |
| 509 | |
Logan Chien | 2833ffb | 2018-10-09 10:03:24 +0800 | [diff] [blame] | 510 | #undef __DEVICE__ |
| 511 | |
| 512 | #endif |