commit | 91f6f07bb88fd39be8b5e0fd2fdf12903316c662 | [log] [tgz] |
---|---|---|
author | Justin Lebar <jlebar@google.com> | Mon May 23 20:19:56 2016 +0000 |
committer | Justin Lebar <jlebar@google.com> | Mon May 23 20:19:56 2016 +0000 |
tree | 4b5dec7b7f5da854be2c0db872c00adb954bfa3a | |
parent | 66a891962b3dd66cff48aa27a613e17a3ecae389 [diff] [blame] |
[CUDA] Add -fcuda-approx-transcendentals flag. Summary: This lets us emit e.g. sin.approx.f32. See http://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-sin Reviewers: rnk Subscribers: tra, cfe-commits Differential Revision: http://reviews.llvm.org/D20493 llvm-svn: 270484
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 5d38d5f..f8b407b 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -938,6 +938,12 @@ Builder.defineMacro("__CUDA_ARCH__"); } + // We need to communicate this to our CUDA header wrapper, which in turn + // informs the proper CUDA headers of this choice. + if (LangOpts.CUDADeviceApproxTranscendentals || LangOpts.FastMath) { + Builder.defineMacro("__CLANG_CUDA_APPROX_TRANSCENDENTALS__"); + } + // OpenCL definitions. if (LangOpts.OpenCL) { #define OPENCLEXT(Ext) \