[CUDA] Enable fusing FP ops (-ffp-contract=fast) for CUDA by default.
This matches default nvcc behavior and gives substantial
performance boost on GPU where fmad is much cheaper compared to add+mul.
Differential Revision: http://reviews.llvm.org/D20341
llvm-svn: 270094
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 7c3850e..c5f839e 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -2255,10 +2255,15 @@
LangOpts.ObjCExceptions = 1;
}
- // During CUDA device-side compilation, the aux triple is the triple used for
- // host compilation.
- if (LangOpts.CUDA && LangOpts.CUDAIsDevice) {
- Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple;
+ if (LangOpts.CUDA) {
+ // During CUDA device-side compilation, the aux triple is the
+ // triple used for host compilation.
+ if (LangOpts.CUDAIsDevice)
+ Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple;
+
+ // Set default FP_CONTRACT to FAST.
+ if (!Args.hasArg(OPT_ffp_contract))
+ Res.getCodeGenOpts().setFPContractMode(CodeGenOptions::FPC_Fast);
}
// FIXME: Override value name discarding when asan or msan is used because the