[CUDA] Enable fusing FP ops (-ffp-contract=fast) for CUDA by default.

This matches default nvcc behavior and gives substantial
performance boost on GPU where fmad is much cheaper compared to add+mul.

Differential Revision: http://reviews.llvm.org/D20341

llvm-svn: 270094
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 7c3850e..c5f839e 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -2255,10 +2255,15 @@
       LangOpts.ObjCExceptions = 1;
   }
 
-  // During CUDA device-side compilation, the aux triple is the triple used for
-  // host compilation.
-  if (LangOpts.CUDA && LangOpts.CUDAIsDevice) {
-    Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple;
+  if (LangOpts.CUDA) {
+    // During CUDA device-side compilation, the aux triple is the
+    // triple used for host compilation.
+    if (LangOpts.CUDAIsDevice)
+      Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple;
+
+    // Set default FP_CONTRACT to FAST.
+    if (!Args.hasArg(OPT_ffp_contract))
+      Res.getCodeGenOpts().setFPContractMode(CodeGenOptions::FPC_Fast);
   }
 
   // FIXME: Override value name discarding when asan or msan is used because the