[NVPTX] Add __nvvm_* intrinsics as Clang builtins

Fixes bug 13354.

llvm-svn: 167647
diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c
index fa6b14c..2c7e0c1 100644
--- a/clang/test/CodeGen/builtins-nvptx.c
+++ b/clang/test/CodeGen/builtins-nvptx.c
@@ -1,8 +1,15 @@
-// RUN: %clang_cc1 -triple nvptx-unknown-unknown -emit-llvm -o %t %s
-// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -emit-llvm -o %t %s
+// REQUIRES: nvptx-registered-target
+// REQUIRES: nvptx64-registered-target
+// RUN: %clang_cc1 -triple nvptx-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
 
 int read_tid() {
 
+// CHECK: call i32 @llvm.ptx.read.tid.x()
+// CHECK: call i32 @llvm.ptx.read.tid.y()
+// CHECK: call i32 @llvm.ptx.read.tid.z()
+// CHECK: call i32 @llvm.ptx.read.tid.w()
+
   int x = __builtin_ptx_read_tid_x();
   int y = __builtin_ptx_read_tid_y();
   int z = __builtin_ptx_read_tid_z();
@@ -14,6 +21,11 @@
 
 int read_ntid() {
 
+// CHECK: call i32 @llvm.ptx.read.ntid.x()
+// CHECK: call i32 @llvm.ptx.read.ntid.y()
+// CHECK: call i32 @llvm.ptx.read.ntid.z()
+// CHECK: call i32 @llvm.ptx.read.ntid.w()
+
   int x = __builtin_ptx_read_ntid_x();
   int y = __builtin_ptx_read_ntid_y();
   int z = __builtin_ptx_read_ntid_z();
@@ -25,6 +37,11 @@
 
 int read_ctaid() {
 
+// CHECK: call i32 @llvm.ptx.read.ctaid.x()
+// CHECK: call i32 @llvm.ptx.read.ctaid.y()
+// CHECK: call i32 @llvm.ptx.read.ctaid.z()
+// CHECK: call i32 @llvm.ptx.read.ctaid.w()
+
   int x = __builtin_ptx_read_ctaid_x();
   int y = __builtin_ptx_read_ctaid_y();
   int z = __builtin_ptx_read_ctaid_z();
@@ -36,6 +53,11 @@
 
 int read_nctaid() {
 
+// CHECK: call i32 @llvm.ptx.read.nctaid.x()
+// CHECK: call i32 @llvm.ptx.read.nctaid.y()
+// CHECK: call i32 @llvm.ptx.read.nctaid.z()
+// CHECK: call i32 @llvm.ptx.read.nctaid.w()
+
   int x = __builtin_ptx_read_nctaid_x();
   int y = __builtin_ptx_read_nctaid_y();
   int z = __builtin_ptx_read_nctaid_z();
@@ -47,6 +69,13 @@
 
 int read_ids() {
 
+// CHECK: call i32 @llvm.ptx.read.laneid()
+// CHECK: call i32 @llvm.ptx.read.warpid()
+// CHECK: call i32 @llvm.ptx.read.nwarpid()
+// CHECK: call i32 @llvm.ptx.read.smid()
+// CHECK: call i32 @llvm.ptx.read.nsmid()
+// CHECK: call i32 @llvm.ptx.read.gridid()
+
   int a = __builtin_ptx_read_laneid();
   int b = __builtin_ptx_read_warpid();
   int c = __builtin_ptx_read_nwarpid();
@@ -60,6 +89,12 @@
 
 int read_lanemasks() {
 
+// CHECK: call i32 @llvm.ptx.read.lanemask.eq()
+// CHECK: call i32 @llvm.ptx.read.lanemask.le()
+// CHECK: call i32 @llvm.ptx.read.lanemask.lt()
+// CHECK: call i32 @llvm.ptx.read.lanemask.ge()
+// CHECK: call i32 @llvm.ptx.read.lanemask.gt()
+
   int a = __builtin_ptx_read_lanemask_eq();
   int b = __builtin_ptx_read_lanemask_le();
   int c = __builtin_ptx_read_lanemask_lt();
@@ -73,6 +108,9 @@
 
 long read_clocks() {
 
+// CHECK: call i32 @llvm.ptx.read.clock()
+// CHECK: call i64 @llvm.ptx.read.clock64()
+
   int a = __builtin_ptx_read_clock();
   long b = __builtin_ptx_read_clock64();
 
@@ -82,6 +120,11 @@
 
 int read_pms() {
 
+// CHECK: call i32 @llvm.ptx.read.pm0()
+// CHECK: call i32 @llvm.ptx.read.pm1()
+// CHECK: call i32 @llvm.ptx.read.pm2()
+// CHECK: call i32 @llvm.ptx.read.pm3()
+
   int a = __builtin_ptx_read_pm0();
   int b = __builtin_ptx_read_pm1();
   int c = __builtin_ptx_read_pm2();
@@ -93,6 +136,33 @@
 
 void sync() {
 
+// CHECK: call void @llvm.ptx.bar.sync(i32 0)
+
   __builtin_ptx_bar_sync(0);
 
 }
+
+
+// NVVM intrinsics
+
+// The idea is not to test all intrinsics, just that Clang is recognizing the
+// builtins defined in BuiltinsNVPTX.def
+void nvvm_math(float f1, float f2, double d1, double d2) {
+// CHECK: call float @llvm.nvvm.fmax.f
+  float t1 = __nvvm_fmax_f(f1, f2);
+// CHECK: call float @llvm.nvvm.fmin.f
+  float t2 = __nvvm_fmin_f(f1, f2);
+// CHECK: call float @llvm.nvvm.sqrt.rn.f
+  float t3 = __nvvm_sqrt_rn_f(f1);
+// CHECK: call float @llvm.nvvm.rcp.rn.f
+  float t4 = __nvvm_rcp_rn_f(f2);
+
+// CHECK: call double @llvm.nvvm.fmax.d
+  double td1 = __nvvm_fmax_d(d1, d2);
+// CHECK: call double @llvm.nvvm.fmin.d
+  double td2 = __nvvm_fmin_d(d1, d2);
+// CHECK: call double @llvm.nvvm.sqrt.rn.d
+  double td3 = __nvvm_sqrt_rn_d(d1);
+// CHECK: call double @llvm.nvvm.rcp.rn.d
+  double td4 = __nvvm_rcp_rn_d(d2);
+}