|  | // REQUIRES: nvptx-registered-target | 
|  | // RUN: %clang_cc1 -triple nvptx-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | FileCheck %s | 
|  | // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | FileCheck %s | 
|  |  | 
|  | #define __device__ __attribute__((device)) | 
|  | #define __global__ __attribute__((global)) | 
|  | #define __shared__ __attribute__((shared)) | 
|  | #define __constant__ __attribute__((constant)) | 
|  |  | 
|  | __device__ int read_tid() { | 
|  |  | 
|  | // CHECK: call i32 @llvm.ptx.read.tid.x() | 
|  | // CHECK: call i32 @llvm.ptx.read.tid.y() | 
|  | // CHECK: call i32 @llvm.ptx.read.tid.z() | 
|  | // CHECK: call i32 @llvm.ptx.read.tid.w() | 
|  |  | 
|  | int x = __builtin_ptx_read_tid_x(); | 
|  | int y = __builtin_ptx_read_tid_y(); | 
|  | int z = __builtin_ptx_read_tid_z(); | 
|  | int w = __builtin_ptx_read_tid_w(); | 
|  |  | 
|  | return x + y + z + w; | 
|  |  | 
|  | } | 
|  |  | 
|  | __device__ int read_ntid() { | 
|  |  | 
|  | // CHECK: call i32 @llvm.ptx.read.ntid.x() | 
|  | // CHECK: call i32 @llvm.ptx.read.ntid.y() | 
|  | // CHECK: call i32 @llvm.ptx.read.ntid.z() | 
|  | // CHECK: call i32 @llvm.ptx.read.ntid.w() | 
|  |  | 
|  | int x = __builtin_ptx_read_ntid_x(); | 
|  | int y = __builtin_ptx_read_ntid_y(); | 
|  | int z = __builtin_ptx_read_ntid_z(); | 
|  | int w = __builtin_ptx_read_ntid_w(); | 
|  |  | 
|  | return x + y + z + w; | 
|  |  | 
|  | } | 
|  |  | 
|  | __device__ int read_ctaid() { | 
|  |  | 
|  | // CHECK: call i32 @llvm.ptx.read.ctaid.x() | 
|  | // CHECK: call i32 @llvm.ptx.read.ctaid.y() | 
|  | // CHECK: call i32 @llvm.ptx.read.ctaid.z() | 
|  | // CHECK: call i32 @llvm.ptx.read.ctaid.w() | 
|  |  | 
|  | int x = __builtin_ptx_read_ctaid_x(); | 
|  | int y = __builtin_ptx_read_ctaid_y(); | 
|  | int z = __builtin_ptx_read_ctaid_z(); | 
|  | int w = __builtin_ptx_read_ctaid_w(); | 
|  |  | 
|  | return x + y + z + w; | 
|  |  | 
|  | } | 
|  |  | 
|  | __device__ int read_nctaid() { | 
|  |  | 
|  | // CHECK: call i32 @llvm.ptx.read.nctaid.x() | 
|  | // CHECK: call i32 @llvm.ptx.read.nctaid.y() | 
|  | // CHECK: call i32 @llvm.ptx.read.nctaid.z() | 
|  | // CHECK: call i32 @llvm.ptx.read.nctaid.w() | 
|  |  | 
|  | int x = __builtin_ptx_read_nctaid_x(); | 
|  | int y = __builtin_ptx_read_nctaid_y(); | 
|  | int z = __builtin_ptx_read_nctaid_z(); | 
|  | int w = __builtin_ptx_read_nctaid_w(); | 
|  |  | 
|  | return x + y + z + w; | 
|  |  | 
|  | } | 
|  |  | 
|  | __device__ int read_ids() { | 
|  |  | 
|  | // CHECK: call i32 @llvm.ptx.read.laneid() | 
|  | // CHECK: call i32 @llvm.ptx.read.warpid() | 
|  | // CHECK: call i32 @llvm.ptx.read.nwarpid() | 
|  | // CHECK: call i32 @llvm.ptx.read.smid() | 
|  | // CHECK: call i32 @llvm.ptx.read.nsmid() | 
|  | // CHECK: call i32 @llvm.ptx.read.gridid() | 
|  |  | 
|  | int a = __builtin_ptx_read_laneid(); | 
|  | int b = __builtin_ptx_read_warpid(); | 
|  | int c = __builtin_ptx_read_nwarpid(); | 
|  | int d = __builtin_ptx_read_smid(); | 
|  | int e = __builtin_ptx_read_nsmid(); | 
|  | int f = __builtin_ptx_read_gridid(); | 
|  |  | 
|  | return a + b + c + d + e + f; | 
|  |  | 
|  | } | 
|  |  | 
|  | __device__ int read_lanemasks() { | 
|  |  | 
|  | // CHECK: call i32 @llvm.ptx.read.lanemask.eq() | 
|  | // CHECK: call i32 @llvm.ptx.read.lanemask.le() | 
|  | // CHECK: call i32 @llvm.ptx.read.lanemask.lt() | 
|  | // CHECK: call i32 @llvm.ptx.read.lanemask.ge() | 
|  | // CHECK: call i32 @llvm.ptx.read.lanemask.gt() | 
|  |  | 
|  | int a = __builtin_ptx_read_lanemask_eq(); | 
|  | int b = __builtin_ptx_read_lanemask_le(); | 
|  | int c = __builtin_ptx_read_lanemask_lt(); | 
|  | int d = __builtin_ptx_read_lanemask_ge(); | 
|  | int e = __builtin_ptx_read_lanemask_gt(); | 
|  |  | 
|  | return a + b + c + d + e; | 
|  |  | 
|  | } | 
|  |  | 
|  | __device__ long read_clocks() { | 
|  |  | 
|  | // CHECK: call i32 @llvm.ptx.read.clock() | 
|  | // CHECK: call i64 @llvm.ptx.read.clock64() | 
|  |  | 
|  | int a = __builtin_ptx_read_clock(); | 
|  | long b = __builtin_ptx_read_clock64(); | 
|  |  | 
|  | return (long)a + b; | 
|  |  | 
|  | } | 
|  |  | 
|  | __device__ int read_pms() { | 
|  |  | 
|  | // CHECK: call i32 @llvm.ptx.read.pm0() | 
|  | // CHECK: call i32 @llvm.ptx.read.pm1() | 
|  | // CHECK: call i32 @llvm.ptx.read.pm2() | 
|  | // CHECK: call i32 @llvm.ptx.read.pm3() | 
|  |  | 
|  | int a = __builtin_ptx_read_pm0(); | 
|  | int b = __builtin_ptx_read_pm1(); | 
|  | int c = __builtin_ptx_read_pm2(); | 
|  | int d = __builtin_ptx_read_pm3(); | 
|  |  | 
|  | return a + b + c + d; | 
|  |  | 
|  | } | 
|  |  | 
|  | __device__ void sync() { | 
|  |  | 
|  | // CHECK: call void @llvm.ptx.bar.sync(i32 0) | 
|  |  | 
|  | __builtin_ptx_bar_sync(0); | 
|  |  | 
|  | } | 
|  |  | 
|  |  | 
|  | // NVVM intrinsics | 
|  |  | 
|  | // The idea is not to test all intrinsics, just that Clang is recognizing the | 
|  | // builtins defined in BuiltinsNVPTX.def | 
|  | __device__ void nvvm_math(float f1, float f2, double d1, double d2) { | 
|  | // CHECK: call float @llvm.nvvm.fmax.f | 
|  | float t1 = __nvvm_fmax_f(f1, f2); | 
|  | // CHECK: call float @llvm.nvvm.fmin.f | 
|  | float t2 = __nvvm_fmin_f(f1, f2); | 
|  | // CHECK: call float @llvm.nvvm.sqrt.rn.f | 
|  | float t3 = __nvvm_sqrt_rn_f(f1); | 
|  | // CHECK: call float @llvm.nvvm.rcp.rn.f | 
|  | float t4 = __nvvm_rcp_rn_f(f2); | 
|  | // CHECK: call float @llvm.nvvm.add.rn.f | 
|  | float t5 = __nvvm_add_rn_f(f1, f2); | 
|  |  | 
|  | // CHECK: call double @llvm.nvvm.fmax.d | 
|  | double td1 = __nvvm_fmax_d(d1, d2); | 
|  | // CHECK: call double @llvm.nvvm.fmin.d | 
|  | double td2 = __nvvm_fmin_d(d1, d2); | 
|  | // CHECK: call double @llvm.nvvm.sqrt.rn.d | 
|  | double td3 = __nvvm_sqrt_rn_d(d1); | 
|  | // CHECK: call double @llvm.nvvm.rcp.rn.d | 
|  | double td4 = __nvvm_rcp_rn_d(d2); | 
|  |  | 
|  | // CHECK: call void @llvm.nvvm.membar.cta() | 
|  | __nvvm_membar_cta(); | 
|  | // CHECK: call void @llvm.nvvm.membar.gl() | 
|  | __nvvm_membar_gl(); | 
|  | // CHECK: call void @llvm.nvvm.membar.sys() | 
|  | __nvvm_membar_sys(); | 
|  | // CHECK: call void @llvm.nvvm.barrier0() | 
|  | __nvvm_bar0(); | 
|  | } | 
|  |  | 
|  | __device__ int di; | 
|  | __shared__ int si; | 
|  | __device__ long dl; | 
|  | __shared__ long sl; | 
|  | __device__ long long dll; | 
|  | __shared__ long long sll; | 
|  |  | 
|  | // Check for atomic intrinsics | 
|  | // CHECK-LABEL: nvvm_atom | 
|  | __device__ void nvvm_atom(float *fp, float f, int *ip, int i, long *lp, long l, | 
|  | long long *llp, long long ll) { | 
|  | // CHECK: atomicrmw add | 
|  | __nvvm_atom_add_gen_i(ip, i); | 
|  | // CHECK: atomicrmw add | 
|  | __nvvm_atom_add_gen_l(&dl, l); | 
|  | // CHECK: atomicrmw add | 
|  | __nvvm_atom_add_gen_ll(&sll, ll); | 
|  |  | 
|  | // CHECK: atomicrmw sub | 
|  | __nvvm_atom_sub_gen_i(ip, i); | 
|  | // CHECK: atomicrmw sub | 
|  | __nvvm_atom_sub_gen_l(&dl, l); | 
|  | // CHECK: atomicrmw sub | 
|  | __nvvm_atom_sub_gen_ll(&sll, ll); | 
|  |  | 
|  | // CHECK: atomicrmw and | 
|  | __nvvm_atom_and_gen_i(ip, i); | 
|  | // CHECK: atomicrmw and | 
|  | __nvvm_atom_and_gen_l(&dl, l); | 
|  | // CHECK: atomicrmw and | 
|  | __nvvm_atom_and_gen_ll(&sll, ll); | 
|  |  | 
|  | // CHECK: atomicrmw or | 
|  | __nvvm_atom_or_gen_i(ip, i); | 
|  | // CHECK: atomicrmw or | 
|  | __nvvm_atom_or_gen_l(&dl, l); | 
|  | // CHECK: atomicrmw or | 
|  | __nvvm_atom_or_gen_ll(&sll, ll); | 
|  |  | 
|  | // CHECK: atomicrmw xor | 
|  | __nvvm_atom_xor_gen_i(ip, i); | 
|  | // CHECK: atomicrmw xor | 
|  | __nvvm_atom_xor_gen_l(&dl, l); | 
|  | // CHECK: atomicrmw xor | 
|  | __nvvm_atom_xor_gen_ll(&sll, ll); | 
|  |  | 
|  | // CHECK: atomicrmw xchg | 
|  | __nvvm_atom_xchg_gen_i(ip, i); | 
|  | // CHECK: atomicrmw xchg | 
|  | __nvvm_atom_xchg_gen_l(&dl, l); | 
|  | // CHECK: atomicrmw xchg | 
|  | __nvvm_atom_xchg_gen_ll(&sll, ll); | 
|  |  | 
|  | // CHECK: atomicrmw max | 
|  | __nvvm_atom_max_gen_i(ip, i); | 
|  | // CHECK: atomicrmw max | 
|  | __nvvm_atom_max_gen_ui((unsigned int *)ip, i); | 
|  | // CHECK: atomicrmw max | 
|  | __nvvm_atom_max_gen_l(&dl, l); | 
|  | // CHECK: atomicrmw max | 
|  | __nvvm_atom_max_gen_ul((unsigned long *)&dl, l); | 
|  | // CHECK: atomicrmw max | 
|  | __nvvm_atom_max_gen_ll(&sll, ll); | 
|  | // CHECK: atomicrmw max | 
|  | __nvvm_atom_max_gen_ull((unsigned long long *)&sll, ll); | 
|  |  | 
|  | // CHECK: atomicrmw min | 
|  | __nvvm_atom_min_gen_i(ip, i); | 
|  | // CHECK: atomicrmw min | 
|  | __nvvm_atom_min_gen_ui((unsigned int *)ip, i); | 
|  | // CHECK: atomicrmw min | 
|  | __nvvm_atom_min_gen_l(&dl, l); | 
|  | // CHECK: atomicrmw min | 
|  | __nvvm_atom_min_gen_ul((unsigned long *)&dl, l); | 
|  | // CHECK: atomicrmw min | 
|  | __nvvm_atom_min_gen_ll(&sll, ll); | 
|  | // CHECK: atomicrmw min | 
|  | __nvvm_atom_min_gen_ull((unsigned long long *)&sll, ll); | 
|  |  | 
|  | // CHECK: cmpxchg | 
|  | __nvvm_atom_cas_gen_i(ip, 0, i); | 
|  | // CHECK: cmpxchg | 
|  | __nvvm_atom_cas_gen_l(&dl, 0, l); | 
|  | // CHECK: cmpxchg | 
|  | __nvvm_atom_cas_gen_ll(&sll, 0, ll); | 
|  |  | 
|  | // CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32 | 
|  | __nvvm_atom_add_gen_f(fp, f); | 
|  |  | 
|  | // CHECK: ret | 
|  | } |