blob: 3f8f930106ff3346f87c038e49136d50ab883000 [file] [log] [blame]
Artem Belevich94a55e82015-09-22 17:22:59 +00001// REQUIRES: x86-registered-target
2// REQUIRES: nvptx-registered-target
3
4// Make sure we handle target overloads correctly.
5// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
6// RUN: -fcuda-target-overloads -emit-llvm -o - %s \
7// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s
8// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \
9// RUN: -fcuda-target-overloads -emit-llvm -o - %s \
Artem Belevich18609102016-02-12 18:29:18 +000010// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE \
11// RUN: -check-prefix=CHECK-DEVICE-STRICT %s
Artem Belevich94a55e82015-09-22 17:22:59 +000012
13// Check target overloads handling with disabled call target checks.
14// RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -emit-llvm \
15// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads -o - %s \
16// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST \
17// RUN: -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-HOST-NC %s
18// RUN: %clang_cc1 -DNOCHECKS -triple nvptx64-nvidia-cuda -emit-llvm \
19// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads \
20// RUN: -fcuda-is-device -o - %s \
21// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE \
22// RUN: -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-DEVICE-NC %s
23
24#include "Inputs/cuda.h"
25
26typedef int (*fp_t)(void);
27typedef void (*gp_t)(void);
28
29// CHECK-HOST: @hp = global i32 ()* @_Z1hv
30// CHECK-HOST: @chp = global i32 ()* @ch
31// CHECK-HOST: @dhp = global i32 ()* @_Z2dhv
32// CHECK-HOST: @cdhp = global i32 ()* @cdh
33// CHECK-HOST: @gp = global void ()* @_Z1gv
34
35// CHECK-BOTH-LABEL: define i32 @_Z2dhv()
36__device__ int dh(void) { return 1; }
37// CHECK-DEVICE: ret i32 1
38__host__ int dh(void) { return 2; }
39// CHECK-HOST: ret i32 2
40
41// CHECK-BOTH-LABEL: define i32 @_Z2hdv()
42__host__ __device__ int hd(void) { return 3; }
43// CHECK-BOTH: ret i32 3
44
45// CHECK-DEVICE-LABEL: define i32 @_Z1dv()
46__device__ int d(void) { return 8; }
47// CHECK-DEVICE: ret i32 8
48
49// CHECK-HOST-LABEL: define i32 @_Z1hv()
50__host__ int h(void) { return 9; }
51// CHECK-HOST: ret i32 9
52
53// CHECK-BOTH-LABEL: define void @_Z1gv()
54__global__ void g(void) {}
55// CHECK-BOTH: ret void
56
57// mangled names of extern "C" __host__ __device__ functions clash
58// with those of their __host__/__device__ counterparts, so
59// overloading of extern "C" functions can only happen for __host__
60// and __device__ functions -- we never codegen them in the same
61// compilation and therefore mangled name conflict is not a problem.
62
63// CHECK-BOTH-LABEL: define i32 @cdh()
64extern "C" __device__ int cdh(void) {return 10;}
65// CHECK-DEVICE: ret i32 10
66extern "C" __host__ int cdh(void) {return 11;}
67// CHECK-HOST: ret i32 11
68
69// CHECK-DEVICE-LABEL: define i32 @cd()
70extern "C" __device__ int cd(void) {return 12;}
71// CHECK-DEVICE: ret i32 12
72
73// CHECK-HOST-LABEL: define i32 @ch()
74extern "C" __host__ int ch(void) {return 13;}
75// CHECK-HOST: ret i32 13
76
77// CHECK-BOTH-LABEL: define i32 @chd()
78extern "C" __host__ __device__ int chd(void) {return 14;}
79// CHECK-BOTH: ret i32 14
80
Artem Belevich18609102016-02-12 18:29:18 +000081// HD functions are sometimes allowed to call H or D functions -- this
82// is an artifact of the source-to-source splitting performed by nvcc
83// that we need to mimic. During device mode compilation in nvcc, host
84// functions aren't present at all, so don't participate in
85// overloading. But in clang, H and D functions are present in both
86// compilation modes. Clang normally uses the target attribute as a
87// tiebreaker between overloads with otherwise identical priority, but
88// in order to match nvcc's behavior, we sometimes need to wholly
89// discard overloads that would not be present during compilation
90// under nvcc.
91
92template <typename T> T template_vs_function(T arg) { return 15; }
93__device__ float template_vs_function(float arg) { return 16; }
94
95// Here we expect to call the templated function during host
96// compilation, even if -fcuda-disable-target-call-checks is passed,
97// and even though C++ overload rules prefer the non-templated
98// function.
99// CHECK-BOTH-LABEL: define void @_Z5hd_tfv()
100__host__ __device__ void hd_tf(void) {
101 template_vs_function(1.0f);
102 // CHECK-HOST: call float @_Z20template_vs_functionIfET_S0_(float
103 // CHECK-DEVICE: call float @_Z20template_vs_functionf(float
104 template_vs_function(2.0);
105 // CHECK-HOST: call double @_Z20template_vs_functionIdET_S0_(double
106 // CHECK-DEVICE: call float @_Z20template_vs_functionf(float
107}
108
109// Calls from __host__ and __device__ functions should always call the
110// overloaded function that matches their mode.
111// CHECK-HOST-LABEL: define void @_Z4h_tfv()
112__host__ void h_tf() {
113 template_vs_function(1.0f);
114 // CHECK-HOST: call float @_Z20template_vs_functionIfET_S0_(float
115 template_vs_function(2.0);
116 // CHECK-HOST: call double @_Z20template_vs_functionIdET_S0_(double
117}
118
119// CHECK-DEVICE-LABEL: define void @_Z4d_tfv()
120__device__ void d_tf() {
121 template_vs_function(1.0f);
122 // CHECK-DEVICE: call float @_Z20template_vs_functionf(float
123 template_vs_function(2.0);
124 // CHECK-DEVICE: call float @_Z20template_vs_functionf(float
125}
126
127// In case we have a mix of HD and H-only or D-only candidates in the
128// overload set, normal C++ overload resolution rules apply first.
129template <typename T> T template_vs_hd_function(T arg) { return 15; }
130__host__ __device__ float template_vs_hd_function(float arg) { return 16; }
131
132// CHECK-BOTH-LABEL: define void @_Z7hd_thdfv()
133__host__ __device__ void hd_thdf() {
134 template_vs_hd_function(1.0f);
135 // CHECK-HOST: call float @_Z23template_vs_hd_functionf(float
136 // CHECK-DEVICE: call float @_Z23template_vs_hd_functionf(float
137 template_vs_hd_function(1);
138 // CHECK-HOST: call i32 @_Z23template_vs_hd_functionIiET_S0_(i32
139 // CHECK-DEVICE-STRICT: call float @_Z23template_vs_hd_functionf(float
140 // CHECK-DEVICE-NC: call i32 @_Z23template_vs_hd_functionIiET_S0_(i32
141}
142
143// CHECK-HOST-LABEL: define void @_Z6h_thdfv()
144__host__ void h_thdf() {
145 template_vs_hd_function(1.0f);
146 // CHECK-HOST: call float @_Z23template_vs_hd_functionf(float
147 template_vs_hd_function(1);
148 // CHECK-HOST: call i32 @_Z23template_vs_hd_functionIiET_S0_(i32
149}
150
151// CHECK-DEVICE-LABEL: define void @_Z6d_thdfv()
152__device__ void d_thdf() {
153 template_vs_hd_function(1.0f);
154 // CHECK-DEVICE: call float @_Z23template_vs_hd_functionf(float
155 template_vs_hd_function(1);
156 // Host-only function template is not callable with strict call checks,
157 // so for device side HD function will be the only choice.
158 // CHECK-DEVICE: call float @_Z23template_vs_hd_functionf(float
159}
160
161// Check that overloads still work the same way on both host and
162// device side when the overload set contains only functions from one
163// side of compilation.
164__device__ float device_only_function(int arg) { return 17; }
165__device__ float device_only_function(float arg) { return 18; }
166
167__host__ float host_only_function(int arg) { return 19; }
168__host__ float host_only_function(float arg) { return 20; }
169
170// CHECK-BOTH-LABEL: define void @_Z6hd_dofv()
171__host__ __device__ void hd_dof() {
172#ifdef NOCHECKS
173 device_only_function(1.0f);
174 // CHECK-BOTH-NC: call float @_Z20device_only_functionf(float
175 device_only_function(1);
176 // CHECK-BOTH-NC: call float @_Z20device_only_functioni(i32
177 host_only_function(1.0f);
178 // CHECK-BOTH-NC: call float @_Z18host_only_functionf(float
179 host_only_function(1);
180 // CHECK-BOTH-NC: call float @_Z18host_only_functioni(i32
181#endif
182}
183
184
Artem Belevich94a55e82015-09-22 17:22:59 +0000185// CHECK-HOST-LABEL: define void @_Z5hostfv()
186__host__ void hostf(void) {
Artem Belevich94a55e82015-09-22 17:22:59 +0000187 fp_t hp = h; // CHECK-HOST: store {{.*}} @_Z1hv, {{.*}} %hp,
188 fp_t chp = ch; // CHECK-HOST: store {{.*}} @ch, {{.*}} %chp,
189 fp_t dhp = dh; // CHECK-HOST: store {{.*}} @_Z2dhv, {{.*}} %dhp,
190 fp_t cdhp = cdh; // CHECK-HOST: store {{.*}} @cdh, {{.*}} %cdhp,
191 fp_t hdp = hd; // CHECK-HOST: store {{.*}} @_Z2hdv, {{.*}} %hdp,
192 fp_t chdp = chd; // CHECK-HOST: store {{.*}} @chd, {{.*}} %chdp,
193 gp_t gp = g; // CHECK-HOST: store {{.*}} @_Z1gv, {{.*}} %gp,
194
Artem Belevich94a55e82015-09-22 17:22:59 +0000195 h(); // CHECK-HOST: call i32 @_Z1hv()
196 ch(); // CHECK-HOST: call i32 @ch()
197 dh(); // CHECK-HOST: call i32 @_Z2dhv()
198 cdh(); // CHECK-HOST: call i32 @cdh()
199 g<<<0,0>>>(); // CHECK-HOST: call void @_Z1gv()
200}
201
202// CHECK-DEVICE-LABEL: define void @_Z7devicefv()
203__device__ void devicef(void) {
204 fp_t dp = d; // CHECK-DEVICE: store {{.*}} @_Z1dv, {{.*}} %dp,
205 fp_t cdp = cd; // CHECK-DEVICE: store {{.*}} @cd, {{.*}} %cdp,
Artem Belevich94a55e82015-09-22 17:22:59 +0000206 fp_t dhp = dh; // CHECK-DEVICE: store {{.*}} @_Z2dhv, {{.*}} %dhp,
207 fp_t cdhp = cdh; // CHECK-DEVICE: store {{.*}} @cdh, {{.*}} %cdhp,
208 fp_t hdp = hd; // CHECK-DEVICE: store {{.*}} @_Z2hdv, {{.*}} %hdp,
209 fp_t chdp = chd; // CHECK-DEVICE: store {{.*}} @chd, {{.*}} %chdp,
210
211 d(); // CHECK-DEVICE: call i32 @_Z1dv()
212 cd(); // CHECK-DEVICE: call i32 @cd()
Artem Belevich94a55e82015-09-22 17:22:59 +0000213 dh(); // CHECK-DEVICE: call i32 @_Z2dhv()
214 cdh(); // CHECK-DEVICE: call i32 @cdh()
215}
216
217// CHECK-BOTH-LABEL: define void @_Z11hostdevicefv()
218__host__ __device__ void hostdevicef(void) {
219#if defined (NOCHECKS)
220 fp_t dp = d; // CHECK-BOTH-NC: store {{.*}} @_Z1dv, {{.*}} %dp,
221 fp_t cdp = cd; // CHECK-BOTH-NC: store {{.*}} @cd, {{.*}} %cdp,
222 fp_t hp = h; // CHECK-BOTH-NC: store {{.*}} @_Z1hv, {{.*}} %hp,
223 fp_t chp = ch; // CHECK-BOTH-NC: store {{.*}} @ch, {{.*}} %chp,
224#endif
225 fp_t dhp = dh; // CHECK-BOTH: store {{.*}} @_Z2dhv, {{.*}} %dhp,
226 fp_t cdhp = cdh; // CHECK-BOTH: store {{.*}} @cdh, {{.*}} %cdhp,
227 fp_t hdp = hd; // CHECK-BOTH: store {{.*}} @_Z2hdv, {{.*}} %hdp,
228 fp_t chdp = chd; // CHECK-BOTH: store {{.*}} @chd, {{.*}} %chdp,
229#if defined (NOCHECKS) && !defined(__CUDA_ARCH__)
230 gp_t gp = g; // CHECK-HOST-NC: store {{.*}} @_Z1gv, {{.*}} %gp,
231#endif
232
233#if defined (NOCHECKS)
234 d(); // CHECK-BOTH-NC: call i32 @_Z1dv()
235 cd(); // CHECK-BOTH-NC: call i32 @cd()
236 h(); // CHECK-BOTH-NC: call i32 @_Z1hv()
237 ch(); // CHECK-BOTH-NC: call i32 @ch()
238#endif
239 dh(); // CHECK-BOTH: call i32 @_Z2dhv()
240 cdh(); // CHECK-BOTH: call i32 @cdh()
241#if defined (NOCHECKS) && !defined(__CUDA_ARCH__)
242 g<<<0,0>>>(); // CHECK-HOST-NC: call void @_Z1gv()
243#endif
244}
245
246// Test for address of overloaded function resolution in the global context.
247fp_t hp = h;
248fp_t chp = ch;
249fp_t dhp = dh;
250fp_t cdhp = cdh;
251gp_t gp = g;
252
253int x;
254// Check constructors/destructors for D/H functions
255struct s_cd_dh {
256 __host__ s_cd_dh() { x = 11; }
257 __device__ s_cd_dh() { x = 12; }
258 __host__ ~s_cd_dh() { x = 21; }
259 __device__ ~s_cd_dh() { x = 22; }
260};
261
262struct s_cd_hd {
263 __host__ __device__ s_cd_hd() { x = 31; }
264 __host__ __device__ ~s_cd_hd() { x = 32; }
265};
266
267// CHECK-BOTH: define void @_Z7wrapperv
268#if defined(__CUDA_ARCH__)
269__device__
270#else
271__host__
272#endif
273void wrapper() {
274 s_cd_dh scddh;
275 // CHECK-BOTH: call void @_ZN7s_cd_dhC1Ev(
276 s_cd_hd scdhd;
277 // CHECK-BOTH: call void @_ZN7s_cd_hdC1Ev
278
279 // CHECK-BOTH: call void @_ZN7s_cd_hdD1Ev(
280 // CHECK-BOTH: call void @_ZN7s_cd_dhD1Ev(
281}
282// CHECK-BOTH: ret void
283
284// Now it's time to check what's been generated for the methods we used.
285
286// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhC2Ev(
287// CHECK-HOST: store i32 11,
288// CHECK-DEVICE: store i32 12,
289// CHECK-BOTH: ret void
290
291// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdC2Ev(
292// CHECK-BOTH: store i32 31,
293// CHECK-BOTH: ret void
294
295// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdD2Ev(
296// CHECK-BOTH: store i32 32,
297// CHECK-BOTH: ret void
298
299// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhD2Ev(
300// CHECK-HOST: store i32 21,
301// CHECK-DEVICE: store i32 22,
302// CHECK-BOTH: ret void
303