Artem Belevich | 94a55e8 | 2015-09-22 17:22:59 +0000 | [diff] [blame] | 1 | // REQUIRES: x86-registered-target |
| 2 | // REQUIRES: nvptx-registered-target |
| 3 | |
| 4 | // Make sure we handle target overloads correctly. |
| 5 | // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ |
| 6 | // RUN: -fcuda-target-overloads -emit-llvm -o - %s \ |
| 7 | // RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s |
| 8 | // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \ |
| 9 | // RUN: -fcuda-target-overloads -emit-llvm -o - %s \ |
Artem Belevich | 1860910 | 2016-02-12 18:29:18 +0000 | [diff] [blame] | 10 | // RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE \ |
| 11 | // RUN: -check-prefix=CHECK-DEVICE-STRICT %s |
Artem Belevich | 94a55e8 | 2015-09-22 17:22:59 +0000 | [diff] [blame] | 12 | |
| 13 | // Check target overloads handling with disabled call target checks. |
| 14 | // RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -emit-llvm \ |
| 15 | // RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads -o - %s \ |
| 16 | // RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST \ |
| 17 | // RUN: -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-HOST-NC %s |
| 18 | // RUN: %clang_cc1 -DNOCHECKS -triple nvptx64-nvidia-cuda -emit-llvm \ |
| 19 | // RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads \ |
| 20 | // RUN: -fcuda-is-device -o - %s \ |
| 21 | // RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE \ |
| 22 | // RUN: -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-DEVICE-NC %s |
| 23 | |
| 24 | #include "Inputs/cuda.h" |
| 25 | |
| 26 | typedef int (*fp_t)(void); |
| 27 | typedef void (*gp_t)(void); |
| 28 | |
| 29 | // CHECK-HOST: @hp = global i32 ()* @_Z1hv |
| 30 | // CHECK-HOST: @chp = global i32 ()* @ch |
| 31 | // CHECK-HOST: @dhp = global i32 ()* @_Z2dhv |
| 32 | // CHECK-HOST: @cdhp = global i32 ()* @cdh |
| 33 | // CHECK-HOST: @gp = global void ()* @_Z1gv |
| 34 | |
| 35 | // CHECK-BOTH-LABEL: define i32 @_Z2dhv() |
| 36 | __device__ int dh(void) { return 1; } |
| 37 | // CHECK-DEVICE: ret i32 1 |
| 38 | __host__ int dh(void) { return 2; } |
| 39 | // CHECK-HOST: ret i32 2 |
| 40 | |
| 41 | // CHECK-BOTH-LABEL: define i32 @_Z2hdv() |
| 42 | __host__ __device__ int hd(void) { return 3; } |
| 43 | // CHECK-BOTH: ret i32 3 |
| 44 | |
| 45 | // CHECK-DEVICE-LABEL: define i32 @_Z1dv() |
| 46 | __device__ int d(void) { return 8; } |
| 47 | // CHECK-DEVICE: ret i32 8 |
| 48 | |
| 49 | // CHECK-HOST-LABEL: define i32 @_Z1hv() |
| 50 | __host__ int h(void) { return 9; } |
| 51 | // CHECK-HOST: ret i32 9 |
| 52 | |
| 53 | // CHECK-BOTH-LABEL: define void @_Z1gv() |
| 54 | __global__ void g(void) {} |
| 55 | // CHECK-BOTH: ret void |
| 56 | |
| 57 | // mangled names of extern "C" __host__ __device__ functions clash |
| 58 | // with those of their __host__/__device__ counterparts, so |
| 59 | // overloading of extern "C" functions can only happen for __host__ |
| 60 | // and __device__ functions -- we never codegen them in the same |
| 61 | // compilation and therefore mangled name conflict is not a problem. |
| 62 | |
| 63 | // CHECK-BOTH-LABEL: define i32 @cdh() |
| 64 | extern "C" __device__ int cdh(void) {return 10;} |
| 65 | // CHECK-DEVICE: ret i32 10 |
| 66 | extern "C" __host__ int cdh(void) {return 11;} |
| 67 | // CHECK-HOST: ret i32 11 |
| 68 | |
| 69 | // CHECK-DEVICE-LABEL: define i32 @cd() |
| 70 | extern "C" __device__ int cd(void) {return 12;} |
| 71 | // CHECK-DEVICE: ret i32 12 |
| 72 | |
| 73 | // CHECK-HOST-LABEL: define i32 @ch() |
| 74 | extern "C" __host__ int ch(void) {return 13;} |
| 75 | // CHECK-HOST: ret i32 13 |
| 76 | |
| 77 | // CHECK-BOTH-LABEL: define i32 @chd() |
| 78 | extern "C" __host__ __device__ int chd(void) {return 14;} |
| 79 | // CHECK-BOTH: ret i32 14 |
| 80 | |
Artem Belevich | 1860910 | 2016-02-12 18:29:18 +0000 | [diff] [blame] | 81 | // HD functions are sometimes allowed to call H or D functions -- this |
| 82 | // is an artifact of the source-to-source splitting performed by nvcc |
| 83 | // that we need to mimic. During device mode compilation in nvcc, host |
| 84 | // functions aren't present at all, so don't participate in |
| 85 | // overloading. But in clang, H and D functions are present in both |
| 86 | // compilation modes. Clang normally uses the target attribute as a |
| 87 | // tiebreaker between overloads with otherwise identical priority, but |
| 88 | // in order to match nvcc's behavior, we sometimes need to wholly |
| 89 | // discard overloads that would not be present during compilation |
| 90 | // under nvcc. |
| 91 | |
| 92 | template <typename T> T template_vs_function(T arg) { return 15; } |
| 93 | __device__ float template_vs_function(float arg) { return 16; } |
| 94 | |
| 95 | // Here we expect to call the templated function during host |
| 96 | // compilation, even if -fcuda-disable-target-call-checks is passed, |
| 97 | // and even though C++ overload rules prefer the non-templated |
| 98 | // function. |
| 99 | // CHECK-BOTH-LABEL: define void @_Z5hd_tfv() |
| 100 | __host__ __device__ void hd_tf(void) { |
| 101 | template_vs_function(1.0f); |
| 102 | // CHECK-HOST: call float @_Z20template_vs_functionIfET_S0_(float |
| 103 | // CHECK-DEVICE: call float @_Z20template_vs_functionf(float |
| 104 | template_vs_function(2.0); |
| 105 | // CHECK-HOST: call double @_Z20template_vs_functionIdET_S0_(double |
| 106 | // CHECK-DEVICE: call float @_Z20template_vs_functionf(float |
| 107 | } |
| 108 | |
| 109 | // Calls from __host__ and __device__ functions should always call the |
| 110 | // overloaded function that matches their mode. |
| 111 | // CHECK-HOST-LABEL: define void @_Z4h_tfv() |
| 112 | __host__ void h_tf() { |
| 113 | template_vs_function(1.0f); |
| 114 | // CHECK-HOST: call float @_Z20template_vs_functionIfET_S0_(float |
| 115 | template_vs_function(2.0); |
| 116 | // CHECK-HOST: call double @_Z20template_vs_functionIdET_S0_(double |
| 117 | } |
| 118 | |
| 119 | // CHECK-DEVICE-LABEL: define void @_Z4d_tfv() |
| 120 | __device__ void d_tf() { |
| 121 | template_vs_function(1.0f); |
| 122 | // CHECK-DEVICE: call float @_Z20template_vs_functionf(float |
| 123 | template_vs_function(2.0); |
| 124 | // CHECK-DEVICE: call float @_Z20template_vs_functionf(float |
| 125 | } |
| 126 | |
| 127 | // In case we have a mix of HD and H-only or D-only candidates in the |
| 128 | // overload set, normal C++ overload resolution rules apply first. |
| 129 | template <typename T> T template_vs_hd_function(T arg) { return 15; } |
| 130 | __host__ __device__ float template_vs_hd_function(float arg) { return 16; } |
| 131 | |
| 132 | // CHECK-BOTH-LABEL: define void @_Z7hd_thdfv() |
| 133 | __host__ __device__ void hd_thdf() { |
| 134 | template_vs_hd_function(1.0f); |
| 135 | // CHECK-HOST: call float @_Z23template_vs_hd_functionf(float |
| 136 | // CHECK-DEVICE: call float @_Z23template_vs_hd_functionf(float |
| 137 | template_vs_hd_function(1); |
| 138 | // CHECK-HOST: call i32 @_Z23template_vs_hd_functionIiET_S0_(i32 |
| 139 | // CHECK-DEVICE-STRICT: call float @_Z23template_vs_hd_functionf(float |
| 140 | // CHECK-DEVICE-NC: call i32 @_Z23template_vs_hd_functionIiET_S0_(i32 |
| 141 | } |
| 142 | |
| 143 | // CHECK-HOST-LABEL: define void @_Z6h_thdfv() |
| 144 | __host__ void h_thdf() { |
| 145 | template_vs_hd_function(1.0f); |
| 146 | // CHECK-HOST: call float @_Z23template_vs_hd_functionf(float |
| 147 | template_vs_hd_function(1); |
| 148 | // CHECK-HOST: call i32 @_Z23template_vs_hd_functionIiET_S0_(i32 |
| 149 | } |
| 150 | |
| 151 | // CHECK-DEVICE-LABEL: define void @_Z6d_thdfv() |
| 152 | __device__ void d_thdf() { |
| 153 | template_vs_hd_function(1.0f); |
| 154 | // CHECK-DEVICE: call float @_Z23template_vs_hd_functionf(float |
| 155 | template_vs_hd_function(1); |
| 156 | // Host-only function template is not callable with strict call checks, |
| 157 | // so for device side HD function will be the only choice. |
| 158 | // CHECK-DEVICE: call float @_Z23template_vs_hd_functionf(float |
| 159 | } |
| 160 | |
| 161 | // Check that overloads still work the same way on both host and |
| 162 | // device side when the overload set contains only functions from one |
| 163 | // side of compilation. |
| 164 | __device__ float device_only_function(int arg) { return 17; } |
| 165 | __device__ float device_only_function(float arg) { return 18; } |
| 166 | |
| 167 | __host__ float host_only_function(int arg) { return 19; } |
| 168 | __host__ float host_only_function(float arg) { return 20; } |
| 169 | |
| 170 | // CHECK-BOTH-LABEL: define void @_Z6hd_dofv() |
| 171 | __host__ __device__ void hd_dof() { |
| 172 | #ifdef NOCHECKS |
| 173 | device_only_function(1.0f); |
| 174 | // CHECK-BOTH-NC: call float @_Z20device_only_functionf(float |
| 175 | device_only_function(1); |
| 176 | // CHECK-BOTH-NC: call float @_Z20device_only_functioni(i32 |
| 177 | host_only_function(1.0f); |
| 178 | // CHECK-BOTH-NC: call float @_Z18host_only_functionf(float |
| 179 | host_only_function(1); |
| 180 | // CHECK-BOTH-NC: call float @_Z18host_only_functioni(i32 |
| 181 | #endif |
| 182 | } |
| 183 | |
| 184 | |
Artem Belevich | 94a55e8 | 2015-09-22 17:22:59 +0000 | [diff] [blame] | 185 | // CHECK-HOST-LABEL: define void @_Z5hostfv() |
| 186 | __host__ void hostf(void) { |
Artem Belevich | 94a55e8 | 2015-09-22 17:22:59 +0000 | [diff] [blame] | 187 | fp_t hp = h; // CHECK-HOST: store {{.*}} @_Z1hv, {{.*}} %hp, |
| 188 | fp_t chp = ch; // CHECK-HOST: store {{.*}} @ch, {{.*}} %chp, |
| 189 | fp_t dhp = dh; // CHECK-HOST: store {{.*}} @_Z2dhv, {{.*}} %dhp, |
| 190 | fp_t cdhp = cdh; // CHECK-HOST: store {{.*}} @cdh, {{.*}} %cdhp, |
| 191 | fp_t hdp = hd; // CHECK-HOST: store {{.*}} @_Z2hdv, {{.*}} %hdp, |
| 192 | fp_t chdp = chd; // CHECK-HOST: store {{.*}} @chd, {{.*}} %chdp, |
| 193 | gp_t gp = g; // CHECK-HOST: store {{.*}} @_Z1gv, {{.*}} %gp, |
| 194 | |
Artem Belevich | 94a55e8 | 2015-09-22 17:22:59 +0000 | [diff] [blame] | 195 | h(); // CHECK-HOST: call i32 @_Z1hv() |
| 196 | ch(); // CHECK-HOST: call i32 @ch() |
| 197 | dh(); // CHECK-HOST: call i32 @_Z2dhv() |
| 198 | cdh(); // CHECK-HOST: call i32 @cdh() |
| 199 | g<<<0,0>>>(); // CHECK-HOST: call void @_Z1gv() |
| 200 | } |
| 201 | |
| 202 | // CHECK-DEVICE-LABEL: define void @_Z7devicefv() |
| 203 | __device__ void devicef(void) { |
| 204 | fp_t dp = d; // CHECK-DEVICE: store {{.*}} @_Z1dv, {{.*}} %dp, |
| 205 | fp_t cdp = cd; // CHECK-DEVICE: store {{.*}} @cd, {{.*}} %cdp, |
Artem Belevich | 94a55e8 | 2015-09-22 17:22:59 +0000 | [diff] [blame] | 206 | fp_t dhp = dh; // CHECK-DEVICE: store {{.*}} @_Z2dhv, {{.*}} %dhp, |
| 207 | fp_t cdhp = cdh; // CHECK-DEVICE: store {{.*}} @cdh, {{.*}} %cdhp, |
| 208 | fp_t hdp = hd; // CHECK-DEVICE: store {{.*}} @_Z2hdv, {{.*}} %hdp, |
| 209 | fp_t chdp = chd; // CHECK-DEVICE: store {{.*}} @chd, {{.*}} %chdp, |
| 210 | |
| 211 | d(); // CHECK-DEVICE: call i32 @_Z1dv() |
| 212 | cd(); // CHECK-DEVICE: call i32 @cd() |
Artem Belevich | 94a55e8 | 2015-09-22 17:22:59 +0000 | [diff] [blame] | 213 | dh(); // CHECK-DEVICE: call i32 @_Z2dhv() |
| 214 | cdh(); // CHECK-DEVICE: call i32 @cdh() |
| 215 | } |
| 216 | |
| 217 | // CHECK-BOTH-LABEL: define void @_Z11hostdevicefv() |
| 218 | __host__ __device__ void hostdevicef(void) { |
| 219 | #if defined (NOCHECKS) |
| 220 | fp_t dp = d; // CHECK-BOTH-NC: store {{.*}} @_Z1dv, {{.*}} %dp, |
| 221 | fp_t cdp = cd; // CHECK-BOTH-NC: store {{.*}} @cd, {{.*}} %cdp, |
| 222 | fp_t hp = h; // CHECK-BOTH-NC: store {{.*}} @_Z1hv, {{.*}} %hp, |
| 223 | fp_t chp = ch; // CHECK-BOTH-NC: store {{.*}} @ch, {{.*}} %chp, |
| 224 | #endif |
| 225 | fp_t dhp = dh; // CHECK-BOTH: store {{.*}} @_Z2dhv, {{.*}} %dhp, |
| 226 | fp_t cdhp = cdh; // CHECK-BOTH: store {{.*}} @cdh, {{.*}} %cdhp, |
| 227 | fp_t hdp = hd; // CHECK-BOTH: store {{.*}} @_Z2hdv, {{.*}} %hdp, |
| 228 | fp_t chdp = chd; // CHECK-BOTH: store {{.*}} @chd, {{.*}} %chdp, |
| 229 | #if defined (NOCHECKS) && !defined(__CUDA_ARCH__) |
| 230 | gp_t gp = g; // CHECK-HOST-NC: store {{.*}} @_Z1gv, {{.*}} %gp, |
| 231 | #endif |
| 232 | |
| 233 | #if defined (NOCHECKS) |
| 234 | d(); // CHECK-BOTH-NC: call i32 @_Z1dv() |
| 235 | cd(); // CHECK-BOTH-NC: call i32 @cd() |
| 236 | h(); // CHECK-BOTH-NC: call i32 @_Z1hv() |
| 237 | ch(); // CHECK-BOTH-NC: call i32 @ch() |
| 238 | #endif |
| 239 | dh(); // CHECK-BOTH: call i32 @_Z2dhv() |
| 240 | cdh(); // CHECK-BOTH: call i32 @cdh() |
| 241 | #if defined (NOCHECKS) && !defined(__CUDA_ARCH__) |
| 242 | g<<<0,0>>>(); // CHECK-HOST-NC: call void @_Z1gv() |
| 243 | #endif |
| 244 | } |
| 245 | |
| 246 | // Test for address of overloaded function resolution in the global context. |
| 247 | fp_t hp = h; |
| 248 | fp_t chp = ch; |
| 249 | fp_t dhp = dh; |
| 250 | fp_t cdhp = cdh; |
| 251 | gp_t gp = g; |
| 252 | |
| 253 | int x; |
| 254 | // Check constructors/destructors for D/H functions |
| 255 | struct s_cd_dh { |
| 256 | __host__ s_cd_dh() { x = 11; } |
| 257 | __device__ s_cd_dh() { x = 12; } |
| 258 | __host__ ~s_cd_dh() { x = 21; } |
| 259 | __device__ ~s_cd_dh() { x = 22; } |
| 260 | }; |
| 261 | |
| 262 | struct s_cd_hd { |
| 263 | __host__ __device__ s_cd_hd() { x = 31; } |
| 264 | __host__ __device__ ~s_cd_hd() { x = 32; } |
| 265 | }; |
| 266 | |
| 267 | // CHECK-BOTH: define void @_Z7wrapperv |
| 268 | #if defined(__CUDA_ARCH__) |
| 269 | __device__ |
| 270 | #else |
| 271 | __host__ |
| 272 | #endif |
| 273 | void wrapper() { |
| 274 | s_cd_dh scddh; |
| 275 | // CHECK-BOTH: call void @_ZN7s_cd_dhC1Ev( |
| 276 | s_cd_hd scdhd; |
| 277 | // CHECK-BOTH: call void @_ZN7s_cd_hdC1Ev |
| 278 | |
| 279 | // CHECK-BOTH: call void @_ZN7s_cd_hdD1Ev( |
| 280 | // CHECK-BOTH: call void @_ZN7s_cd_dhD1Ev( |
| 281 | } |
| 282 | // CHECK-BOTH: ret void |
| 283 | |
| 284 | // Now it's time to check what's been generated for the methods we used. |
| 285 | |
| 286 | // CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhC2Ev( |
| 287 | // CHECK-HOST: store i32 11, |
| 288 | // CHECK-DEVICE: store i32 12, |
| 289 | // CHECK-BOTH: ret void |
| 290 | |
| 291 | // CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdC2Ev( |
| 292 | // CHECK-BOTH: store i32 31, |
| 293 | // CHECK-BOTH: ret void |
| 294 | |
| 295 | // CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdD2Ev( |
| 296 | // CHECK-BOTH: store i32 32, |
| 297 | // CHECK-BOTH: ret void |
| 298 | |
| 299 | // CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhD2Ev( |
| 300 | // CHECK-HOST: store i32 21, |
| 301 | // CHECK-DEVICE: store i32 22, |
| 302 | // CHECK-BOTH: ret void |
| 303 | |