Jingyue Wu | 48a9bdc | 2015-07-20 21:28:54 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -check-prefix=SM20 %s |
| 2 | ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck -check-prefix=SM35 %s |
| 3 | |
| 4 | target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" |
| 5 | target triple = "nvptx64-unknown-unknown" |
| 6 | |
| 7 | ; SM20-LABEL: .visible .entry foo1( |
| 8 | ; SM20: ld.global.f32 |
| 9 | ; SM35-LABEL: .visible .entry foo1( |
| 10 | ; SM35: ld.global.nc.f32 |
| 11 | define void @foo1(float * noalias readonly %from, float * %to) { |
| 12 | %1 = load float, float * %from |
| 13 | store float %1, float * %to |
| 14 | ret void |
| 15 | } |
| 16 | |
| 17 | ; SM20-LABEL: .visible .entry foo2( |
| 18 | ; SM20: ld.global.f64 |
| 19 | ; SM35-LABEL: .visible .entry foo2( |
| 20 | ; SM35: ld.global.nc.f64 |
| 21 | define void @foo2(double * noalias readonly %from, double * %to) { |
| 22 | %1 = load double, double * %from |
| 23 | store double %1, double * %to |
| 24 | ret void |
| 25 | } |
| 26 | |
| 27 | ; SM20-LABEL: .visible .entry foo3( |
| 28 | ; SM20: ld.global.u16 |
| 29 | ; SM35-LABEL: .visible .entry foo3( |
| 30 | ; SM35: ld.global.nc.u16 |
| 31 | define void @foo3(i16 * noalias readonly %from, i16 * %to) { |
| 32 | %1 = load i16, i16 * %from |
| 33 | store i16 %1, i16 * %to |
| 34 | ret void |
| 35 | } |
| 36 | |
| 37 | ; SM20-LABEL: .visible .entry foo4( |
| 38 | ; SM20: ld.global.u32 |
| 39 | ; SM35-LABEL: .visible .entry foo4( |
| 40 | ; SM35: ld.global.nc.u32 |
| 41 | define void @foo4(i32 * noalias readonly %from, i32 * %to) { |
| 42 | %1 = load i32, i32 * %from |
| 43 | store i32 %1, i32 * %to |
| 44 | ret void |
| 45 | } |
| 46 | |
| 47 | ; SM20-LABEL: .visible .entry foo5( |
| 48 | ; SM20: ld.global.u64 |
| 49 | ; SM35-LABEL: .visible .entry foo5( |
| 50 | ; SM35: ld.global.nc.u64 |
| 51 | define void @foo5(i64 * noalias readonly %from, i64 * %to) { |
| 52 | %1 = load i64, i64 * %from |
| 53 | store i64 %1, i64 * %to |
| 54 | ret void |
| 55 | } |
| 56 | |
| 57 | ; i128 is non standard integer in nvptx64 |
| 58 | ; SM20-LABEL: .visible .entry foo6( |
| 59 | ; SM20: ld.global.u64 |
| 60 | ; SM20: ld.global.u64 |
| 61 | ; SM35-LABEL: .visible .entry foo6( |
| 62 | ; SM35: ld.global.nc.u64 |
| 63 | ; SM35: ld.global.nc.u64 |
| 64 | define void @foo6(i128 * noalias readonly %from, i128 * %to) { |
| 65 | %1 = load i128, i128 * %from |
| 66 | store i128 %1, i128 * %to |
| 67 | ret void |
| 68 | } |
| 69 | |
| 70 | ; SM20-LABEL: .visible .entry foo7( |
| 71 | ; SM20: ld.global.v2.u8 |
| 72 | ; SM35-LABEL: .visible .entry foo7( |
| 73 | ; SM35: ld.global.nc.v2.u8 |
| 74 | define void @foo7(<2 x i8> * noalias readonly %from, <2 x i8> * %to) { |
| 75 | %1 = load <2 x i8>, <2 x i8> * %from |
| 76 | store <2 x i8> %1, <2 x i8> * %to |
| 77 | ret void |
| 78 | } |
| 79 | |
| 80 | ; SM20-LABEL: .visible .entry foo8( |
| 81 | ; SM20: ld.global.v2.u16 |
| 82 | ; SM35-LABEL: .visible .entry foo8( |
| 83 | ; SM35: ld.global.nc.v2.u16 |
| 84 | define void @foo8(<2 x i16> * noalias readonly %from, <2 x i16> * %to) { |
| 85 | %1 = load <2 x i16>, <2 x i16> * %from |
| 86 | store <2 x i16> %1, <2 x i16> * %to |
| 87 | ret void |
| 88 | } |
| 89 | |
| 90 | ; SM20-LABEL: .visible .entry foo9( |
| 91 | ; SM20: ld.global.v2.u32 |
| 92 | ; SM35-LABEL: .visible .entry foo9( |
| 93 | ; SM35: ld.global.nc.v2.u32 |
| 94 | define void @foo9(<2 x i32> * noalias readonly %from, <2 x i32> * %to) { |
| 95 | %1 = load <2 x i32>, <2 x i32> * %from |
| 96 | store <2 x i32> %1, <2 x i32> * %to |
| 97 | ret void |
| 98 | } |
| 99 | |
| 100 | ; SM20-LABEL: .visible .entry foo10( |
| 101 | ; SM20: ld.global.v2.u64 |
| 102 | ; SM35-LABEL: .visible .entry foo10( |
| 103 | ; SM35: ld.global.nc.v2.u64 |
| 104 | define void @foo10(<2 x i64> * noalias readonly %from, <2 x i64> * %to) { |
| 105 | %1 = load <2 x i64>, <2 x i64> * %from |
| 106 | store <2 x i64> %1, <2 x i64> * %to |
| 107 | ret void |
| 108 | } |
| 109 | |
| 110 | ; SM20-LABEL: .visible .entry foo11( |
| 111 | ; SM20: ld.global.v2.f32 |
| 112 | ; SM35-LABEL: .visible .entry foo11( |
| 113 | ; SM35: ld.global.nc.v2.f32 |
| 114 | define void @foo11(<2 x float> * noalias readonly %from, <2 x float> * %to) { |
| 115 | %1 = load <2 x float>, <2 x float> * %from |
| 116 | store <2 x float> %1, <2 x float> * %to |
| 117 | ret void |
| 118 | } |
| 119 | |
| 120 | ; SM20-LABEL: .visible .entry foo12( |
| 121 | ; SM20: ld.global.v2.f64 |
| 122 | ; SM35-LABEL: .visible .entry foo12( |
| 123 | ; SM35: ld.global.nc.v2.f64 |
| 124 | define void @foo12(<2 x double> * noalias readonly %from, <2 x double> * %to) { |
| 125 | %1 = load <2 x double>, <2 x double> * %from |
| 126 | store <2 x double> %1, <2 x double> * %to |
| 127 | ret void |
| 128 | } |
| 129 | |
| 130 | ; SM20-LABEL: .visible .entry foo13( |
| 131 | ; SM20: ld.global.v4.u8 |
| 132 | ; SM35-LABEL: .visible .entry foo13( |
| 133 | ; SM35: ld.global.nc.v4.u8 |
| 134 | define void @foo13(<4 x i8> * noalias readonly %from, <4 x i8> * %to) { |
| 135 | %1 = load <4 x i8>, <4 x i8> * %from |
| 136 | store <4 x i8> %1, <4 x i8> * %to |
| 137 | ret void |
| 138 | } |
| 139 | |
| 140 | ; SM20-LABEL: .visible .entry foo14( |
| 141 | ; SM20: ld.global.v4.u16 |
| 142 | ; SM35-LABEL: .visible .entry foo14( |
| 143 | ; SM35: ld.global.nc.v4.u16 |
| 144 | define void @foo14(<4 x i16> * noalias readonly %from, <4 x i16> * %to) { |
| 145 | %1 = load <4 x i16>, <4 x i16> * %from |
| 146 | store <4 x i16> %1, <4 x i16> * %to |
| 147 | ret void |
| 148 | } |
| 149 | |
| 150 | ; SM20-LABEL: .visible .entry foo15( |
| 151 | ; SM20: ld.global.v4.u32 |
| 152 | ; SM35-LABEL: .visible .entry foo15( |
| 153 | ; SM35: ld.global.nc.v4.u32 |
| 154 | define void @foo15(<4 x i32> * noalias readonly %from, <4 x i32> * %to) { |
| 155 | %1 = load <4 x i32>, <4 x i32> * %from |
| 156 | store <4 x i32> %1, <4 x i32> * %to |
| 157 | ret void |
| 158 | } |
| 159 | |
| 160 | ; SM20-LABEL: .visible .entry foo16( |
| 161 | ; SM20: ld.global.v4.f32 |
| 162 | ; SM35-LABEL: .visible .entry foo16( |
| 163 | ; SM35: ld.global.nc.v4.f32 |
| 164 | define void @foo16(<4 x float> * noalias readonly %from, <4 x float> * %to) { |
| 165 | %1 = load <4 x float>, <4 x float> * %from |
| 166 | store <4 x float> %1, <4 x float> * %to |
| 167 | ret void |
| 168 | } |
| 169 | |
| 170 | ; SM20-LABEL: .visible .entry foo17( |
| 171 | ; SM20: ld.global.v2.f64 |
| 172 | ; SM20: ld.global.v2.f64 |
| 173 | ; SM35-LABEL: .visible .entry foo17( |
| 174 | ; SM35: ld.global.nc.v2.f64 |
| 175 | ; SM35: ld.global.nc.v2.f64 |
| 176 | define void @foo17(<4 x double> * noalias readonly %from, <4 x double> * %to) { |
| 177 | %1 = load <4 x double>, <4 x double> * %from |
| 178 | store <4 x double> %1, <4 x double> * %to |
| 179 | ret void |
| 180 | } |
| 181 | |
| 182 | ; SM20-LABEL: .visible .entry foo18( |
| 183 | ; SM20: ld.global.u64 |
| 184 | ; SM35-LABEL: .visible .entry foo18( |
| 185 | ; SM35: ld.global.nc.u64 |
| 186 | define void @foo18(float ** noalias readonly %from, float ** %to) { |
| 187 | %1 = load float *, float ** %from |
| 188 | store float * %1, float ** %to |
| 189 | ret void |
| 190 | } |
| 191 | |
Bjarke Hammersholt Roune | 5cbc7d2 | 2015-08-05 23:11:57 +0000 | [diff] [blame] | 192 | ; Test that we can infer a cached load for a pointer induction variable. |
| 193 | ; SM20-LABEL: .visible .entry foo19( |
| 194 | ; SM20: ld.global.f32 |
| 195 | ; SM35-LABEL: .visible .entry foo19( |
| 196 | ; SM35: ld.global.nc.f32 |
| 197 | define void @foo19(float * noalias readonly %from, float * %to, i32 %n) { |
| 198 | entry: |
| 199 | br label %loop |
| 200 | |
| 201 | loop: |
| 202 | %i = phi i32 [ 0, %entry ], [ %nexti, %loop ] |
| 203 | %sum = phi float [ 0.0, %entry ], [ %nextsum, %loop ] |
| 204 | %ptr = getelementptr inbounds float, float * %from, i32 %i |
| 205 | %value = load float, float * %ptr, align 4 |
| 206 | %nextsum = fadd float %value, %sum |
| 207 | %nexti = add nsw i32 %i, 1 |
| 208 | %exitcond = icmp eq i32 %nexti, %n |
| 209 | br i1 %exitcond, label %exit, label %loop |
| 210 | |
| 211 | exit: |
| 212 | store float %nextsum, float * %to |
| 213 | ret void |
| 214 | } |
| 215 | |
| 216 | ; This test captures the case of a non-kernel function. In a |
| 217 | ; non-kernel function, without interprocedural analysis, we do not |
| 218 | ; know that the parameter is global. We also do not know that the |
| 219 | ; pointed-to memory is never written to (for the duration of the |
| 220 | ; kernel). For both reasons, we cannot use a cached load here. |
| 221 | ; SM20-LABEL: notkernel( |
| 222 | ; SM20: ld.f32 |
| 223 | ; SM35-LABEL: notkernel( |
| 224 | ; SM35: ld.f32 |
| 225 | define void @notkernel(float * noalias readonly %from, float * %to) { |
| 226 | %1 = load float, float * %from |
| 227 | store float %1, float * %to |
| 228 | ret void |
| 229 | } |
| 230 | |
| 231 | ; As @notkernel, but with the parameter explicitly marked as global. We still |
| 232 | ; do not know that the parameter is never written to (for the duration of the |
| 233 | ; kernel). This case does not currently come up normally since we do not infer |
| 234 | ; that pointers are global interprocedurally as of 2015-08-05. |
| 235 | ; SM20-LABEL: notkernel2( |
| 236 | ; SM20: ld.global.f32 |
| 237 | ; SM35-LABEL: notkernel2( |
| 238 | ; SM35: ld.global.f32 |
| 239 | define void @notkernel2(float addrspace(1) * noalias readonly %from, float * %to) { |
| 240 | %1 = load float, float addrspace(1) * %from |
| 241 | store float %1, float * %to |
| 242 | ret void |
| 243 | } |
| 244 | |
| 245 | !nvvm.annotations = !{!1 ,!2 ,!3 ,!4 ,!5 ,!6, !7 ,!8 ,!9 ,!10 ,!11 ,!12, !13, !14, !15, !16, !17, !18, !19} |
Jingyue Wu | 48a9bdc | 2015-07-20 21:28:54 +0000 | [diff] [blame] | 246 | !1 = !{void (float *, float *)* @foo1, !"kernel", i32 1} |
| 247 | !2 = !{void (double *, double *)* @foo2, !"kernel", i32 1} |
| 248 | !3 = !{void (i16 *, i16 *)* @foo3, !"kernel", i32 1} |
| 249 | !4 = !{void (i32 *, i32 *)* @foo4, !"kernel", i32 1} |
| 250 | !5 = !{void (i64 *, i64 *)* @foo5, !"kernel", i32 1} |
| 251 | !6 = !{void (i128 *, i128 *)* @foo6, !"kernel", i32 1} |
| 252 | !7 = !{void (<2 x i8> *, <2 x i8> *)* @foo7, !"kernel", i32 1} |
| 253 | !8 = !{void (<2 x i16> *, <2 x i16> *)* @foo8, !"kernel", i32 1} |
| 254 | !9 = !{void (<2 x i32> *, <2 x i32> *)* @foo9, !"kernel", i32 1} |
| 255 | !10 = !{void (<2 x i64> *, <2 x i64> *)* @foo10, !"kernel", i32 1} |
| 256 | !11 = !{void (<2 x float> *, <2 x float> *)* @foo11, !"kernel", i32 1} |
| 257 | !12 = !{void (<2 x double> *, <2 x double> *)* @foo12, !"kernel", i32 1} |
| 258 | !13 = !{void (<4 x i8> *, <4 x i8> *)* @foo13, !"kernel", i32 1} |
| 259 | !14 = !{void (<4 x i16> *, <4 x i16> *)* @foo14, !"kernel", i32 1} |
| 260 | !15 = !{void (<4 x i32> *, <4 x i32> *)* @foo15, !"kernel", i32 1} |
| 261 | !16 = !{void (<4 x float> *, <4 x float> *)* @foo16, !"kernel", i32 1} |
| 262 | !17 = !{void (<4 x double> *, <4 x double> *)* @foo17, !"kernel", i32 1} |
| 263 | !18 = !{void (float **, float **)* @foo18, !"kernel", i32 1} |
Bjarke Hammersholt Roune | 5cbc7d2 | 2015-08-05 23:11:57 +0000 | [diff] [blame] | 264 | !19 = !{void (float *, float *, i32)* @foo19, !"kernel", i32 1} |