blob: d2d1ae67c01c3a7dc145cab968800e151a85119d [file] [log] [blame]
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -check-prefix=SM20 %s
2; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck -check-prefix=SM35 %s
3
4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
5target triple = "nvptx64-unknown-unknown"
6
7; SM20-LABEL: .visible .entry foo1(
8; SM20: ld.global.f32
9; SM35-LABEL: .visible .entry foo1(
10; SM35: ld.global.nc.f32
11define void @foo1(float * noalias readonly %from, float * %to) {
12 %1 = load float, float * %from
13 store float %1, float * %to
14 ret void
15}
16
17; SM20-LABEL: .visible .entry foo2(
18; SM20: ld.global.f64
19; SM35-LABEL: .visible .entry foo2(
20; SM35: ld.global.nc.f64
21define void @foo2(double * noalias readonly %from, double * %to) {
22 %1 = load double, double * %from
23 store double %1, double * %to
24 ret void
25}
26
27; SM20-LABEL: .visible .entry foo3(
28; SM20: ld.global.u16
29; SM35-LABEL: .visible .entry foo3(
30; SM35: ld.global.nc.u16
31define void @foo3(i16 * noalias readonly %from, i16 * %to) {
32 %1 = load i16, i16 * %from
33 store i16 %1, i16 * %to
34 ret void
35}
36
37; SM20-LABEL: .visible .entry foo4(
38; SM20: ld.global.u32
39; SM35-LABEL: .visible .entry foo4(
40; SM35: ld.global.nc.u32
41define void @foo4(i32 * noalias readonly %from, i32 * %to) {
42 %1 = load i32, i32 * %from
43 store i32 %1, i32 * %to
44 ret void
45}
46
47; SM20-LABEL: .visible .entry foo5(
48; SM20: ld.global.u64
49; SM35-LABEL: .visible .entry foo5(
50; SM35: ld.global.nc.u64
51define void @foo5(i64 * noalias readonly %from, i64 * %to) {
52 %1 = load i64, i64 * %from
53 store i64 %1, i64 * %to
54 ret void
55}
56
57; i128 is non standard integer in nvptx64
58; SM20-LABEL: .visible .entry foo6(
59; SM20: ld.global.u64
60; SM20: ld.global.u64
61; SM35-LABEL: .visible .entry foo6(
62; SM35: ld.global.nc.u64
63; SM35: ld.global.nc.u64
64define void @foo6(i128 * noalias readonly %from, i128 * %to) {
65 %1 = load i128, i128 * %from
66 store i128 %1, i128 * %to
67 ret void
68}
69
70; SM20-LABEL: .visible .entry foo7(
71; SM20: ld.global.v2.u8
72; SM35-LABEL: .visible .entry foo7(
73; SM35: ld.global.nc.v2.u8
74define void @foo7(<2 x i8> * noalias readonly %from, <2 x i8> * %to) {
75 %1 = load <2 x i8>, <2 x i8> * %from
76 store <2 x i8> %1, <2 x i8> * %to
77 ret void
78}
79
80; SM20-LABEL: .visible .entry foo8(
81; SM20: ld.global.v2.u16
82; SM35-LABEL: .visible .entry foo8(
83; SM35: ld.global.nc.v2.u16
84define void @foo8(<2 x i16> * noalias readonly %from, <2 x i16> * %to) {
85 %1 = load <2 x i16>, <2 x i16> * %from
86 store <2 x i16> %1, <2 x i16> * %to
87 ret void
88}
89
90; SM20-LABEL: .visible .entry foo9(
91; SM20: ld.global.v2.u32
92; SM35-LABEL: .visible .entry foo9(
93; SM35: ld.global.nc.v2.u32
94define void @foo9(<2 x i32> * noalias readonly %from, <2 x i32> * %to) {
95 %1 = load <2 x i32>, <2 x i32> * %from
96 store <2 x i32> %1, <2 x i32> * %to
97 ret void
98}
99
100; SM20-LABEL: .visible .entry foo10(
101; SM20: ld.global.v2.u64
102; SM35-LABEL: .visible .entry foo10(
103; SM35: ld.global.nc.v2.u64
104define void @foo10(<2 x i64> * noalias readonly %from, <2 x i64> * %to) {
105 %1 = load <2 x i64>, <2 x i64> * %from
106 store <2 x i64> %1, <2 x i64> * %to
107 ret void
108}
109
110; SM20-LABEL: .visible .entry foo11(
111; SM20: ld.global.v2.f32
112; SM35-LABEL: .visible .entry foo11(
113; SM35: ld.global.nc.v2.f32
114define void @foo11(<2 x float> * noalias readonly %from, <2 x float> * %to) {
115 %1 = load <2 x float>, <2 x float> * %from
116 store <2 x float> %1, <2 x float> * %to
117 ret void
118}
119
120; SM20-LABEL: .visible .entry foo12(
121; SM20: ld.global.v2.f64
122; SM35-LABEL: .visible .entry foo12(
123; SM35: ld.global.nc.v2.f64
124define void @foo12(<2 x double> * noalias readonly %from, <2 x double> * %to) {
125 %1 = load <2 x double>, <2 x double> * %from
126 store <2 x double> %1, <2 x double> * %to
127 ret void
128}
129
130; SM20-LABEL: .visible .entry foo13(
131; SM20: ld.global.v4.u8
132; SM35-LABEL: .visible .entry foo13(
133; SM35: ld.global.nc.v4.u8
134define void @foo13(<4 x i8> * noalias readonly %from, <4 x i8> * %to) {
135 %1 = load <4 x i8>, <4 x i8> * %from
136 store <4 x i8> %1, <4 x i8> * %to
137 ret void
138}
139
140; SM20-LABEL: .visible .entry foo14(
141; SM20: ld.global.v4.u16
142; SM35-LABEL: .visible .entry foo14(
143; SM35: ld.global.nc.v4.u16
144define void @foo14(<4 x i16> * noalias readonly %from, <4 x i16> * %to) {
145 %1 = load <4 x i16>, <4 x i16> * %from
146 store <4 x i16> %1, <4 x i16> * %to
147 ret void
148}
149
150; SM20-LABEL: .visible .entry foo15(
151; SM20: ld.global.v4.u32
152; SM35-LABEL: .visible .entry foo15(
153; SM35: ld.global.nc.v4.u32
154define void @foo15(<4 x i32> * noalias readonly %from, <4 x i32> * %to) {
155 %1 = load <4 x i32>, <4 x i32> * %from
156 store <4 x i32> %1, <4 x i32> * %to
157 ret void
158}
159
160; SM20-LABEL: .visible .entry foo16(
161; SM20: ld.global.v4.f32
162; SM35-LABEL: .visible .entry foo16(
163; SM35: ld.global.nc.v4.f32
164define void @foo16(<4 x float> * noalias readonly %from, <4 x float> * %to) {
165 %1 = load <4 x float>, <4 x float> * %from
166 store <4 x float> %1, <4 x float> * %to
167 ret void
168}
169
170; SM20-LABEL: .visible .entry foo17(
171; SM20: ld.global.v2.f64
172; SM20: ld.global.v2.f64
173; SM35-LABEL: .visible .entry foo17(
174; SM35: ld.global.nc.v2.f64
175; SM35: ld.global.nc.v2.f64
176define void @foo17(<4 x double> * noalias readonly %from, <4 x double> * %to) {
177 %1 = load <4 x double>, <4 x double> * %from
178 store <4 x double> %1, <4 x double> * %to
179 ret void
180}
181
182; SM20-LABEL: .visible .entry foo18(
183; SM20: ld.global.u64
184; SM35-LABEL: .visible .entry foo18(
185; SM35: ld.global.nc.u64
186define void @foo18(float ** noalias readonly %from, float ** %to) {
187 %1 = load float *, float ** %from
188 store float * %1, float ** %to
189 ret void
190}
191
192!nvvm.annotations = !{!1 ,!2 ,!3 ,!4 ,!5 ,!6, !7 ,!8 ,!9 ,!10 ,!11 ,!12, !13, !14, !15, !16, !17, !18}
193!1 = !{void (float *, float *)* @foo1, !"kernel", i32 1}
194!2 = !{void (double *, double *)* @foo2, !"kernel", i32 1}
195!3 = !{void (i16 *, i16 *)* @foo3, !"kernel", i32 1}
196!4 = !{void (i32 *, i32 *)* @foo4, !"kernel", i32 1}
197!5 = !{void (i64 *, i64 *)* @foo5, !"kernel", i32 1}
198!6 = !{void (i128 *, i128 *)* @foo6, !"kernel", i32 1}
199!7 = !{void (<2 x i8> *, <2 x i8> *)* @foo7, !"kernel", i32 1}
200!8 = !{void (<2 x i16> *, <2 x i16> *)* @foo8, !"kernel", i32 1}
201!9 = !{void (<2 x i32> *, <2 x i32> *)* @foo9, !"kernel", i32 1}
202!10 = !{void (<2 x i64> *, <2 x i64> *)* @foo10, !"kernel", i32 1}
203!11 = !{void (<2 x float> *, <2 x float> *)* @foo11, !"kernel", i32 1}
204!12 = !{void (<2 x double> *, <2 x double> *)* @foo12, !"kernel", i32 1}
205!13 = !{void (<4 x i8> *, <4 x i8> *)* @foo13, !"kernel", i32 1}
206!14 = !{void (<4 x i16> *, <4 x i16> *)* @foo14, !"kernel", i32 1}
207!15 = !{void (<4 x i32> *, <4 x i32> *)* @foo15, !"kernel", i32 1}
208!16 = !{void (<4 x float> *, <4 x float> *)* @foo16, !"kernel", i32 1}
209!17 = !{void (<4 x double> *, <4 x double> *)* @foo17, !"kernel", i32 1}
210!18 = !{void (float **, float **)* @foo18, !"kernel", i32 1}