blob: ef1b7da6ad0f530247e69c5f5d6c908f645f8e2a [file] [log] [blame]
Justin Holewinski871ec932013-08-06 14:13:31 +00001; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
2; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
3
4; Ensure we access the local stack properly
5
Jingyue Wu9c711502015-06-24 20:20:16 +00006; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}};
7; PTX32: cvta.local.u32 %SP, %SPL;
Eli Bendersky7cd70df2014-04-21 17:23:12 +00008; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo_param_0];
9; PTX32: st.volatile.u32 [%SP+0], %r{{[0-9]+}};
Jingyue Wu9c711502015-06-24 20:20:16 +000010; PTX64: mov.u64 %SPL, __local_depot{{[0-9]+}};
11; PTX64: cvta.local.u64 %SP, %SPL;
Eli Bendersky7cd70df2014-04-21 17:23:12 +000012; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo_param_0];
13; PTX64: st.volatile.u32 [%SP+0], %r{{[0-9]+}};
Justin Holewinski871ec932013-08-06 14:13:31 +000014define void @foo(i32 %a) {
15 %local = alloca i32, align 4
Eli Bendersky7cd70df2014-04-21 17:23:12 +000016 store volatile i32 %a, i32* %local
Justin Holewinski871ec932013-08-06 14:13:31 +000017 ret void
18}
Jingyue Wu9c711502015-06-24 20:20:16 +000019
20; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}};
21; PTX32: cvta.local.u32 %SP, %SPL;
22; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo2_param_0];
23; PTX32: add.u32 %r[[SP_REG:[0-9]+]], %SPL, 0;
24; PTX32: st.local.u32 [%r[[SP_REG]]], %r{{[0-9]+}};
25; PTX64: mov.u64 %SPL, __local_depot{{[0-9]+}};
26; PTX64: cvta.local.u64 %SP, %SPL;
27; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo2_param_0];
28; PTX64: add.u64 %rd[[SP_REG:[0-9]+]], %SPL, 0;
29; PTX64: st.local.u32 [%rd[[SP_REG]]], %r{{[0-9]+}};
30define void @foo2(i32 %a) {
31 %local = alloca i32, align 4
32 store i32 %a, i32* %local
33 call void @bar(i32* %local)
34 ret void
35}
36
37declare void @bar(i32* %a)
38
39!nvvm.annotations = !{!0}
40!0 = !{void (i32)* @foo2, !"kernel", i32 1}
41
42; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}};
43; PTX32-NOT: cvta.local.u32 %SP, %SPL;
44; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo3_param_0];
45; PTX32: add.u32 %r{{[0-9]+}}, %SPL, 0;
46; PTX32: st.local.u32 [%r{{[0-9]+}}], %r{{[0-9]+}};
47; PTX64: mov.u64 %SPL, __local_depot{{[0-9]+}};
48; PTX64-NOT: cvta.local.u64 %SP, %SPL;
49; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo3_param_0];
50; PTX64: add.u64 %rd{{[0-9]+}}, %SPL, 0;
51; PTX64: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}};
52define void @foo3(i32 %a) {
53 %local = alloca [3 x i32], align 4
54 %1 = bitcast [3 x i32]* %local to i32*
55 %2 = getelementptr inbounds i32, i32* %1, i32 %a
56 store i32 %a, i32* %2
57 ret void
58}
Jingyue Wu9fe08c42015-06-30 18:59:19 +000059
60; PTX32: cvta.local.u32 %SP, %SPL;
61; PTX32: add.u32 {{%r[0-9]+}}, %SP, 0;
Jingyue Wu77b5b382015-07-01 20:08:06 +000062; PTX32: add.u32 {{%r[0-9]+}}, %SPL, 0;
63; PTX32: add.u32 {{%r[0-9]+}}, %SP, 4;
64; PTX32: add.u32 {{%r[0-9]+}}, %SPL, 4;
Jingyue Wu9fe08c42015-06-30 18:59:19 +000065; PTX32: st.local.u32 [{{%r[0-9]+}}], {{%r[0-9]+}}
66; PTX32: st.local.u32 [{{%r[0-9]+}}], {{%r[0-9]+}}
67; PTX64: cvta.local.u64 %SP, %SPL;
68; PTX64: add.u64 {{%rd[0-9]+}}, %SP, 0;
Jingyue Wu77b5b382015-07-01 20:08:06 +000069; PTX64: add.u64 {{%rd[0-9]+}}, %SPL, 0;
70; PTX64: add.u64 {{%rd[0-9]+}}, %SP, 4;
71; PTX64: add.u64 {{%rd[0-9]+}}, %SPL, 4;
Jingyue Wu9fe08c42015-06-30 18:59:19 +000072; PTX64: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}}
73; PTX64: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}}
74define void @foo4() {
75 %A = alloca i32
76 %B = alloca i32
77 store i32 0, i32* %A
78 store i32 0, i32* %B
79 call void @bar(i32* %A)
80 call void @bar(i32* %B)
81 ret void
82}