blob: 1058e4b0a5744181a786e575657668f6c1b6fcbc [file] [log] [blame]
Justin Holewinski871ec932013-08-06 14:13:31 +00001; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
2; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
3
4; Ensure we access the local stack properly
5
Jingyue Wu9c711502015-06-24 20:20:16 +00006; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}};
7; PTX32: cvta.local.u32 %SP, %SPL;
Eli Bendersky7cd70df2014-04-21 17:23:12 +00008; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo_param_0];
9; PTX32: st.volatile.u32 [%SP+0], %r{{[0-9]+}};
Jingyue Wu9c711502015-06-24 20:20:16 +000010; PTX64: mov.u64 %SPL, __local_depot{{[0-9]+}};
11; PTX64: cvta.local.u64 %SP, %SPL;
Eli Bendersky7cd70df2014-04-21 17:23:12 +000012; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo_param_0];
13; PTX64: st.volatile.u32 [%SP+0], %r{{[0-9]+}};
Justin Holewinski871ec932013-08-06 14:13:31 +000014define void @foo(i32 %a) {
15 %local = alloca i32, align 4
Eli Bendersky7cd70df2014-04-21 17:23:12 +000016 store volatile i32 %a, i32* %local
Justin Holewinski871ec932013-08-06 14:13:31 +000017 ret void
18}
Jingyue Wu9c711502015-06-24 20:20:16 +000019
20; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}};
21; PTX32: cvta.local.u32 %SP, %SPL;
22; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo2_param_0];
23; PTX32: add.u32 %r[[SP_REG:[0-9]+]], %SPL, 0;
24; PTX32: st.local.u32 [%r[[SP_REG]]], %r{{[0-9]+}};
25; PTX64: mov.u64 %SPL, __local_depot{{[0-9]+}};
26; PTX64: cvta.local.u64 %SP, %SPL;
27; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo2_param_0];
28; PTX64: add.u64 %rd[[SP_REG:[0-9]+]], %SPL, 0;
29; PTX64: st.local.u32 [%rd[[SP_REG]]], %r{{[0-9]+}};
30define void @foo2(i32 %a) {
31 %local = alloca i32, align 4
32 store i32 %a, i32* %local
33 call void @bar(i32* %local)
34 ret void
35}
36
37declare void @bar(i32* %a)
38
39!nvvm.annotations = !{!0}
40!0 = !{void (i32)* @foo2, !"kernel", i32 1}
41
42; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}};
43; PTX32-NOT: cvta.local.u32 %SP, %SPL;
44; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo3_param_0];
45; PTX32: add.u32 %r{{[0-9]+}}, %SPL, 0;
46; PTX32: st.local.u32 [%r{{[0-9]+}}], %r{{[0-9]+}};
47; PTX64: mov.u64 %SPL, __local_depot{{[0-9]+}};
48; PTX64-NOT: cvta.local.u64 %SP, %SPL;
49; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo3_param_0];
50; PTX64: add.u64 %rd{{[0-9]+}}, %SPL, 0;
51; PTX64: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}};
52define void @foo3(i32 %a) {
53 %local = alloca [3 x i32], align 4
54 %1 = bitcast [3 x i32]* %local to i32*
55 %2 = getelementptr inbounds i32, i32* %1, i32 %a
56 store i32 %a, i32* %2
57 ret void
58}