Justin Holewinski | 871ec93 | 2013-08-06 14:13:31 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 |
| 2 | ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 |
| 3 | |
| 4 | ; Ensure we access the local stack properly |
| 5 | |
Jingyue Wu | 9c71150 | 2015-06-24 20:20:16 +0000 | [diff] [blame] | 6 | ; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}}; |
| 7 | ; PTX32: cvta.local.u32 %SP, %SPL; |
Eli Bendersky | 7cd70df | 2014-04-21 17:23:12 +0000 | [diff] [blame] | 8 | ; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo_param_0]; |
| 9 | ; PTX32: st.volatile.u32 [%SP+0], %r{{[0-9]+}}; |
Jingyue Wu | 9c71150 | 2015-06-24 20:20:16 +0000 | [diff] [blame] | 10 | ; PTX64: mov.u64 %SPL, __local_depot{{[0-9]+}}; |
| 11 | ; PTX64: cvta.local.u64 %SP, %SPL; |
Eli Bendersky | 7cd70df | 2014-04-21 17:23:12 +0000 | [diff] [blame] | 12 | ; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo_param_0]; |
| 13 | ; PTX64: st.volatile.u32 [%SP+0], %r{{[0-9]+}}; |
Justin Holewinski | 871ec93 | 2013-08-06 14:13:31 +0000 | [diff] [blame] | 14 | define void @foo(i32 %a) { |
| 15 | %local = alloca i32, align 4 |
Eli Bendersky | 7cd70df | 2014-04-21 17:23:12 +0000 | [diff] [blame] | 16 | store volatile i32 %a, i32* %local |
Justin Holewinski | 871ec93 | 2013-08-06 14:13:31 +0000 | [diff] [blame] | 17 | ret void |
| 18 | } |
Jingyue Wu | 9c71150 | 2015-06-24 20:20:16 +0000 | [diff] [blame] | 19 | |
| 20 | ; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}}; |
| 21 | ; PTX32: cvta.local.u32 %SP, %SPL; |
| 22 | ; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo2_param_0]; |
| 23 | ; PTX32: add.u32 %r[[SP_REG:[0-9]+]], %SPL, 0; |
| 24 | ; PTX32: st.local.u32 [%r[[SP_REG]]], %r{{[0-9]+}}; |
| 25 | ; PTX64: mov.u64 %SPL, __local_depot{{[0-9]+}}; |
| 26 | ; PTX64: cvta.local.u64 %SP, %SPL; |
| 27 | ; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo2_param_0]; |
| 28 | ; PTX64: add.u64 %rd[[SP_REG:[0-9]+]], %SPL, 0; |
| 29 | ; PTX64: st.local.u32 [%rd[[SP_REG]]], %r{{[0-9]+}}; |
| 30 | define void @foo2(i32 %a) { |
| 31 | %local = alloca i32, align 4 |
| 32 | store i32 %a, i32* %local |
| 33 | call void @bar(i32* %local) |
| 34 | ret void |
| 35 | } |
| 36 | |
| 37 | declare void @bar(i32* %a) |
| 38 | |
| 39 | !nvvm.annotations = !{!0} |
| 40 | !0 = !{void (i32)* @foo2, !"kernel", i32 1} |
| 41 | |
| 42 | ; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}}; |
| 43 | ; PTX32-NOT: cvta.local.u32 %SP, %SPL; |
| 44 | ; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo3_param_0]; |
| 45 | ; PTX32: add.u32 %r{{[0-9]+}}, %SPL, 0; |
| 46 | ; PTX32: st.local.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}; |
| 47 | ; PTX64: mov.u64 %SPL, __local_depot{{[0-9]+}}; |
| 48 | ; PTX64-NOT: cvta.local.u64 %SP, %SPL; |
| 49 | ; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo3_param_0]; |
| 50 | ; PTX64: add.u64 %rd{{[0-9]+}}, %SPL, 0; |
| 51 | ; PTX64: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}; |
| 52 | define void @foo3(i32 %a) { |
| 53 | %local = alloca [3 x i32], align 4 |
| 54 | %1 = bitcast [3 x i32]* %local to i32* |
| 55 | %2 = getelementptr inbounds i32, i32* %1, i32 %a |
| 56 | store i32 %a, i32* %2 |
| 57 | ret void |
| 58 | } |
Jingyue Wu | 9fe08c4 | 2015-06-30 18:59:19 +0000 | [diff] [blame] | 59 | |
| 60 | ; PTX32: cvta.local.u32 %SP, %SPL; |
| 61 | ; PTX32: add.u32 {{%r[0-9]+}}, %SP, 0; |
Jingyue Wu | 77b5b38 | 2015-07-01 20:08:06 +0000 | [diff] [blame] | 62 | ; PTX32: add.u32 {{%r[0-9]+}}, %SPL, 0; |
| 63 | ; PTX32: add.u32 {{%r[0-9]+}}, %SP, 4; |
| 64 | ; PTX32: add.u32 {{%r[0-9]+}}, %SPL, 4; |
Jingyue Wu | 9fe08c4 | 2015-06-30 18:59:19 +0000 | [diff] [blame] | 65 | ; PTX32: st.local.u32 [{{%r[0-9]+}}], {{%r[0-9]+}} |
| 66 | ; PTX32: st.local.u32 [{{%r[0-9]+}}], {{%r[0-9]+}} |
| 67 | ; PTX64: cvta.local.u64 %SP, %SPL; |
| 68 | ; PTX64: add.u64 {{%rd[0-9]+}}, %SP, 0; |
Jingyue Wu | 77b5b38 | 2015-07-01 20:08:06 +0000 | [diff] [blame] | 69 | ; PTX64: add.u64 {{%rd[0-9]+}}, %SPL, 0; |
| 70 | ; PTX64: add.u64 {{%rd[0-9]+}}, %SP, 4; |
| 71 | ; PTX64: add.u64 {{%rd[0-9]+}}, %SPL, 4; |
Jingyue Wu | 9fe08c4 | 2015-06-30 18:59:19 +0000 | [diff] [blame] | 72 | ; PTX64: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}} |
| 73 | ; PTX64: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}} |
| 74 | define void @foo4() { |
| 75 | %A = alloca i32 |
| 76 | %B = alloca i32 |
| 77 | store i32 0, i32* %A |
| 78 | store i32 0, i32* %B |
| 79 | call void @bar(i32* %A) |
| 80 | call void @bar(i32* %B) |
| 81 | ret void |
| 82 | } |