blob: 5c6663dbbdab94ac10606bc3abf99f4ae35bd45c [file] [log] [blame]
Changpeng Fang26fb9d22016-07-28 23:01:45 +00001; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
2; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4
5; OPT-LABEL: @annotate_unreachable_noloop(
6; OPT-NOT: call i1 @llvm.amdgcn.loop
7
8; GCN-LABEL: {{^}}annotate_unreachable_noloop:
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +00009; GCN: s_cbranch_scc1
Changpeng Fang26fb9d22016-07-28 23:01:45 +000010; GCN-NOT: s_endpgm
11; GCN: .Lfunc_end0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000012define amdgpu_kernel void @annotate_unreachable_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 {
Changpeng Fang26fb9d22016-07-28 23:01:45 +000013bb:
14 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
15 br label %bb1
16
17bb1: ; preds = %bb
18 %tmp2 = sext i32 %tmp to i64
19 %tmp3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %arg, i64 %tmp2
20 %tmp4 = load <4 x float>, <4 x float> addrspace(1)* %tmp3, align 16
21 br i1 undef, label %bb5, label %bb3
22
23bb3: ; preds = %bb1
24 %tmp6 = extractelement <4 x float> %tmp4, i32 2
25 %tmp7 = fcmp olt float %tmp6, 0.000000e+00
26 br i1 %tmp7, label %bb4, label %bb5 ; crash goes away if these are swapped
27
28bb4: ; preds = %bb3
29 unreachable
30
31bb5: ; preds = %bb3, %bb1
32 unreachable
33}
34
35
36; OPT-LABEL: @annotate_ret_noloop(
37; OPT-NOT: call i1 @llvm.amdgcn.loop
38
39; GCN-LABEL: {{^}}annotate_ret_noloop:
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +000040; GCN: load_dwordx4
41; GCN: v_cmp_nlt_f32
42; GCN: s_and_saveexec_b64
43; GCN: ; mask branch [[UNIFIED_RET:BB[0-9]+_[0-9]+]]
44; GCN-NEXT: [[UNIFIED_RET]]:
45; GCN-NEXT: s_or_b64 exec, exec
46; GCN-NEXT: s_endpgm
47; GCN: .Lfunc_end
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000048define amdgpu_kernel void @annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 {
Changpeng Fang26fb9d22016-07-28 23:01:45 +000049bb:
50 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
51 br label %bb1
52
53bb1: ; preds = %bb
54 %tmp2 = sext i32 %tmp to i64
55 %tmp3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %arg, i64 %tmp2
56 %tmp4 = load <4 x float>, <4 x float> addrspace(1)* %tmp3, align 16
Matt Arsenaultb8f8dbc2017-03-24 19:52:05 +000057 %tmp5 = extractelement <4 x float> %tmp4, i32 1
58 store volatile <4 x float> %tmp4, <4 x float> addrspace(1)* undef
59 %cmp = fcmp ogt float %tmp5, 1.0
60 br i1 %cmp, label %bb5, label %bb3
61
62bb3: ; preds = %bb1
63 %tmp6 = extractelement <4 x float> %tmp4, i32 2
64 %tmp7 = fcmp olt float %tmp6, 0.000000e+00
65 br i1 %tmp7, label %bb4, label %bb5 ; crash goes away if these are swapped
66
67bb4: ; preds = %bb3
68 ret void
69
70bb5: ; preds = %bb3, %bb1
71 ret void
72}
73
74; OPT-LABEL: @uniform_annotate_ret_noloop(
75; OPT-NOT: call i1 @llvm.amdgcn.loop
76
77; GCN-LABEL: {{^}}uniform_annotate_ret_noloop:
78; GCN: s_cbranch_scc1
79; GCN: s_endpgm
80; GCN: .Lfunc_end
81define amdgpu_kernel void @uniform_annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg, i32 %tmp) #0 {
82bb:
83 br label %bb1
84
85bb1: ; preds = %bb
86 %tmp2 = sext i32 %tmp to i64
87 %tmp3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %arg, i64 %tmp2
88 %tmp4 = load <4 x float>, <4 x float> addrspace(1)* %tmp3, align 16
Changpeng Fang26fb9d22016-07-28 23:01:45 +000089 br i1 undef, label %bb5, label %bb3
90
91bb3: ; preds = %bb1
92 %tmp6 = extractelement <4 x float> %tmp4, i32 2
93 %tmp7 = fcmp olt float %tmp6, 0.000000e+00
94 br i1 %tmp7, label %bb4, label %bb5 ; crash goes away if these are swapped
95
96bb4: ; preds = %bb3
97 ret void
98
99bb5: ; preds = %bb3, %bb1
100 ret void
101}
102
103
104declare i32 @llvm.amdgcn.workitem.id.x() #1
105
106attributes #0 = { nounwind }
107attributes #1 = { nounwind readnone }