blob: 8e18ab5554e458dfe8942a9cb16635bbe02ccbe4 [file] [log] [blame]
Matt Arsenaultad55ee52016-12-06 01:02:51 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
Tom Stellard58ac7442014-04-29 23:12:48 +00002;
3;
4; Most SALU instructions ignore control flow, so we need to make sure
5; they don't overwrite values from other blocks.
6
Tom Stellard744b99b2014-09-24 01:33:28 +00007; If the branch decision is made based on a value in an SGPR then all
8; threads will execute the same code paths, so we don't need to worry
9; about instructions in different blocks overwriting each other.
Tom Stellard79243d92014-10-01 17:15:17 +000010; SI-LABEL: {{^}}sgpr_if_else_salu_br:
Tom Stellard326d6ec2014-11-05 14:50:53 +000011; SI: s_add
Matt Arsenaultad55ee52016-12-06 01:02:51 +000012; SI: s_branch
13
14; SI: s_sub
Tom Stellard58ac7442014-04-29 23:12:48 +000015
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000016define amdgpu_kernel void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
Tom Stellard58ac7442014-04-29 23:12:48 +000017entry:
18 %0 = icmp eq i32 %a, 0
19 br i1 %0, label %if, label %else
20
21if:
Matt Arsenaultad55ee52016-12-06 01:02:51 +000022 %1 = sub i32 %b, %c
23 br label %endif
24
25else:
26 %2 = add i32 %d, %e
27 br label %endif
28
29endif:
30 %3 = phi i32 [%1, %if], [%2, %else]
31 %4 = add i32 %3, %a
32 store i32 %4, i32 addrspace(1)* %out
33 ret void
34}
35
36; SI-LABEL: {{^}}sgpr_if_else_salu_br_opt:
37; SI: s_cmp_lg_u32
38; SI: s_cbranch_scc0 [[IF:BB[0-9]+_[0-9]+]]
39
40; SI: ; BB#1: ; %else
41; SI: s_load_dword [[LOAD0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xe
42; SI: s_load_dword [[LOAD1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xf
43; SI-NOT: add
44; SI: s_branch [[ENDIF:BB[0-9]+_[0-9]+]]
45
46; SI: [[IF]]: ; %if
47; SI: s_load_dword [[LOAD0]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
48; SI: s_load_dword [[LOAD1]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
49; SI-NOT: add
50
51; SI: [[ENDIF]]: ; %endif
52; SI: s_add_i32 s{{[0-9]+}}, [[LOAD0]], [[LOAD1]]
53; SI: buffer_store_dword
54; SI-NEXT: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000055define amdgpu_kernel void @sgpr_if_else_salu_br_opt(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
Matt Arsenaultad55ee52016-12-06 01:02:51 +000056entry:
57 %0 = icmp eq i32 %a, 0
58 br i1 %0, label %if, label %else
59
60if:
Tom Stellard58ac7442014-04-29 23:12:48 +000061 %1 = add i32 %b, %c
62 br label %endif
63
64else:
65 %2 = add i32 %d, %e
66 br label %endif
67
68endif:
69 %3 = phi i32 [%1, %if], [%2, %else]
70 %4 = add i32 %3, %a
71 store i32 %4, i32 addrspace(1)* %out
72 ret void
73}
Tom Stellard744b99b2014-09-24 01:33:28 +000074
75; The two S_ADD instructions should write to different registers, since
76; different threads will take different control flow paths.
77
Tom Stellard79243d92014-10-01 17:15:17 +000078; SI-LABEL: {{^}}sgpr_if_else_valu_br:
Tom Stellard326d6ec2014-11-05 14:50:53 +000079; SI: s_add_i32 [[SGPR:s[0-9]+]]
80; SI-NOT: s_add_i32 [[SGPR]]
Tom Stellard744b99b2014-09-24 01:33:28 +000081
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000082define amdgpu_kernel void @sgpr_if_else_valu_br(i32 addrspace(1)* %out, float %a, i32 %b, i32 %c, i32 %d, i32 %e) {
Tom Stellard744b99b2014-09-24 01:33:28 +000083entry:
Matt Arsenault9c47dd52016-02-11 06:02:01 +000084 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
Tom Stellard744b99b2014-09-24 01:33:28 +000085 %tid_f = uitofp i32 %tid to float
86 %tmp1 = fcmp ueq float %tid_f, 0.0
87 br i1 %tmp1, label %if, label %else
88
89if:
90 %tmp2 = add i32 %b, %c
91 br label %endif
92
93else:
94 %tmp3 = add i32 %d, %e
95 br label %endif
96
97endif:
98 %tmp4 = phi i32 [%tmp2, %if], [%tmp3, %else]
99 store i32 %tmp4, i32 addrspace(1)* %out
100 ret void
101}
102
Matt Arsenaultbecd6562014-12-03 05:22:35 +0000103; FIXME: Should write to different SGPR pairs instead of copying to
104; VALU for i1 phi.
105
106; SI-LABEL: {{^}}sgpr_if_else_valu_cmp_phi_br:
107; SI: buffer_load_dword [[AVAL:v[0-9]+]]
Matt Arsenaultf5b2cd82015-03-23 18:45:30 +0000108; SI: v_cmp_gt_i32_e32 [[CMP_IF:vcc]], 0, [[AVAL]]
Matt Arsenaultbecd6562014-12-03 05:22:35 +0000109; SI: v_cndmask_b32_e64 [[V_CMP:v[0-9]+]], 0, -1, [[CMP_IF]]
110
Matt Arsenaultad55ee52016-12-06 01:02:51 +0000111; SI: BB{{[0-9]+}}_2:
Matt Arsenaultbecd6562014-12-03 05:22:35 +0000112; SI: buffer_load_dword [[AVAL:v[0-9]+]]
Matt Arsenault5d8eb252016-09-30 01:50:20 +0000113; SI: v_cmp_eq_u32_e32 [[CMP_ELSE:vcc]], 0, [[AVAL]]
Matt Arsenaultbecd6562014-12-03 05:22:35 +0000114; SI: v_cndmask_b32_e64 [[V_CMP]], 0, -1, [[CMP_ELSE]]
115
Matt Arsenault5d8eb252016-09-30 01:50:20 +0000116; SI: v_cmp_ne_u32_e32 [[CMP_CMP:vcc]], 0, [[V_CMP]]
Matt Arsenaultbecd6562014-12-03 05:22:35 +0000117; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP_CMP]]
118; SI: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000119define amdgpu_kernel void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
Matt Arsenaultbecd6562014-12-03 05:22:35 +0000120entry:
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000121 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
Matt Arsenaultbecd6562014-12-03 05:22:35 +0000122 %tmp1 = icmp eq i32 %tid, 0
123 br i1 %tmp1, label %if, label %else
124
125if:
David Blaikie79e6c742015-02-27 19:29:02 +0000126 %gep.if = getelementptr i32, i32 addrspace(1)* %a, i32 %tid
David Blaikiea79ac142015-02-27 21:17:42 +0000127 %a.val = load i32, i32 addrspace(1)* %gep.if
Matt Arsenaultbecd6562014-12-03 05:22:35 +0000128 %cmp.if = icmp eq i32 %a.val, 0
129 br label %endif
130
131else:
David Blaikie79e6c742015-02-27 19:29:02 +0000132 %gep.else = getelementptr i32, i32 addrspace(1)* %b, i32 %tid
David Blaikiea79ac142015-02-27 21:17:42 +0000133 %b.val = load i32, i32 addrspace(1)* %gep.else
Matt Arsenaultbecd6562014-12-03 05:22:35 +0000134 %cmp.else = icmp slt i32 %b.val, 0
135 br label %endif
136
137endif:
138 %tmp4 = phi i1 [%cmp.if, %if], [%cmp.else, %else]
139 %ext = sext i1 %tmp4 to i32
140 store i32 %ext, i32 addrspace(1)* %out
141 ret void
142}
143
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000144declare i32 @llvm.amdgcn.workitem.id.x() #0
Tom Stellard744b99b2014-09-24 01:33:28 +0000145
146attributes #0 = { readnone }