Blame - llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll - toolchain/llvm-project

blob: b00fff7dc183b21533487446c234bd4b8cf43373 [file] [log] [blame]

Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	1	; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow %s \| FileCheck -check-prefix=IR %s
				2	; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s
				3
				4	; Add an extra verifier runs. There were some cases where invalid IR
				5	; was produced but happened to be fixed by the later passes.
				6
				7	; Make sure divergent control flow with multiple exits from a region
				8	; is properly handled. UnifyFunctionExitNodes should be run before
				9	; StructurizeCFG.
				10
				11	; IR-LABEL: @multi_divergent_region_exit_ret_ret(
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	12	; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
				13	; IR: %2 = extractvalue { i1, i64 } %1, 0
				14	; IR: %3 = extractvalue { i1, i64 } %1, 1
				15	; IR: br i1 %2, label %LeafBlock1, label %Flow
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	16
				17	; IR: Flow:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	18	; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
				19	; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
				20	; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
				21	; IR: %7 = extractvalue { i1, i64 } %6, 0
				22	; IR: %8 = extractvalue { i1, i64 } %6, 1
				23	; IR: br i1 %7, label %LeafBlock, label %Flow1
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	24
				25	; IR: LeafBlock:
				26	; IR: br label %Flow1
				27
				28	; IR: LeafBlock1:
				29	; IR: br label %Flow{{$}}
				30
				31	; IR: Flow2:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	32	; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
				33	; IR: call void @llvm.amdgcn.end.cf(i64 %19)
				34	; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
				35	; IR: %13 = extractvalue { i1, i64 } %12, 0
				36	; IR: %14 = extractvalue { i1, i64 } %12, 1
				37	; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	38
				39	; IR: exit0:
				40	; IR: store volatile i32 9, i32 addrspace(1)* undef
				41	; IR: br label %UnifiedReturnBlock
				42
				43	; IR: Flow1:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	44	; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
				45	; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
				46	; IR: call void @llvm.amdgcn.end.cf(i64 %8)
				47	; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
				48	; IR: %18 = extractvalue { i1, i64 } %17, 0
				49	; IR: %19 = extractvalue { i1, i64 } %17, 1
				50	; IR: br i1 %18, label %exit1, label %Flow2
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	51
				52	; IR: exit1:
				53	; IR: store volatile i32 17, i32 addrspace(3)* undef
				54	; IR: br label %Flow2
				55
				56	; IR: UnifiedReturnBlock:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	57	; IR: call void @llvm.amdgcn.end.cf(i64 %14)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	58	; IR: ret void
				59
				60
				61	; GCN-LABEL: {{^}}multi_divergent_region_exit_ret_ret:
				62	; GCN: v_cmp_lt_i32_e32 vcc, 1
				63	; GCN: s_and_saveexec_b64
				64	; GCN: s_xor_b64
				65
				66
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	67	; FIXME: Why is this compare essentially repeated?
				68	; GCN: v_cmp_eq_u32_e32 vcc, 1, [[REG:v[0-9]+]]
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	69	; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1, vcc
Matt Arsenault	9aa45f0	2017-07-06 20:57:05 +0000	[diff] [blame]	70	; GCN: v_cmp_ne_u32_e32 vcc, 1, [[REG]]
				71	; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1, vcc
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	72
				73	; GCN: ; %Flow1
				74	; GCN-NEXT: s_or_b64 exec, exec
				75	; GCN: v_cmp_ne_u32_e32 vcc, 0
				76
				77	; GCN: ; %exit1
				78	; GCN: ds_write_b32
				79
				80	; GCN: %Flow2
				81	; GCN-NEXT: s_or_b64 exec, exec
				82	; GCN: v_cmp_ne_u32_e32 vcc, 0
				83	; GCN-NEXT: s_and_saveexec_b64
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	84
				85	; GCN: ; %exit0
				86	; GCN: buffer_store_dword
				87
				88	; GCN: ; %UnifiedReturnBlock
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	89	; GCN-NEXT: s_endpgm
				90	define amdgpu_kernel void @multi_divergent_region_exit_ret_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
				91	entry:
				92	%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
				93	%tmp1 = add i32 0, %tmp
				94	%tmp2 = zext i32 %tmp1 to i64
				95	%tmp3 = add i64 0, %tmp2
				96	%tmp4 = shl i64 %tmp3, 32
				97	%tmp5 = ashr exact i64 %tmp4, 32
				98	%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
				99	%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
				100	%tmp8 = sext i32 %tmp7 to i64
				101	%tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
				102	%tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
				103	%tmp13 = zext i32 %tmp10 to i64
				104	%tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
				105	%tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
				106	%Pivot = icmp slt i32 %tmp16, 2
				107	br i1 %Pivot, label %LeafBlock, label %LeafBlock1
				108
				109	LeafBlock: ; preds = %entry
				110	%SwitchLeaf = icmp eq i32 %tmp16, 1
				111	br i1 %SwitchLeaf, label %exit0, label %exit1
				112
				113	LeafBlock1: ; preds = %entry
				114	%SwitchLeaf2 = icmp eq i32 %tmp16, 2
				115	br i1 %SwitchLeaf2, label %exit0, label %exit1
				116
				117	exit0: ; preds = %LeafBlock, %LeafBlock1
				118	store volatile i32 9, i32 addrspace(1)* undef
				119	ret void
				120
				121	exit1: ; preds = %LeafBlock, %LeafBlock1
				122	store volatile i32 17, i32 addrspace(3)* undef
				123	ret void
				124	}
				125
				126	; IR-LABEL: @multi_divergent_region_exit_unreachable_unreachable(
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	127	; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	128
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	129	; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	130
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	131	; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
				132	; IR: call void @llvm.amdgcn.end.cf(i64 %19)
				133	; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
				134	; IR: br i1 %13, label %exit0, label %UnifiedUnreachableBlock
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	135
				136
				137	; IR: UnifiedUnreachableBlock:
				138	; IR-NEXT: unreachable
				139
				140
				141	; FIXME: Probably should insert an s_endpgm anyway.
				142	; GCN-LABEL: {{^}}multi_divergent_region_exit_unreachable_unreachable:
				143	; GCN: ; %UnifiedUnreachableBlock
				144	; GCN-NEXT: .Lfunc_end
				145	define amdgpu_kernel void @multi_divergent_region_exit_unreachable_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
				146	entry:
				147	%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
				148	%tmp1 = add i32 0, %tmp
				149	%tmp2 = zext i32 %tmp1 to i64
				150	%tmp3 = add i64 0, %tmp2
				151	%tmp4 = shl i64 %tmp3, 32
				152	%tmp5 = ashr exact i64 %tmp4, 32
				153	%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
				154	%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
				155	%tmp8 = sext i32 %tmp7 to i64
				156	%tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
				157	%tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
				158	%tmp13 = zext i32 %tmp10 to i64
				159	%tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
				160	%tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
				161	%Pivot = icmp slt i32 %tmp16, 2
				162	br i1 %Pivot, label %LeafBlock, label %LeafBlock1
				163
				164	LeafBlock: ; preds = %entry
				165	%SwitchLeaf = icmp eq i32 %tmp16, 1
				166	br i1 %SwitchLeaf, label %exit0, label %exit1
				167
				168	LeafBlock1: ; preds = %entry
				169	%SwitchLeaf2 = icmp eq i32 %tmp16, 2
				170	br i1 %SwitchLeaf2, label %exit0, label %exit1
				171
				172	exit0: ; preds = %LeafBlock, %LeafBlock1
				173	store volatile i32 9, i32 addrspace(1)* undef
				174	unreachable
				175
				176	exit1: ; preds = %LeafBlock, %LeafBlock1
				177	store volatile i32 17, i32 addrspace(3)* undef
				178	unreachable
				179	}
				180
				181	; IR-LABEL: @multi_exit_region_divergent_ret_uniform_ret(
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	182	; IR: %divergent.cond0 = icmp slt i32 %tmp16, 2
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	183	; IR: llvm.amdgcn.if
				184	; IR: br i1
				185
				186	; IR: {{^}}Flow:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	187	; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
				188	; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
				189	; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
				190	; IR: br i1 %7, label %LeafBlock, label %Flow1
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	191
				192	; IR: {{^}}LeafBlock:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	193	; IR: %divergent.cond1 = icmp eq i32 %tmp16, 1
				194	; IR: %9 = xor i1 %divergent.cond1, true
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	195	; IR: br label %Flow1
				196
				197	; IR: LeafBlock1:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	198	; IR: %uniform.cond0 = icmp eq i32 %arg3, 2
				199	; IR: %10 = xor i1 %uniform.cond0, true
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	200	; IR: br label %Flow
				201
				202	; IR: Flow2:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	203	; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
				204	; IR: call void @llvm.amdgcn.end.cf(i64 %19)
				205	; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
				206	; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	207
				208	; IR: exit0:
				209	; IR: store volatile i32 9, i32 addrspace(1)* undef
				210	; IR: br label %UnifiedReturnBlock
				211
				212	; IR: {{^}}Flow1:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	213	; IR: %15 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %4, %Flow ]
				214	; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
				215	; IR: call void @llvm.amdgcn.end.cf(i64 %8)
				216	; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
				217	; IR: %18 = extractvalue { i1, i64 } %17, 0
				218	; IR: %19 = extractvalue { i1, i64 } %17, 1
				219	; IR: br i1 %18, label %exit1, label %Flow2
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	220
				221	; IR: exit1:
				222	; IR: store volatile i32 17, i32 addrspace(3)* undef
				223	; IR: br label %Flow2
				224
				225	; IR: UnifiedReturnBlock:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	226	; IR: call void @llvm.amdgcn.end.cf(i64 %14)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	227	; IR: ret void
				228	define amdgpu_kernel void @multi_exit_region_divergent_ret_uniform_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 {
				229	entry:
				230	%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
				231	%tmp1 = add i32 0, %tmp
				232	%tmp2 = zext i32 %tmp1 to i64
				233	%tmp3 = add i64 0, %tmp2
				234	%tmp4 = shl i64 %tmp3, 32
				235	%tmp5 = ashr exact i64 %tmp4, 32
				236	%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
				237	%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
				238	%tmp8 = sext i32 %tmp7 to i64
				239	%tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
				240	%tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
				241	%tmp13 = zext i32 %tmp10 to i64
				242	%tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
				243	%tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
				244	%divergent.cond0 = icmp slt i32 %tmp16, 2
				245	br i1 %divergent.cond0, label %LeafBlock, label %LeafBlock1
				246
				247	LeafBlock: ; preds = %entry
				248	%divergent.cond1 = icmp eq i32 %tmp16, 1
				249	br i1 %divergent.cond1, label %exit0, label %exit1
				250
				251	LeafBlock1: ; preds = %entry
				252	%uniform.cond0 = icmp eq i32 %arg3, 2
				253	br i1 %uniform.cond0, label %exit0, label %exit1
				254
				255	exit0: ; preds = %LeafBlock, %LeafBlock1
				256	store volatile i32 9, i32 addrspace(1)* undef
				257	ret void
				258
				259	exit1: ; preds = %LeafBlock, %LeafBlock1
				260	store volatile i32 17, i32 addrspace(3)* undef
				261	ret void
				262	}
				263
				264	; IR-LABEL: @multi_exit_region_uniform_ret_divergent_ret(
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	265	; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
				266	; IR: br i1 %2, label %LeafBlock1, label %Flow
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	267
				268	; IR: Flow:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	269	; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
				270	; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
				271	; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	272
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	273	; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
				274	; IR: call void @llvm.amdgcn.end.cf(i64 %19)
				275	; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	276
				277	define amdgpu_kernel void @multi_exit_region_uniform_ret_divergent_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 {
				278	entry:
				279	%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
				280	%tmp1 = add i32 0, %tmp
				281	%tmp2 = zext i32 %tmp1 to i64
				282	%tmp3 = add i64 0, %tmp2
				283	%tmp4 = shl i64 %tmp3, 32
				284	%tmp5 = ashr exact i64 %tmp4, 32
				285	%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
				286	%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
				287	%tmp8 = sext i32 %tmp7 to i64
				288	%tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
				289	%tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
				290	%tmp13 = zext i32 %tmp10 to i64
				291	%tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
				292	%tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
				293	%Pivot = icmp slt i32 %tmp16, 2
				294	br i1 %Pivot, label %LeafBlock, label %LeafBlock1
				295
				296	LeafBlock: ; preds = %entry
				297	%SwitchLeaf = icmp eq i32 %arg3, 1
				298	br i1 %SwitchLeaf, label %exit0, label %exit1
				299
				300	LeafBlock1: ; preds = %entry
				301	%SwitchLeaf2 = icmp eq i32 %tmp16, 2
				302	br i1 %SwitchLeaf2, label %exit0, label %exit1
				303
				304	exit0: ; preds = %LeafBlock, %LeafBlock1
				305	store volatile i32 9, i32 addrspace(1)* undef
				306	ret void
				307
				308	exit1: ; preds = %LeafBlock, %LeafBlock1
				309	store volatile i32 17, i32 addrspace(3)* undef
				310	ret void
				311	}
				312
				313	; IR-LABEL: @multi_divergent_region_exit_ret_ret_return_value(
				314	; IR: Flow2:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	315	; IR: %11 = phi float [ 2.000000e+00, %exit1 ], [ undef, %Flow1 ]
				316	; IR: %12 = phi i1 [ false, %exit1 ], [ %16, %Flow1 ]
				317	; IR: call void @llvm.amdgcn.end.cf(i64 %20)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	318
				319	; IR: UnifiedReturnBlock:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	320	; IR: %UnifiedRetVal = phi float [ %11, %Flow2 ], [ 1.000000e+00, %exit0 ]
				321	; IR: call void @llvm.amdgcn.end.cf(i64 %15)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	322	; IR: ret float %UnifiedRetVal
				323	define amdgpu_ps float @multi_divergent_region_exit_ret_ret_return_value(i32 %vgpr) #0 {
				324	entry:
				325	%Pivot = icmp slt i32 %vgpr, 2
				326	br i1 %Pivot, label %LeafBlock, label %LeafBlock1
				327
				328	LeafBlock: ; preds = %entry
				329	%SwitchLeaf = icmp eq i32 %vgpr, 1
				330	br i1 %SwitchLeaf, label %exit0, label %exit1
				331
				332	LeafBlock1: ; preds = %entry
				333	%SwitchLeaf2 = icmp eq i32 %vgpr, 2
				334	br i1 %SwitchLeaf2, label %exit0, label %exit1
				335
				336	exit0: ; preds = %LeafBlock, %LeafBlock1
				337	store i32 9, i32 addrspace(1)* undef
				338	ret float 1.0
				339
				340	exit1: ; preds = %LeafBlock, %LeafBlock1
				341	store i32 17, i32 addrspace(3)* undef
				342	ret float 2.0
				343	}
				344
				345	; IR-LABEL: @uniform_branch_to_multi_divergent_region_exit_ret_ret_return_value(
				346
				347	; GCN-LABEL: {{^}}uniform_branch_to_multi_divergent_region_exit_ret_ret_return_value:
				348	; GCN: s_cmp_gt_i32 s0, 1
				349	; GCN: s_cbranch_scc0 [[FLOW:BB[0-9]+_[0-9]+]]
				350
				351	; GCN: v_cmp_ne_u32_e32 vcc, 7, v0
				352
				353	; GCN: {{^}}[[FLOW]]:
				354	; GCN: s_cbranch_vccnz [[FLOW1:BB[0-9]+]]
				355
				356	; GCN: v_mov_b32_e32 v0, 2.0
				357	; GCN: s_or_b64 exec, exec
				358	; GCN: s_and_b64 exec, exec
				359	; GCN: v_mov_b32_e32 v0, 1.0
				360
				361	; GCN: {{^BB[0-9]+_[0-9]+}}: ; %UnifiedReturnBlock
				362	; GCN-NEXT: s_or_b64 exec, exec
Mark Searles	70359ac	2017-06-02 14:19:25 +0000	[diff] [blame]	363	; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	364	; GCN-NEXT: ; return
				365
				366	define amdgpu_ps float @uniform_branch_to_multi_divergent_region_exit_ret_ret_return_value(i32 inreg %sgpr, i32 %vgpr) #0 {
				367	entry:
				368	%uniform.cond = icmp slt i32 %sgpr, 2
				369	br i1 %uniform.cond, label %LeafBlock, label %LeafBlock1
				370
				371	LeafBlock: ; preds = %entry
				372	%divergent.cond0 = icmp eq i32 %vgpr, 3
				373	br i1 %divergent.cond0, label %exit0, label %exit1
				374
				375	LeafBlock1: ; preds = %entry
				376	%divergent.cond1 = icmp eq i32 %vgpr, 7
				377	br i1 %divergent.cond1, label %exit0, label %exit1
				378
				379	exit0: ; preds = %LeafBlock, %LeafBlock1
				380	store i32 9, i32 addrspace(1)* undef
				381	ret float 1.0
				382
				383	exit1: ; preds = %LeafBlock, %LeafBlock1
				384	store i32 17, i32 addrspace(3)* undef
				385	ret float 2.0
				386	}
				387
				388	; IR-LABEL: @multi_divergent_region_exit_ret_unreachable(
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	389	; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	390
				391	; IR: Flow:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	392	; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
				393	; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
				394	; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	395
				396	; IR: Flow2:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	397	; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
				398	; IR: call void @llvm.amdgcn.end.cf(i64 %19)
				399	; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
				400	; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	401
				402	; IR: exit0:
				403	; IR-NEXT: store volatile i32 17, i32 addrspace(3)* undef
				404	; IR-NEXT: br label %UnifiedReturnBlock
				405
				406	; IR: Flow1:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	407	; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
				408	; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
				409	; IR: call void @llvm.amdgcn.end.cf(i64 %8)
				410	; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
				411	; IR: %18 = extractvalue { i1, i64 } %17, 0
				412	; IR: %19 = extractvalue { i1, i64 } %17, 1
				413	; IR: br i1 %18, label %exit1, label %Flow2
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	414
				415	; IR: exit1:
				416	; IR-NEXT: store volatile i32 9, i32 addrspace(1)* undef
				417	; IR-NEXT: call void @llvm.amdgcn.unreachable()
				418	; IR-NEXT: br label %Flow2
				419
				420	; IR: UnifiedReturnBlock:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	421	; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	422	; IR-NEXT: ret void
				423	define amdgpu_kernel void @multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
				424	entry:
				425	%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
				426	%tmp1 = add i32 0, %tmp
				427	%tmp2 = zext i32 %tmp1 to i64
				428	%tmp3 = add i64 0, %tmp2
				429	%tmp4 = shl i64 %tmp3, 32
				430	%tmp5 = ashr exact i64 %tmp4, 32
				431	%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
				432	%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
				433	%tmp8 = sext i32 %tmp7 to i64
				434	%tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
				435	%tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
				436	%tmp13 = zext i32 %tmp10 to i64
				437	%tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
				438	%tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
				439	%Pivot = icmp slt i32 %tmp16, 2
				440	br i1 %Pivot, label %LeafBlock, label %LeafBlock1
				441
				442	LeafBlock: ; preds = %entry
				443	%SwitchLeaf = icmp eq i32 %tmp16, 1
				444	br i1 %SwitchLeaf, label %exit0, label %exit1
				445
				446	LeafBlock1: ; preds = %entry
				447	%SwitchLeaf2 = icmp eq i32 %tmp16, 2
				448	br i1 %SwitchLeaf2, label %exit0, label %exit1
				449
				450	exit0: ; preds = %LeafBlock, %LeafBlock1
				451	store volatile i32 17, i32 addrspace(3)* undef
				452	ret void
				453
				454	exit1: ; preds = %LeafBlock, %LeafBlock1
				455	store volatile i32 9, i32 addrspace(1)* undef
				456	unreachable
				457	}
				458
				459	; The non-uniformity of the branch to the exiting blocks requires
				460	; looking at transitive predecessors.
				461
				462	; IR-LABEL: @indirect_multi_divergent_region_exit_ret_unreachable(
				463
				464	; IR: exit0: ; preds = %Flow2
				465	; IR-NEXT: store volatile i32 17, i32 addrspace(3)* undef
				466	; IR-NEXT: br label %UnifiedReturnBlock
				467
				468
				469	; IR: indirect.exit1:
				470	; IR: %load = load volatile i32, i32 addrspace(1)* undef
				471	; IR: store volatile i32 %load, i32 addrspace(1)* undef
				472	; IR: store volatile i32 9, i32 addrspace(1)* undef
				473	; IR: call void @llvm.amdgcn.unreachable()
				474	; IR-NEXT: br label %Flow2
				475
				476	; IR: UnifiedReturnBlock: ; preds = %exit0, %Flow2
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	477	; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	478	; IR-NEXT: ret void
				479	define amdgpu_kernel void @indirect_multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
				480	entry:
				481	%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
				482	%tmp1 = add i32 0, %tmp
				483	%tmp2 = zext i32 %tmp1 to i64
				484	%tmp3 = add i64 0, %tmp2
				485	%tmp4 = shl i64 %tmp3, 32
				486	%tmp5 = ashr exact i64 %tmp4, 32
				487	%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
				488	%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
				489	%tmp8 = sext i32 %tmp7 to i64
				490	%tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
				491	%tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
				492	%tmp13 = zext i32 %tmp10 to i64
				493	%tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
				494	%tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
				495	%Pivot = icmp slt i32 %tmp16, 2
				496	br i1 %Pivot, label %LeafBlock, label %LeafBlock1
				497
				498	LeafBlock: ; preds = %entry
				499	%SwitchLeaf = icmp eq i32 %tmp16, 1
				500	br i1 %SwitchLeaf, label %exit0, label %indirect.exit1
				501
				502	LeafBlock1: ; preds = %entry
				503	%SwitchLeaf2 = icmp eq i32 %tmp16, 2
				504	br i1 %SwitchLeaf2, label %exit0, label %indirect.exit1
				505
				506	exit0: ; preds = %LeafBlock, %LeafBlock1
				507	store volatile i32 17, i32 addrspace(3)* undef
				508	ret void
				509
				510	indirect.exit1:
				511	%load = load volatile i32, i32 addrspace(1)* undef
				512	store volatile i32 %load, i32 addrspace(1)* undef
				513	br label %exit1
				514
				515	exit1: ; preds = %LeafBlock, %LeafBlock1
				516	store volatile i32 9, i32 addrspace(1)* undef
				517	unreachable
				518	}
				519
				520	; IR-LABEL: @multi_divergent_region_exit_ret_switch(
				521	define amdgpu_kernel void @multi_divergent_region_exit_ret_switch(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
				522	entry:
				523	%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
				524	%tmp1 = add i32 0, %tmp
				525	%tmp2 = zext i32 %tmp1 to i64
				526	%tmp3 = add i64 0, %tmp2
				527	%tmp4 = shl i64 %tmp3, 32
				528	%tmp5 = ashr exact i64 %tmp4, 32
				529	%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
				530	%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
				531	%tmp8 = sext i32 %tmp7 to i64
				532	%tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
				533	%tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
				534	%tmp13 = zext i32 %tmp10 to i64
				535	%tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
				536	%tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
				537	switch i32 %tmp16, label %exit1
				538	[ i32 1, label %LeafBlock
				539	i32 2, label %LeafBlock1
				540	i32 3, label %exit0 ]
				541
				542	LeafBlock: ; preds = %entry
				543	%SwitchLeaf = icmp eq i32 %tmp16, 1
				544	br i1 %SwitchLeaf, label %exit0, label %exit1
				545
				546	LeafBlock1: ; preds = %entry
				547	%SwitchLeaf2 = icmp eq i32 %tmp16, 2
				548	br i1 %SwitchLeaf2, label %exit0, label %exit1
				549
				550	exit0: ; preds = %LeafBlock, %LeafBlock1
				551	store volatile i32 17, i32 addrspace(3)* undef
				552	ret void
				553
				554	exit1: ; preds = %LeafBlock, %LeafBlock1
				555	store volatile i32 9, i32 addrspace(1)* undef
				556	unreachable
				557	}
				558
				559	; IR-LABEL: @divergent_multi_ret_nest_in_uniform_triangle(
				560	define amdgpu_kernel void @divergent_multi_ret_nest_in_uniform_triangle(i32 %arg0) #0 {
				561	entry:
				562	%uniform.cond0 = icmp eq i32 %arg0, 4
				563	br i1 %uniform.cond0, label %divergent.multi.exit.region, label %uniform.ret
				564
				565	divergent.multi.exit.region:
				566	%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
				567	%divergent.cond0 = icmp eq i32 %id.x, 0
				568	br i1 %divergent.cond0, label %divergent.ret0, label %divergent.ret1
				569
				570	divergent.ret0:
				571	store volatile i32 11, i32 addrspace(3)* undef
				572	ret void
				573
				574	divergent.ret1:
				575	store volatile i32 42, i32 addrspace(3)* undef
				576	ret void
				577
				578	uniform.ret:
				579	store volatile i32 9, i32 addrspace(1)* undef
				580	ret void
				581	}
				582
				583	; IR-LABEL: @divergent_complex_multi_ret_nest_in_uniform_triangle(
				584	define amdgpu_kernel void @divergent_complex_multi_ret_nest_in_uniform_triangle(i32 %arg0) #0 {
				585	entry:
				586	%uniform.cond0 = icmp eq i32 %arg0, 4
				587	br i1 %uniform.cond0, label %divergent.multi.exit.region, label %uniform.ret
				588
				589	divergent.multi.exit.region:
				590	%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
				591	%divergent.cond0 = icmp eq i32 %id.x, 0
				592	br i1 %divergent.cond0, label %divergent.if, label %divergent.ret1
				593
				594	divergent.if:
				595	%vgpr0 = load volatile float, float addrspace(1)* undef
				596	%divergent.cond1 = fcmp ogt float %vgpr0, 1.0
				597	br i1 %divergent.cond1, label %divergent.then, label %divergent.endif
				598
				599	divergent.then:
				600	%vgpr1 = load volatile float, float addrspace(1)* undef
				601	%divergent.cond2 = fcmp olt float %vgpr1, 4.0
				602	store volatile i32 33, i32 addrspace(1)* undef
				603	br i1 %divergent.cond2, label %divergent.ret0, label %divergent.endif
				604
				605	divergent.endif:
				606	store volatile i32 38, i32 addrspace(1)* undef
				607	br label %divergent.ret0
				608
				609	divergent.ret0:
				610	store volatile i32 11, i32 addrspace(3)* undef
				611	ret void
				612
				613	divergent.ret1:
				614	store volatile i32 42, i32 addrspace(3)* undef
				615	ret void
				616
				617	uniform.ret:
				618	store volatile i32 9, i32 addrspace(1)* undef
				619	ret void
				620	}
				621
				622	; IR-LABEL: @uniform_complex_multi_ret_nest_in_divergent_triangle(
				623	; IR: Flow1: ; preds = %uniform.ret1, %uniform.multi.exit.region
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	624	; IR: %8 = phi i1 [ false, %uniform.ret1 ], [ true, %uniform.multi.exit.region ]
				625	; IR: br i1 %8, label %uniform.if, label %Flow2
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	626
				627	; IR: Flow: ; preds = %uniform.then, %uniform.if
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	628	; IR: %11 = phi i1 [ %10, %uniform.then ], [ %9, %uniform.if ]
				629	; IR: br i1 %11, label %uniform.endif, label %uniform.ret0
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	630
				631	; IR: UnifiedReturnBlock: ; preds = %Flow3, %Flow2
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	632	; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %6)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	633	; IR-NEXT: ret void
				634	define amdgpu_kernel void @uniform_complex_multi_ret_nest_in_divergent_triangle(i32 %arg0) #0 {
				635	entry:
				636	%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
				637	%divergent.cond0 = icmp eq i32 %id.x, 0
				638	br i1 %divergent.cond0, label %uniform.multi.exit.region, label %divergent.ret
				639
				640	uniform.multi.exit.region:
				641	%uniform.cond0 = icmp eq i32 %arg0, 4
				642	br i1 %uniform.cond0, label %uniform.if, label %uniform.ret1
				643
				644	uniform.if:
				645	%sgpr0 = load volatile i32, i32 addrspace(2)* undef
				646	%uniform.cond1 = icmp slt i32 %sgpr0, 1
				647	br i1 %uniform.cond1, label %uniform.then, label %uniform.endif
				648
				649	uniform.then:
				650	%sgpr1 = load volatile i32, i32 addrspace(2)* undef
				651	%uniform.cond2 = icmp sge i32 %sgpr1, 4
				652	store volatile i32 33, i32 addrspace(1)* undef
				653	br i1 %uniform.cond2, label %uniform.ret0, label %uniform.endif
				654
				655	uniform.endif:
				656	store volatile i32 38, i32 addrspace(1)* undef
				657	br label %uniform.ret0
				658
				659	uniform.ret0:
				660	store volatile i32 11, i32 addrspace(3)* undef
				661	ret void
				662
				663	uniform.ret1:
				664	store volatile i32 42, i32 addrspace(3)* undef
				665	ret void
				666
				667	divergent.ret:
				668	store volatile i32 9, i32 addrspace(1)* undef
				669	ret void
				670	}
				671
				672	; IR-LABEL: @multi_divergent_unreachable_exit(
				673	; IR: UnifiedUnreachableBlock:
				674	; IR-NEXT: call void @llvm.amdgcn.unreachable()
				675	; IR-NEXT: br label %UnifiedReturnBlock
				676
				677	; IR: UnifiedReturnBlock:
				678	; IR-NEXT: call void @llvm.amdgcn.end.cf(i64
				679	; IR-NEXT: ret void
				680	define amdgpu_kernel void @multi_divergent_unreachable_exit() #0 {
				681	bb:
				682	%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
				683	switch i32 %tmp, label %bb3 [
				684	i32 2, label %bb1
				685	i32 0, label %bb2
				686	]
				687
				688	bb1: ; preds = %bb
				689	unreachable
				690
				691	bb2: ; preds = %bb
				692	unreachable
				693
				694	bb3: ; preds = %bb
				695	switch i32 undef, label %bb5 [
				696	i32 2, label %bb4
				697	]
				698
				699	bb4: ; preds = %bb3
				700	ret void
				701
				702	bb5: ; preds = %bb3
				703	unreachable
				704	}
				705
				706	declare i32 @llvm.amdgcn.workitem.id.x() #1
				707
				708	attributes #0 = { nounwind }
				709	attributes #1 = { nounwind readnone }