Blame - llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll - toolchain/llvm-project

blob: 82c27f204a47892b67882b616f9fd718a8e108ea [file] [log] [blame]

Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	1	; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow %s \| FileCheck -check-prefix=IR %s
				2	; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s
				3
				4	; Add an extra verifier runs. There were some cases where invalid IR
				5	; was produced but happened to be fixed by the later passes.
				6
				7	; Make sure divergent control flow with multiple exits from a region
				8	; is properly handled. UnifyFunctionExitNodes should be run before
				9	; StructurizeCFG.
				10
				11	; IR-LABEL: @multi_divergent_region_exit_ret_ret(
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	12	; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
				13	; IR: %2 = extractvalue { i1, i64 } %1, 0
				14	; IR: %3 = extractvalue { i1, i64 } %1, 1
				15	; IR: br i1 %2, label %LeafBlock1, label %Flow
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	16
				17	; IR: Flow:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	18	; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
				19	; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
				20	; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
				21	; IR: %7 = extractvalue { i1, i64 } %6, 0
				22	; IR: %8 = extractvalue { i1, i64 } %6, 1
				23	; IR: br i1 %7, label %LeafBlock, label %Flow1
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	24
				25	; IR: LeafBlock:
				26	; IR: br label %Flow1
				27
				28	; IR: LeafBlock1:
				29	; IR: br label %Flow{{$}}
				30
				31	; IR: Flow2:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	32	; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
				33	; IR: call void @llvm.amdgcn.end.cf(i64 %19)
				34	; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
				35	; IR: %13 = extractvalue { i1, i64 } %12, 0
				36	; IR: %14 = extractvalue { i1, i64 } %12, 1
				37	; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	38
				39	; IR: exit0:
				40	; IR: store volatile i32 9, i32 addrspace(1)* undef
				41	; IR: br label %UnifiedReturnBlock
				42
				43	; IR: Flow1:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	44	; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
				45	; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
				46	; IR: call void @llvm.amdgcn.end.cf(i64 %8)
				47	; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
				48	; IR: %18 = extractvalue { i1, i64 } %17, 0
				49	; IR: %19 = extractvalue { i1, i64 } %17, 1
				50	; IR: br i1 %18, label %exit1, label %Flow2
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	51
				52	; IR: exit1:
				53	; IR: store volatile i32 17, i32 addrspace(3)* undef
				54	; IR: br label %Flow2
				55
				56	; IR: UnifiedReturnBlock:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	57	; IR: call void @llvm.amdgcn.end.cf(i64 %14)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	58	; IR: ret void
				59
				60
				61	; GCN-LABEL: {{^}}multi_divergent_region_exit_ret_ret:
				62	; GCN: v_cmp_lt_i32_e32 vcc, 1
				63	; GCN: s_and_saveexec_b64
				64	; GCN: s_xor_b64
				65
				66
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	67	; FIXME: Why is this compare essentially repeated?
				68	; GCN: v_cmp_eq_u32_e32 vcc, 1, [[REG:v[0-9]+]]
				69	; GCN-NEXT: v_cmp_ne_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1, [[REG]]
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	70	; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1, vcc
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	71	; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	72
				73	; GCN: ; %Flow1
				74	; GCN-NEXT: s_or_b64 exec, exec
				75	; GCN: v_cmp_ne_u32_e32 vcc, 0
				76
				77	; GCN: ; %exit1
				78	; GCN: ds_write_b32
				79
				80	; GCN: %Flow2
				81	; GCN-NEXT: s_or_b64 exec, exec
				82	; GCN: v_cmp_ne_u32_e32 vcc, 0
				83	; GCN-NEXT: s_and_saveexec_b64
				84	; GCN-NEXT: s_xor_b64
				85
				86	; GCN: ; %exit0
				87	; GCN: buffer_store_dword
				88
				89	; GCN: ; %UnifiedReturnBlock
				90	; GCN-NEXT: s_or_b64 exec, exec
				91	; GCN-NEXT: s_endpgm
				92	define amdgpu_kernel void @multi_divergent_region_exit_ret_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
				93	entry:
				94	%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
				95	%tmp1 = add i32 0, %tmp
				96	%tmp2 = zext i32 %tmp1 to i64
				97	%tmp3 = add i64 0, %tmp2
				98	%tmp4 = shl i64 %tmp3, 32
				99	%tmp5 = ashr exact i64 %tmp4, 32
				100	%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
				101	%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
				102	%tmp8 = sext i32 %tmp7 to i64
				103	%tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
				104	%tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
				105	%tmp13 = zext i32 %tmp10 to i64
				106	%tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
				107	%tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
				108	%Pivot = icmp slt i32 %tmp16, 2
				109	br i1 %Pivot, label %LeafBlock, label %LeafBlock1
				110
				111	LeafBlock: ; preds = %entry
				112	%SwitchLeaf = icmp eq i32 %tmp16, 1
				113	br i1 %SwitchLeaf, label %exit0, label %exit1
				114
				115	LeafBlock1: ; preds = %entry
				116	%SwitchLeaf2 = icmp eq i32 %tmp16, 2
				117	br i1 %SwitchLeaf2, label %exit0, label %exit1
				118
				119	exit0: ; preds = %LeafBlock, %LeafBlock1
				120	store volatile i32 9, i32 addrspace(1)* undef
				121	ret void
				122
				123	exit1: ; preds = %LeafBlock, %LeafBlock1
				124	store volatile i32 17, i32 addrspace(3)* undef
				125	ret void
				126	}
				127
				128	; IR-LABEL: @multi_divergent_region_exit_unreachable_unreachable(
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	129	; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	130
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	131	; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	132
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	133	; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
				134	; IR: call void @llvm.amdgcn.end.cf(i64 %19)
				135	; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
				136	; IR: br i1 %13, label %exit0, label %UnifiedUnreachableBlock
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	137
				138
				139	; IR: UnifiedUnreachableBlock:
				140	; IR-NEXT: unreachable
				141
				142
				143	; FIXME: Probably should insert an s_endpgm anyway.
				144	; GCN-LABEL: {{^}}multi_divergent_region_exit_unreachable_unreachable:
				145	; GCN: ; %UnifiedUnreachableBlock
				146	; GCN-NEXT: .Lfunc_end
				147	define amdgpu_kernel void @multi_divergent_region_exit_unreachable_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
				148	entry:
				149	%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
				150	%tmp1 = add i32 0, %tmp
				151	%tmp2 = zext i32 %tmp1 to i64
				152	%tmp3 = add i64 0, %tmp2
				153	%tmp4 = shl i64 %tmp3, 32
				154	%tmp5 = ashr exact i64 %tmp4, 32
				155	%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
				156	%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
				157	%tmp8 = sext i32 %tmp7 to i64
				158	%tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
				159	%tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
				160	%tmp13 = zext i32 %tmp10 to i64
				161	%tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
				162	%tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
				163	%Pivot = icmp slt i32 %tmp16, 2
				164	br i1 %Pivot, label %LeafBlock, label %LeafBlock1
				165
				166	LeafBlock: ; preds = %entry
				167	%SwitchLeaf = icmp eq i32 %tmp16, 1
				168	br i1 %SwitchLeaf, label %exit0, label %exit1
				169
				170	LeafBlock1: ; preds = %entry
				171	%SwitchLeaf2 = icmp eq i32 %tmp16, 2
				172	br i1 %SwitchLeaf2, label %exit0, label %exit1
				173
				174	exit0: ; preds = %LeafBlock, %LeafBlock1
				175	store volatile i32 9, i32 addrspace(1)* undef
				176	unreachable
				177
				178	exit1: ; preds = %LeafBlock, %LeafBlock1
				179	store volatile i32 17, i32 addrspace(3)* undef
				180	unreachable
				181	}
				182
				183	; IR-LABEL: @multi_exit_region_divergent_ret_uniform_ret(
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	184	; IR: %divergent.cond0 = icmp slt i32 %tmp16, 2
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	185	; IR: llvm.amdgcn.if
				186	; IR: br i1
				187
				188	; IR: {{^}}Flow:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	189	; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
				190	; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
				191	; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
				192	; IR: br i1 %7, label %LeafBlock, label %Flow1
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	193
				194	; IR: {{^}}LeafBlock:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	195	; IR: %divergent.cond1 = icmp eq i32 %tmp16, 1
				196	; IR: %9 = xor i1 %divergent.cond1, true
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	197	; IR: br label %Flow1
				198
				199	; IR: LeafBlock1:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	200	; IR: %uniform.cond0 = icmp eq i32 %arg3, 2
				201	; IR: %10 = xor i1 %uniform.cond0, true
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	202	; IR: br label %Flow
				203
				204	; IR: Flow2:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	205	; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
				206	; IR: call void @llvm.amdgcn.end.cf(i64 %19)
				207	; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
				208	; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	209
				210	; IR: exit0:
				211	; IR: store volatile i32 9, i32 addrspace(1)* undef
				212	; IR: br label %UnifiedReturnBlock
				213
				214	; IR: {{^}}Flow1:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	215	; IR: %15 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %4, %Flow ]
				216	; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
				217	; IR: call void @llvm.amdgcn.end.cf(i64 %8)
				218	; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
				219	; IR: %18 = extractvalue { i1, i64 } %17, 0
				220	; IR: %19 = extractvalue { i1, i64 } %17, 1
				221	; IR: br i1 %18, label %exit1, label %Flow2
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	222
				223	; IR: exit1:
				224	; IR: store volatile i32 17, i32 addrspace(3)* undef
				225	; IR: br label %Flow2
				226
				227	; IR: UnifiedReturnBlock:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	228	; IR: call void @llvm.amdgcn.end.cf(i64 %14)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	229	; IR: ret void
				230	define amdgpu_kernel void @multi_exit_region_divergent_ret_uniform_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 {
				231	entry:
				232	%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
				233	%tmp1 = add i32 0, %tmp
				234	%tmp2 = zext i32 %tmp1 to i64
				235	%tmp3 = add i64 0, %tmp2
				236	%tmp4 = shl i64 %tmp3, 32
				237	%tmp5 = ashr exact i64 %tmp4, 32
				238	%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
				239	%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
				240	%tmp8 = sext i32 %tmp7 to i64
				241	%tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
				242	%tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
				243	%tmp13 = zext i32 %tmp10 to i64
				244	%tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
				245	%tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
				246	%divergent.cond0 = icmp slt i32 %tmp16, 2
				247	br i1 %divergent.cond0, label %LeafBlock, label %LeafBlock1
				248
				249	LeafBlock: ; preds = %entry
				250	%divergent.cond1 = icmp eq i32 %tmp16, 1
				251	br i1 %divergent.cond1, label %exit0, label %exit1
				252
				253	LeafBlock1: ; preds = %entry
				254	%uniform.cond0 = icmp eq i32 %arg3, 2
				255	br i1 %uniform.cond0, label %exit0, label %exit1
				256
				257	exit0: ; preds = %LeafBlock, %LeafBlock1
				258	store volatile i32 9, i32 addrspace(1)* undef
				259	ret void
				260
				261	exit1: ; preds = %LeafBlock, %LeafBlock1
				262	store volatile i32 17, i32 addrspace(3)* undef
				263	ret void
				264	}
				265
				266	; IR-LABEL: @multi_exit_region_uniform_ret_divergent_ret(
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	267	; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
				268	; IR: br i1 %2, label %LeafBlock1, label %Flow
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	269
				270	; IR: Flow:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	271	; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
				272	; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
				273	; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	274
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	275	; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
				276	; IR: call void @llvm.amdgcn.end.cf(i64 %19)
				277	; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	278
				279	define amdgpu_kernel void @multi_exit_region_uniform_ret_divergent_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 {
				280	entry:
				281	%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
				282	%tmp1 = add i32 0, %tmp
				283	%tmp2 = zext i32 %tmp1 to i64
				284	%tmp3 = add i64 0, %tmp2
				285	%tmp4 = shl i64 %tmp3, 32
				286	%tmp5 = ashr exact i64 %tmp4, 32
				287	%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
				288	%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
				289	%tmp8 = sext i32 %tmp7 to i64
				290	%tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
				291	%tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
				292	%tmp13 = zext i32 %tmp10 to i64
				293	%tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
				294	%tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
				295	%Pivot = icmp slt i32 %tmp16, 2
				296	br i1 %Pivot, label %LeafBlock, label %LeafBlock1
				297
				298	LeafBlock: ; preds = %entry
				299	%SwitchLeaf = icmp eq i32 %arg3, 1
				300	br i1 %SwitchLeaf, label %exit0, label %exit1
				301
				302	LeafBlock1: ; preds = %entry
				303	%SwitchLeaf2 = icmp eq i32 %tmp16, 2
				304	br i1 %SwitchLeaf2, label %exit0, label %exit1
				305
				306	exit0: ; preds = %LeafBlock, %LeafBlock1
				307	store volatile i32 9, i32 addrspace(1)* undef
				308	ret void
				309
				310	exit1: ; preds = %LeafBlock, %LeafBlock1
				311	store volatile i32 17, i32 addrspace(3)* undef
				312	ret void
				313	}
				314
				315	; IR-LABEL: @multi_divergent_region_exit_ret_ret_return_value(
				316	; IR: Flow2:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	317	; IR: %11 = phi float [ 2.000000e+00, %exit1 ], [ undef, %Flow1 ]
				318	; IR: %12 = phi i1 [ false, %exit1 ], [ %16, %Flow1 ]
				319	; IR: call void @llvm.amdgcn.end.cf(i64 %20)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	320
				321	; IR: UnifiedReturnBlock:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	322	; IR: %UnifiedRetVal = phi float [ %11, %Flow2 ], [ 1.000000e+00, %exit0 ]
				323	; IR: call void @llvm.amdgcn.end.cf(i64 %15)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	324	; IR: ret float %UnifiedRetVal
				325	define amdgpu_ps float @multi_divergent_region_exit_ret_ret_return_value(i32 %vgpr) #0 {
				326	entry:
				327	%Pivot = icmp slt i32 %vgpr, 2
				328	br i1 %Pivot, label %LeafBlock, label %LeafBlock1
				329
				330	LeafBlock: ; preds = %entry
				331	%SwitchLeaf = icmp eq i32 %vgpr, 1
				332	br i1 %SwitchLeaf, label %exit0, label %exit1
				333
				334	LeafBlock1: ; preds = %entry
				335	%SwitchLeaf2 = icmp eq i32 %vgpr, 2
				336	br i1 %SwitchLeaf2, label %exit0, label %exit1
				337
				338	exit0: ; preds = %LeafBlock, %LeafBlock1
				339	store i32 9, i32 addrspace(1)* undef
				340	ret float 1.0
				341
				342	exit1: ; preds = %LeafBlock, %LeafBlock1
				343	store i32 17, i32 addrspace(3)* undef
				344	ret float 2.0
				345	}
				346
				347	; IR-LABEL: @uniform_branch_to_multi_divergent_region_exit_ret_ret_return_value(
				348
				349	; GCN-LABEL: {{^}}uniform_branch_to_multi_divergent_region_exit_ret_ret_return_value:
				350	; GCN: s_cmp_gt_i32 s0, 1
				351	; GCN: s_cbranch_scc0 [[FLOW:BB[0-9]+_[0-9]+]]
				352
				353	; GCN: v_cmp_ne_u32_e32 vcc, 7, v0
				354
				355	; GCN: {{^}}[[FLOW]]:
				356	; GCN: s_cbranch_vccnz [[FLOW1:BB[0-9]+]]
				357
				358	; GCN: v_mov_b32_e32 v0, 2.0
				359	; GCN: s_or_b64 exec, exec
				360	; GCN: s_and_b64 exec, exec
				361	; GCN: v_mov_b32_e32 v0, 1.0
				362
				363	; GCN: {{^BB[0-9]+_[0-9]+}}: ; %UnifiedReturnBlock
				364	; GCN-NEXT: s_or_b64 exec, exec
Mark Searles	70359ac	2017-06-02 14:19:25 +0000	[diff] [blame]	365	; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	366	; GCN-NEXT: ; return
				367
				368	define amdgpu_ps float @uniform_branch_to_multi_divergent_region_exit_ret_ret_return_value(i32 inreg %sgpr, i32 %vgpr) #0 {
				369	entry:
				370	%uniform.cond = icmp slt i32 %sgpr, 2
				371	br i1 %uniform.cond, label %LeafBlock, label %LeafBlock1
				372
				373	LeafBlock: ; preds = %entry
				374	%divergent.cond0 = icmp eq i32 %vgpr, 3
				375	br i1 %divergent.cond0, label %exit0, label %exit1
				376
				377	LeafBlock1: ; preds = %entry
				378	%divergent.cond1 = icmp eq i32 %vgpr, 7
				379	br i1 %divergent.cond1, label %exit0, label %exit1
				380
				381	exit0: ; preds = %LeafBlock, %LeafBlock1
				382	store i32 9, i32 addrspace(1)* undef
				383	ret float 1.0
				384
				385	exit1: ; preds = %LeafBlock, %LeafBlock1
				386	store i32 17, i32 addrspace(3)* undef
				387	ret float 2.0
				388	}
				389
				390	; IR-LABEL: @multi_divergent_region_exit_ret_unreachable(
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	391	; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	392
				393	; IR: Flow:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	394	; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
				395	; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
				396	; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	397
				398	; IR: Flow2:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	399	; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
				400	; IR: call void @llvm.amdgcn.end.cf(i64 %19)
				401	; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
				402	; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	403
				404	; IR: exit0:
				405	; IR-NEXT: store volatile i32 17, i32 addrspace(3)* undef
				406	; IR-NEXT: br label %UnifiedReturnBlock
				407
				408	; IR: Flow1:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	409	; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
				410	; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
				411	; IR: call void @llvm.amdgcn.end.cf(i64 %8)
				412	; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
				413	; IR: %18 = extractvalue { i1, i64 } %17, 0
				414	; IR: %19 = extractvalue { i1, i64 } %17, 1
				415	; IR: br i1 %18, label %exit1, label %Flow2
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	416
				417	; IR: exit1:
				418	; IR-NEXT: store volatile i32 9, i32 addrspace(1)* undef
				419	; IR-NEXT: call void @llvm.amdgcn.unreachable()
				420	; IR-NEXT: br label %Flow2
				421
				422	; IR: UnifiedReturnBlock:
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	423	; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	424	; IR-NEXT: ret void
				425	define amdgpu_kernel void @multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
				426	entry:
				427	%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
				428	%tmp1 = add i32 0, %tmp
				429	%tmp2 = zext i32 %tmp1 to i64
				430	%tmp3 = add i64 0, %tmp2
				431	%tmp4 = shl i64 %tmp3, 32
				432	%tmp5 = ashr exact i64 %tmp4, 32
				433	%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
				434	%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
				435	%tmp8 = sext i32 %tmp7 to i64
				436	%tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
				437	%tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
				438	%tmp13 = zext i32 %tmp10 to i64
				439	%tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
				440	%tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
				441	%Pivot = icmp slt i32 %tmp16, 2
				442	br i1 %Pivot, label %LeafBlock, label %LeafBlock1
				443
				444	LeafBlock: ; preds = %entry
				445	%SwitchLeaf = icmp eq i32 %tmp16, 1
				446	br i1 %SwitchLeaf, label %exit0, label %exit1
				447
				448	LeafBlock1: ; preds = %entry
				449	%SwitchLeaf2 = icmp eq i32 %tmp16, 2
				450	br i1 %SwitchLeaf2, label %exit0, label %exit1
				451
				452	exit0: ; preds = %LeafBlock, %LeafBlock1
				453	store volatile i32 17, i32 addrspace(3)* undef
				454	ret void
				455
				456	exit1: ; preds = %LeafBlock, %LeafBlock1
				457	store volatile i32 9, i32 addrspace(1)* undef
				458	unreachable
				459	}
				460
				461	; The non-uniformity of the branch to the exiting blocks requires
				462	; looking at transitive predecessors.
				463
				464	; IR-LABEL: @indirect_multi_divergent_region_exit_ret_unreachable(
				465
				466	; IR: exit0: ; preds = %Flow2
				467	; IR-NEXT: store volatile i32 17, i32 addrspace(3)* undef
				468	; IR-NEXT: br label %UnifiedReturnBlock
				469
				470
				471	; IR: indirect.exit1:
				472	; IR: %load = load volatile i32, i32 addrspace(1)* undef
				473	; IR: store volatile i32 %load, i32 addrspace(1)* undef
				474	; IR: store volatile i32 9, i32 addrspace(1)* undef
				475	; IR: call void @llvm.amdgcn.unreachable()
				476	; IR-NEXT: br label %Flow2
				477
				478	; IR: UnifiedReturnBlock: ; preds = %exit0, %Flow2
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	479	; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	480	; IR-NEXT: ret void
				481	define amdgpu_kernel void @indirect_multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
				482	entry:
				483	%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
				484	%tmp1 = add i32 0, %tmp
				485	%tmp2 = zext i32 %tmp1 to i64
				486	%tmp3 = add i64 0, %tmp2
				487	%tmp4 = shl i64 %tmp3, 32
				488	%tmp5 = ashr exact i64 %tmp4, 32
				489	%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
				490	%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
				491	%tmp8 = sext i32 %tmp7 to i64
				492	%tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
				493	%tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
				494	%tmp13 = zext i32 %tmp10 to i64
				495	%tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
				496	%tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
				497	%Pivot = icmp slt i32 %tmp16, 2
				498	br i1 %Pivot, label %LeafBlock, label %LeafBlock1
				499
				500	LeafBlock: ; preds = %entry
				501	%SwitchLeaf = icmp eq i32 %tmp16, 1
				502	br i1 %SwitchLeaf, label %exit0, label %indirect.exit1
				503
				504	LeafBlock1: ; preds = %entry
				505	%SwitchLeaf2 = icmp eq i32 %tmp16, 2
				506	br i1 %SwitchLeaf2, label %exit0, label %indirect.exit1
				507
				508	exit0: ; preds = %LeafBlock, %LeafBlock1
				509	store volatile i32 17, i32 addrspace(3)* undef
				510	ret void
				511
				512	indirect.exit1:
				513	%load = load volatile i32, i32 addrspace(1)* undef
				514	store volatile i32 %load, i32 addrspace(1)* undef
				515	br label %exit1
				516
				517	exit1: ; preds = %LeafBlock, %LeafBlock1
				518	store volatile i32 9, i32 addrspace(1)* undef
				519	unreachable
				520	}
				521
				522	; IR-LABEL: @multi_divergent_region_exit_ret_switch(
				523	define amdgpu_kernel void @multi_divergent_region_exit_ret_switch(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
				524	entry:
				525	%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
				526	%tmp1 = add i32 0, %tmp
				527	%tmp2 = zext i32 %tmp1 to i64
				528	%tmp3 = add i64 0, %tmp2
				529	%tmp4 = shl i64 %tmp3, 32
				530	%tmp5 = ashr exact i64 %tmp4, 32
				531	%tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %tmp5
				532	%tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
				533	%tmp8 = sext i32 %tmp7 to i64
				534	%tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp8
				535	%tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4
				536	%tmp13 = zext i32 %tmp10 to i64
				537	%tmp14 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp13
				538	%tmp16 = load i32, i32 addrspace(1)* %tmp14, align 16
				539	switch i32 %tmp16, label %exit1
				540	[ i32 1, label %LeafBlock
				541	i32 2, label %LeafBlock1
				542	i32 3, label %exit0 ]
				543
				544	LeafBlock: ; preds = %entry
				545	%SwitchLeaf = icmp eq i32 %tmp16, 1
				546	br i1 %SwitchLeaf, label %exit0, label %exit1
				547
				548	LeafBlock1: ; preds = %entry
				549	%SwitchLeaf2 = icmp eq i32 %tmp16, 2
				550	br i1 %SwitchLeaf2, label %exit0, label %exit1
				551
				552	exit0: ; preds = %LeafBlock, %LeafBlock1
				553	store volatile i32 17, i32 addrspace(3)* undef
				554	ret void
				555
				556	exit1: ; preds = %LeafBlock, %LeafBlock1
				557	store volatile i32 9, i32 addrspace(1)* undef
				558	unreachable
				559	}
				560
				561	; IR-LABEL: @divergent_multi_ret_nest_in_uniform_triangle(
				562	define amdgpu_kernel void @divergent_multi_ret_nest_in_uniform_triangle(i32 %arg0) #0 {
				563	entry:
				564	%uniform.cond0 = icmp eq i32 %arg0, 4
				565	br i1 %uniform.cond0, label %divergent.multi.exit.region, label %uniform.ret
				566
				567	divergent.multi.exit.region:
				568	%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
				569	%divergent.cond0 = icmp eq i32 %id.x, 0
				570	br i1 %divergent.cond0, label %divergent.ret0, label %divergent.ret1
				571
				572	divergent.ret0:
				573	store volatile i32 11, i32 addrspace(3)* undef
				574	ret void
				575
				576	divergent.ret1:
				577	store volatile i32 42, i32 addrspace(3)* undef
				578	ret void
				579
				580	uniform.ret:
				581	store volatile i32 9, i32 addrspace(1)* undef
				582	ret void
				583	}
				584
				585	; IR-LABEL: @divergent_complex_multi_ret_nest_in_uniform_triangle(
				586	define amdgpu_kernel void @divergent_complex_multi_ret_nest_in_uniform_triangle(i32 %arg0) #0 {
				587	entry:
				588	%uniform.cond0 = icmp eq i32 %arg0, 4
				589	br i1 %uniform.cond0, label %divergent.multi.exit.region, label %uniform.ret
				590
				591	divergent.multi.exit.region:
				592	%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
				593	%divergent.cond0 = icmp eq i32 %id.x, 0
				594	br i1 %divergent.cond0, label %divergent.if, label %divergent.ret1
				595
				596	divergent.if:
				597	%vgpr0 = load volatile float, float addrspace(1)* undef
				598	%divergent.cond1 = fcmp ogt float %vgpr0, 1.0
				599	br i1 %divergent.cond1, label %divergent.then, label %divergent.endif
				600
				601	divergent.then:
				602	%vgpr1 = load volatile float, float addrspace(1)* undef
				603	%divergent.cond2 = fcmp olt float %vgpr1, 4.0
				604	store volatile i32 33, i32 addrspace(1)* undef
				605	br i1 %divergent.cond2, label %divergent.ret0, label %divergent.endif
				606
				607	divergent.endif:
				608	store volatile i32 38, i32 addrspace(1)* undef
				609	br label %divergent.ret0
				610
				611	divergent.ret0:
				612	store volatile i32 11, i32 addrspace(3)* undef
				613	ret void
				614
				615	divergent.ret1:
				616	store volatile i32 42, i32 addrspace(3)* undef
				617	ret void
				618
				619	uniform.ret:
				620	store volatile i32 9, i32 addrspace(1)* undef
				621	ret void
				622	}
				623
				624	; IR-LABEL: @uniform_complex_multi_ret_nest_in_divergent_triangle(
				625	; IR: Flow1: ; preds = %uniform.ret1, %uniform.multi.exit.region
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	626	; IR: %8 = phi i1 [ false, %uniform.ret1 ], [ true, %uniform.multi.exit.region ]
				627	; IR: br i1 %8, label %uniform.if, label %Flow2
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	628
				629	; IR: Flow: ; preds = %uniform.then, %uniform.if
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	630	; IR: %11 = phi i1 [ %10, %uniform.then ], [ %9, %uniform.if ]
				631	; IR: br i1 %11, label %uniform.endif, label %uniform.ret0
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	632
				633	; IR: UnifiedReturnBlock: ; preds = %Flow3, %Flow2
Matt Arsenault	4474652	2017-04-24 20:25:01 +0000	[diff] [blame]	634	; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %6)
Matt Arsenault	b8f8dbc	2017-03-24 19:52:05 +0000	[diff] [blame]	635	; IR-NEXT: ret void
				636	define amdgpu_kernel void @uniform_complex_multi_ret_nest_in_divergent_triangle(i32 %arg0) #0 {
				637	entry:
				638	%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
				639	%divergent.cond0 = icmp eq i32 %id.x, 0
				640	br i1 %divergent.cond0, label %uniform.multi.exit.region, label %divergent.ret
				641
				642	uniform.multi.exit.region:
				643	%uniform.cond0 = icmp eq i32 %arg0, 4
				644	br i1 %uniform.cond0, label %uniform.if, label %uniform.ret1
				645
				646	uniform.if:
				647	%sgpr0 = load volatile i32, i32 addrspace(2)* undef
				648	%uniform.cond1 = icmp slt i32 %sgpr0, 1
				649	br i1 %uniform.cond1, label %uniform.then, label %uniform.endif
				650
				651	uniform.then:
				652	%sgpr1 = load volatile i32, i32 addrspace(2)* undef
				653	%uniform.cond2 = icmp sge i32 %sgpr1, 4
				654	store volatile i32 33, i32 addrspace(1)* undef
				655	br i1 %uniform.cond2, label %uniform.ret0, label %uniform.endif
				656
				657	uniform.endif:
				658	store volatile i32 38, i32 addrspace(1)* undef
				659	br label %uniform.ret0
				660
				661	uniform.ret0:
				662	store volatile i32 11, i32 addrspace(3)* undef
				663	ret void
				664
				665	uniform.ret1:
				666	store volatile i32 42, i32 addrspace(3)* undef
				667	ret void
				668
				669	divergent.ret:
				670	store volatile i32 9, i32 addrspace(1)* undef
				671	ret void
				672	}
				673
				674	; IR-LABEL: @multi_divergent_unreachable_exit(
				675	; IR: UnifiedUnreachableBlock:
				676	; IR-NEXT: call void @llvm.amdgcn.unreachable()
				677	; IR-NEXT: br label %UnifiedReturnBlock
				678
				679	; IR: UnifiedReturnBlock:
				680	; IR-NEXT: call void @llvm.amdgcn.end.cf(i64
				681	; IR-NEXT: ret void
				682	define amdgpu_kernel void @multi_divergent_unreachable_exit() #0 {
				683	bb:
				684	%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
				685	switch i32 %tmp, label %bb3 [
				686	i32 2, label %bb1
				687	i32 0, label %bb2
				688	]
				689
				690	bb1: ; preds = %bb
				691	unreachable
				692
				693	bb2: ; preds = %bb
				694	unreachable
				695
				696	bb3: ; preds = %bb
				697	switch i32 undef, label %bb5 [
				698	i32 2, label %bb4
				699	]
				700
				701	bb4: ; preds = %bb3
				702	ret void
				703
				704	bb5: ; preds = %bb3
				705	unreachable
				706	}
				707
				708	declare i32 @llvm.amdgcn.workitem.id.x() #1
				709
				710	attributes #0 = { nounwind }
				711	attributes #1 = { nounwind readnone }