Blame - llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll - toolchain/llvm-project

blob: 805b2f5ad2bf8e4cd6b06cccf1d01205b363d616 [file] [log] [blame]

Matt Arsenault	167601e	2018-08-30 05:49:28 +0000	[diff] [blame]	1	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				2	; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s
				3
				4	; SelectionDAG builder was using the IR value kind to decide how to
				5	; split the types for copyToRegs/copyFromRegs in all contexts. This
				6	; was incorrect if the ABI-like value such as a call was used outside
				7	; of the block. The value in that case is not used directly, but
				8	; through another set of copies to potentially different register
				9	; types in the parent block.
				10
				11	; This would then end up producing inconsistent pairs of copies with
				12	; the wrong sizes when the vector type result from the call was split
				13	; into multiple pieces, but expected to be a single register in the
				14	; cross-block copy.
				15	;
				16	; This isn't exactly ideal for AMDGPU, since in reality the
				17	; intermediate vector register type is undesirable anyway, but it
				18	; requires more work to be able to split all vector copies in all
				19	; contexts.
				20	;
				21	; This was only an issue if the value was used directly in another
				22	; block. If there was an intermediate operation or a phi it was fine,
				23	; since that didn't look like an ABI copy.
				24
				25
				26	define float @call_split_type_used_outside_block_v2f32() #0 {
				27	; GCN-LABEL: call_split_type_used_outside_block_v2f32:
				28	; GCN: ; %bb.0: ; %bb0
				29	; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
				30	; GCN-NEXT: s_mov_b32 s5, s32
				31	; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
				32	; GCN-NEXT: v_writelane_b32 v32, s33, 0
				33	; GCN-NEXT: v_writelane_b32 v32, s34, 1
				34	; GCN-NEXT: s_add_u32 s32, s32, 0x400
				35	; GCN-NEXT: v_writelane_b32 v32, s35, 2
				36	; GCN-NEXT: s_getpc_b64 s[6:7]
				37	; GCN-NEXT: s_add_u32 s6, s6, func_v2f32@rel32@lo+4
				38	; GCN-NEXT: s_addc_u32 s7, s7, func_v2f32@rel32@hi+4
				39	; GCN-NEXT: s_mov_b64 s[34:35], s[30:31]
				40	; GCN-NEXT: s_mov_b32 s33, s5
				41	; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
				42	; GCN-NEXT: s_mov_b32 s5, s33
				43	; GCN-NEXT: s_mov_b64 s[30:31], s[34:35]
				44	; GCN-NEXT: v_readlane_b32 s35, v32, 2
				45	; GCN-NEXT: v_readlane_b32 s34, v32, 1
				46	; GCN-NEXT: v_readlane_b32 s33, v32, 0
				47	; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
				48	; GCN-NEXT: s_sub_u32 s32, s32, 0x400
				49	; GCN-NEXT: s_waitcnt vmcnt(0)
				50	; GCN-NEXT: s_setpc_b64 s[30:31]
				51	bb0:
				52	%split.ret.type = call <2 x float> @func_v2f32()
				53	br label %bb1
				54
				55	bb1:
				56	%extract = extractelement <2 x float> %split.ret.type, i32 0
				57	ret float %extract
				58	}
				59
				60	define float @call_split_type_used_outside_block_v3f32() #0 {
				61	; GCN-LABEL: call_split_type_used_outside_block_v3f32:
				62	; GCN: ; %bb.0: ; %bb0
				63	; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
				64	; GCN-NEXT: s_mov_b32 s5, s32
				65	; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
				66	; GCN-NEXT: v_writelane_b32 v32, s33, 0
				67	; GCN-NEXT: v_writelane_b32 v32, s34, 1
				68	; GCN-NEXT: s_add_u32 s32, s32, 0x400
				69	; GCN-NEXT: v_writelane_b32 v32, s35, 2
				70	; GCN-NEXT: s_getpc_b64 s[6:7]
				71	; GCN-NEXT: s_add_u32 s6, s6, func_v3f32@rel32@lo+4
				72	; GCN-NEXT: s_addc_u32 s7, s7, func_v3f32@rel32@hi+4
				73	; GCN-NEXT: s_mov_b64 s[34:35], s[30:31]
				74	; GCN-NEXT: s_mov_b32 s33, s5
				75	; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
				76	; GCN-NEXT: s_mov_b32 s5, s33
				77	; GCN-NEXT: s_mov_b64 s[30:31], s[34:35]
				78	; GCN-NEXT: v_readlane_b32 s35, v32, 2
				79	; GCN-NEXT: v_readlane_b32 s34, v32, 1
				80	; GCN-NEXT: v_readlane_b32 s33, v32, 0
				81	; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
				82	; GCN-NEXT: s_sub_u32 s32, s32, 0x400
				83	; GCN-NEXT: s_waitcnt vmcnt(0)
				84	; GCN-NEXT: s_setpc_b64 s[30:31]
				85	bb0:
				86	%split.ret.type = call <3 x float> @func_v3f32()
				87	br label %bb1
				88
				89	bb1:
				90	%extract = extractelement <3 x float> %split.ret.type, i32 0
				91	ret float %extract
				92	}
				93
				94	define half @call_split_type_used_outside_block_v4f16() #0 {
				95	; GCN-LABEL: call_split_type_used_outside_block_v4f16:
				96	; GCN: ; %bb.0: ; %bb0
				97	; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
				98	; GCN-NEXT: s_mov_b32 s5, s32
				99	; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
				100	; GCN-NEXT: v_writelane_b32 v32, s33, 0
				101	; GCN-NEXT: v_writelane_b32 v32, s34, 1
				102	; GCN-NEXT: s_add_u32 s32, s32, 0x400
				103	; GCN-NEXT: v_writelane_b32 v32, s35, 2
				104	; GCN-NEXT: s_getpc_b64 s[6:7]
				105	; GCN-NEXT: s_add_u32 s6, s6, func_v4f16@rel32@lo+4
				106	; GCN-NEXT: s_addc_u32 s7, s7, func_v4f16@rel32@hi+4
				107	; GCN-NEXT: s_mov_b64 s[34:35], s[30:31]
				108	; GCN-NEXT: s_mov_b32 s33, s5
				109	; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
				110	; GCN-NEXT: s_mov_b32 s5, s33
				111	; GCN-NEXT: s_mov_b64 s[30:31], s[34:35]
				112	; GCN-NEXT: v_readlane_b32 s35, v32, 2
				113	; GCN-NEXT: v_readlane_b32 s34, v32, 1
				114	; GCN-NEXT: v_readlane_b32 s33, v32, 0
				115	; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
				116	; GCN-NEXT: s_sub_u32 s32, s32, 0x400
				117	; GCN-NEXT: s_waitcnt vmcnt(0)
				118	; GCN-NEXT: s_setpc_b64 s[30:31]
				119	bb0:
				120	%split.ret.type = call <4 x half> @func_v4f16()
				121	br label %bb1
				122
				123	bb1:
				124	%extract = extractelement <4 x half> %split.ret.type, i32 0
				125	ret half %extract
				126	}
				127
				128	define { i32, half } @call_split_type_used_outside_block_struct() #0 {
				129	; GCN-LABEL: call_split_type_used_outside_block_struct:
				130	; GCN: ; %bb.0: ; %bb0
				131	; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
				132	; GCN-NEXT: s_mov_b32 s5, s32
				133	; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
				134	; GCN-NEXT: v_writelane_b32 v32, s33, 0
				135	; GCN-NEXT: v_writelane_b32 v32, s34, 1
				136	; GCN-NEXT: s_add_u32 s32, s32, 0x400
				137	; GCN-NEXT: v_writelane_b32 v32, s35, 2
				138	; GCN-NEXT: s_getpc_b64 s[6:7]
				139	; GCN-NEXT: s_add_u32 s6, s6, func_struct@rel32@lo+4
				140	; GCN-NEXT: s_addc_u32 s7, s7, func_struct@rel32@hi+4
				141	; GCN-NEXT: s_mov_b64 s[34:35], s[30:31]
				142	; GCN-NEXT: s_mov_b32 s33, s5
				143	; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
				144	; GCN-NEXT: s_mov_b32 s5, s33
				145	; GCN-NEXT: s_mov_b64 s[30:31], s[34:35]
				146	; GCN-NEXT: v_readlane_b32 s35, v32, 2
				147	; GCN-NEXT: v_readlane_b32 s34, v32, 1
				148	; GCN-NEXT: v_readlane_b32 s33, v32, 0
				149	; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
				150	; GCN-NEXT: v_mov_b32_e32 v1, v4
				151	; GCN-NEXT: s_sub_u32 s32, s32, 0x400
				152	; GCN-NEXT: s_waitcnt vmcnt(0)
				153	; GCN-NEXT: s_setpc_b64 s[30:31]
				154	bb0:
				155	%split.ret.type = call { <4 x i32>, <4 x half> } @func_struct()
				156	br label %bb1
				157
				158	bb1:
				159	%val0 = extractvalue { <4 x i32>, <4 x half> } %split.ret.type, 0
				160	%val1 = extractvalue { <4 x i32>, <4 x half> } %split.ret.type, 1
				161	%extract0 = extractelement <4 x i32> %val0, i32 0
				162	%extract1 = extractelement <4 x half> %val1, i32 0
				163	%ins0 = insertvalue { i32, half } undef, i32 %extract0, 0
				164	%ins1 = insertvalue { i32, half } %ins0, half %extract1, 1
				165	ret { i32, half } %ins1
				166	}
				167
				168
				169	declare <2 x float> @func_v2f32() #0
				170	declare <3 x float> @func_v3f32() #0
				171	declare <4 x float> @func_v4f32() #0
				172	declare <4 x half> @func_v4f16() #0
				173
				174	declare { <4 x i32>, <4 x half> } @func_struct() #0
				175
				176	attributes #0 = { nounwind}