Blame - llvm/test/CodeGen/AMDGPU/coalescer-subrange-crash.ll - toolchain/llvm-project

blob: ef1b3d25f88337a516bf1660474367b0b81425da [file] [log] [blame]

Matt Arsenault	d2c8a33	2017-02-16 02:01:13 +0000	[diff] [blame]	1	; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck %s
Krzysztof Parzyszek	3bf4aec	2016-09-02 19:48:55 +0000	[diff] [blame]	2	;
				3	; This testcase used to cause the following crash:
				4	;
				5	; *** Couldn't join subrange!
				6	;
				7	; UNREACHABLE executed at lib/CodeGen/RegisterCoalescer.cpp:2666!
				8	;
				9	; The insertelement instructions became subregister definitions: one virtual
				10	; register was defined and re-defined by one group of the consecutive insert-
				11	; elements, and another was defined by the second group.
				12	; Since a copy between the two full registers was present in the program,
				13	; the coalescer tried to merge them. The join algorithm for the main range
				14	; decided that it was correct to do so, while the subrange join unexpectedly
				15	; failed. This was caused by the live interval subranges not being computed
				16	; correctly: subregister defs are not uses for the purpose of subranges.
				17	;
				18	; Test for a valid output:
				19	; CHECK: image_sample_c_d_o
Krzysztof Parzyszek	3bf4aec	2016-09-02 19:48:55 +0000	[diff] [blame]	20	define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg, [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg1, [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg2, [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg3, [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg4, float inreg %arg5, i32 inreg %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <3 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, <2 x i32> %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, i32 %arg20, float %arg21, i32 %arg22) #0 {
				21	main_body:
Matt Arsenault	d2c8a33	2017-02-16 02:01:13 +0000	[diff] [blame]	22	%i.i = extractelement <2 x i32> %arg8, i32 0
				23	%j.i = extractelement <2 x i32> %arg8, i32 1
				24	%i.f.i = bitcast i32 %i.i to float
				25	%j.f.i = bitcast i32 %j.i to float
				26	%p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 3, i32 0, i32 %arg6) #1
				27	%p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 3, i32 0, i32 %arg6) #1
				28	%tmp23 = fadd float %p2.i, 0xBFA99999A0000000
				29	%tmp24 = fadd float %p2.i, 0x3FA99999A0000000
Krzysztof Parzyszek	3bf4aec	2016-09-02 19:48:55 +0000	[diff] [blame]	30	%tmp25 = bitcast float %tmp23 to i32
				31	%tmp26 = insertelement <16 x i32> <i32 212739, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp25, i32 1
				32	%tmp27 = insertelement <16 x i32> %tmp26, i32 undef, i32 2
				33	%tmp28 = insertelement <16 x i32> %tmp27, i32 undef, i32 3
				34	%tmp29 = insertelement <16 x i32> %tmp28, i32 undef, i32 4
				35	%tmp30 = insertelement <16 x i32> %tmp29, i32 0, i32 5
				36	%tmp31 = insertelement <16 x i32> %tmp30, i32 undef, i32 6
				37	%tmp32 = insertelement <16 x i32> %tmp31, i32 undef, i32 7
				38	%tmp33 = insertelement <16 x i32> %tmp32, i32 undef, i32 8
Matt Arsenault	964a848	2017-03-21 16:24:12 +0000	[diff] [blame]	39	%tmp33.bc = bitcast <16 x i32> %tmp33 to <16 x float>
				40	%tmp34 = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v16f32.v8i32(<16 x float> %tmp33.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 true)
Krzysztof Parzyszek	3bf4aec	2016-09-02 19:48:55 +0000	[diff] [blame]	41	%tmp35 = extractelement <4 x float> %tmp34, i32 0
				42	%tmp36 = bitcast float %tmp24 to i32
				43	%tmp37 = insertelement <16 x i32> <i32 212739, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp36, i32 1
				44	%tmp38 = insertelement <16 x i32> %tmp37, i32 undef, i32 2
				45	%tmp39 = insertelement <16 x i32> %tmp38, i32 undef, i32 3
				46	%tmp40 = insertelement <16 x i32> %tmp39, i32 undef, i32 4
				47	%tmp41 = insertelement <16 x i32> %tmp40, i32 0, i32 5
				48	%tmp42 = insertelement <16 x i32> %tmp41, i32 undef, i32 6
				49	%tmp43 = insertelement <16 x i32> %tmp42, i32 undef, i32 7
				50	%tmp44 = insertelement <16 x i32> %tmp43, i32 undef, i32 8
Matt Arsenault	964a848	2017-03-21 16:24:12 +0000	[diff] [blame]	51	%tmp44.bc = bitcast <16 x i32> %tmp44 to <16 x float>
				52	%tmp45 = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v16f32.v8i32(<16 x float> %tmp44.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 true)
Krzysztof Parzyszek	3bf4aec	2016-09-02 19:48:55 +0000	[diff] [blame]	53	%tmp46 = extractelement <4 x float> %tmp45, i32 0
				54	%tmp47 = fmul float %tmp35, %tmp46
				55	%tmp48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, float %tmp47, 14
				56	%tmp49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %tmp48, float %arg21, 24
				57	ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %tmp49
				58	}
				59
Matt Arsenault	964a848	2017-03-21 16:24:12 +0000	[diff] [blame]	60	declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
				61	declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
				62	declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v16f32.v8i32(<16 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
Krzysztof Parzyszek	3bf4aec	2016-09-02 19:48:55 +0000	[diff] [blame]	63
Matt Arsenault	964a848	2017-03-21 16:24:12 +0000	[diff] [blame]	64	attributes #0 = { nounwind }
				65	attributes #1 = { nounwind readnone }
				66	attributes #2 = { nounwind readonly }