Blame - llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll - toolchain/llvm-project

blob: dcae08d55ab43ab999600c31d4bc17685f435bb2 [file] [log] [blame]

Tim Renouf	4f703f5	2018-08-21 11:07:10 +0000	[diff] [blame]	1	;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs \| FileCheck %s -check-prefix=CHECK -check-prefix=SICI
				2	;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs \| FileCheck %s -check-prefix=CHECK -check-prefix=VI
				3
				4	;CHECK-LABEL: {{^}}buffer_load:
				5	;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
				6	;CHECK: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc
				7	;CHECK: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc
				8	;CHECK: s_waitcnt
				9	define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) {
				10	main_body:
				11	%data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 0, i32 0, i32 0)
				12	%data_glc = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 0, i32 0, i32 1)
				13	%data_slc = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 0, i32 0, i32 2)
				14	%r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
				15	%r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
				16	%r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
				17	ret {<4 x float>, <4 x float>, <4 x float>} %r2
				18	}
				19
				20	;CHECK-LABEL: {{^}}buffer_load_immoffs:
				21	;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:40
				22	;CHECK: s_waitcnt
				23	define amdgpu_ps <4 x float> @buffer_load_immoffs(<4 x i32> inreg) {
				24	main_body:
				25	%data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 40, i32 0, i32 0)
				26	ret <4 x float> %data
				27	}
				28
				29	;CHECK-LABEL: {{^}}buffer_load_immoffs_large:
				30	;CHECK: s_movk_i32 [[OFFSET:s[0-9]+]], 0x1ffc
				31	;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], [[OFFSET]] offset:4
				32	;CHECK: s_waitcnt
				33	define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) {
				34	main_body:
				35	%data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 4, i32 8188, i32 0)
				36	ret <4 x float> %data
				37	}
				38
				39	;CHECK-LABEL: {{^}}buffer_load_ofs:
				40	;CHECK: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
				41	;CHECK: s_waitcnt
				42	define amdgpu_ps <4 x float> @buffer_load_ofs(<4 x i32> inreg, i32) {
				43	main_body:
				44	%data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 %1, i32 0, i32 0)
				45	ret <4 x float> %data
				46	}
				47
				48	;CHECK-LABEL: {{^}}buffer_load_ofs_imm:
				49	;CHECK: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:60
				50	;CHECK: s_waitcnt
				51	define amdgpu_ps <4 x float> @buffer_load_ofs_imm(<4 x i32> inreg, i32) {
				52	main_body:
				53	%ofs = add i32 %1, 60
				54	%data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 %ofs, i32 0, i32 0)
				55	ret <4 x float> %data
				56	}
				57
				58	;CHECK-LABEL: {{^}}buffer_load_x1:
				59	;CHECK: buffer_load_dword v0, v0, s[0:3], 0 offen
				60	;CHECK: s_waitcnt
				61	define amdgpu_ps float @buffer_load_x1(<4 x i32> inreg %rsrc, i32 %ofs) {
				62	main_body:
				63	%data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 0, i32 0)
				64	ret float %data
				65	}
				66
				67	;CHECK-LABEL: {{^}}buffer_load_x2:
				68	;CHECK: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen
				69	;CHECK: s_waitcnt
				70	define amdgpu_ps <2 x float> @buffer_load_x2(<4 x i32> inreg %rsrc, i32 %ofs) {
				71	main_body:
				72	%data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0, i32 0)
				73	ret <2 x float> %data
				74	}
				75
				76	;CHECK-LABEL: {{^}}buffer_load_negative_offset:
Tim Renouf	a37679d	2018-10-03 10:29:43 +0000	[diff] [blame^]	77	;CHECK: v_add_{{[iu]}}32_e32 [[VOFS:v[0-9]+]], vcc, -16, v0
				78	;CHECK: buffer_load_dwordx4 v[0:3], [[VOFS]], s[0:3], 0 offen
Tim Renouf	4f703f5	2018-08-21 11:07:10 +0000	[diff] [blame]	79	define amdgpu_ps <4 x float> @buffer_load_negative_offset(<4 x i32> inreg, i32 %ofs) {
				80	main_body:
				81	%ofs.1 = add i32 %ofs, -16
				82	%data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %0, i32 %ofs.1, i32 0, i32 0)
				83	ret <4 x float> %data
				84	}
				85
				86	; SI won't merge ds memory operations, because of the signed offset bug, so
				87	; we only have check lines for VI.
				88	; CHECK-LABEL: buffer_load_mmo:
				89	; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
				90	; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
				91	define amdgpu_ps float @buffer_load_mmo(<4 x i32> inreg %rsrc, float addrspace(3)* %lds) {
				92	entry:
				93	store float 0.0, float addrspace(3)* %lds
				94	%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
				95	%tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
				96	store float 0.0, float addrspace(3)* %tmp2
				97	ret float %val
				98	}
				99
				100	;CHECK-LABEL: {{^}}buffer_load_x1_offen_merged:
				101	;CHECK-NEXT: %bb.
				102	;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
				103	;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
				104	;CHECK: s_waitcnt
				105	define amdgpu_ps void @buffer_load_x1_offen_merged(<4 x i32> inreg %rsrc, i32 %a) {
				106	main_body:
				107	%a1 = add i32 %a, 4
				108	%a2 = add i32 %a, 8
				109	%a3 = add i32 %a, 12
				110	%a4 = add i32 %a, 16
				111	%a5 = add i32 %a, 28
				112	%a6 = add i32 %a, 32
				113	%r1 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
				114	%r2 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
				115	%r3 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %a3, i32 0, i32 0)
				116	%r4 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %a4, i32 0, i32 0)
				117	%r5 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %a5, i32 0, i32 0)
				118	%r6 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %a6, i32 0, i32 0)
				119	call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
				120	call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
				121	ret void
				122	}
				123
				124	;CHECK-LABEL: {{^}}buffer_load_x1_offen_merged_glc_slc:
				125	;CHECK-NEXT: %bb.
				126	;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4{{$}}
				127	;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:12 glc{{$}}
				128	;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28 glc slc{{$}}
				129	;CHECK: s_waitcnt
				130	define amdgpu_ps void @buffer_load_x1_offen_merged_glc_slc(<4 x i32> inreg %rsrc, i32 %a) {
				131	main_body:
				132	%a1 = add i32 %a, 4
				133	%a2 = add i32 %a, 8
				134	%a3 = add i32 %a, 12
				135	%a4 = add i32 %a, 16
				136	%a5 = add i32 %a, 28
				137	%a6 = add i32 %a, 32
				138	%r1 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
				139	%r2 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
				140	%r3 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %a3, i32 0, i32 1)
				141	%r4 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %a4, i32 0, i32 1)
				142	%r5 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %a5, i32 0, i32 3)
				143	%r6 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %a6, i32 0, i32 3)
				144	call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
				145	call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
				146	ret void
				147	}
				148
				149	;CHECK-LABEL: {{^}}buffer_load_x2_offen_merged:
				150	;CHECK-NEXT: %bb.
				151	;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
				152	;CHECK: s_waitcnt
				153	define amdgpu_ps void @buffer_load_x2_offen_merged(<4 x i32> inreg %rsrc, i32 %a) {
				154	main_body:
				155	%a1 = add i32 %a, 4
				156	%a2 = add i32 %a, 12
				157	%vr1 = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %a1, i32 0, i32 0)
				158	%vr2 = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %a2, i32 0, i32 0)
				159	%r1 = extractelement <2 x float> %vr1, i32 0
				160	%r2 = extractelement <2 x float> %vr1, i32 1
				161	%r3 = extractelement <2 x float> %vr2, i32 0
				162	%r4 = extractelement <2 x float> %vr2, i32 1
				163	call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
				164	ret void
				165	}
				166
				167	;CHECK-LABEL: {{^}}buffer_load_x1_offset_merged:
				168	;CHECK-NEXT: %bb.
				169	;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4
				170	;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:28
				171	;CHECK: s_waitcnt
				172	define amdgpu_ps void @buffer_load_x1_offset_merged(<4 x i32> inreg %rsrc) {
				173	main_body:
				174	%r1 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4, i32 0, i32 0)
				175	%r2 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 8, i32 0, i32 0)
				176	%r3 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 12, i32 0, i32 0)
				177	%r4 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 16, i32 0, i32 0)
				178	%r5 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 28, i32 0, i32 0)
				179	%r6 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 32, i32 0, i32 0)
				180	call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
				181	call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
				182	ret void
				183	}
				184
				185	;CHECK-LABEL: {{^}}buffer_load_x2_offset_merged:
				186	;CHECK-NEXT: %bb.
				187	;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4
				188	;CHECK: s_waitcnt
				189	define amdgpu_ps void @buffer_load_x2_offset_merged(<4 x i32> inreg %rsrc) {
				190	main_body:
				191	%vr1 = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 4, i32 0, i32 0)
				192	%vr2 = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 12, i32 0, i32 0)
				193	%r1 = extractelement <2 x float> %vr1, i32 0
				194	%r2 = extractelement <2 x float> %vr1, i32 1
				195	%r3 = extractelement <2 x float> %vr2, i32 0
				196	%r4 = extractelement <2 x float> %vr2, i32 1
				197	call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
				198	ret void
				199	}
				200
Tim Renouf	bb5ee41	2018-08-21 11:08:12 +0000	[diff] [blame]	201	;CHECK-LABEL: {{^}}buffer_load_int:
				202	;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
				203	;CHECK: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 glc
				204	;CHECK: buffer_load_dword v6, off, s[0:3], 0 slc
				205	;CHECK: s_waitcnt
				206	define amdgpu_ps {<4 x float>, <2 x float>, float} @buffer_load_int(<4 x i32> inreg) {
				207	main_body:
				208	%data = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0)
				209	%data_glc = call <2 x i32> @llvm.amdgcn.raw.buffer.load.v2i32(<4 x i32> %0, i32 0, i32 0, i32 1)
				210	%data_slc = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %0, i32 0, i32 0, i32 2)
				211	%fdata = bitcast <4 x i32> %data to <4 x float>
				212	%fdata_glc = bitcast <2 x i32> %data_glc to <2 x float>
				213	%fdata_slc = bitcast i32 %data_slc to float
				214	%r0 = insertvalue {<4 x float>, <2 x float>, float} undef, <4 x float> %fdata, 0
				215	%r1 = insertvalue {<4 x float>, <2 x float>, float} %r0, <2 x float> %fdata_glc, 1
				216	%r2 = insertvalue {<4 x float>, <2 x float>, float} %r1, float %fdata_slc, 2
				217	ret {<4 x float>, <2 x float>, float} %r2
				218	}
				219
Tim Renouf	4f703f5	2018-08-21 11:07:10 +0000	[diff] [blame]	220	declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #0
				221	declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32) #0
				222	declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #0
Tim Renouf	bb5ee41	2018-08-21 11:08:12 +0000	[diff] [blame]	223	declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32) #0
				224	declare <2 x i32> @llvm.amdgcn.raw.buffer.load.v2i32(<4 x i32>, i32, i32, i32) #0
				225	declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32) #0
Tim Renouf	4f703f5	2018-08-21 11:07:10 +0000	[diff] [blame]	226	declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
				227
				228	attributes #0 = { nounwind readonly }