Blame - llvm/test/CodeGen/R600/ds_write2.ll - toolchain/llvm-project

blob: 6e5bcffb62105e1e776382bcf3608dbbba7bfe2b [file] [log] [blame]

Matt Arsenault	61cc908	2014-10-10 22:16:07 +0000	[diff] [blame]	1	; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s \| FileCheck -strict-whitespace -check-prefix=SI %s
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	2
				3	@lds = addrspace(3) global [512 x float] zeroinitializer, align 4
				4	@lds.f64 = addrspace(3) global [512 x double] zeroinitializer, align 8
				5
				6
				7	; SI-LABEL: @simple_write2_one_val_f32
				8	; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
				9	; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
Matt Arsenault	61cc908	2014-10-10 22:16:07 +0000	[diff] [blame]	10	; SI: DS_WRITE2_B32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	11	; SI: S_ENDPGM
				12	define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
				13	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				14	%in.gep = getelementptr float addrspace(1)* %in, i32 %x.i
				15	%val = load float addrspace(1)* %in.gep, align 4
				16	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
				17	store float %val, float addrspace(3)* %arrayidx0, align 4
				18	%add.x = add nsw i32 %x.i, 8
				19	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
				20	store float %val, float addrspace(3)* %arrayidx1, align 4
				21	ret void
				22	}
				23
				24	; SI-LABEL: @simple_write2_two_val_f32
				25	; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
				26	; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
				27	; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
Matt Arsenault	61cc908	2014-10-10 22:16:07 +0000	[diff] [blame]	28	; SI: DS_WRITE2_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	29	; SI: S_ENDPGM
				30	define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
				31	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				32	%in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
				33	%in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
				34	%val0 = load float addrspace(1)* %in.gep.0, align 4
				35	%val1 = load float addrspace(1)* %in.gep.1, align 4
				36	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
				37	store float %val0, float addrspace(3)* %arrayidx0, align 4
				38	%add.x = add nsw i32 %x.i, 8
				39	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
				40	store float %val1, float addrspace(3)* %arrayidx1, align 4
				41	ret void
				42	}
				43
				44	; SI-LABEL: @simple_write2_two_val_f32_volatile_0
				45	; SI-NOT: DS_WRITE2_B32
Matt Arsenault	61cc908	2014-10-10 22:16:07 +0000	[diff] [blame]	46	; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}
				47	; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	48	; SI: S_ENDPGM
				49	define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
				50	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				51	%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
				52	%in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
				53	%val0 = load float addrspace(1)* %in0.gep, align 4
				54	%val1 = load float addrspace(1)* %in1.gep, align 4
				55	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
				56	store volatile float %val0, float addrspace(3)* %arrayidx0, align 4
				57	%add.x = add nsw i32 %x.i, 8
				58	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
				59	store float %val1, float addrspace(3)* %arrayidx1, align 4
				60	ret void
				61	}
				62
				63	; SI-LABEL: @simple_write2_two_val_f32_volatile_1
				64	; SI-NOT: DS_WRITE2_B32
Matt Arsenault	61cc908	2014-10-10 22:16:07 +0000	[diff] [blame]	65	; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}}
				66	; SI: DS_WRITE_B32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	67	; SI: S_ENDPGM
				68	define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
				69	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				70	%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
				71	%in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
				72	%val0 = load float addrspace(1)* %in0.gep, align 4
				73	%val1 = load float addrspace(1)* %in1.gep, align 4
				74	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
				75	store float %val0, float addrspace(3)* %arrayidx0, align 4
				76	%add.x = add nsw i32 %x.i, 8
				77	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
				78	store volatile float %val1, float addrspace(3)* %arrayidx1, align 4
				79	ret void
				80	}
				81
				82	; 2 data subregisters from different super registers.
				83	; SI-LABEL: @simple_write2_two_val_subreg2_mixed_f32
				84	; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
				85	; SI: BUFFER_LOAD_DWORDX2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
				86	; SI: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
Matt Arsenault	61cc908	2014-10-10 22:16:07 +0000	[diff] [blame]	87	; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	88	; SI: S_ENDPGM
				89	define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
				90	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				91	%in.gep.0 = getelementptr <2 x float> addrspace(1)* %in, i32 %x.i
				92	%in.gep.1 = getelementptr <2 x float> addrspace(1)* %in.gep.0, i32 1
				93	%val0 = load <2 x float> addrspace(1)* %in.gep.0, align 8
				94	%val1 = load <2 x float> addrspace(1)* %in.gep.1, align 8
				95	%val0.0 = extractelement <2 x float> %val0, i32 0
				96	%val1.1 = extractelement <2 x float> %val1, i32 1
				97	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
				98	store float %val0.0, float addrspace(3)* %arrayidx0, align 4
				99	%add.x = add nsw i32 %x.i, 8
				100	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
				101	store float %val1.1, float addrspace(3)* %arrayidx1, align 4
				102	ret void
				103	}
				104
				105	; SI-LABEL: @simple_write2_two_val_subreg2_f32
				106	; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
				107	; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
Matt Arsenault	61cc908	2014-10-10 22:16:07 +0000	[diff] [blame]	108	; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	109	; SI: S_ENDPGM
				110	define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
				111	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				112	%in.gep = getelementptr <2 x float> addrspace(1)* %in, i32 %x.i
				113	%val = load <2 x float> addrspace(1)* %in.gep, align 8
				114	%val0 = extractelement <2 x float> %val, i32 0
				115	%val1 = extractelement <2 x float> %val, i32 1
				116	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
				117	store float %val0, float addrspace(3)* %arrayidx0, align 4
				118	%add.x = add nsw i32 %x.i, 8
				119	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
				120	store float %val1, float addrspace(3)* %arrayidx1, align 4
				121	ret void
				122	}
				123
				124	; SI-LABEL: @simple_write2_two_val_subreg4_f32
				125	; SI-DAG: BUFFER_LOAD_DWORDX4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
				126	; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
Matt Arsenault	61cc908	2014-10-10 22:16:07 +0000	[diff] [blame]	127	; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	128	; SI: S_ENDPGM
				129	define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
				130	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				131	%in.gep = getelementptr <4 x float> addrspace(1)* %in, i32 %x.i
				132	%val = load <4 x float> addrspace(1)* %in.gep, align 16
				133	%val0 = extractelement <4 x float> %val, i32 0
				134	%val1 = extractelement <4 x float> %val, i32 3
				135	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
				136	store float %val0, float addrspace(3)* %arrayidx0, align 4
				137	%add.x = add nsw i32 %x.i, 8
				138	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
				139	store float %val1, float addrspace(3)* %arrayidx1, align 4
				140	ret void
				141	}
				142
				143	; SI-LABEL: @simple_write2_two_val_max_offset_f32
				144	; SI-DAG: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
				145	; SI-DAG: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x4
				146	; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
Matt Arsenault	61cc908	2014-10-10 22:16:07 +0000	[diff] [blame]	147	; SI: DS_WRITE2_B32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	148	; SI: S_ENDPGM
				149	define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
				150	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				151	%in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
				152	%in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
				153	%val0 = load float addrspace(1)* %in.gep.0, align 4
				154	%val1 = load float addrspace(1)* %in.gep.1, align 4
				155	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
				156	store float %val0, float addrspace(3)* %arrayidx0, align 4
				157	%add.x = add nsw i32 %x.i, 255
				158	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
				159	store float %val1, float addrspace(3)* %arrayidx1, align 4
				160	ret void
				161	}
				162
				163	; SI-LABEL: @simple_write2_two_val_too_far_f32
Matt Arsenault	61cc908	2014-10-10 22:16:07 +0000	[diff] [blame]	164	; SI: DS_WRITE_B32 v{{[0-9]+}}, v{{[0-9]+}}
				165	; SI: DS_WRITE_B32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	166	; SI: S_ENDPGM
				167	define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
				168	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				169	%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
				170	%in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
				171	%val0 = load float addrspace(1)* %in0.gep, align 4
				172	%val1 = load float addrspace(1)* %in1.gep, align 4
				173	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
				174	store float %val0, float addrspace(3)* %arrayidx0, align 4
Matt Arsenault	fe0a2e6	2014-10-10 22:12:32 +0000	[diff] [blame]	175	%add.x = add nsw i32 %x.i, 257
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	176	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
				177	store float %val1, float addrspace(3)* %arrayidx1, align 4
				178	ret void
				179	}
				180
				181	; SI-LABEL: @simple_write2_two_val_f32_x2
Matt Arsenault	61cc908	2014-10-10 22:16:07 +0000	[diff] [blame]	182	; SI: DS_WRITE2_B32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8
				183	; SI-NEXT: DS_WRITE2_B32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	184	; SI: S_ENDPGM
				185	define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
				186	%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
				187	%in0.gep = getelementptr float addrspace(1)* %in0, i32 %tid.x
				188	%in1.gep = getelementptr float addrspace(1)* %in1, i32 %tid.x
				189	%val0 = load float addrspace(1)* %in0.gep, align 4
				190	%val1 = load float addrspace(1)* %in1.gep, align 4
				191
				192	%idx.0 = add nsw i32 %tid.x, 0
				193	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
				194	store float %val0, float addrspace(3)* %arrayidx0, align 4
				195
				196	%idx.1 = add nsw i32 %tid.x, 8
				197	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
				198	store float %val1, float addrspace(3)* %arrayidx1, align 4
				199
				200	%idx.2 = add nsw i32 %tid.x, 11
				201	%arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
				202	store float %val0, float addrspace(3)* %arrayidx2, align 4
				203
				204	%idx.3 = add nsw i32 %tid.x, 27
				205	%arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
				206	store float %val1, float addrspace(3)* %arrayidx3, align 4
				207
				208	ret void
				209	}
				210
				211	; SI-LABEL: @simple_write2_two_val_f32_x2_nonzero_base
Matt Arsenault	61cc908	2014-10-10 22:16:07 +0000	[diff] [blame]	212	; SI: DS_WRITE2_B32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:3 offset1:8
				213	; SI-NEXT: DS_WRITE2_B32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	214	; SI: S_ENDPGM
				215	define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
				216	%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
				217	%in0.gep = getelementptr float addrspace(1)* %in0, i32 %tid.x
				218	%in1.gep = getelementptr float addrspace(1)* %in1, i32 %tid.x
				219	%val0 = load float addrspace(1)* %in0.gep, align 4
				220	%val1 = load float addrspace(1)* %in1.gep, align 4
				221
				222	%idx.0 = add nsw i32 %tid.x, 3
				223	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
				224	store float %val0, float addrspace(3)* %arrayidx0, align 4
				225
				226	%idx.1 = add nsw i32 %tid.x, 8
				227	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
				228	store float %val1, float addrspace(3)* %arrayidx1, align 4
				229
				230	%idx.2 = add nsw i32 %tid.x, 11
				231	%arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
				232	store float %val0, float addrspace(3)* %arrayidx2, align 4
				233
				234	%idx.3 = add nsw i32 %tid.x, 27
				235	%arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
				236	store float %val1, float addrspace(3)* %arrayidx3, align 4
				237
				238	ret void
				239	}
				240
				241	; SI-LABEL: @write2_ptr_subreg_arg_two_val_f32
				242	; SI-NOT: DS_WRITE2_B32
				243	; SI: DS_WRITE_B32
				244	; SI: DS_WRITE_B32
				245	; SI: S_ENDPGM
				246	define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 {
				247	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				248	%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
				249	%in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
				250	%val0 = load float addrspace(1)* %in0.gep, align 4
				251	%val1 = load float addrspace(1)* %in1.gep, align 4
				252
				253	%index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
				254	%index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
				255	%gep = getelementptr inbounds <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
				256	%gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
				257	%gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
				258
				259	; Apply an additional offset after the vector that will be more obviously folded.
				260	%gep.1.offset = getelementptr float addrspace(3)* %gep.1, i32 8
				261	store float %val0, float addrspace(3)* %gep.0, align 4
				262
				263	%add.x = add nsw i32 %x.i, 8
				264	store float %val1, float addrspace(3)* %gep.1.offset, align 4
				265	ret void
				266	}
				267
				268	; SI-LABEL: @simple_write2_one_val_f64
				269	; SI: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]+:[0-9]+\]]],
				270	; SI: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
Matt Arsenault	61cc908	2014-10-10 22:16:07 +0000	[diff] [blame]	271	; SI: DS_WRITE2_B64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	272	; SI: S_ENDPGM
				273	define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
				274	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				275	%in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
				276	%val = load double addrspace(1)* %in.gep, align 8
				277	%arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
				278	store double %val, double addrspace(3)* %arrayidx0, align 8
				279	%add.x = add nsw i32 %x.i, 8
				280	%arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
				281	store double %val, double addrspace(3)* %arrayidx1, align 8
				282	ret void
				283	}
				284
				285	; SI-LABEL: @misaligned_simple_write2_one_val_f64
				286	; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
				287	; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
Matt Arsenault	61cc908	2014-10-10 22:16:07 +0000	[diff] [blame]	288	; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1 [M0]
				289	; SI: DS_WRITE2_B32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 [M0]
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	290	; SI: S_ENDPGM
				291	define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
				292	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				293	%in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
				294	%val = load double addrspace(1)* %in.gep, align 8
				295	%arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
				296	store double %val, double addrspace(3)* %arrayidx0, align 4
				297	%add.x = add nsw i32 %x.i, 7
				298	%arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
				299	store double %val, double addrspace(3)* %arrayidx1, align 4
				300	ret void
				301	}
				302
				303	; SI-LABEL: @simple_write2_two_val_f64
				304	; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
				305	; SI-DAG: BUFFER_LOAD_DWORDX2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:0x8
				306	; SI-DAG: V_LSHLREV_B32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
Matt Arsenault	61cc908	2014-10-10 22:16:07 +0000	[diff] [blame]	307	; SI: DS_WRITE2_B64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	308	; SI: S_ENDPGM
				309	define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
				310	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				311	%in.gep.0 = getelementptr double addrspace(1)* %in, i32 %x.i
				312	%in.gep.1 = getelementptr double addrspace(1)* %in.gep.0, i32 1
				313	%val0 = load double addrspace(1)* %in.gep.0, align 8
				314	%val1 = load double addrspace(1)* %in.gep.1, align 8
				315	%arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
				316	store double %val0, double addrspace(3)* %arrayidx0, align 8
				317	%add.x = add nsw i32 %x.i, 8
				318	%arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
				319	store double %val1, double addrspace(3)* %arrayidx1, align 8
				320	ret void
				321	}
				322
Matt Arsenault	e775f5f	2014-10-14 17:21:19 +0000	[diff] [blame^]	323	@foo = addrspace(3) global [4 x i32] zeroinitializer, align 4
				324
				325	; SI-LABEL: @store_constant_adjacent_offsets
				326	; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
				327	; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
				328	define void @store_constant_adjacent_offsets() {
				329	store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
				330	store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
				331	ret void
				332	}
				333
				334	; SI-LABEL: @store_constant_disjoint_offsets
				335	; SI-DAG: V_MOV_B32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
				336	; SI-DAG: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
				337	; SI: DS_WRITE2_B32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2
				338	define void @store_constant_disjoint_offsets() {
				339	store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
				340	store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
				341	ret void
				342	}
				343
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	344	@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] zeroinitializer, align 4
				345	@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] zeroinitializer, align 4
				346
				347	define void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, float addrspace(1)* %in) #0 {
				348	%x.i = tail call i32 @llvm.r600.read.tgid.x() #1
				349	%y.i = tail call i32 @llvm.r600.read.tidig.y() #1
				350	%val = load float addrspace(1)* %in
				351	%arrayidx44 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
				352	store float %val, float addrspace(3)* %arrayidx44, align 4
				353	%add47 = add nsw i32 %x.i, 1
				354	%arrayidx48 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
				355	store float %val, float addrspace(3)* %arrayidx48, align 4
				356	%add51 = add nsw i32 %x.i, 16
				357	%arrayidx52 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
				358	store float %val, float addrspace(3)* %arrayidx52, align 4
				359	%add55 = add nsw i32 %x.i, 17
				360	%arrayidx56 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
				361	store float %val, float addrspace(3)* %arrayidx56, align 4
				362	%arrayidx60 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
				363	store float %val, float addrspace(3)* %arrayidx60, align 4
				364	%add63 = add nsw i32 %y.i, 1
				365	%arrayidx64 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
				366	store float %val, float addrspace(3)* %arrayidx64, align 4
				367	%add67 = add nsw i32 %y.i, 32
				368	%arrayidx68 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
				369	store float %val, float addrspace(3)* %arrayidx68, align 4
				370	%add71 = add nsw i32 %y.i, 33
				371	%arrayidx72 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
				372	store float %val, float addrspace(3)* %arrayidx72, align 4
				373	%add75 = add nsw i32 %y.i, 64
				374	%arrayidx76 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
				375	store float %val, float addrspace(3)* %arrayidx76, align 4
				376	%add79 = add nsw i32 %y.i, 65
				377	%arrayidx80 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
				378	store float %val, float addrspace(3)* %arrayidx80, align 4
				379	ret void
				380	}
				381
				382	; Function Attrs: nounwind readnone
				383	declare i32 @llvm.r600.read.tgid.x() #1
				384
				385	; Function Attrs: nounwind readnone
				386	declare i32 @llvm.r600.read.tgid.y() #1
				387
				388	; Function Attrs: nounwind readnone
				389	declare i32 @llvm.r600.read.tidig.x() #1
				390
				391	; Function Attrs: nounwind readnone
				392	declare i32 @llvm.r600.read.tidig.y() #1
				393
				394	; Function Attrs: noduplicate nounwind
				395	declare void @llvm.AMDGPU.barrier.local() #2
				396
				397	attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
				398	attributes #1 = { nounwind readnone }
				399	attributes #2 = { noduplicate nounwind }