Blame - llvm/test/CodeGen/R600/ds_write2.ll - toolchain/llvm-project

blob: 27273e7c674d11d8a8dbd69d6b4f8e3d5cb86f62 [file] [log] [blame]

Tom Stellard	49f8bfd	2015-01-06 18:00:21 +0000	[diff] [blame]	1	; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s \| FileCheck -strict-whitespace -check-prefix=SI %s
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	2
Matt Arsenault	cc8d3b8	2014-11-13 19:56:13 +0000	[diff] [blame]	3	@lds = addrspace(3) global [512 x float] undef, align 4
				4	@lds.f64 = addrspace(3) global [512 x double] undef, align 8
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	5
				6
				7	; SI-LABEL: @simple_write2_one_val_f32
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	8	; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
				9	; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
				10	; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
				11	; SI: s_endpgm
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	12	define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
				13	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				14	%in.gep = getelementptr float addrspace(1)* %in, i32 %x.i
				15	%val = load float addrspace(1)* %in.gep, align 4
				16	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
				17	store float %val, float addrspace(3)* %arrayidx0, align 4
				18	%add.x = add nsw i32 %x.i, 8
				19	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
				20	store float %val, float addrspace(3)* %arrayidx1, align 4
				21	ret void
				22	}
				23
				24	; SI-LABEL: @simple_write2_two_val_f32
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	25	; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
Matt Arsenault	fb13b22	2014-12-03 03:12:13 +0000	[diff] [blame]	26	; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	27	; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
				28	; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
				29	; SI: s_endpgm
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	30	define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
				31	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				32	%in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
				33	%in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
				34	%val0 = load float addrspace(1)* %in.gep.0, align 4
				35	%val1 = load float addrspace(1)* %in.gep.1, align 4
				36	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
				37	store float %val0, float addrspace(3)* %arrayidx0, align 4
				38	%add.x = add nsw i32 %x.i, 8
				39	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
				40	store float %val1, float addrspace(3)* %arrayidx1, align 4
				41	ret void
				42	}
				43
				44	; SI-LABEL: @simple_write2_two_val_f32_volatile_0
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	45	; SI-NOT: ds_write2_b32
				46	; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}}
				47	; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
				48	; SI: s_endpgm
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	49	define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
				50	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				51	%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
				52	%in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
				53	%val0 = load float addrspace(1)* %in0.gep, align 4
				54	%val1 = load float addrspace(1)* %in1.gep, align 4
				55	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
				56	store volatile float %val0, float addrspace(3)* %arrayidx0, align 4
				57	%add.x = add nsw i32 %x.i, 8
				58	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
				59	store float %val1, float addrspace(3)* %arrayidx1, align 4
				60	ret void
				61	}
				62
				63	; SI-LABEL: @simple_write2_two_val_f32_volatile_1
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	64	; SI-NOT: ds_write2_b32
				65	; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}}
				66	; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
				67	; SI: s_endpgm
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	68	define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
				69	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				70	%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
				71	%in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
				72	%val0 = load float addrspace(1)* %in0.gep, align 4
				73	%val1 = load float addrspace(1)* %in1.gep, align 4
				74	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
				75	store float %val0, float addrspace(3)* %arrayidx0, align 4
				76	%add.x = add nsw i32 %x.i, 8
				77	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
				78	store volatile float %val1, float addrspace(3)* %arrayidx1, align 4
				79	ret void
				80	}
				81
				82	; 2 data subregisters from different super registers.
				83	; SI-LABEL: @simple_write2_two_val_subreg2_mixed_f32
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	84	; SI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
				85	; SI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
				86	; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
				87	; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
				88	; SI: s_endpgm
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	89	define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
				90	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				91	%in.gep.0 = getelementptr <2 x float> addrspace(1)* %in, i32 %x.i
				92	%in.gep.1 = getelementptr <2 x float> addrspace(1)* %in.gep.0, i32 1
				93	%val0 = load <2 x float> addrspace(1)* %in.gep.0, align 8
				94	%val1 = load <2 x float> addrspace(1)* %in.gep.1, align 8
				95	%val0.0 = extractelement <2 x float> %val0, i32 0
				96	%val1.1 = extractelement <2 x float> %val1, i32 1
				97	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
				98	store float %val0.0, float addrspace(3)* %arrayidx0, align 4
				99	%add.x = add nsw i32 %x.i, 8
				100	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
				101	store float %val1.1, float addrspace(3)* %arrayidx1, align 4
				102	ret void
				103	}
				104
				105	; SI-LABEL: @simple_write2_two_val_subreg2_f32
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	106	; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
				107	; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
				108	; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
				109	; SI: s_endpgm
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	110	define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
				111	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				112	%in.gep = getelementptr <2 x float> addrspace(1)* %in, i32 %x.i
				113	%val = load <2 x float> addrspace(1)* %in.gep, align 8
				114	%val0 = extractelement <2 x float> %val, i32 0
				115	%val1 = extractelement <2 x float> %val, i32 1
				116	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
				117	store float %val0, float addrspace(3)* %arrayidx0, align 4
				118	%add.x = add nsw i32 %x.i, 8
				119	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
				120	store float %val1, float addrspace(3)* %arrayidx1, align 4
				121	ret void
				122	}
				123
				124	; SI-LABEL: @simple_write2_two_val_subreg4_f32
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	125	; SI-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
				126	; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
				127	; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
				128	; SI: s_endpgm
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	129	define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
				130	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				131	%in.gep = getelementptr <4 x float> addrspace(1)* %in, i32 %x.i
				132	%val = load <4 x float> addrspace(1)* %in.gep, align 16
				133	%val0 = extractelement <4 x float> %val, i32 0
				134	%val1 = extractelement <4 x float> %val, i32 3
				135	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
				136	store float %val0, float addrspace(3)* %arrayidx0, align 4
				137	%add.x = add nsw i32 %x.i, 8
				138	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
				139	store float %val1, float addrspace(3)* %arrayidx1, align 4
				140	ret void
				141	}
				142
				143	; SI-LABEL: @simple_write2_two_val_max_offset_f32
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	144	; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
Matt Arsenault	fb13b22	2014-12-03 03:12:13 +0000	[diff] [blame]	145	; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	146	; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
				147	; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
				148	; SI: s_endpgm
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	149	define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
				150	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				151	%in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
				152	%in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
				153	%val0 = load float addrspace(1)* %in.gep.0, align 4
				154	%val1 = load float addrspace(1)* %in.gep.1, align 4
				155	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
				156	store float %val0, float addrspace(3)* %arrayidx0, align 4
				157	%add.x = add nsw i32 %x.i, 255
				158	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
				159	store float %val1, float addrspace(3)* %arrayidx1, align 4
				160	ret void
				161	}
				162
				163	; SI-LABEL: @simple_write2_two_val_too_far_f32
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	164	; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}
				165	; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
				166	; SI: s_endpgm
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	167	define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
				168	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				169	%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
				170	%in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
				171	%val0 = load float addrspace(1)* %in0.gep, align 4
				172	%val1 = load float addrspace(1)* %in1.gep, align 4
				173	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
				174	store float %val0, float addrspace(3)* %arrayidx0, align 4
Matt Arsenault	fe0a2e6	2014-10-10 22:12:32 +0000	[diff] [blame]	175	%add.x = add nsw i32 %x.i, 257
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	176	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
				177	store float %val1, float addrspace(3)* %arrayidx1, align 4
				178	ret void
				179	}
				180
				181	; SI-LABEL: @simple_write2_two_val_f32_x2
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	182	; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8
				183	; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
				184	; SI: s_endpgm
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	185	define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
				186	%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
				187	%in0.gep = getelementptr float addrspace(1)* %in0, i32 %tid.x
				188	%in1.gep = getelementptr float addrspace(1)* %in1, i32 %tid.x
				189	%val0 = load float addrspace(1)* %in0.gep, align 4
				190	%val1 = load float addrspace(1)* %in1.gep, align 4
				191
				192	%idx.0 = add nsw i32 %tid.x, 0
				193	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
				194	store float %val0, float addrspace(3)* %arrayidx0, align 4
				195
				196	%idx.1 = add nsw i32 %tid.x, 8
				197	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
				198	store float %val1, float addrspace(3)* %arrayidx1, align 4
				199
				200	%idx.2 = add nsw i32 %tid.x, 11
				201	%arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
				202	store float %val0, float addrspace(3)* %arrayidx2, align 4
				203
				204	%idx.3 = add nsw i32 %tid.x, 27
				205	%arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
				206	store float %val1, float addrspace(3)* %arrayidx3, align 4
				207
				208	ret void
				209	}
				210
				211	; SI-LABEL: @simple_write2_two_val_f32_x2_nonzero_base
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	212	; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:3 offset1:8
				213	; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
				214	; SI: s_endpgm
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	215	define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
				216	%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
				217	%in0.gep = getelementptr float addrspace(1)* %in0, i32 %tid.x
				218	%in1.gep = getelementptr float addrspace(1)* %in1, i32 %tid.x
				219	%val0 = load float addrspace(1)* %in0.gep, align 4
				220	%val1 = load float addrspace(1)* %in1.gep, align 4
				221
				222	%idx.0 = add nsw i32 %tid.x, 3
				223	%arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
				224	store float %val0, float addrspace(3)* %arrayidx0, align 4
				225
				226	%idx.1 = add nsw i32 %tid.x, 8
				227	%arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
				228	store float %val1, float addrspace(3)* %arrayidx1, align 4
				229
				230	%idx.2 = add nsw i32 %tid.x, 11
				231	%arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
				232	store float %val0, float addrspace(3)* %arrayidx2, align 4
				233
				234	%idx.3 = add nsw i32 %tid.x, 27
				235	%arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
				236	store float %val1, float addrspace(3)* %arrayidx3, align 4
				237
				238	ret void
				239	}
				240
				241	; SI-LABEL: @write2_ptr_subreg_arg_two_val_f32
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	242	; SI-NOT: ds_write2_b32
				243	; SI: ds_write_b32
				244	; SI: ds_write_b32
				245	; SI: s_endpgm
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	246	define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 {
				247	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				248	%in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
				249	%in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
				250	%val0 = load float addrspace(1)* %in0.gep, align 4
				251	%val1 = load float addrspace(1)* %in1.gep, align 4
				252
				253	%index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
				254	%index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
				255	%gep = getelementptr inbounds <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
				256	%gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
				257	%gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
				258
				259	; Apply an additional offset after the vector that will be more obviously folded.
				260	%gep.1.offset = getelementptr float addrspace(3)* %gep.1, i32 8
				261	store float %val0, float addrspace(3)* %gep.0, align 4
				262
				263	%add.x = add nsw i32 %x.i, 8
				264	store float %val1, float addrspace(3)* %gep.1.offset, align 4
				265	ret void
				266	}
				267
				268	; SI-LABEL: @simple_write2_one_val_f64
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	269	; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]],
				270	; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
				271	; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
				272	; SI: s_endpgm
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	273	define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
				274	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				275	%in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
				276	%val = load double addrspace(1)* %in.gep, align 8
				277	%arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
				278	store double %val, double addrspace(3)* %arrayidx0, align 8
				279	%add.x = add nsw i32 %x.i, 8
				280	%arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
				281	store double %val, double addrspace(3)* %arrayidx1, align 8
				282	ret void
				283	}
				284
				285	; SI-LABEL: @misaligned_simple_write2_one_val_f64
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	286	; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
				287	; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
				288	; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1 [M0]
				289	; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 [M0]
				290	; SI: s_endpgm
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	291	define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
				292	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				293	%in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
				294	%val = load double addrspace(1)* %in.gep, align 8
				295	%arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
				296	store double %val, double addrspace(3)* %arrayidx0, align 4
				297	%add.x = add nsw i32 %x.i, 7
				298	%arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
				299	store double %val, double addrspace(3)* %arrayidx1, align 4
				300	ret void
				301	}
				302
				303	; SI-LABEL: @simple_write2_two_val_f64
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	304	; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
Matt Arsenault	fb13b22	2014-12-03 03:12:13 +0000	[diff] [blame]	305	; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	306	; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
				307	; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
				308	; SI: s_endpgm
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	309	define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
				310	%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
				311	%in.gep.0 = getelementptr double addrspace(1)* %in, i32 %x.i
				312	%in.gep.1 = getelementptr double addrspace(1)* %in.gep.0, i32 1
				313	%val0 = load double addrspace(1)* %in.gep.0, align 8
				314	%val1 = load double addrspace(1)* %in.gep.1, align 8
				315	%arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
				316	store double %val0, double addrspace(3)* %arrayidx0, align 8
				317	%add.x = add nsw i32 %x.i, 8
				318	%arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
				319	store double %val1, double addrspace(3)* %arrayidx1, align 8
				320	ret void
				321	}
				322
Matt Arsenault	cc8d3b8	2014-11-13 19:56:13 +0000	[diff] [blame]	323	@foo = addrspace(3) global [4 x i32] undef, align 4
Matt Arsenault	e775f5f	2014-10-14 17:21:19 +0000	[diff] [blame]	324
				325	; SI-LABEL: @store_constant_adjacent_offsets
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	326	; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
				327	; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
Matt Arsenault	e775f5f	2014-10-14 17:21:19 +0000	[diff] [blame]	328	define void @store_constant_adjacent_offsets() {
				329	store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
				330	store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
				331	ret void
				332	}
				333
				334	; SI-LABEL: @store_constant_disjoint_offsets
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	335	; SI-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
				336	; SI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
				337	; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2
Matt Arsenault	e775f5f	2014-10-14 17:21:19 +0000	[diff] [blame]	338	define void @store_constant_disjoint_offsets() {
				339	store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
				340	store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
				341	ret void
				342	}
				343
Matt Arsenault	cc8d3b8	2014-11-13 19:56:13 +0000	[diff] [blame]	344	@bar = addrspace(3) global [4 x i64] undef, align 4
Matt Arsenault	1a74aff	2014-10-15 18:06:43 +0000	[diff] [blame]	345
				346	; SI-LABEL: @store_misaligned64_constant_offsets
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	347	; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
				348	; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
				349	; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
Matt Arsenault	1a74aff	2014-10-15 18:06:43 +0000	[diff] [blame]	350	define void @store_misaligned64_constant_offsets() {
				351	store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
				352	store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
				353	ret void
				354	}
				355
Matt Arsenault	cc8d3b8	2014-11-13 19:56:13 +0000	[diff] [blame]	356	@bar.large = addrspace(3) global [4096 x i64] undef, align 4
Matt Arsenault	1a74aff	2014-10-15 18:06:43 +0000	[diff] [blame]	357
				358	; SI-LABEL: @store_misaligned64_constant_large_offsets
Tom Stellard	326d6ec	2014-11-05 14:50:53 +0000	[diff] [blame]	359	; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
				360	; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
				361	; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
				362	; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
				363	; SI: s_endpgm
Matt Arsenault	1a74aff	2014-10-15 18:06:43 +0000	[diff] [blame]	364	define void @store_misaligned64_constant_large_offsets() {
				365	store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
				366	store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
				367	ret void
				368	}
				369
Matt Arsenault	cc8d3b8	2014-11-13 19:56:13 +0000	[diff] [blame]	370	@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] undef, align 4
				371	@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	372
				373	define void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, float addrspace(1)* %in) #0 {
				374	%x.i = tail call i32 @llvm.r600.read.tgid.x() #1
				375	%y.i = tail call i32 @llvm.r600.read.tidig.y() #1
				376	%val = load float addrspace(1)* %in
				377	%arrayidx44 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
				378	store float %val, float addrspace(3)* %arrayidx44, align 4
				379	%add47 = add nsw i32 %x.i, 1
				380	%arrayidx48 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
				381	store float %val, float addrspace(3)* %arrayidx48, align 4
				382	%add51 = add nsw i32 %x.i, 16
				383	%arrayidx52 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
				384	store float %val, float addrspace(3)* %arrayidx52, align 4
				385	%add55 = add nsw i32 %x.i, 17
				386	%arrayidx56 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
				387	store float %val, float addrspace(3)* %arrayidx56, align 4
				388	%arrayidx60 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
				389	store float %val, float addrspace(3)* %arrayidx60, align 4
				390	%add63 = add nsw i32 %y.i, 1
				391	%arrayidx64 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
				392	store float %val, float addrspace(3)* %arrayidx64, align 4
				393	%add67 = add nsw i32 %y.i, 32
				394	%arrayidx68 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
				395	store float %val, float addrspace(3)* %arrayidx68, align 4
				396	%add71 = add nsw i32 %y.i, 33
				397	%arrayidx72 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
				398	store float %val, float addrspace(3)* %arrayidx72, align 4
				399	%add75 = add nsw i32 %y.i, 64
				400	%arrayidx76 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
				401	store float %val, float addrspace(3)* %arrayidx76, align 4
				402	%add79 = add nsw i32 %y.i, 65
				403	%arrayidx80 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
				404	store float %val, float addrspace(3)* %arrayidx80, align 4
				405	ret void
				406	}
				407
				408	; Function Attrs: nounwind readnone
				409	declare i32 @llvm.r600.read.tgid.x() #1
				410
				411	; Function Attrs: nounwind readnone
				412	declare i32 @llvm.r600.read.tgid.y() #1
				413
				414	; Function Attrs: nounwind readnone
				415	declare i32 @llvm.r600.read.tidig.x() #1
				416
				417	; Function Attrs: nounwind readnone
				418	declare i32 @llvm.r600.read.tidig.y() #1
				419
				420	; Function Attrs: noduplicate nounwind
				421	declare void @llvm.AMDGPU.barrier.local() #2
				422
				423	attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
				424	attributes #1 = { nounwind readnone }
				425	attributes #2 = { noduplicate nounwind }