blob: 7cb070c12b65df4f44069de79814992893004971 [file] [log] [blame]
Matt Arsenault84445dd2017-11-30 22:51:26 +00001; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,CI %s
2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+flat-for-global < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9 %s
Matt Arsenault41033282014-10-10 22:01:59 +00003
Matt Arsenaultcc8d3b82014-11-13 19:56:13 +00004@lds = addrspace(3) global [512 x float] undef, align 4
5@lds.f64 = addrspace(3) global [512 x double] undef, align 8
Matt Arsenault41033282014-10-10 22:01:59 +00006
Matt Arsenault3f71c0e2017-11-29 00:55:57 +00007; GCN-LABEL: {{^}}simple_write2_one_val_f32:
8; CI-DAG: s_mov_b32 m0
9; GFX9-NOT: m0
10
Matt Arsenault84445dd2017-11-30 22:51:26 +000011; GCN-DAG: {{buffer|flat|global}}_load_dword [[VAL:v[0-9]+]]
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000012; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
13; GCN: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:8
14; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000015define amdgpu_kernel void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000016 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +000017 %in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i
David Blaikiea79ac142015-02-27 21:17:42 +000018 %val = load float, float addrspace(1)* %in.gep, align 4
David Blaikie79e6c742015-02-27 19:29:02 +000019 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
Matt Arsenault41033282014-10-10 22:01:59 +000020 store float %val, float addrspace(3)* %arrayidx0, align 4
21 %add.x = add nsw i32 %x.i, 8
David Blaikie79e6c742015-02-27 19:29:02 +000022 %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
Matt Arsenault41033282014-10-10 22:01:59 +000023 store float %val, float addrspace(3)* %arrayidx1, align 4
24 ret void
25}
26
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000027; GCN-LABEL: {{^}}simple_write2_two_val_f32:
28; CI-DAG: s_mov_b32 m0
29; GFX9-NOT: m0
30
31; CI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
32; CI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
33
34; GFX9-DAG: global_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off{{$}}
35; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off offset:4
36
37; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
38; GCN: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
39; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000040define amdgpu_kernel void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000041 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +000042 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
43 %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
Matt Arsenault44e54832016-04-12 13:38:18 +000044 %val0 = load volatile float, float addrspace(1)* %in.gep.0, align 4
45 %val1 = load volatile float, float addrspace(1)* %in.gep.1, align 4
David Blaikie79e6c742015-02-27 19:29:02 +000046 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
Matt Arsenault41033282014-10-10 22:01:59 +000047 store float %val0, float addrspace(3)* %arrayidx0, align 4
48 %add.x = add nsw i32 %x.i, 8
David Blaikie79e6c742015-02-27 19:29:02 +000049 %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
Matt Arsenault41033282014-10-10 22:01:59 +000050 store float %val1, float addrspace(3)* %arrayidx1, align 4
51 ret void
52}
53
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000054; GCN-LABEL: @simple_write2_two_val_f32_volatile_0
55; CI-DAG: s_mov_b32 m0
56; GFX9-NOT: m0
57
58; GCN-NOT: ds_write2_b32
59; GCN: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}}
60; GCN: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
61; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000062define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000063 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +000064 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
65 %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
Matt Arsenault44e54832016-04-12 13:38:18 +000066 %val0 = load volatile float, float addrspace(1)* %in0.gep, align 4
67 %val1 = load volatile float, float addrspace(1)* %in1.gep, align 4
David Blaikie79e6c742015-02-27 19:29:02 +000068 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
Matt Arsenault41033282014-10-10 22:01:59 +000069 store volatile float %val0, float addrspace(3)* %arrayidx0, align 4
70 %add.x = add nsw i32 %x.i, 8
David Blaikie79e6c742015-02-27 19:29:02 +000071 %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
Matt Arsenault41033282014-10-10 22:01:59 +000072 store float %val1, float addrspace(3)* %arrayidx1, align 4
73 ret void
74}
75
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000076; GCN-LABEL: @simple_write2_two_val_f32_volatile_1
77; CI-DAG: s_mov_b32 m0
78; GFX9-NOT: m0
79
80; GCN-NOT: ds_write2_b32
81; GCN: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}}
82; GCN: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
83; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000084define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +000085 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +000086 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
87 %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
Matt Arsenault44e54832016-04-12 13:38:18 +000088 %val0 = load volatile float, float addrspace(1)* %in0.gep, align 4
89 %val1 = load volatile float, float addrspace(1)* %in1.gep, align 4
David Blaikie79e6c742015-02-27 19:29:02 +000090 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
Matt Arsenault41033282014-10-10 22:01:59 +000091 store float %val0, float addrspace(3)* %arrayidx0, align 4
92 %add.x = add nsw i32 %x.i, 8
David Blaikie79e6c742015-02-27 19:29:02 +000093 %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
Matt Arsenault41033282014-10-10 22:01:59 +000094 store volatile float %val1, float addrspace(3)* %arrayidx1, align 4
95 ret void
96}
97
98; 2 data subregisters from different super registers.
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000099; GCN-LABEL: {{^}}simple_write2_two_val_subreg2_mixed_f32:
100; GFX9-NOT: m0
101
102; CI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
103; CI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
104; CI-DAG: s_mov_b32 m0
105
106; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
107
108; GFX9: global_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
109; GFX9: global_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
Matt Arsenault84445dd2017-11-30 22:51:26 +0000110
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000111; GCN: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
112; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000113define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000114 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +0000115 %in.gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i
116 %in.gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in.gep.0, i32 1
Matt Arsenault44e54832016-04-12 13:38:18 +0000117 %val0 = load volatile <2 x float>, <2 x float> addrspace(1)* %in.gep.0, align 8
118 %val1 = load volatile <2 x float>, <2 x float> addrspace(1)* %in.gep.1, align 8
Matt Arsenault41033282014-10-10 22:01:59 +0000119 %val0.0 = extractelement <2 x float> %val0, i32 0
120 %val1.1 = extractelement <2 x float> %val1, i32 1
David Blaikie79e6c742015-02-27 19:29:02 +0000121 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
Matt Arsenault41033282014-10-10 22:01:59 +0000122 store float %val0.0, float addrspace(3)* %arrayidx0, align 4
123 %add.x = add nsw i32 %x.i, 8
David Blaikie79e6c742015-02-27 19:29:02 +0000124 %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
Matt Arsenault41033282014-10-10 22:01:59 +0000125 store float %val1.1, float addrspace(3)* %arrayidx1, align 4
126 ret void
127}
128
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000129; GCN-LABEL: @simple_write2_two_val_subreg2_f32
130; CI-DAG: s_mov_b32 m0
131; GFX9-NOT: m0
132
133; GCN-DAG: {{buffer|global}}_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
134; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
135; GCN: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
136; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000137define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000138 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +0000139 %in.gep = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i
David Blaikiea79ac142015-02-27 21:17:42 +0000140 %val = load <2 x float>, <2 x float> addrspace(1)* %in.gep, align 8
Matt Arsenault41033282014-10-10 22:01:59 +0000141 %val0 = extractelement <2 x float> %val, i32 0
142 %val1 = extractelement <2 x float> %val, i32 1
David Blaikie79e6c742015-02-27 19:29:02 +0000143 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
Matt Arsenault41033282014-10-10 22:01:59 +0000144 store float %val0, float addrspace(3)* %arrayidx0, align 4
145 %add.x = add nsw i32 %x.i, 8
David Blaikie79e6c742015-02-27 19:29:02 +0000146 %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
Matt Arsenault41033282014-10-10 22:01:59 +0000147 store float %val1, float addrspace(3)* %arrayidx1, align 4
148 ret void
149}
150
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000151; GCN-LABEL: @simple_write2_two_val_subreg4_f32
152; CI-DAG: s_mov_b32 m0
153; GFX9-NOT: m0
154
155; GCN-DAG: {{buffer|global}}_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
156; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
157; GCN: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
158; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000159define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000160 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +0000161 %in.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 %x.i
David Blaikiea79ac142015-02-27 21:17:42 +0000162 %val = load <4 x float>, <4 x float> addrspace(1)* %in.gep, align 16
Matt Arsenault41033282014-10-10 22:01:59 +0000163 %val0 = extractelement <4 x float> %val, i32 0
164 %val1 = extractelement <4 x float> %val, i32 3
David Blaikie79e6c742015-02-27 19:29:02 +0000165 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
Matt Arsenault41033282014-10-10 22:01:59 +0000166 store float %val0, float addrspace(3)* %arrayidx0, align 4
167 %add.x = add nsw i32 %x.i, 8
David Blaikie79e6c742015-02-27 19:29:02 +0000168 %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
Matt Arsenault41033282014-10-10 22:01:59 +0000169 store float %val1, float addrspace(3)* %arrayidx1, align 4
170 ret void
171}
172
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000173; GCN-LABEL: @simple_write2_two_val_max_offset_f32
174; CI-DAG: s_mov_b32 m0
175; GFX9-NOT: m0
176
177; CI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
178; CI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
179
180; GFX9-DAG: global_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off{{$}}
181; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off offset:4
182
183; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
184; GCN: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
185; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000186define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000187 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +0000188 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
189 %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
Matt Arsenault44e54832016-04-12 13:38:18 +0000190 %val0 = load volatile float, float addrspace(1)* %in.gep.0, align 4
191 %val1 = load volatile float, float addrspace(1)* %in.gep.1, align 4
David Blaikie79e6c742015-02-27 19:29:02 +0000192 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
Matt Arsenault41033282014-10-10 22:01:59 +0000193 store float %val0, float addrspace(3)* %arrayidx0, align 4
194 %add.x = add nsw i32 %x.i, 255
David Blaikie79e6c742015-02-27 19:29:02 +0000195 %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
Matt Arsenault41033282014-10-10 22:01:59 +0000196 store float %val1, float addrspace(3)* %arrayidx1, align 4
197 ret void
198}
199
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000200; GCN-LABEL: @simple_write2_two_val_too_far_f32
201; CI-DAG: s_mov_b32 m0
202; GFX9-NOT: m0
203
204; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}
205; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
206; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000207define amdgpu_kernel void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000208 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +0000209 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
210 %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
David Blaikiea79ac142015-02-27 21:17:42 +0000211 %val0 = load float, float addrspace(1)* %in0.gep, align 4
212 %val1 = load float, float addrspace(1)* %in1.gep, align 4
David Blaikie79e6c742015-02-27 19:29:02 +0000213 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
Matt Arsenault41033282014-10-10 22:01:59 +0000214 store float %val0, float addrspace(3)* %arrayidx0, align 4
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +0000215 %add.x = add nsw i32 %x.i, 257
David Blaikie79e6c742015-02-27 19:29:02 +0000216 %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
Matt Arsenault41033282014-10-10 22:01:59 +0000217 store float %val1, float addrspace(3)* %arrayidx1, align 4
218 ret void
219}
220
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000221; GCN-LABEL: @simple_write2_two_val_f32_x2
222; CI-DAG: s_mov_b32 m0
223; GFX9-NOT: m0
224
225; GCN: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset1:8
226; GCN: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
227; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000228define amdgpu_kernel void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000229 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +0000230 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x
231 %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x
David Blaikiea79ac142015-02-27 21:17:42 +0000232 %val0 = load float, float addrspace(1)* %in0.gep, align 4
233 %val1 = load float, float addrspace(1)* %in1.gep, align 4
Matt Arsenault41033282014-10-10 22:01:59 +0000234
235 %idx.0 = add nsw i32 %tid.x, 0
David Blaikie79e6c742015-02-27 19:29:02 +0000236 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
Matt Arsenault41033282014-10-10 22:01:59 +0000237 store float %val0, float addrspace(3)* %arrayidx0, align 4
238
239 %idx.1 = add nsw i32 %tid.x, 8
David Blaikie79e6c742015-02-27 19:29:02 +0000240 %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
Matt Arsenault41033282014-10-10 22:01:59 +0000241 store float %val1, float addrspace(3)* %arrayidx1, align 4
242
243 %idx.2 = add nsw i32 %tid.x, 11
David Blaikie79e6c742015-02-27 19:29:02 +0000244 %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
Matt Arsenault41033282014-10-10 22:01:59 +0000245 store float %val0, float addrspace(3)* %arrayidx2, align 4
246
247 %idx.3 = add nsw i32 %tid.x, 27
David Blaikie79e6c742015-02-27 19:29:02 +0000248 %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
Matt Arsenault41033282014-10-10 22:01:59 +0000249 store float %val1, float addrspace(3)* %arrayidx3, align 4
250
251 ret void
252}
253
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000254; GCN-LABEL: @simple_write2_two_val_f32_x2_nonzero_base
255; CI-DAG: s_mov_b32 m0
256; GFX9-NOT: m0
257
258; GCN: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:3 offset1:8
259; GCN: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
260; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000261define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000262 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +0000263 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x
264 %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x
David Blaikiea79ac142015-02-27 21:17:42 +0000265 %val0 = load float, float addrspace(1)* %in0.gep, align 4
266 %val1 = load float, float addrspace(1)* %in1.gep, align 4
Matt Arsenault41033282014-10-10 22:01:59 +0000267
268 %idx.0 = add nsw i32 %tid.x, 3
David Blaikie79e6c742015-02-27 19:29:02 +0000269 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
Matt Arsenault41033282014-10-10 22:01:59 +0000270 store float %val0, float addrspace(3)* %arrayidx0, align 4
271
272 %idx.1 = add nsw i32 %tid.x, 8
David Blaikie79e6c742015-02-27 19:29:02 +0000273 %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
Matt Arsenault41033282014-10-10 22:01:59 +0000274 store float %val1, float addrspace(3)* %arrayidx1, align 4
275
276 %idx.2 = add nsw i32 %tid.x, 11
David Blaikie79e6c742015-02-27 19:29:02 +0000277 %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
Matt Arsenault41033282014-10-10 22:01:59 +0000278 store float %val0, float addrspace(3)* %arrayidx2, align 4
279
280 %idx.3 = add nsw i32 %tid.x, 27
David Blaikie79e6c742015-02-27 19:29:02 +0000281 %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
Matt Arsenault41033282014-10-10 22:01:59 +0000282 store float %val1, float addrspace(3)* %arrayidx3, align 4
283
284 ret void
285}
286
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000287; GCN-LABEL: @write2_ptr_subreg_arg_two_val_f32
288; CI-DAG: s_mov_b32 m0
289; GFX9-NOT: m0
290
291; GCN-NOT: ds_write2_b32
292; GCN: ds_write_b32
293; GCN: ds_write_b32
294; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000295define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000296 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +0000297 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
298 %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
David Blaikiea79ac142015-02-27 21:17:42 +0000299 %val0 = load float, float addrspace(1)* %in0.gep, align 4
300 %val1 = load float, float addrspace(1)* %in1.gep, align 4
Matt Arsenault41033282014-10-10 22:01:59 +0000301
302 %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
303 %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
David Blaikie79e6c742015-02-27 19:29:02 +0000304 %gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
Matt Arsenault41033282014-10-10 22:01:59 +0000305 %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
306 %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
307
308 ; Apply an additional offset after the vector that will be more obviously folded.
David Blaikie79e6c742015-02-27 19:29:02 +0000309 %gep.1.offset = getelementptr float, float addrspace(3)* %gep.1, i32 8
Matt Arsenault41033282014-10-10 22:01:59 +0000310 store float %val0, float addrspace(3)* %gep.0, align 4
311
312 %add.x = add nsw i32 %x.i, 8
313 store float %val1, float addrspace(3)* %gep.1.offset, align 4
314 ret void
315}
316
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000317; GCN-LABEL: @simple_write2_one_val_f64
318; CI-DAG: s_mov_b32 m0
319; GFX9-NOT: m0
320
321; GCN-DAG: {{buffer|global}}_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]],
322; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
323; GCN: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset1:8
324; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000325define amdgpu_kernel void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000326 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +0000327 %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
David Blaikiea79ac142015-02-27 21:17:42 +0000328 %val = load double, double addrspace(1)* %in.gep, align 8
David Blaikie79e6c742015-02-27 19:29:02 +0000329 %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
Matt Arsenault41033282014-10-10 22:01:59 +0000330 store double %val, double addrspace(3)* %arrayidx0, align 8
331 %add.x = add nsw i32 %x.i, 8
David Blaikie79e6c742015-02-27 19:29:02 +0000332 %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
Matt Arsenault41033282014-10-10 22:01:59 +0000333 store double %val, double addrspace(3)* %arrayidx1, align 8
334 ret void
335}
336
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000337; GCN-LABEL: @misaligned_simple_write2_one_val_f64
338; CI-DAG: s_mov_b32 m0
339; GFX9-NOT: m0
340
341; GCN-DAG: {{buffer|global}}_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
342; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
343; GCN: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:1
344; GCN: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15
345; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000346define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000347 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +0000348 %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
David Blaikiea79ac142015-02-27 21:17:42 +0000349 %val = load double, double addrspace(1)* %in.gep, align 8
David Blaikie79e6c742015-02-27 19:29:02 +0000350 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
Matt Arsenault41033282014-10-10 22:01:59 +0000351 store double %val, double addrspace(3)* %arrayidx0, align 4
352 %add.x = add nsw i32 %x.i, 7
David Blaikie79e6c742015-02-27 19:29:02 +0000353 %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
Matt Arsenault41033282014-10-10 22:01:59 +0000354 store double %val, double addrspace(3)* %arrayidx1, align 4
355 ret void
356}
357
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000358; GCN-LABEL: @simple_write2_two_val_f64
359; CI-DAG: s_mov_b32 m0
360; GFX9-NOT: m0
361
362; CI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
363; CI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
364
365; GFX9-DAG: global_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, off{{$}}
366; GFX9-DAG: global_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, off offset:8
367
368
369; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
370; GCN: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
371; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000372define amdgpu_kernel void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000373 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
David Blaikie79e6c742015-02-27 19:29:02 +0000374 %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i
375 %in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1
Matt Arsenault44e54832016-04-12 13:38:18 +0000376 %val0 = load volatile double, double addrspace(1)* %in.gep.0, align 8
377 %val1 = load volatile double, double addrspace(1)* %in.gep.1, align 8
David Blaikie79e6c742015-02-27 19:29:02 +0000378 %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
Matt Arsenault41033282014-10-10 22:01:59 +0000379 store double %val0, double addrspace(3)* %arrayidx0, align 8
380 %add.x = add nsw i32 %x.i, 8
David Blaikie79e6c742015-02-27 19:29:02 +0000381 %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
Matt Arsenault41033282014-10-10 22:01:59 +0000382 store double %val1, double addrspace(3)* %arrayidx1, align 8
383 ret void
384}
385
Matt Arsenaultcc8d3b82014-11-13 19:56:13 +0000386@foo = addrspace(3) global [4 x i32] undef, align 4
Matt Arsenaulte775f5f2014-10-14 17:21:19 +0000387
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000388; GCN-LABEL: @store_constant_adjacent_offsets
389; CI-DAG: s_mov_b32 m0
390; GFX9-NOT: m0
391
392; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
393; GCN: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000394define amdgpu_kernel void @store_constant_adjacent_offsets() {
David Blaikief72d05b2015-03-13 18:20:45 +0000395 store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
396 store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
Matt Arsenaulte775f5f2014-10-14 17:21:19 +0000397 ret void
398}
399
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000400; GCN-LABEL: @store_constant_disjoint_offsets
401; CI-DAG: s_mov_b32 m0
402; GFX9-NOT: m0
403
404; GCN-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
405; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
406; GCN: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset1:2
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000407define amdgpu_kernel void @store_constant_disjoint_offsets() {
David Blaikief72d05b2015-03-13 18:20:45 +0000408 store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
409 store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
Matt Arsenaulte775f5f2014-10-14 17:21:19 +0000410 ret void
411}
412
Matt Arsenaultcc8d3b82014-11-13 19:56:13 +0000413@bar = addrspace(3) global [4 x i64] undef, align 4
Matt Arsenault1a74aff2014-10-15 18:06:43 +0000414
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000415; GCN-LABEL: @store_misaligned64_constant_offsets
416; CI-DAG: s_mov_b32 m0
417; GFX9-NOT: m0
418
419; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
420; GCN-DAG: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
421; GCN-DAG: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
422; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000423define amdgpu_kernel void @store_misaligned64_constant_offsets() {
David Blaikief72d05b2015-03-13 18:20:45 +0000424 store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
425 store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
Matt Arsenault1a74aff2014-10-15 18:06:43 +0000426 ret void
427}
428
Matt Arsenaultcc8d3b82014-11-13 19:56:13 +0000429@bar.large = addrspace(3) global [4096 x i64] undef, align 4
Matt Arsenault1a74aff2014-10-15 18:06:43 +0000430
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000431; GCN-LABEL: @store_misaligned64_constant_large_offsets
432; CI-DAG: s_mov_b32 m0
433; GFX9-NOT: m0
434
435; GCN-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
436; GCN-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
437; GCN-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
438; GCN-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
439; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000440define amdgpu_kernel void @store_misaligned64_constant_large_offsets() {
David Blaikief72d05b2015-03-13 18:20:45 +0000441 store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
442 store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
Matt Arsenault1a74aff2014-10-15 18:06:43 +0000443 ret void
444}
445
Matt Arsenaultcc8d3b82014-11-13 19:56:13 +0000446@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] undef, align 4
447@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4
Matt Arsenault41033282014-10-10 22:01:59 +0000448
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000449define amdgpu_kernel void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, float addrspace(1)* %in) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000450 %x.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1
451 %y.i = tail call i32 @llvm.amdgcn.workitem.id.y() #1
David Blaikiea79ac142015-02-27 21:17:42 +0000452 %val = load float, float addrspace(1)* %in
David Blaikie79e6c742015-02-27 19:29:02 +0000453 %arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
Matt Arsenault41033282014-10-10 22:01:59 +0000454 store float %val, float addrspace(3)* %arrayidx44, align 4
455 %add47 = add nsw i32 %x.i, 1
David Blaikie79e6c742015-02-27 19:29:02 +0000456 %arrayidx48 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
Matt Arsenault41033282014-10-10 22:01:59 +0000457 store float %val, float addrspace(3)* %arrayidx48, align 4
458 %add51 = add nsw i32 %x.i, 16
David Blaikie79e6c742015-02-27 19:29:02 +0000459 %arrayidx52 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
Matt Arsenault41033282014-10-10 22:01:59 +0000460 store float %val, float addrspace(3)* %arrayidx52, align 4
461 %add55 = add nsw i32 %x.i, 17
David Blaikie79e6c742015-02-27 19:29:02 +0000462 %arrayidx56 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
Matt Arsenault41033282014-10-10 22:01:59 +0000463 store float %val, float addrspace(3)* %arrayidx56, align 4
David Blaikie79e6c742015-02-27 19:29:02 +0000464 %arrayidx60 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
Matt Arsenault41033282014-10-10 22:01:59 +0000465 store float %val, float addrspace(3)* %arrayidx60, align 4
466 %add63 = add nsw i32 %y.i, 1
David Blaikie79e6c742015-02-27 19:29:02 +0000467 %arrayidx64 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
Matt Arsenault41033282014-10-10 22:01:59 +0000468 store float %val, float addrspace(3)* %arrayidx64, align 4
469 %add67 = add nsw i32 %y.i, 32
David Blaikie79e6c742015-02-27 19:29:02 +0000470 %arrayidx68 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
Matt Arsenault41033282014-10-10 22:01:59 +0000471 store float %val, float addrspace(3)* %arrayidx68, align 4
472 %add71 = add nsw i32 %y.i, 33
David Blaikie79e6c742015-02-27 19:29:02 +0000473 %arrayidx72 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
Matt Arsenault41033282014-10-10 22:01:59 +0000474 store float %val, float addrspace(3)* %arrayidx72, align 4
475 %add75 = add nsw i32 %y.i, 64
David Blaikie79e6c742015-02-27 19:29:02 +0000476 %arrayidx76 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
Matt Arsenault41033282014-10-10 22:01:59 +0000477 store float %val, float addrspace(3)* %arrayidx76, align 4
478 %add79 = add nsw i32 %y.i, 65
David Blaikie79e6c742015-02-27 19:29:02 +0000479 %arrayidx80 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
Matt Arsenault41033282014-10-10 22:01:59 +0000480 store float %val, float addrspace(3)* %arrayidx80, align 4
481 ret void
482}
483
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000484; GCN-LABEL: {{^}}simple_write2_v4f32_superreg_align4:
485; CI: s_mov_b32 m0
486; GFX9-NOT: m0
487
488; GCN: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:2 offset1:3{{$}}
489; GCN: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset1:1{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000490define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(<4 x float> addrspace(3)* %out, <4 x float> addrspace(1)* %in) #0 {
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000491 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
Matt Arsenault84db5d92015-07-14 17:57:36 +0000492 %in.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %in
493 %val0 = load <4 x float>, <4 x float> addrspace(1)* %in.gep, align 4
494 %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(3)* %out, i32 %x.i
495 store <4 x float> %val0, <4 x float> addrspace(3)* %out.gep, align 4
496 ret void
497}
498
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000499declare i32 @llvm.amdgcn.workgroup.id.x() #1
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000500declare i32 @llvm.amdgcn.workgroup.id.y() #1
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000501declare i32 @llvm.amdgcn.workitem.id.x() #1
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000502declare i32 @llvm.amdgcn.workitem.id.y() #1
Matt Arsenault41033282014-10-10 22:01:59 +0000503
Matt Arsenault45f82162016-07-11 23:35:48 +0000504attributes #0 = { nounwind }
Matt Arsenault84445dd2017-11-30 22:51:26 +0000505attributes #1 = { nounwind readnone speculatable }
Matt Arsenault2aed6ca2015-12-19 01:46:41 +0000506attributes #2 = { convergent nounwind }