blob: 582b87be6c20911ade3e009816eb7b65b171debb [file] [log] [blame]
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +00001; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck %s
2
3; CHECK-LABEL: ds_read32_combine_stride_400:
4; CHECK: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
5; CHECK: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
6; CHECK-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
7; CHECK-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
8; CHECK-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
9; CHECK-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100
10; CHECK-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100
11; CHECK-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100
12; CHECK-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:100
13define void @ds_read32_combine_stride_400(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) {
14bb:
15 %tmp = load float, float addrspace(3)* %arg, align 4
16 %tmp2 = fadd float %tmp, 0.000000e+00
17 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 100
18 %tmp4 = load float, float addrspace(3)* %tmp3, align 4
19 %tmp5 = fadd float %tmp2, %tmp4
20 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200
21 %tmp7 = load float, float addrspace(3)* %tmp6, align 4
22 %tmp8 = fadd float %tmp5, %tmp7
23 %tmp9 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300
24 %tmp10 = load float, float addrspace(3)* %tmp9, align 4
25 %tmp11 = fadd float %tmp8, %tmp10
26 %tmp12 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400
27 %tmp13 = load float, float addrspace(3)* %tmp12, align 4
28 %tmp14 = fadd float %tmp11, %tmp13
29 %tmp15 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500
30 %tmp16 = load float, float addrspace(3)* %tmp15, align 4
31 %tmp17 = fadd float %tmp14, %tmp16
32 %tmp18 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600
33 %tmp19 = load float, float addrspace(3)* %tmp18, align 4
34 %tmp20 = fadd float %tmp17, %tmp19
35 %tmp21 = getelementptr inbounds float, float addrspace(3)* %arg, i32 700
36 %tmp22 = load float, float addrspace(3)* %tmp21, align 4
37 %tmp23 = fadd float %tmp20, %tmp22
38 store float %tmp23, float *%arg1, align 4
39 ret void
40}
41
42; CHECK-LABEL: ds_read32_combine_stride_400_back:
43; CHECK: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
44; CHECK: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
45; CHECK-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
46; CHECK-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
47; CHECK-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
48; CHECK-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100
49; CHECK-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100
50; CHECK-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100
51; CHECK-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:100
52define void @ds_read32_combine_stride_400_back(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) {
53bb:
54 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 700
55 %tmp2 = load float, float addrspace(3)* %tmp, align 4
56 %tmp3 = fadd float %tmp2, 0.000000e+00
57 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600
58 %tmp5 = load float, float addrspace(3)* %tmp4, align 4
59 %tmp6 = fadd float %tmp3, %tmp5
60 %tmp7 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500
61 %tmp8 = load float, float addrspace(3)* %tmp7, align 4
62 %tmp9 = fadd float %tmp6, %tmp8
63 %tmp10 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400
64 %tmp11 = load float, float addrspace(3)* %tmp10, align 4
65 %tmp12 = fadd float %tmp9, %tmp11
66 %tmp13 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300
67 %tmp14 = load float, float addrspace(3)* %tmp13, align 4
68 %tmp15 = fadd float %tmp12, %tmp14
69 %tmp16 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200
70 %tmp17 = load float, float addrspace(3)* %tmp16, align 4
71 %tmp18 = fadd float %tmp15, %tmp17
72 %tmp19 = getelementptr inbounds float, float addrspace(3)* %arg, i32 100
73 %tmp20 = load float, float addrspace(3)* %tmp19, align 4
74 %tmp21 = fadd float %tmp18, %tmp20
75 %tmp22 = load float, float addrspace(3)* %arg, align 4
76 %tmp23 = fadd float %tmp21, %tmp22
77 store float %tmp23, float *%arg1, align 4
78 ret void
79}
80
81; CHECK-LABEL: ds_read32_combine_stride_8192:
82; CHECK: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
83; CHECK: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
84; CHECK-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:32
85; CHECK-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:64 offset1:96
86; CHECK-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:128 offset1:160
87; CHECK-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:192 offset1:224
88define void @ds_read32_combine_stride_8192(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) {
89bb:
90 %tmp = load float, float addrspace(3)* %arg, align 4
91 %tmp2 = fadd float %tmp, 0.000000e+00
92 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 2048
93 %tmp4 = load float, float addrspace(3)* %tmp3, align 4
94 %tmp5 = fadd float %tmp2, %tmp4
95 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4096
96 %tmp7 = load float, float addrspace(3)* %tmp6, align 4
97 %tmp8 = fadd float %tmp5, %tmp7
98 %tmp9 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6144
99 %tmp10 = load float, float addrspace(3)* %tmp9, align 4
100 %tmp11 = fadd float %tmp8, %tmp10
101 %tmp12 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8192
102 %tmp13 = load float, float addrspace(3)* %tmp12, align 4
103 %tmp14 = fadd float %tmp11, %tmp13
104 %tmp15 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10240
105 %tmp16 = load float, float addrspace(3)* %tmp15, align 4
106 %tmp17 = fadd float %tmp14, %tmp16
107 %tmp18 = getelementptr inbounds float, float addrspace(3)* %arg, i32 12288
108 %tmp19 = load float, float addrspace(3)* %tmp18, align 4
109 %tmp20 = fadd float %tmp17, %tmp19
110 %tmp21 = getelementptr inbounds float, float addrspace(3)* %arg, i32 14336
111 %tmp22 = load float, float addrspace(3)* %tmp21, align 4
112 %tmp23 = fadd float %tmp20, %tmp22
113 store float %tmp23, float *%arg1, align 4
114 ret void
115}
116
117; CHECK-LABEL: ds_read32_combine_stride_8192_shifted:
118; CHECK: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
119; CHECK: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
120; CHECK-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
121; CHECK-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
122; CHECK-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
123; CHECK-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:32
124; CHECK-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:32
125; CHECK-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:32
126define void @ds_read32_combine_stride_8192_shifted(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) {
127bb:
128 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 2
129 %tmp2 = load float, float addrspace(3)* %tmp, align 4
130 %tmp3 = fadd float %tmp2, 0.000000e+00
131 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 2050
132 %tmp5 = load float, float addrspace(3)* %tmp4, align 4
133 %tmp6 = fadd float %tmp3, %tmp5
134 %tmp7 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4098
135 %tmp8 = load float, float addrspace(3)* %tmp7, align 4
136 %tmp9 = fadd float %tmp6, %tmp8
137 %tmp10 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6146
138 %tmp11 = load float, float addrspace(3)* %tmp10, align 4
139 %tmp12 = fadd float %tmp9, %tmp11
140 %tmp13 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8194
141 %tmp14 = load float, float addrspace(3)* %tmp13, align 4
142 %tmp15 = fadd float %tmp12, %tmp14
143 %tmp16 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10242
144 %tmp17 = load float, float addrspace(3)* %tmp16, align 4
145 %tmp18 = fadd float %tmp15, %tmp17
146 store float %tmp18, float *%arg1, align 4
147 ret void
148}
149
150; CHECK-LABEL: ds_read64_combine_stride_400:
151; CHECK: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
152; CHECK: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
153; CHECK-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
154; CHECK-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:50
155; CHECK-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:100 offset1:150
156; CHECK-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:200 offset1:250
157; CHECK-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:50
158define void @ds_read64_combine_stride_400(double addrspace(3)* nocapture readonly %arg, double *nocapture %arg1) {
159bb:
160 %tmp = load double, double addrspace(3)* %arg, align 8
161 %tmp2 = fadd double %tmp, 0.000000e+00
162 %tmp3 = getelementptr inbounds double, double addrspace(3)* %arg, i32 50
163 %tmp4 = load double, double addrspace(3)* %tmp3, align 8
164 %tmp5 = fadd double %tmp2, %tmp4
165 %tmp6 = getelementptr inbounds double, double addrspace(3)* %arg, i32 100
166 %tmp7 = load double, double addrspace(3)* %tmp6, align 8
167 %tmp8 = fadd double %tmp5, %tmp7
168 %tmp9 = getelementptr inbounds double, double addrspace(3)* %arg, i32 150
169 %tmp10 = load double, double addrspace(3)* %tmp9, align 8
170 %tmp11 = fadd double %tmp8, %tmp10
171 %tmp12 = getelementptr inbounds double, double addrspace(3)* %arg, i32 200
172 %tmp13 = load double, double addrspace(3)* %tmp12, align 8
173 %tmp14 = fadd double %tmp11, %tmp13
174 %tmp15 = getelementptr inbounds double, double addrspace(3)* %arg, i32 250
175 %tmp16 = load double, double addrspace(3)* %tmp15, align 8
176 %tmp17 = fadd double %tmp14, %tmp16
177 %tmp18 = getelementptr inbounds double, double addrspace(3)* %arg, i32 300
178 %tmp19 = load double, double addrspace(3)* %tmp18, align 8
179 %tmp20 = fadd double %tmp17, %tmp19
180 %tmp21 = getelementptr inbounds double, double addrspace(3)* %arg, i32 350
181 %tmp22 = load double, double addrspace(3)* %tmp21, align 8
182 %tmp23 = fadd double %tmp20, %tmp22
183 store double %tmp23, double *%arg1, align 8
184 ret void
185}
186
187; CHECK-LABEL: ds_read64_combine_stride_8192_shifted:
188; CHECK: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
189; CHECK: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
190; CHECK-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
191; CHECK-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
192; CHECK-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
193; CHECK-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:16
194; CHECK-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:16
195; CHECK-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:16
196define void @ds_read64_combine_stride_8192_shifted(double addrspace(3)* nocapture readonly %arg, double *nocapture %arg1) {
197bb:
198 %tmp = getelementptr inbounds double, double addrspace(3)* %arg, i32 1
199 %tmp2 = load double, double addrspace(3)* %tmp, align 8
200 %tmp3 = fadd double %tmp2, 0.000000e+00
201 %tmp4 = getelementptr inbounds double, double addrspace(3)* %arg, i32 1025
202 %tmp5 = load double, double addrspace(3)* %tmp4, align 8
203 %tmp6 = fadd double %tmp3, %tmp5
204 %tmp7 = getelementptr inbounds double, double addrspace(3)* %arg, i32 2049
205 %tmp8 = load double, double addrspace(3)* %tmp7, align 8
206 %tmp9 = fadd double %tmp6, %tmp8
207 %tmp10 = getelementptr inbounds double, double addrspace(3)* %arg, i32 3073
208 %tmp11 = load double, double addrspace(3)* %tmp10, align 8
209 %tmp12 = fadd double %tmp9, %tmp11
210 %tmp13 = getelementptr inbounds double, double addrspace(3)* %arg, i32 4097
211 %tmp14 = load double, double addrspace(3)* %tmp13, align 8
212 %tmp15 = fadd double %tmp12, %tmp14
213 %tmp16 = getelementptr inbounds double, double addrspace(3)* %arg, i32 5121
214 %tmp17 = load double, double addrspace(3)* %tmp16, align 8
215 %tmp18 = fadd double %tmp15, %tmp17
216 store double %tmp18, double *%arg1, align 8
217 ret void
218}
219
220; CHECK-LABEL: ds_write32_combine_stride_400:
221; CHECK: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
222; CHECK: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
223; CHECK-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
224; CHECK-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
225; CHECK-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
226; CHECK-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
227; CHECK-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
228; CHECK-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
229; CHECK-DAG: ds_write2_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
230define void @ds_write32_combine_stride_400(float addrspace(3)* nocapture %arg) {
231bb:
232 store float 1.000000e+00, float addrspace(3)* %arg, align 4
233 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 100
234 store float 1.000000e+00, float addrspace(3)* %tmp, align 4
235 %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200
236 store float 1.000000e+00, float addrspace(3)* %tmp1, align 4
237 %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300
238 store float 1.000000e+00, float addrspace(3)* %tmp2, align 4
239 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400
240 store float 1.000000e+00, float addrspace(3)* %tmp3, align 4
241 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500
242 store float 1.000000e+00, float addrspace(3)* %tmp4, align 4
243 %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600
244 store float 1.000000e+00, float addrspace(3)* %tmp5, align 4
245 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 700
246 store float 1.000000e+00, float addrspace(3)* %tmp6, align 4
247 ret void
248}
249
250; CHECK-LABEL: ds_write32_combine_stride_400_back:
251; CHECK: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
252; CHECK: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
253; CHECK-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
254; CHECK-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
255; CHECK-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
256; CHECK-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
257; CHECK-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
258; CHECK-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
259; CHECK-DAG: ds_write2_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
260define void @ds_write32_combine_stride_400_back(float addrspace(3)* nocapture %arg) {
261bb:
262 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 700
263 store float 1.000000e+00, float addrspace(3)* %tmp, align 4
264 %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600
265 store float 1.000000e+00, float addrspace(3)* %tmp1, align 4
266 %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500
267 store float 1.000000e+00, float addrspace(3)* %tmp2, align 4
268 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400
269 store float 1.000000e+00, float addrspace(3)* %tmp3, align 4
270 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300
271 store float 1.000000e+00, float addrspace(3)* %tmp4, align 4
272 %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200
273 store float 1.000000e+00, float addrspace(3)* %tmp5, align 4
274 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 100
275 store float 1.000000e+00, float addrspace(3)* %tmp6, align 4
276 store float 1.000000e+00, float addrspace(3)* %arg, align 4
277 ret void
278}
279
280; CHECK-LABEL: ds_write32_combine_stride_8192:
281; CHECK: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
282; CHECK: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
283; CHECK-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
284; CHECK-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:64 offset1:96
285; CHECK-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:128 offset1:160
286; CHECK-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:192 offset1:224
287define void @ds_write32_combine_stride_8192(float addrspace(3)* nocapture %arg) {
288bb:
289 store float 1.000000e+00, float addrspace(3)* %arg, align 4
290 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 2048
291 store float 1.000000e+00, float addrspace(3)* %tmp, align 4
292 %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4096
293 store float 1.000000e+00, float addrspace(3)* %tmp1, align 4
294 %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6144
295 store float 1.000000e+00, float addrspace(3)* %tmp2, align 4
296 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8192
297 store float 1.000000e+00, float addrspace(3)* %tmp3, align 4
298 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10240
299 store float 1.000000e+00, float addrspace(3)* %tmp4, align 4
300 %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 12288
301 store float 1.000000e+00, float addrspace(3)* %tmp5, align 4
302 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 14336
303 store float 1.000000e+00, float addrspace(3)* %tmp6, align 4
304 ret void
305}
306
307; CHECK-LABEL: ds_write32_combine_stride_8192_shifted:
308; CHECK: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
309; CHECK: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
310; CHECK-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]]
311; CHECK-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]]
312; CHECK-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]]
313; CHECK-DAG: ds_write2st64_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
314; CHECK-DAG: ds_write2st64_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
315; CHECK-DAG: ds_write2st64_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
316define void @ds_write32_combine_stride_8192_shifted(float addrspace(3)* nocapture %arg) {
317bb:
318 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 1
319 store float 1.000000e+00, float addrspace(3)* %tmp, align 4
320 %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 2049
321 store float 1.000000e+00, float addrspace(3)* %tmp1, align 4
322 %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4097
323 store float 1.000000e+00, float addrspace(3)* %tmp2, align 4
324 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6145
325 store float 1.000000e+00, float addrspace(3)* %tmp3, align 4
326 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8193
327 store float 1.000000e+00, float addrspace(3)* %tmp4, align 4
328 %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10241
329 store float 1.000000e+00, float addrspace(3)* %tmp5, align 4
330 ret void
331}
332
333; CHECK-LABEL: ds_write64_combine_stride_400:
334; CHECK: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
335; CHECK: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
336; CHECK-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
337; CHECK-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50
338; CHECK-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:100 offset1:150
339; CHECK-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:200 offset1:250
340; CHECK-DAG: ds_write2_b64 [[B1]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50
341define void @ds_write64_combine_stride_400(double addrspace(3)* nocapture %arg) {
342bb:
343 store double 1.000000e+00, double addrspace(3)* %arg, align 8
344 %tmp = getelementptr inbounds double, double addrspace(3)* %arg, i32 50
345 store double 1.000000e+00, double addrspace(3)* %tmp, align 8
346 %tmp1 = getelementptr inbounds double, double addrspace(3)* %arg, i32 100
347 store double 1.000000e+00, double addrspace(3)* %tmp1, align 8
348 %tmp2 = getelementptr inbounds double, double addrspace(3)* %arg, i32 150
349 store double 1.000000e+00, double addrspace(3)* %tmp2, align 8
350 %tmp3 = getelementptr inbounds double, double addrspace(3)* %arg, i32 200
351 store double 1.000000e+00, double addrspace(3)* %tmp3, align 8
352 %tmp4 = getelementptr inbounds double, double addrspace(3)* %arg, i32 250
353 store double 1.000000e+00, double addrspace(3)* %tmp4, align 8
354 %tmp5 = getelementptr inbounds double, double addrspace(3)* %arg, i32 300
355 store double 1.000000e+00, double addrspace(3)* %tmp5, align 8
356 %tmp6 = getelementptr inbounds double, double addrspace(3)* %arg, i32 350
357 store double 1.000000e+00, double addrspace(3)* %tmp6, align 8
358 ret void
359}
360
361; CHECK-LABEL: ds_write64_combine_stride_8192_shifted:
362; CHECK: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
363; CHECK: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
364; CHECK-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
365; CHECK-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
366; CHECK-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
367; CHECK-DAG: ds_write2st64_b64 [[B1]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16
368; CHECK-DAG: ds_write2st64_b64 [[B2]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16
369; CHECK-DAG: ds_write2st64_b64 [[B3]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16
370define void @ds_write64_combine_stride_8192_shifted(double addrspace(3)* nocapture %arg) {
371bb:
372 %tmp = getelementptr inbounds double, double addrspace(3)* %arg, i32 1
373 store double 1.000000e+00, double addrspace(3)* %tmp, align 8
374 %tmp1 = getelementptr inbounds double, double addrspace(3)* %arg, i32 1025
375 store double 1.000000e+00, double addrspace(3)* %tmp1, align 8
376 %tmp2 = getelementptr inbounds double, double addrspace(3)* %arg, i32 2049
377 store double 1.000000e+00, double addrspace(3)* %tmp2, align 8
378 %tmp3 = getelementptr inbounds double, double addrspace(3)* %arg, i32 3073
379 store double 1.000000e+00, double addrspace(3)* %tmp3, align 8
380 %tmp4 = getelementptr inbounds double, double addrspace(3)* %arg, i32 4097
381 store double 1.000000e+00, double addrspace(3)* %tmp4, align 8
382 %tmp5 = getelementptr inbounds double, double addrspace(3)* %arg, i32 5121
383 store double 1.000000e+00, double addrspace(3)* %tmp5, align 8
384 ret void
385}