blob: 6ae36cc1fbb15f4c5c2cc2e0faf8dc4fb0d2710b [file] [log] [blame]
Matt Arsenault84445dd2017-11-30 22:51:26 +00001; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +00003
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +00004; GCN-LABEL: ds_read32_combine_stride_400:
5; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
6; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
Matt Arsenault84445dd2017-11-30 22:51:26 +00007
Mark Searles7687d422018-01-22 21:46:43 +00008; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
9; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
10; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
Matt Arsenault84445dd2017-11-30 22:51:26 +000011
12; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x320, [[BASE]]
13; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x640, [[BASE]]
14; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x960, [[BASE]]
15
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +000016; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100
17; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100
18; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100
19; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:100
20define amdgpu_kernel void @ds_read32_combine_stride_400(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +000021bb:
22 %tmp = load float, float addrspace(3)* %arg, align 4
23 %tmp2 = fadd float %tmp, 0.000000e+00
24 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 100
25 %tmp4 = load float, float addrspace(3)* %tmp3, align 4
26 %tmp5 = fadd float %tmp2, %tmp4
27 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200
28 %tmp7 = load float, float addrspace(3)* %tmp6, align 4
29 %tmp8 = fadd float %tmp5, %tmp7
30 %tmp9 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300
31 %tmp10 = load float, float addrspace(3)* %tmp9, align 4
32 %tmp11 = fadd float %tmp8, %tmp10
33 %tmp12 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400
34 %tmp13 = load float, float addrspace(3)* %tmp12, align 4
35 %tmp14 = fadd float %tmp11, %tmp13
36 %tmp15 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500
37 %tmp16 = load float, float addrspace(3)* %tmp15, align 4
38 %tmp17 = fadd float %tmp14, %tmp16
39 %tmp18 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600
40 %tmp19 = load float, float addrspace(3)* %tmp18, align 4
41 %tmp20 = fadd float %tmp17, %tmp19
42 %tmp21 = getelementptr inbounds float, float addrspace(3)* %arg, i32 700
43 %tmp22 = load float, float addrspace(3)* %tmp21, align 4
44 %tmp23 = fadd float %tmp20, %tmp22
45 store float %tmp23, float *%arg1, align 4
46 ret void
47}
48
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +000049; GCN-LABEL: ds_read32_combine_stride_400_back:
50; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
51; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
Matt Arsenault84445dd2017-11-30 22:51:26 +000052
Mark Searles7687d422018-01-22 21:46:43 +000053; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
54; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
55; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
Matt Arsenault84445dd2017-11-30 22:51:26 +000056
57; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x320, [[BASE]]
58; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x640, [[BASE]]
59; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x960, [[BASE]]
60
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +000061; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100
62; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100
63; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100
64; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:100
65define amdgpu_kernel void @ds_read32_combine_stride_400_back(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +000066bb:
67 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 700
68 %tmp2 = load float, float addrspace(3)* %tmp, align 4
69 %tmp3 = fadd float %tmp2, 0.000000e+00
70 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600
71 %tmp5 = load float, float addrspace(3)* %tmp4, align 4
72 %tmp6 = fadd float %tmp3, %tmp5
73 %tmp7 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500
74 %tmp8 = load float, float addrspace(3)* %tmp7, align 4
75 %tmp9 = fadd float %tmp6, %tmp8
76 %tmp10 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400
77 %tmp11 = load float, float addrspace(3)* %tmp10, align 4
78 %tmp12 = fadd float %tmp9, %tmp11
79 %tmp13 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300
80 %tmp14 = load float, float addrspace(3)* %tmp13, align 4
81 %tmp15 = fadd float %tmp12, %tmp14
82 %tmp16 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200
83 %tmp17 = load float, float addrspace(3)* %tmp16, align 4
84 %tmp18 = fadd float %tmp15, %tmp17
85 %tmp19 = getelementptr inbounds float, float addrspace(3)* %arg, i32 100
86 %tmp20 = load float, float addrspace(3)* %tmp19, align 4
87 %tmp21 = fadd float %tmp18, %tmp20
88 %tmp22 = load float, float addrspace(3)* %arg, align 4
89 %tmp23 = fadd float %tmp21, %tmp22
90 store float %tmp23, float *%arg1, align 4
91 ret void
92}
93
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +000094; GCN-LABEL: ds_read32_combine_stride_8192:
95; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
96; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
97; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:32
98; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:64 offset1:96
99; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:128 offset1:160
100; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:192 offset1:224
101define amdgpu_kernel void @ds_read32_combine_stride_8192(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +0000102bb:
103 %tmp = load float, float addrspace(3)* %arg, align 4
104 %tmp2 = fadd float %tmp, 0.000000e+00
105 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 2048
106 %tmp4 = load float, float addrspace(3)* %tmp3, align 4
107 %tmp5 = fadd float %tmp2, %tmp4
108 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4096
109 %tmp7 = load float, float addrspace(3)* %tmp6, align 4
110 %tmp8 = fadd float %tmp5, %tmp7
111 %tmp9 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6144
112 %tmp10 = load float, float addrspace(3)* %tmp9, align 4
113 %tmp11 = fadd float %tmp8, %tmp10
114 %tmp12 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8192
115 %tmp13 = load float, float addrspace(3)* %tmp12, align 4
116 %tmp14 = fadd float %tmp11, %tmp13
117 %tmp15 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10240
118 %tmp16 = load float, float addrspace(3)* %tmp15, align 4
119 %tmp17 = fadd float %tmp14, %tmp16
120 %tmp18 = getelementptr inbounds float, float addrspace(3)* %arg, i32 12288
121 %tmp19 = load float, float addrspace(3)* %tmp18, align 4
122 %tmp20 = fadd float %tmp17, %tmp19
123 %tmp21 = getelementptr inbounds float, float addrspace(3)* %arg, i32 14336
124 %tmp22 = load float, float addrspace(3)* %tmp21, align 4
125 %tmp23 = fadd float %tmp20, %tmp22
126 store float %tmp23, float *%arg1, align 4
127 ret void
128}
129
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000130; GCN-LABEL: ds_read32_combine_stride_8192_shifted:
131; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
132; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
Matt Arsenault84445dd2017-11-30 22:51:26 +0000133
134; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
Mark Searles7687d422018-01-22 21:46:43 +0000135; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
136; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
Matt Arsenault84445dd2017-11-30 22:51:26 +0000137
138; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 8, [[BASE]]
139; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x4008, [[BASE]]
140; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x8008, [[BASE]]
141
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000142; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:32
143; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:32
144; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:32
145define amdgpu_kernel void @ds_read32_combine_stride_8192_shifted(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +0000146bb:
147 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 2
148 %tmp2 = load float, float addrspace(3)* %tmp, align 4
149 %tmp3 = fadd float %tmp2, 0.000000e+00
150 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 2050
151 %tmp5 = load float, float addrspace(3)* %tmp4, align 4
152 %tmp6 = fadd float %tmp3, %tmp5
153 %tmp7 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4098
154 %tmp8 = load float, float addrspace(3)* %tmp7, align 4
155 %tmp9 = fadd float %tmp6, %tmp8
156 %tmp10 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6146
157 %tmp11 = load float, float addrspace(3)* %tmp10, align 4
158 %tmp12 = fadd float %tmp9, %tmp11
159 %tmp13 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8194
160 %tmp14 = load float, float addrspace(3)* %tmp13, align 4
161 %tmp15 = fadd float %tmp12, %tmp14
162 %tmp16 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10242
163 %tmp17 = load float, float addrspace(3)* %tmp16, align 4
164 %tmp18 = fadd float %tmp15, %tmp17
165 store float %tmp18, float *%arg1, align 4
166 ret void
167}
168
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000169; GCN-LABEL: ds_read64_combine_stride_400:
170; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
171; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
Matt Arsenault84445dd2017-11-30 22:51:26 +0000172
Mark Searles7687d422018-01-22 21:46:43 +0000173; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
Matt Arsenault84445dd2017-11-30 22:51:26 +0000174; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x960, [[BASE]]
175
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000176; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:50
177; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:100 offset1:150
178; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:200 offset1:250
179; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:50
180define amdgpu_kernel void @ds_read64_combine_stride_400(double addrspace(3)* nocapture readonly %arg, double *nocapture %arg1) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +0000181bb:
182 %tmp = load double, double addrspace(3)* %arg, align 8
183 %tmp2 = fadd double %tmp, 0.000000e+00
184 %tmp3 = getelementptr inbounds double, double addrspace(3)* %arg, i32 50
185 %tmp4 = load double, double addrspace(3)* %tmp3, align 8
186 %tmp5 = fadd double %tmp2, %tmp4
187 %tmp6 = getelementptr inbounds double, double addrspace(3)* %arg, i32 100
188 %tmp7 = load double, double addrspace(3)* %tmp6, align 8
189 %tmp8 = fadd double %tmp5, %tmp7
190 %tmp9 = getelementptr inbounds double, double addrspace(3)* %arg, i32 150
191 %tmp10 = load double, double addrspace(3)* %tmp9, align 8
192 %tmp11 = fadd double %tmp8, %tmp10
193 %tmp12 = getelementptr inbounds double, double addrspace(3)* %arg, i32 200
194 %tmp13 = load double, double addrspace(3)* %tmp12, align 8
195 %tmp14 = fadd double %tmp11, %tmp13
196 %tmp15 = getelementptr inbounds double, double addrspace(3)* %arg, i32 250
197 %tmp16 = load double, double addrspace(3)* %tmp15, align 8
198 %tmp17 = fadd double %tmp14, %tmp16
199 %tmp18 = getelementptr inbounds double, double addrspace(3)* %arg, i32 300
200 %tmp19 = load double, double addrspace(3)* %tmp18, align 8
201 %tmp20 = fadd double %tmp17, %tmp19
202 %tmp21 = getelementptr inbounds double, double addrspace(3)* %arg, i32 350
203 %tmp22 = load double, double addrspace(3)* %tmp21, align 8
204 %tmp23 = fadd double %tmp20, %tmp22
205 store double %tmp23, double *%arg1, align 8
206 ret void
207}
208
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000209; GCN-LABEL: ds_read64_combine_stride_8192_shifted:
210; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
211; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
Matt Arsenault84445dd2017-11-30 22:51:26 +0000212
213; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
Mark Searles7687d422018-01-22 21:46:43 +0000214; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
215; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
Matt Arsenault84445dd2017-11-30 22:51:26 +0000216
217; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 8, [[BASE]]
218; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x4008, [[BASE]]
219; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x8008, [[BASE]]
220
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000221; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:16
222; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:16
223; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:16
224define amdgpu_kernel void @ds_read64_combine_stride_8192_shifted(double addrspace(3)* nocapture readonly %arg, double *nocapture %arg1) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +0000225bb:
226 %tmp = getelementptr inbounds double, double addrspace(3)* %arg, i32 1
227 %tmp2 = load double, double addrspace(3)* %tmp, align 8
228 %tmp3 = fadd double %tmp2, 0.000000e+00
229 %tmp4 = getelementptr inbounds double, double addrspace(3)* %arg, i32 1025
230 %tmp5 = load double, double addrspace(3)* %tmp4, align 8
231 %tmp6 = fadd double %tmp3, %tmp5
232 %tmp7 = getelementptr inbounds double, double addrspace(3)* %arg, i32 2049
233 %tmp8 = load double, double addrspace(3)* %tmp7, align 8
234 %tmp9 = fadd double %tmp6, %tmp8
235 %tmp10 = getelementptr inbounds double, double addrspace(3)* %arg, i32 3073
236 %tmp11 = load double, double addrspace(3)* %tmp10, align 8
237 %tmp12 = fadd double %tmp9, %tmp11
238 %tmp13 = getelementptr inbounds double, double addrspace(3)* %arg, i32 4097
239 %tmp14 = load double, double addrspace(3)* %tmp13, align 8
240 %tmp15 = fadd double %tmp12, %tmp14
241 %tmp16 = getelementptr inbounds double, double addrspace(3)* %arg, i32 5121
242 %tmp17 = load double, double addrspace(3)* %tmp16, align 8
243 %tmp18 = fadd double %tmp15, %tmp17
244 store double %tmp18, double *%arg1, align 8
245 ret void
246}
247
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000248; GCN-LABEL: ds_write32_combine_stride_400:
249; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
250; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
Matt Arsenault84445dd2017-11-30 22:51:26 +0000251
Mark Searles7687d422018-01-22 21:46:43 +0000252; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
253; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
254; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
Matt Arsenault84445dd2017-11-30 22:51:26 +0000255
256; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x320, [[BASE]]
257; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x640, [[BASE]]
258; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x960, [[BASE]]
259
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000260; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
261; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
262; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
263; GCN-DAG: ds_write2_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
264define amdgpu_kernel void @ds_write32_combine_stride_400(float addrspace(3)* nocapture %arg) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +0000265bb:
266 store float 1.000000e+00, float addrspace(3)* %arg, align 4
267 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 100
268 store float 1.000000e+00, float addrspace(3)* %tmp, align 4
269 %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200
270 store float 1.000000e+00, float addrspace(3)* %tmp1, align 4
271 %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300
272 store float 1.000000e+00, float addrspace(3)* %tmp2, align 4
273 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400
274 store float 1.000000e+00, float addrspace(3)* %tmp3, align 4
275 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500
276 store float 1.000000e+00, float addrspace(3)* %tmp4, align 4
277 %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600
278 store float 1.000000e+00, float addrspace(3)* %tmp5, align 4
279 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 700
280 store float 1.000000e+00, float addrspace(3)* %tmp6, align 4
281 ret void
282}
283
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000284; GCN-LABEL: ds_write32_combine_stride_400_back:
285; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
286; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
Matt Arsenault84445dd2017-11-30 22:51:26 +0000287
Mark Searles7687d422018-01-22 21:46:43 +0000288; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
289; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
290; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
Matt Arsenault84445dd2017-11-30 22:51:26 +0000291
292; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x320, [[BASE]]
293; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x640, [[BASE]]
294; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x960, [[BASE]]
295
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000296; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
297; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
298; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
299; GCN-DAG: ds_write2_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
300define amdgpu_kernel void @ds_write32_combine_stride_400_back(float addrspace(3)* nocapture %arg) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +0000301bb:
302 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 700
303 store float 1.000000e+00, float addrspace(3)* %tmp, align 4
304 %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600
305 store float 1.000000e+00, float addrspace(3)* %tmp1, align 4
306 %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500
307 store float 1.000000e+00, float addrspace(3)* %tmp2, align 4
308 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400
309 store float 1.000000e+00, float addrspace(3)* %tmp3, align 4
310 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300
311 store float 1.000000e+00, float addrspace(3)* %tmp4, align 4
312 %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200
313 store float 1.000000e+00, float addrspace(3)* %tmp5, align 4
314 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 100
315 store float 1.000000e+00, float addrspace(3)* %tmp6, align 4
316 store float 1.000000e+00, float addrspace(3)* %arg, align 4
317 ret void
318}
319
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000320; GCN-LABEL: ds_write32_combine_stride_8192:
321; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
322; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
323; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
324; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:64 offset1:96
325; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:128 offset1:160
326; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:192 offset1:224
327define amdgpu_kernel void @ds_write32_combine_stride_8192(float addrspace(3)* nocapture %arg) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +0000328bb:
329 store float 1.000000e+00, float addrspace(3)* %arg, align 4
330 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 2048
331 store float 1.000000e+00, float addrspace(3)* %tmp, align 4
332 %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4096
333 store float 1.000000e+00, float addrspace(3)* %tmp1, align 4
334 %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6144
335 store float 1.000000e+00, float addrspace(3)* %tmp2, align 4
336 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8192
337 store float 1.000000e+00, float addrspace(3)* %tmp3, align 4
338 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10240
339 store float 1.000000e+00, float addrspace(3)* %tmp4, align 4
340 %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 12288
341 store float 1.000000e+00, float addrspace(3)* %tmp5, align 4
342 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 14336
343 store float 1.000000e+00, float addrspace(3)* %tmp6, align 4
344 ret void
345}
346
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000347; GCN-LABEL: ds_write32_combine_stride_8192_shifted:
348; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
349; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
Matt Arsenault84445dd2017-11-30 22:51:26 +0000350
351; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]]
Mark Searles7687d422018-01-22 21:46:43 +0000352; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
353; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
Matt Arsenault84445dd2017-11-30 22:51:26 +0000354
355; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 4, [[BASE]]
356; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x4004, [[BASE]]
357; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x8004, [[BASE]]
358
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000359; GCN-DAG: ds_write2st64_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
360; GCN-DAG: ds_write2st64_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
361; GCN-DAG: ds_write2st64_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
362define amdgpu_kernel void @ds_write32_combine_stride_8192_shifted(float addrspace(3)* nocapture %arg) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +0000363bb:
364 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 1
365 store float 1.000000e+00, float addrspace(3)* %tmp, align 4
366 %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 2049
367 store float 1.000000e+00, float addrspace(3)* %tmp1, align 4
368 %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4097
369 store float 1.000000e+00, float addrspace(3)* %tmp2, align 4
370 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6145
371 store float 1.000000e+00, float addrspace(3)* %tmp3, align 4
372 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8193
373 store float 1.000000e+00, float addrspace(3)* %tmp4, align 4
374 %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10241
375 store float 1.000000e+00, float addrspace(3)* %tmp5, align 4
376 ret void
377}
378
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000379; GCN-LABEL: ds_write64_combine_stride_400:
380; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
381; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
Matt Arsenault84445dd2017-11-30 22:51:26 +0000382
Mark Searles7687d422018-01-22 21:46:43 +0000383; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
Matt Arsenault84445dd2017-11-30 22:51:26 +0000384; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x960, [[BASE]]
385
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000386; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50
387; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:100 offset1:150
388; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:200 offset1:250
389; GCN-DAG: ds_write2_b64 [[B1]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50
390define amdgpu_kernel void @ds_write64_combine_stride_400(double addrspace(3)* nocapture %arg) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +0000391bb:
392 store double 1.000000e+00, double addrspace(3)* %arg, align 8
393 %tmp = getelementptr inbounds double, double addrspace(3)* %arg, i32 50
394 store double 1.000000e+00, double addrspace(3)* %tmp, align 8
395 %tmp1 = getelementptr inbounds double, double addrspace(3)* %arg, i32 100
396 store double 1.000000e+00, double addrspace(3)* %tmp1, align 8
397 %tmp2 = getelementptr inbounds double, double addrspace(3)* %arg, i32 150
398 store double 1.000000e+00, double addrspace(3)* %tmp2, align 8
399 %tmp3 = getelementptr inbounds double, double addrspace(3)* %arg, i32 200
400 store double 1.000000e+00, double addrspace(3)* %tmp3, align 8
401 %tmp4 = getelementptr inbounds double, double addrspace(3)* %arg, i32 250
402 store double 1.000000e+00, double addrspace(3)* %tmp4, align 8
403 %tmp5 = getelementptr inbounds double, double addrspace(3)* %arg, i32 300
404 store double 1.000000e+00, double addrspace(3)* %tmp5, align 8
405 %tmp6 = getelementptr inbounds double, double addrspace(3)* %arg, i32 350
406 store double 1.000000e+00, double addrspace(3)* %tmp6, align 8
407 ret void
408}
409
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000410; GCN-LABEL: ds_write64_combine_stride_8192_shifted:
411; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
412; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
Matt Arsenault84445dd2017-11-30 22:51:26 +0000413
414; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
Mark Searles7687d422018-01-22 21:46:43 +0000415; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
416; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]]
Matt Arsenault84445dd2017-11-30 22:51:26 +0000417
418; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 8, [[BASE]]
419; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x4008, [[BASE]]
420; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x8008, [[BASE]]
421
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000422; GCN-DAG: ds_write2st64_b64 [[B1]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16
423; GCN-DAG: ds_write2st64_b64 [[B2]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16
424; GCN-DAG: ds_write2st64_b64 [[B3]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16
425define amdgpu_kernel void @ds_write64_combine_stride_8192_shifted(double addrspace(3)* nocapture %arg) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +0000426bb:
427 %tmp = getelementptr inbounds double, double addrspace(3)* %arg, i32 1
428 store double 1.000000e+00, double addrspace(3)* %tmp, align 8
429 %tmp1 = getelementptr inbounds double, double addrspace(3)* %arg, i32 1025
430 store double 1.000000e+00, double addrspace(3)* %tmp1, align 8
431 %tmp2 = getelementptr inbounds double, double addrspace(3)* %arg, i32 2049
432 store double 1.000000e+00, double addrspace(3)* %tmp2, align 8
433 %tmp3 = getelementptr inbounds double, double addrspace(3)* %arg, i32 3073
434 store double 1.000000e+00, double addrspace(3)* %tmp3, align 8
435 %tmp4 = getelementptr inbounds double, double addrspace(3)* %arg, i32 4097
436 store double 1.000000e+00, double addrspace(3)* %tmp4, align 8
437 %tmp5 = getelementptr inbounds double, double addrspace(3)* %arg, i32 5121
438 store double 1.000000e+00, double addrspace(3)* %tmp5, align 8
439 ret void
440}