blob: a723b0210adebb106545a7969cd76cec8657540d [file] [log] [blame]
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +00001; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +00003
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +00004; GCN-LABEL: ds_read32_combine_stride_400:
5; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
6; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
7; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
8; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
9; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
10; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
11; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
12; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
13; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100
14; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100
15; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100
16; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:100
17define amdgpu_kernel void @ds_read32_combine_stride_400(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +000018bb:
19 %tmp = load float, float addrspace(3)* %arg, align 4
20 %tmp2 = fadd float %tmp, 0.000000e+00
21 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 100
22 %tmp4 = load float, float addrspace(3)* %tmp3, align 4
23 %tmp5 = fadd float %tmp2, %tmp4
24 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200
25 %tmp7 = load float, float addrspace(3)* %tmp6, align 4
26 %tmp8 = fadd float %tmp5, %tmp7
27 %tmp9 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300
28 %tmp10 = load float, float addrspace(3)* %tmp9, align 4
29 %tmp11 = fadd float %tmp8, %tmp10
30 %tmp12 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400
31 %tmp13 = load float, float addrspace(3)* %tmp12, align 4
32 %tmp14 = fadd float %tmp11, %tmp13
33 %tmp15 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500
34 %tmp16 = load float, float addrspace(3)* %tmp15, align 4
35 %tmp17 = fadd float %tmp14, %tmp16
36 %tmp18 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600
37 %tmp19 = load float, float addrspace(3)* %tmp18, align 4
38 %tmp20 = fadd float %tmp17, %tmp19
39 %tmp21 = getelementptr inbounds float, float addrspace(3)* %arg, i32 700
40 %tmp22 = load float, float addrspace(3)* %tmp21, align 4
41 %tmp23 = fadd float %tmp20, %tmp22
42 store float %tmp23, float *%arg1, align 4
43 ret void
44}
45
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +000046; GCN-LABEL: ds_read32_combine_stride_400_back:
47; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
48; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
49; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
50; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
51; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
52; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
53; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
54; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
55; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100
56; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100
57; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100
58; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:100
59define amdgpu_kernel void @ds_read32_combine_stride_400_back(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +000060bb:
61 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 700
62 %tmp2 = load float, float addrspace(3)* %tmp, align 4
63 %tmp3 = fadd float %tmp2, 0.000000e+00
64 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600
65 %tmp5 = load float, float addrspace(3)* %tmp4, align 4
66 %tmp6 = fadd float %tmp3, %tmp5
67 %tmp7 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500
68 %tmp8 = load float, float addrspace(3)* %tmp7, align 4
69 %tmp9 = fadd float %tmp6, %tmp8
70 %tmp10 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400
71 %tmp11 = load float, float addrspace(3)* %tmp10, align 4
72 %tmp12 = fadd float %tmp9, %tmp11
73 %tmp13 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300
74 %tmp14 = load float, float addrspace(3)* %tmp13, align 4
75 %tmp15 = fadd float %tmp12, %tmp14
76 %tmp16 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200
77 %tmp17 = load float, float addrspace(3)* %tmp16, align 4
78 %tmp18 = fadd float %tmp15, %tmp17
79 %tmp19 = getelementptr inbounds float, float addrspace(3)* %arg, i32 100
80 %tmp20 = load float, float addrspace(3)* %tmp19, align 4
81 %tmp21 = fadd float %tmp18, %tmp20
82 %tmp22 = load float, float addrspace(3)* %arg, align 4
83 %tmp23 = fadd float %tmp21, %tmp22
84 store float %tmp23, float *%arg1, align 4
85 ret void
86}
87
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +000088; GCN-LABEL: ds_read32_combine_stride_8192:
89; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
90; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
91; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:32
92; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:64 offset1:96
93; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:128 offset1:160
94; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:192 offset1:224
95define amdgpu_kernel void @ds_read32_combine_stride_8192(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +000096bb:
97 %tmp = load float, float addrspace(3)* %arg, align 4
98 %tmp2 = fadd float %tmp, 0.000000e+00
99 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 2048
100 %tmp4 = load float, float addrspace(3)* %tmp3, align 4
101 %tmp5 = fadd float %tmp2, %tmp4
102 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4096
103 %tmp7 = load float, float addrspace(3)* %tmp6, align 4
104 %tmp8 = fadd float %tmp5, %tmp7
105 %tmp9 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6144
106 %tmp10 = load float, float addrspace(3)* %tmp9, align 4
107 %tmp11 = fadd float %tmp8, %tmp10
108 %tmp12 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8192
109 %tmp13 = load float, float addrspace(3)* %tmp12, align 4
110 %tmp14 = fadd float %tmp11, %tmp13
111 %tmp15 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10240
112 %tmp16 = load float, float addrspace(3)* %tmp15, align 4
113 %tmp17 = fadd float %tmp14, %tmp16
114 %tmp18 = getelementptr inbounds float, float addrspace(3)* %arg, i32 12288
115 %tmp19 = load float, float addrspace(3)* %tmp18, align 4
116 %tmp20 = fadd float %tmp17, %tmp19
117 %tmp21 = getelementptr inbounds float, float addrspace(3)* %arg, i32 14336
118 %tmp22 = load float, float addrspace(3)* %tmp21, align 4
119 %tmp23 = fadd float %tmp20, %tmp22
120 store float %tmp23, float *%arg1, align 4
121 ret void
122}
123
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000124; GCN-LABEL: ds_read32_combine_stride_8192_shifted:
125; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
126; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
127; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
128; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
129; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
130; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
131; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
132; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
133; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:32
134; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:32
135; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:32
136define amdgpu_kernel void @ds_read32_combine_stride_8192_shifted(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +0000137bb:
138 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 2
139 %tmp2 = load float, float addrspace(3)* %tmp, align 4
140 %tmp3 = fadd float %tmp2, 0.000000e+00
141 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 2050
142 %tmp5 = load float, float addrspace(3)* %tmp4, align 4
143 %tmp6 = fadd float %tmp3, %tmp5
144 %tmp7 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4098
145 %tmp8 = load float, float addrspace(3)* %tmp7, align 4
146 %tmp9 = fadd float %tmp6, %tmp8
147 %tmp10 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6146
148 %tmp11 = load float, float addrspace(3)* %tmp10, align 4
149 %tmp12 = fadd float %tmp9, %tmp11
150 %tmp13 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8194
151 %tmp14 = load float, float addrspace(3)* %tmp13, align 4
152 %tmp15 = fadd float %tmp12, %tmp14
153 %tmp16 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10242
154 %tmp17 = load float, float addrspace(3)* %tmp16, align 4
155 %tmp18 = fadd float %tmp15, %tmp17
156 store float %tmp18, float *%arg1, align 4
157 ret void
158}
159
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000160; GCN-LABEL: ds_read64_combine_stride_400:
161; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
162; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
163; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
164; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
165; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:50
166; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:100 offset1:150
167; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:200 offset1:250
168; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:50
169define amdgpu_kernel void @ds_read64_combine_stride_400(double addrspace(3)* nocapture readonly %arg, double *nocapture %arg1) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +0000170bb:
171 %tmp = load double, double addrspace(3)* %arg, align 8
172 %tmp2 = fadd double %tmp, 0.000000e+00
173 %tmp3 = getelementptr inbounds double, double addrspace(3)* %arg, i32 50
174 %tmp4 = load double, double addrspace(3)* %tmp3, align 8
175 %tmp5 = fadd double %tmp2, %tmp4
176 %tmp6 = getelementptr inbounds double, double addrspace(3)* %arg, i32 100
177 %tmp7 = load double, double addrspace(3)* %tmp6, align 8
178 %tmp8 = fadd double %tmp5, %tmp7
179 %tmp9 = getelementptr inbounds double, double addrspace(3)* %arg, i32 150
180 %tmp10 = load double, double addrspace(3)* %tmp9, align 8
181 %tmp11 = fadd double %tmp8, %tmp10
182 %tmp12 = getelementptr inbounds double, double addrspace(3)* %arg, i32 200
183 %tmp13 = load double, double addrspace(3)* %tmp12, align 8
184 %tmp14 = fadd double %tmp11, %tmp13
185 %tmp15 = getelementptr inbounds double, double addrspace(3)* %arg, i32 250
186 %tmp16 = load double, double addrspace(3)* %tmp15, align 8
187 %tmp17 = fadd double %tmp14, %tmp16
188 %tmp18 = getelementptr inbounds double, double addrspace(3)* %arg, i32 300
189 %tmp19 = load double, double addrspace(3)* %tmp18, align 8
190 %tmp20 = fadd double %tmp17, %tmp19
191 %tmp21 = getelementptr inbounds double, double addrspace(3)* %arg, i32 350
192 %tmp22 = load double, double addrspace(3)* %tmp21, align 8
193 %tmp23 = fadd double %tmp20, %tmp22
194 store double %tmp23, double *%arg1, align 8
195 ret void
196}
197
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000198; GCN-LABEL: ds_read64_combine_stride_8192_shifted:
199; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
200; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
201; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
202; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
203; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
204; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
205; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
206; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
207; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:16
208; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:16
209; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:16
210define amdgpu_kernel void @ds_read64_combine_stride_8192_shifted(double addrspace(3)* nocapture readonly %arg, double *nocapture %arg1) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +0000211bb:
212 %tmp = getelementptr inbounds double, double addrspace(3)* %arg, i32 1
213 %tmp2 = load double, double addrspace(3)* %tmp, align 8
214 %tmp3 = fadd double %tmp2, 0.000000e+00
215 %tmp4 = getelementptr inbounds double, double addrspace(3)* %arg, i32 1025
216 %tmp5 = load double, double addrspace(3)* %tmp4, align 8
217 %tmp6 = fadd double %tmp3, %tmp5
218 %tmp7 = getelementptr inbounds double, double addrspace(3)* %arg, i32 2049
219 %tmp8 = load double, double addrspace(3)* %tmp7, align 8
220 %tmp9 = fadd double %tmp6, %tmp8
221 %tmp10 = getelementptr inbounds double, double addrspace(3)* %arg, i32 3073
222 %tmp11 = load double, double addrspace(3)* %tmp10, align 8
223 %tmp12 = fadd double %tmp9, %tmp11
224 %tmp13 = getelementptr inbounds double, double addrspace(3)* %arg, i32 4097
225 %tmp14 = load double, double addrspace(3)* %tmp13, align 8
226 %tmp15 = fadd double %tmp12, %tmp14
227 %tmp16 = getelementptr inbounds double, double addrspace(3)* %arg, i32 5121
228 %tmp17 = load double, double addrspace(3)* %tmp16, align 8
229 %tmp18 = fadd double %tmp15, %tmp17
230 store double %tmp18, double *%arg1, align 8
231 ret void
232}
233
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000234; GCN-LABEL: ds_write32_combine_stride_400:
235; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
236; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
237; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
238; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
239; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
240; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
241; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
242; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
243; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
244; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
245; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
246; GCN-DAG: ds_write2_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
247define amdgpu_kernel void @ds_write32_combine_stride_400(float addrspace(3)* nocapture %arg) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +0000248bb:
249 store float 1.000000e+00, float addrspace(3)* %arg, align 4
250 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 100
251 store float 1.000000e+00, float addrspace(3)* %tmp, align 4
252 %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200
253 store float 1.000000e+00, float addrspace(3)* %tmp1, align 4
254 %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300
255 store float 1.000000e+00, float addrspace(3)* %tmp2, align 4
256 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400
257 store float 1.000000e+00, float addrspace(3)* %tmp3, align 4
258 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500
259 store float 1.000000e+00, float addrspace(3)* %tmp4, align 4
260 %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600
261 store float 1.000000e+00, float addrspace(3)* %tmp5, align 4
262 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 700
263 store float 1.000000e+00, float addrspace(3)* %tmp6, align 4
264 ret void
265}
266
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000267; GCN-LABEL: ds_write32_combine_stride_400_back:
268; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
269; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
270; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
271; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
272; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
273; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]]
274; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]]
275; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]]
276; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
277; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
278; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
279; GCN-DAG: ds_write2_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100
280define amdgpu_kernel void @ds_write32_combine_stride_400_back(float addrspace(3)* nocapture %arg) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +0000281bb:
282 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 700
283 store float 1.000000e+00, float addrspace(3)* %tmp, align 4
284 %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600
285 store float 1.000000e+00, float addrspace(3)* %tmp1, align 4
286 %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500
287 store float 1.000000e+00, float addrspace(3)* %tmp2, align 4
288 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400
289 store float 1.000000e+00, float addrspace(3)* %tmp3, align 4
290 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300
291 store float 1.000000e+00, float addrspace(3)* %tmp4, align 4
292 %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200
293 store float 1.000000e+00, float addrspace(3)* %tmp5, align 4
294 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 100
295 store float 1.000000e+00, float addrspace(3)* %tmp6, align 4
296 store float 1.000000e+00, float addrspace(3)* %arg, align 4
297 ret void
298}
299
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000300; GCN-LABEL: ds_write32_combine_stride_8192:
301; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
302; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
303; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
304; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:64 offset1:96
305; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:128 offset1:160
306; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:192 offset1:224
307define amdgpu_kernel void @ds_write32_combine_stride_8192(float addrspace(3)* nocapture %arg) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +0000308bb:
309 store float 1.000000e+00, float addrspace(3)* %arg, align 4
310 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 2048
311 store float 1.000000e+00, float addrspace(3)* %tmp, align 4
312 %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4096
313 store float 1.000000e+00, float addrspace(3)* %tmp1, align 4
314 %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6144
315 store float 1.000000e+00, float addrspace(3)* %tmp2, align 4
316 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8192
317 store float 1.000000e+00, float addrspace(3)* %tmp3, align 4
318 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10240
319 store float 1.000000e+00, float addrspace(3)* %tmp4, align 4
320 %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 12288
321 store float 1.000000e+00, float addrspace(3)* %tmp5, align 4
322 %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 14336
323 store float 1.000000e+00, float addrspace(3)* %tmp6, align 4
324 ret void
325}
326
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000327; GCN-LABEL: ds_write32_combine_stride_8192_shifted:
328; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
329; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
330; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]]
331; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]]
332; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]]
333; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]]
334; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]]
335; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]]
336; GCN-DAG: ds_write2st64_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
337; GCN-DAG: ds_write2st64_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
338; GCN-DAG: ds_write2st64_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32
339define amdgpu_kernel void @ds_write32_combine_stride_8192_shifted(float addrspace(3)* nocapture %arg) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +0000340bb:
341 %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 1
342 store float 1.000000e+00, float addrspace(3)* %tmp, align 4
343 %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 2049
344 store float 1.000000e+00, float addrspace(3)* %tmp1, align 4
345 %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4097
346 store float 1.000000e+00, float addrspace(3)* %tmp2, align 4
347 %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6145
348 store float 1.000000e+00, float addrspace(3)* %tmp3, align 4
349 %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8193
350 store float 1.000000e+00, float addrspace(3)* %tmp4, align 4
351 %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10241
352 store float 1.000000e+00, float addrspace(3)* %tmp5, align 4
353 ret void
354}
355
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000356; GCN-LABEL: ds_write64_combine_stride_400:
357; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
358; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
359; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
360; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]]
361; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50
362; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:100 offset1:150
363; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:200 offset1:250
364; GCN-DAG: ds_write2_b64 [[B1]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50
365define amdgpu_kernel void @ds_write64_combine_stride_400(double addrspace(3)* nocapture %arg) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +0000366bb:
367 store double 1.000000e+00, double addrspace(3)* %arg, align 8
368 %tmp = getelementptr inbounds double, double addrspace(3)* %arg, i32 50
369 store double 1.000000e+00, double addrspace(3)* %tmp, align 8
370 %tmp1 = getelementptr inbounds double, double addrspace(3)* %arg, i32 100
371 store double 1.000000e+00, double addrspace(3)* %tmp1, align 8
372 %tmp2 = getelementptr inbounds double, double addrspace(3)* %arg, i32 150
373 store double 1.000000e+00, double addrspace(3)* %tmp2, align 8
374 %tmp3 = getelementptr inbounds double, double addrspace(3)* %arg, i32 200
375 store double 1.000000e+00, double addrspace(3)* %tmp3, align 8
376 %tmp4 = getelementptr inbounds double, double addrspace(3)* %arg, i32 250
377 store double 1.000000e+00, double addrspace(3)* %tmp4, align 8
378 %tmp5 = getelementptr inbounds double, double addrspace(3)* %arg, i32 300
379 store double 1.000000e+00, double addrspace(3)* %tmp5, align 8
380 %tmp6 = getelementptr inbounds double, double addrspace(3)* %arg, i32 350
381 store double 1.000000e+00, double addrspace(3)* %tmp6, align 8
382 ret void
383}
384
Stanislav Mekhanoshin86b0a542017-04-14 00:33:44 +0000385; GCN-LABEL: ds_write64_combine_stride_8192_shifted:
386; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0
387; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]]
388; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
389; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
390; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
391; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]]
392; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]]
393; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]]
394; GCN-DAG: ds_write2st64_b64 [[B1]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16
395; GCN-DAG: ds_write2st64_b64 [[B2]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16
396; GCN-DAG: ds_write2st64_b64 [[B3]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16
397define amdgpu_kernel void @ds_write64_combine_stride_8192_shifted(double addrspace(3)* nocapture %arg) {
Stanislav Mekhanoshind026f792017-04-13 17:53:07 +0000398bb:
399 %tmp = getelementptr inbounds double, double addrspace(3)* %arg, i32 1
400 store double 1.000000e+00, double addrspace(3)* %tmp, align 8
401 %tmp1 = getelementptr inbounds double, double addrspace(3)* %arg, i32 1025
402 store double 1.000000e+00, double addrspace(3)* %tmp1, align 8
403 %tmp2 = getelementptr inbounds double, double addrspace(3)* %arg, i32 2049
404 store double 1.000000e+00, double addrspace(3)* %tmp2, align 8
405 %tmp3 = getelementptr inbounds double, double addrspace(3)* %arg, i32 3073
406 store double 1.000000e+00, double addrspace(3)* %tmp3, align 8
407 %tmp4 = getelementptr inbounds double, double addrspace(3)* %arg, i32 4097
408 store double 1.000000e+00, double addrspace(3)* %tmp4, align 8
409 %tmp5 = getelementptr inbounds double, double addrspace(3)* %arg, i32 5121
410 store double 1.000000e+00, double addrspace(3)* %tmp5, align 8
411 ret void
412}