blob: fdce4f3a9951734289464718061dfba28b9a71e4 [file] [log] [blame]
Marek Olsak37cd4d02015-02-03 21:53:27 +00001; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
Tom Stellard754f80f2013-04-05 23:31:51 +00005
Tom Stellardd3ee8c12013-08-16 01:12:06 +00006;===------------------------------------------------------------------------===;
7; Global Address Space
8;===------------------------------------------------------------------------===;
Tom Stellard79243d92014-10-01 17:15:17 +00009; FUNC-LABEL: {{^}}store_i1:
Marek Olsak37cd4d02015-02-03 21:53:27 +000010; EG: MEM_RAT MSKOR
11; SI: buffer_store_byte
Tom Stellard1c8788e2014-03-07 20:12:33 +000012define void @store_i1(i1 addrspace(1)* %out) {
13entry:
14 store i1 true, i1 addrspace(1)* %out
15 ret void
16}
Tom Stellardd3ee8c12013-08-16 01:12:06 +000017
18; i8 store
Marek Olsak37cd4d02015-02-03 21:53:27 +000019; EG-LABEL: {{^}}store_i8:
20; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
Matt Arsenault810cb622014-12-12 00:00:24 +000021
Vincent Lejeune7e2c8322013-09-04 19:53:46 +000022; IG 0: Get the byte index and truncate the value
Marek Olsak37cd4d02015-02-03 21:53:27 +000023; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
24; EG: LSHL T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
25; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.y
26; EG-NEXT: 3(4.203895e-45), 255(3.573311e-43)
Matt Arsenault810cb622014-12-12 00:00:24 +000027
28
Vincent Lejeune7e2c8322013-09-04 19:53:46 +000029; IG 1: Truncate the calculated the shift amount for the mask
Matt Arsenault810cb622014-12-12 00:00:24 +000030
Tom Stellardd3ee8c12013-08-16 01:12:06 +000031; IG 2: Shift the value and the mask
Marek Olsak37cd4d02015-02-03 21:53:27 +000032; EG: LSHL T[[RW_GPR]].X, PS, PV.[[SHIFT_CHAN]]
33; EG: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
34; EG-NEXT: 255
Tom Stellardd3ee8c12013-08-16 01:12:06 +000035; IG 3: Initialize the Y and Z channels to zero
36; XXX: An optimal scheduler should merge this into one of the prevous IGs.
Marek Olsak37cd4d02015-02-03 21:53:27 +000037; EG: MOV T[[RW_GPR]].Y, 0.0
38; EG: MOV * T[[RW_GPR]].Z, 0.0
Tom Stellardd3ee8c12013-08-16 01:12:06 +000039
Marek Olsak37cd4d02015-02-03 21:53:27 +000040; SI-LABEL: {{^}}store_i8:
41; SI: buffer_store_byte
Tom Stellardd3ee8c12013-08-16 01:12:06 +000042
43define void @store_i8(i8 addrspace(1)* %out, i8 %in) {
44entry:
45 store i8 %in, i8 addrspace(1)* %out
46 ret void
47}
48
49; i16 store
Marek Olsak37cd4d02015-02-03 21:53:27 +000050; EG-LABEL: {{^}}store_i16:
51; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
Matt Arsenault810cb622014-12-12 00:00:24 +000052
Vincent Lejeune7e2c8322013-09-04 19:53:46 +000053; IG 0: Get the byte index and truncate the value
Matt Arsenault810cb622014-12-12 00:00:24 +000054
55
Marek Olsak37cd4d02015-02-03 21:53:27 +000056; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
57; EG-NEXT: 3(4.203895e-45),
Matt Arsenault810cb622014-12-12 00:00:24 +000058
Marek Olsak37cd4d02015-02-03 21:53:27 +000059; EG: LSHL T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
60; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.y
Matt Arsenault810cb622014-12-12 00:00:24 +000061
Marek Olsak37cd4d02015-02-03 21:53:27 +000062; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
Vincent Lejeune7e2c8322013-09-04 19:53:46 +000063; IG 1: Truncate the calculated the shift amount for the mask
Matt Arsenault810cb622014-12-12 00:00:24 +000064
Tom Stellardd3ee8c12013-08-16 01:12:06 +000065; IG 2: Shift the value and the mask
Marek Olsak37cd4d02015-02-03 21:53:27 +000066; EG: LSHL T[[RW_GPR]].X, PS, PV.[[SHIFT_CHAN]]
67; EG: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
68; EG-NEXT: 65535
Tom Stellardd3ee8c12013-08-16 01:12:06 +000069; IG 3: Initialize the Y and Z channels to zero
70; XXX: An optimal scheduler should merge this into one of the prevous IGs.
Marek Olsak37cd4d02015-02-03 21:53:27 +000071; EG: MOV T[[RW_GPR]].Y, 0.0
72; EG: MOV * T[[RW_GPR]].Z, 0.0
Tom Stellardd3ee8c12013-08-16 01:12:06 +000073
Marek Olsak37cd4d02015-02-03 21:53:27 +000074; SI-LABEL: {{^}}store_i16:
75; SI: buffer_store_short
Tom Stellardd3ee8c12013-08-16 01:12:06 +000076define void @store_i16(i16 addrspace(1)* %out, i16 %in) {
77entry:
78 store i16 %in, i16 addrspace(1)* %out
79 ret void
80}
81
Marek Olsak37cd4d02015-02-03 21:53:27 +000082; EG-LABEL: {{^}}store_v2i8:
83; EG: MEM_RAT MSKOR
84; EG-NOT: MEM_RAT MSKOR
85; SI-LABEL: {{^}}store_v2i8:
86; SI: buffer_store_byte
87; SI: buffer_store_byte
Tom Stellardfbab8272013-08-16 01:12:11 +000088define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
89entry:
90 %0 = trunc <2 x i32> %in to <2 x i8>
91 store <2 x i8> %0, <2 x i8> addrspace(1)* %out
92 ret void
93}
94
95
Marek Olsak37cd4d02015-02-03 21:53:27 +000096; EG-LABEL: {{^}}store_v2i16:
97; EG: MEM_RAT_CACHELESS STORE_RAW
98; CM-LABEL: {{^}}store_v2i16:
99; CM: MEM_RAT_CACHELESS STORE_DWORD
100; SI-LABEL: {{^}}store_v2i16:
101; SI: buffer_store_short
102; SI: buffer_store_short
Tom Stellardfbab8272013-08-16 01:12:11 +0000103define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
104entry:
105 %0 = trunc <2 x i32> %in to <2 x i16>
106 store <2 x i16> %0, <2 x i16> addrspace(1)* %out
107 ret void
108}
109
Marek Olsak37cd4d02015-02-03 21:53:27 +0000110; EG-LABEL: {{^}}store_v4i8:
111; EG: MEM_RAT_CACHELESS STORE_RAW
112; CM-LABEL: {{^}}store_v4i8:
113; CM: MEM_RAT_CACHELESS STORE_DWORD
114; SI-LABEL: {{^}}store_v4i8:
115; SI: buffer_store_byte
116; SI: buffer_store_byte
117; SI: buffer_store_byte
118; SI: buffer_store_byte
Tom Stellardfbab8272013-08-16 01:12:11 +0000119define void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
120entry:
121 %0 = trunc <4 x i32> %in to <4 x i8>
122 store <4 x i8> %0, <4 x i8> addrspace(1)* %out
123 ret void
124}
125
Tom Stellard5a6b0d82013-04-19 02:10:53 +0000126; floating-point store
Marek Olsak37cd4d02015-02-03 21:53:27 +0000127; EG-LABEL: {{^}}store_f32:
128; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1
129; CM-LABEL: {{^}}store_f32:
130; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}}
131; SI-LABEL: {{^}}store_f32:
132; SI: buffer_store_dword
Tom Stellard754f80f2013-04-05 23:31:51 +0000133
Tom Stellard5a6b0d82013-04-19 02:10:53 +0000134define void @store_f32(float addrspace(1)* %out, float %in) {
Tom Stellard754f80f2013-04-05 23:31:51 +0000135 store float %in, float addrspace(1)* %out
136 ret void
137}
Tom Stellard0125f2a2013-06-25 02:39:35 +0000138
Marek Olsak37cd4d02015-02-03 21:53:27 +0000139; EG-LABEL: {{^}}store_v4i16:
140; EG: MEM_RAT MSKOR
141; EG: MEM_RAT MSKOR
142; EG: MEM_RAT MSKOR
143; EG: MEM_RAT MSKOR
144; EG-NOT: MEM_RAT MSKOR
145; SI-LABEL: {{^}}store_v4i16:
146; SI: buffer_store_short
147; SI: buffer_store_short
148; SI: buffer_store_short
149; SI: buffer_store_short
150; SI-NOT: buffer_store_byte
Tom Stellardfbab8272013-08-16 01:12:11 +0000151define void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) {
152entry:
153 %0 = trunc <4 x i32> %in to <4 x i16>
154 store <4 x i16> %0, <4 x i16> addrspace(1)* %out
155 ret void
156}
157
Tom Stellarded2f6142013-07-18 21:43:42 +0000158; vec2 floating-point stores
Marek Olsak37cd4d02015-02-03 21:53:27 +0000159; EG-LABEL: {{^}}store_v2f32:
160; EG: MEM_RAT_CACHELESS STORE_RAW
161; CM-LABEL: {{^}}store_v2f32:
162; CM: MEM_RAT_CACHELESS STORE_DWORD
163; SI-LABEL: {{^}}store_v2f32:
164; SI: buffer_store_dwordx2
Tom Stellarded2f6142013-07-18 21:43:42 +0000165
166define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
167entry:
168 %0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0
Tom Stellard8e5da412013-08-14 23:24:32 +0000169 %1 = insertelement <2 x float> %0, float %b, i32 1
Tom Stellarded2f6142013-07-18 21:43:42 +0000170 store <2 x float> %1, <2 x float> addrspace(1)* %out
171 ret void
172}
173
Marek Olsak37cd4d02015-02-03 21:53:27 +0000174; EG-LABEL: {{^}}store_v4i32:
175; EG: MEM_RAT_CACHELESS STORE_RAW
176; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
177; CM-LABEL: {{^}}store_v4i32:
178; CM: MEM_RAT_CACHELESS STORE_DWORD
179; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
180; SI-LABEL: {{^}}store_v4i32:
181; SI: buffer_store_dwordx4
Tom Stellard6d1379e2013-08-16 01:12:00 +0000182define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
183entry:
184 store <4 x i32> %in, <4 x i32> addrspace(1)* %out
185 ret void
186}
187
Tom Stellard79243d92014-10-01 17:15:17 +0000188; FUNC-LABEL: {{^}}store_i64_i8:
Marek Olsak37cd4d02015-02-03 21:53:27 +0000189; EG: MEM_RAT MSKOR
190; SI: buffer_store_byte
Tom Stellard605e1162014-05-02 15:41:46 +0000191define void @store_i64_i8(i8 addrspace(1)* %out, i64 %in) {
192entry:
193 %0 = trunc i64 %in to i8
194 store i8 %0, i8 addrspace(1)* %out
195 ret void
196}
197
Tom Stellard79243d92014-10-01 17:15:17 +0000198; FUNC-LABEL: {{^}}store_i64_i16:
Marek Olsak37cd4d02015-02-03 21:53:27 +0000199; EG: MEM_RAT MSKOR
200; SI: buffer_store_short
Tom Stellard605e1162014-05-02 15:41:46 +0000201define void @store_i64_i16(i16 addrspace(1)* %out, i64 %in) {
202entry:
203 %0 = trunc i64 %in to i16
204 store i16 %0, i16 addrspace(1)* %out
205 ret void
206}
207
Tom Stellard2ffc3302013-08-26 15:05:44 +0000208;===------------------------------------------------------------------------===;
209; Local Address Space
210;===------------------------------------------------------------------------===;
211
Tom Stellard79243d92014-10-01 17:15:17 +0000212; FUNC-LABEL: {{^}}store_local_i1:
Marek Olsak37cd4d02015-02-03 21:53:27 +0000213; EG: LDS_BYTE_WRITE
214; SI: ds_write_b8
Tom Stellard1c8788e2014-03-07 20:12:33 +0000215define void @store_local_i1(i1 addrspace(3)* %out) {
216entry:
217 store i1 true, i1 addrspace(3)* %out
218 ret void
219}
220
Marek Olsak37cd4d02015-02-03 21:53:27 +0000221; EG-LABEL: {{^}}store_local_i8:
222; EG: LDS_BYTE_WRITE
223; SI-LABEL: {{^}}store_local_i8:
224; SI: ds_write_b8
Tom Stellardf3d166a2013-08-26 15:05:49 +0000225define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
226 store i8 %in, i8 addrspace(3)* %out
227 ret void
228}
229
Marek Olsak37cd4d02015-02-03 21:53:27 +0000230; EG-LABEL: {{^}}store_local_i16:
231; EG: LDS_SHORT_WRITE
232; SI-LABEL: {{^}}store_local_i16:
233; SI: ds_write_b16
Tom Stellardf3d166a2013-08-26 15:05:49 +0000234define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
235 store i16 %in, i16 addrspace(3)* %out
236 ret void
237}
238
Marek Olsak37cd4d02015-02-03 21:53:27 +0000239; EG-LABEL: {{^}}store_local_v2i16:
240; EG: LDS_WRITE
241; CM-LABEL: {{^}}store_local_v2i16:
242; CM: LDS_WRITE
243; SI-LABEL: {{^}}store_local_v2i16:
244; SI: ds_write_b16
245; SI: ds_write_b16
Tom Stellard2ffc3302013-08-26 15:05:44 +0000246define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
247entry:
248 store <2 x i16> %in, <2 x i16> addrspace(3)* %out
249 ret void
250}
251
Marek Olsak37cd4d02015-02-03 21:53:27 +0000252; EG-LABEL: {{^}}store_local_v4i8:
253; EG: LDS_WRITE
254; CM-LABEL: {{^}}store_local_v4i8:
255; CM: LDS_WRITE
256; SI-LABEL: {{^}}store_local_v4i8:
257; SI: ds_write_b8
258; SI: ds_write_b8
259; SI: ds_write_b8
260; SI: ds_write_b8
Tom Stellard7da047c2013-08-26 15:05:55 +0000261define void @store_local_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
262entry:
263 store <4 x i8> %in, <4 x i8> addrspace(3)* %out
264 ret void
265}
266
Marek Olsak37cd4d02015-02-03 21:53:27 +0000267; EG-LABEL: {{^}}store_local_v2i32:
268; EG: LDS_WRITE
269; EG: LDS_WRITE
270; CM-LABEL: {{^}}store_local_v2i32:
271; CM: LDS_WRITE
272; CM: LDS_WRITE
273; SI-LABEL: {{^}}store_local_v2i32:
274; SI: ds_write_b64
Tom Stellard2ffc3302013-08-26 15:05:44 +0000275define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
276entry:
277 store <2 x i32> %in, <2 x i32> addrspace(3)* %out
278 ret void
279}
280
Marek Olsak37cd4d02015-02-03 21:53:27 +0000281; EG-LABEL: {{^}}store_local_v4i32:
282; EG: LDS_WRITE
283; EG: LDS_WRITE
284; EG: LDS_WRITE
285; EG: LDS_WRITE
286; CM-LABEL: {{^}}store_local_v4i32:
287; CM: LDS_WRITE
288; CM: LDS_WRITE
289; CM: LDS_WRITE
290; CM: LDS_WRITE
291; SI-LABEL: {{^}}store_local_v4i32:
292; SI: ds_write_b32
293; SI: ds_write_b32
294; SI: ds_write_b32
295; SI: ds_write_b32
Tom Stellard2ffc3302013-08-26 15:05:44 +0000296define void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
297entry:
298 store <4 x i32> %in, <4 x i32> addrspace(3)* %out
299 ret void
300}
301
Tom Stellard79243d92014-10-01 17:15:17 +0000302; FUNC-LABEL: {{^}}store_local_i64_i8:
Marek Olsak37cd4d02015-02-03 21:53:27 +0000303; EG: LDS_BYTE_WRITE
304; SI: ds_write_b8
Tom Stellard605e1162014-05-02 15:41:46 +0000305define void @store_local_i64_i8(i8 addrspace(3)* %out, i64 %in) {
306entry:
307 %0 = trunc i64 %in to i8
308 store i8 %0, i8 addrspace(3)* %out
309 ret void
310}
311
Tom Stellard79243d92014-10-01 17:15:17 +0000312; FUNC-LABEL: {{^}}store_local_i64_i16:
Marek Olsak37cd4d02015-02-03 21:53:27 +0000313; EG: LDS_SHORT_WRITE
314; SI: ds_write_b16
Tom Stellard605e1162014-05-02 15:41:46 +0000315define void @store_local_i64_i16(i16 addrspace(3)* %out, i64 %in) {
316entry:
317 %0 = trunc i64 %in to i16
318 store i16 %0, i16 addrspace(3)* %out
319 ret void
320}
321
Tom Stellard0125f2a2013-06-25 02:39:35 +0000322; The stores in this function are combined by the optimizer to create a
323; 64-bit store with 32-bit alignment. This is legal for SI and the legalizer
324; should not try to split the 64-bit store back into 2 32-bit stores.
325;
326; Evergreen / Northern Islands don't support 64-bit stores yet, so there should
327; be two 32-bit stores.
328
Marek Olsak37cd4d02015-02-03 21:53:27 +0000329; EG-LABEL: {{^}}vecload2:
330; EG: MEM_RAT_CACHELESS STORE_RAW
331; CM-LABEL: {{^}}vecload2:
332; CM: MEM_RAT_CACHELESS STORE_DWORD
333; SI-LABEL: {{^}}vecload2:
334; SI: buffer_store_dwordx2
Tom Stellard0125f2a2013-06-25 02:39:35 +0000335define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
336entry:
Manman Ren1047fe42013-09-30 18:17:35 +0000337 %0 = load i32 addrspace(2)* %mem, align 4
David Blaikie79e6c742015-02-27 19:29:02 +0000338 %arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1
Manman Ren1047fe42013-09-30 18:17:35 +0000339 %1 = load i32 addrspace(2)* %arrayidx1.i, align 4
340 store i32 %0, i32 addrspace(1)* %out, align 4
David Blaikie79e6c742015-02-27 19:29:02 +0000341 %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
Manman Ren1047fe42013-09-30 18:17:35 +0000342 store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
Tom Stellard0125f2a2013-06-25 02:39:35 +0000343 ret void
344}
345
Bill Wendling187d3dd2013-08-22 21:28:54 +0000346attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
Tom Stellard868fd922014-04-17 21:00:11 +0000347
348; When i128 was a legal type this program generated cannot select errors:
349
Tom Stellard79243d92014-10-01 17:15:17 +0000350; FUNC-LABEL: {{^}}"i128-const-store":
Tom Stellard868fd922014-04-17 21:00:11 +0000351; FIXME: We should be able to to this with one store instruction
Marek Olsak37cd4d02015-02-03 21:53:27 +0000352; EG: STORE_RAW
353; EG: STORE_RAW
354; EG: STORE_RAW
355; EG: STORE_RAW
356; CM: STORE_DWORD
357; CM: STORE_DWORD
358; CM: STORE_DWORD
359; CM: STORE_DWORD
Tom Stellard326d6ec2014-11-05 14:50:53 +0000360; SI: buffer_store_dwordx2
361; SI: buffer_store_dwordx2
Tom Stellard868fd922014-04-17 21:00:11 +0000362define void @i128-const-store(i32 addrspace(1)* %out) {
363entry:
364 store i32 1, i32 addrspace(1)* %out, align 4
David Blaikie79e6c742015-02-27 19:29:02 +0000365 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
Tom Stellard868fd922014-04-17 21:00:11 +0000366 store i32 1, i32 addrspace(1)* %arrayidx2, align 4
David Blaikie79e6c742015-02-27 19:29:02 +0000367 %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
Tom Stellard868fd922014-04-17 21:00:11 +0000368 store i32 2, i32 addrspace(1)* %arrayidx4, align 4
David Blaikie79e6c742015-02-27 19:29:02 +0000369 %arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
Tom Stellard868fd922014-04-17 21:00:11 +0000370 store i32 2, i32 addrspace(1)* %arrayidx6, align 4
371 ret void
372}