blob: ce7656adc0b452a5dc70dac7b31a25b35c2fc787 [file] [log] [blame]
Jan Vesely06200bd2017-01-06 21:00:46 +00001; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
Simon Pilgrimc910a702017-05-23 21:27:15 +00003; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
Jan Vesely06200bd2017-01-06 21:00:46 +00005
6; FUNC-LABEL: {{^}}store_i1:
7; EG: MOVA_INT
8; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
9; EG: MOVA_INT
10; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
11
12; CM: MOVA_INT
13; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
14; CM: MOVA_INT
15; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
16
17; SI: buffer_store_byte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000018define amdgpu_kernel void @store_i1(i1 addrspace(0)* %out) {
Jan Vesely06200bd2017-01-06 21:00:46 +000019entry:
20 store i1 true, i1 addrspace(0)* %out
21 ret void
22}
23
24; i8 store
25; FUNC-LABEL: {{^}}store_i8:
26; EG: LSHR * [[ADDRESS:T[0-9]\.[XYZW]]], KC0[2].Y, literal.x
27; EG-NEXT: 2
28; EG: MOVA_INT * AR.x (MASKED)
29; EG: MOV [[OLD:T[0-9]\.[XYZW]]], {{.*}}AR.x
30
31; IG 0: Get the byte index and truncate the value
32; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
33; EG: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
34; EG-NEXT: 3(4.203895e-45)
35; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.x
36; EG-NEXT: 255(3.573311e-43)
37
38; EG: NOT_INT
39; EG: AND_INT {{[\* ]*}}[[CLR_CHAN:T[0-9]\.[XYZW]]], {{.*}}[[OLD]]
40; EG: OR_INT * [[RES:T[0-9]\.[XYZW]]]
41; TODO: Is the reload necessary?
42; EG: MOVA_INT * AR.x (MASKED), [[ADDRESS]]
43; EG: MOV * T(0 + AR.x).X+, [[RES]]
44
45; SI: buffer_store_byte
46
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000047define amdgpu_kernel void @store_i8(i8 addrspace(0)* %out, i8 %in) {
Jan Vesely06200bd2017-01-06 21:00:46 +000048entry:
49 store i8 %in, i8 addrspace(0)* %out
50 ret void
51}
52
53; i16 store
54; FUNC-LABEL: {{^}}store_i16:
55; EG: LSHR * [[ADDRESS:T[0-9]\.[XYZW]]], KC0[2].Y, literal.x
56; EG-NEXT: 2
57; EG: MOVA_INT * AR.x (MASKED)
58; EG: MOV [[OLD:T[0-9]\.[XYZW]]], {{.*}}AR.x
59
60; IG 0: Get the byte index and truncate the value
61; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
62; EG: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
63; EG-NEXT: 3(4.203895e-45)
64; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.x
65; EG-NEXT: 65535(9.183409e-41)
66
67; EG: NOT_INT
68; EG: AND_INT {{[\* ]*}}[[CLR_CHAN:T[0-9]\.[XYZW]]], {{.*}}[[OLD]]
69; EG: OR_INT * [[RES:T[0-9]\.[XYZW]]]
70; TODO: Is the reload necessary?
71; EG: MOVA_INT * AR.x (MASKED), [[ADDRESS]]
72; EG: MOV * T(0 + AR.x).X+, [[RES]]
73
74; SI: buffer_store_short
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000075define amdgpu_kernel void @store_i16(i16 addrspace(0)* %out, i16 %in) {
Jan Vesely06200bd2017-01-06 21:00:46 +000076entry:
77 store i16 %in, i16 addrspace(0)* %out
78 ret void
79}
80
81; FUNC-LABEL: {{^}}store_i24:
82; SI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
83; SI-DAG: buffer_store_byte
84; SI-DAG: buffer_store_short
85
86; EG: MOVA_INT
87; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
88; EG: MOVA_INT
89; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
90; TODO: This load and store can be eliminated
91; EG: MOVA_INT
92; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
93; EG: MOVA_INT
94; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
95
96; CM: MOVA_INT
97; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
98; CM: MOVA_INT
99; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
100; TODO: This load and store can be eliminated
101; CM: MOVA_INT
102; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
103; CM: MOVA_INT
104; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000105define amdgpu_kernel void @store_i24(i24 addrspace(0)* %out, i24 %in) {
Jan Vesely06200bd2017-01-06 21:00:46 +0000106entry:
107 store i24 %in, i24 addrspace(0)* %out
108 ret void
109}
110
111; FUNC-LABEL: {{^}}store_i25:
112; SI: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, 0x1ffffff{{$}}
113; SI: v_mov_b32_e32 [[VAND:v[0-9]+]], [[AND]]
114; SI: buffer_store_dword [[VAND]]
115
116; EG: MOVA_INT
117; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
118; EG-NOT: MOVA_INT
119
120; CM: MOVA_INT
121; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
122; CM-NOT: MOVA_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000123define amdgpu_kernel void @store_i25(i25 addrspace(0)* %out, i25 %in) {
Jan Vesely06200bd2017-01-06 21:00:46 +0000124entry:
125 store i25 %in, i25 addrspace(0)* %out
126 ret void
127}
128
129; FUNC-LABEL: {{^}}store_v2i8:
130; v2i8 is naturally 2B aligned, treat as i16
131; EG: MOVA_INT
132; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
133; EG: MOVA_INT
134; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
135; EG-NOT: MOVA_INT
136
137; CM: MOVA_INT
138; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
139; CM: MOVA_INT
140; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
141; CM-NOT: MOVA_INT
142
143; SI: buffer_store_short
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000144define amdgpu_kernel void @store_v2i8(<2 x i8> addrspace(0)* %out, <2 x i32> %in) {
Jan Vesely06200bd2017-01-06 21:00:46 +0000145entry:
146 %0 = trunc <2 x i32> %in to <2 x i8>
147 store <2 x i8> %0, <2 x i8> addrspace(0)* %out
148 ret void
149}
150
151; FUNC-LABEL: {{^}}store_v2i8_unaligned:
152; EG: MOVA_INT
153; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
154; EG: MOVA_INT
155; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
156; TODO: This load and store cannot be eliminated,
157; they might be different locations
158; EG: MOVA_INT
159; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
160; EG: MOVA_INT
161; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
162
163; CM: MOVA_INT
164; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
165; CM: MOVA_INT
166; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
167; TODO: This load and store cannot be eliminated,
168; they might be different locations
169; CM: MOVA_INT
170; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
171; CM: MOVA_INT
172; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
173
174; SI: buffer_store_byte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000175define amdgpu_kernel void @store_v2i8_unaligned(<2 x i8> addrspace(0)* %out, <2 x i32> %in) {
Jan Vesely06200bd2017-01-06 21:00:46 +0000176entry:
177 %0 = trunc <2 x i32> %in to <2 x i8>
178 store <2 x i8> %0, <2 x i8> addrspace(0)* %out, align 1
179 ret void
180}
181
182
183; FUNC-LABEL: {{^}}store_v2i16:
184; v2i8 is naturally 2B aligned, treat as i16
185; EG: MOVA_INT
186; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
187; EG-NOT: MOVA_INT
188
189; CM: MOVA_INT
190; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
191; CM-NOT: MOVA_INT
192
193; SI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000194define amdgpu_kernel void @store_v2i16(<2 x i16> addrspace(0)* %out, <2 x i32> %in) {
Jan Vesely06200bd2017-01-06 21:00:46 +0000195entry:
196 %0 = trunc <2 x i32> %in to <2 x i16>
197 store <2 x i16> %0, <2 x i16> addrspace(0)* %out
198 ret void
199}
200
201; FUNC-LABEL: {{^}}store_v2i16_unaligned:
202; EG: MOVA_INT
203; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
204; EG: MOVA_INT
205; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
206; TODO: This load and store cannot be eliminated,
207; they might be different locations
208; EG: MOVA_INT
209; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
210; EG: MOVA_INT
211; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
212
213; CM: MOVA_INT
214; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
215; CM: MOVA_INT
216; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
217; TODO: This load and store cannot be eliminated,
218; they might be different locations
219; CM: MOVA_INT
220; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
221; CM: MOVA_INT
222; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
223
224; SI: buffer_store_short
225; SI: buffer_store_short
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000226define amdgpu_kernel void @store_v2i16_unaligned(<2 x i16> addrspace(0)* %out, <2 x i32> %in) {
Jan Vesely06200bd2017-01-06 21:00:46 +0000227entry:
228 %0 = trunc <2 x i32> %in to <2 x i16>
229 store <2 x i16> %0, <2 x i16> addrspace(0)* %out, align 2
230 ret void
231}
232
233; FUNC-LABEL: {{^}}store_v4i8:
234; EG: MOVA_INT
235; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
236; EG-NOT: MOVA_INT
237
238; CM: MOVA_INT
239; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
240; CM-NOT: MOVA_INT
241
242; SI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000243define amdgpu_kernel void @store_v4i8(<4 x i8> addrspace(0)* %out, <4 x i32> %in) {
Jan Vesely06200bd2017-01-06 21:00:46 +0000244entry:
245 %0 = trunc <4 x i32> %in to <4 x i8>
246 store <4 x i8> %0, <4 x i8> addrspace(0)* %out
247 ret void
248}
249
250; FUNC-LABEL: {{^}}store_v4i8_unaligned:
251; EG: MOVA_INT
252; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
253; EG: MOVA_INT
254; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
255; TODO: This load and store cannot be eliminated,
256; they might be different locations
257; EG: MOVA_INT
258; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
259; EG: MOVA_INT
260; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
261; TODO: This load and store cannot be eliminated,
262; they might be different locations
263; EG: MOVA_INT
264; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
265; EG: MOVA_INT
266; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
267; TODO: This load and store cannot be eliminated,
268; they might be different locations
269; EG: MOVA_INT
270; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
271; EG: MOVA_INT
272; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
273
274; CM: MOVA_INT
275; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
276; CM: MOVA_INT
277; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
278; TODO: This load and store cannot be eliminated,
279; they might be different locations
280; CM: MOVA_INT
281; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
282; CM: MOVA_INT
283; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
284; TODO: This load and store cannot be eliminated,
285; they might be different locations
286; CM: MOVA_INT
287; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
288; CM: MOVA_INT
289; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
290; TODO: This load and store cannot be eliminated,
291; they might be different locations
292; CM: MOVA_INT
293; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
294; CM: MOVA_INT
295; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
296
297; SI: buffer_store_byte
298; SI: buffer_store_byte
299; SI: buffer_store_byte
300; SI: buffer_store_byte
301; SI-NOT: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000302define amdgpu_kernel void @store_v4i8_unaligned(<4 x i8> addrspace(0)* %out, <4 x i32> %in) {
Jan Vesely06200bd2017-01-06 21:00:46 +0000303entry:
304 %0 = trunc <4 x i32> %in to <4 x i8>
305 store <4 x i8> %0, <4 x i8> addrspace(0)* %out, align 1
306 ret void
307}
308
309; FUNC-LABEL: {{^}}store_v8i8_unaligned:
310; EG: MOVA_INT
311; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
312; EG: MOVA_INT
313; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
314; TODO: This load and store cannot be eliminated,
315; they might be different locations
316; EG: MOVA_INT
317; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
318; EG: MOVA_INT
319; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
320; TODO: This load and store cannot be eliminated,
321; they might be different locations
322; EG: MOVA_INT
323; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
324; EG: MOVA_INT
325; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
326; TODO: This load and store cannot be eliminated,
327; they might be different locations
328; EG: MOVA_INT
329; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
330; EG: MOVA_INT
331; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
332; TODO: This load and store cannot be eliminated,
333; they might be different locations
334; EG: MOVA_INT
335; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
336; EG: MOVA_INT
337; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
338; TODO: This load and store cannot be eliminated,
339; they might be different locations
340; EG: MOVA_INT
341; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
342; EG: MOVA_INT
343; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
344; TODO: This load and store cannot be eliminated,
345; they might be different locations
346; EG: MOVA_INT
347; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
348; EG: MOVA_INT
349; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
350; TODO: This load and store cannot be eliminated,
351; they might be different locations
352; EG: MOVA_INT
353; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
354; EG: MOVA_INT
355; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
356
357; CM: MOVA_INT
358; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
359; CM: MOVA_INT
360; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
361; TODO: This load and store cannot be eliminated,
362; they might be different locations
363; CM: MOVA_INT
364; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
365; CM: MOVA_INT
366; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
367; TODO: This load and store cannot be eliminated,
368; they might be different locations
369; CM: MOVA_INT
370; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
371; CM: MOVA_INT
372; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
373; TODO: This load and store cannot be eliminated,
374; they might be different locations
375; CM: MOVA_INT
376; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
377; CM: MOVA_INT
378; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
379; TODO: This load and store cannot be eliminated,
380; they might be different locations
381; CM: MOVA_INT
382; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
383; CM: MOVA_INT
384; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
385; TODO: This load and store cannot be eliminated,
386; they might be different locations
387; CM: MOVA_INT
388; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
389; CM: MOVA_INT
390; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
391; TODO: This load and store cannot be eliminated,
392; they might be different locations
393; CM: MOVA_INT
394; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
395; CM: MOVA_INT
396; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
397; TODO: This load and store cannot be eliminated,
398; they might be different locations
399; CM: MOVA_INT
400; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
401; CM: MOVA_INT
402; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
403
404; SI: buffer_store_byte
405; SI: buffer_store_byte
406; SI: buffer_store_byte
407; SI: buffer_store_byte
408; SI: buffer_store_byte
409; SI: buffer_store_byte
410; SI: buffer_store_byte
411; SI: buffer_store_byte
412; SI-NOT: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000413define amdgpu_kernel void @store_v8i8_unaligned(<8 x i8> addrspace(0)* %out, <8 x i32> %in) {
Jan Vesely06200bd2017-01-06 21:00:46 +0000414entry:
415 %0 = trunc <8 x i32> %in to <8 x i8>
416 store <8 x i8> %0, <8 x i8> addrspace(0)* %out, align 1
417 ret void
418}
419
420; FUNC-LABEL: {{^}}store_v4i8_halfaligned:
421; EG: MOVA_INT
422; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
423; EG: MOVA_INT
424; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
425; TODO: This load and store cannot be eliminated,
426; they might be different locations
427; EG: MOVA_INT
428; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
429; EG: MOVA_INT
430; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
431
432; CM: MOVA_INT
433; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
434; CM: MOVA_INT
435; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
436; TODO: This load and store cannot be eliminated,
437; they might be different locations
438; CM: MOVA_INT
439; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
440; CM: MOVA_INT
441; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
442
443; SI: buffer_store_short
444; SI: buffer_store_short
445; SI-NOT: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000446define amdgpu_kernel void @store_v4i8_halfaligned(<4 x i8> addrspace(0)* %out, <4 x i32> %in) {
Jan Vesely06200bd2017-01-06 21:00:46 +0000447entry:
448 %0 = trunc <4 x i32> %in to <4 x i8>
449 store <4 x i8> %0, <4 x i8> addrspace(0)* %out, align 2
450 ret void
451}
452
453; floating-point store
454; FUNC-LABEL: {{^}}store_f32:
455; EG: MOVA_INT
456; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
457
458; CM: MOVA_INT
459; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
460
461; SI: buffer_store_dword
462
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000463define amdgpu_kernel void @store_f32(float addrspace(0)* %out, float %in) {
Jan Vesely06200bd2017-01-06 21:00:46 +0000464 store float %in, float addrspace(0)* %out
465 ret void
466}
467
468; FUNC-LABEL: {{^}}store_v4i16:
469; EG: MOVA_INT
470; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
471; EG: MOVA_INT
472; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
473
474; CM: MOVA_INT
475; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
476; CM: MOVA_INT
477; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
478
479;TODO: why not x2?
480; XSI: buffer_store_dwordx2
481; SI: buffer_store_dword
482; SI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000483define amdgpu_kernel void @store_v4i16(<4 x i16> addrspace(0)* %out, <4 x i32> %in) {
Jan Vesely06200bd2017-01-06 21:00:46 +0000484entry:
485 %0 = trunc <4 x i32> %in to <4 x i16>
486 store <4 x i16> %0, <4 x i16> addrspace(0)* %out
487 ret void
488}
489
490; vec2 floating-point stores
491; FUNC-LABEL: {{^}}store_v2f32:
492; EG: MOVA_INT
493; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
494; EG: MOVA_INT
495; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
496
497; CM: MOVA_INT
498; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
499; CM: MOVA_INT
500; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
501
502;TODO: why not x2?
503; XSI: buffer_store_dwordx2
504; SI: buffer_store_dword
505; SI: buffer_store_dword
506
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000507define amdgpu_kernel void @store_v2f32(<2 x float> addrspace(0)* %out, float %a, float %b) {
Jan Vesely06200bd2017-01-06 21:00:46 +0000508entry:
509 %0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0
510 %1 = insertelement <2 x float> %0, float %b, i32 1
511 store <2 x float> %1, <2 x float> addrspace(0)* %out
512 ret void
513}
514
515; FUNC-LABEL: {{^}}store_v3i32:
516; EG: MOVA_INT
517; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
518; EG: MOVA_INT
519; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
520; EG: MOVA_INT
521; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
522
523; CM: MOVA_INT
524; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
525; CM: MOVA_INT
526; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
527; CM: MOVA_INT
528; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
529
530;TODO: why not x2?
531; XSI-DAG: buffer_store_dwordx2
532; SI: buffer_store_dword
533; SI: buffer_store_dword
534; SI: buffer_store_dword
535
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000536define amdgpu_kernel void @store_v3i32(<3 x i32> addrspace(0)* %out, <3 x i32> %a) nounwind {
Jan Vesely06200bd2017-01-06 21:00:46 +0000537 store <3 x i32> %a, <3 x i32> addrspace(0)* %out, align 16
538 ret void
539}
540
541; FUNC-LABEL: {{^}}store_v4i32:
542; EG: MOVA_INT
543; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
544; EG: MOVA_INT
545; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
546; EG: MOVA_INT
547; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
548; EG: MOVA_INT
549; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
550
551; CM: MOVA_INT
552; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
553; CM: MOVA_INT
554; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
555; CM: MOVA_INT
556; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
557; CM: MOVA_INT
558; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
559
560;TODO: why not x4?
561; XSI: buffer_store_dwordx4
562; SI: buffer_store_dword
563; SI: buffer_store_dword
564; SI: buffer_store_dword
565; SI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000566define amdgpu_kernel void @store_v4i32(<4 x i32> addrspace(0)* %out, <4 x i32> %in) {
Jan Vesely06200bd2017-01-06 21:00:46 +0000567entry:
568 store <4 x i32> %in, <4 x i32> addrspace(0)* %out
569 ret void
570}
571
572; FUNC-LABEL: {{^}}store_v4i32_unaligned:
573; EG: MOVA_INT
574; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
575; EG: MOVA_INT
576; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
577; EG: MOVA_INT
578; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
579; EG: MOVA_INT
580; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
581
582; CM: MOVA_INT
583; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
584; CM: MOVA_INT
585; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
586; CM: MOVA_INT
587; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
588; CM: MOVA_INT
589; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
590
591;TODO: why not x4?
592; XSI: buffer_store_dwordx4
593; SI: buffer_store_dword
594; SI: buffer_store_dword
595; SI: buffer_store_dword
596; SI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000597define amdgpu_kernel void @store_v4i32_unaligned(<4 x i32> addrspace(0)* %out, <4 x i32> %in) {
Jan Vesely06200bd2017-01-06 21:00:46 +0000598entry:
599 store <4 x i32> %in, <4 x i32> addrspace(0)* %out, align 4
600 ret void
601}
602
603; v4f32 store
604; FUNC-LABEL: {{^}}store_v4f32:
605; EG: MOVA_INT
606; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
607; EG: MOVA_INT
608; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
609; EG: MOVA_INT
610; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
611; EG: MOVA_INT
612; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
613
614; CM: MOVA_INT
615; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
616; CM: MOVA_INT
617; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
618; CM: MOVA_INT
619; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
620; CM: MOVA_INT
621; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
622
623;TODO: why not x4?
624; XSI: buffer_store_dwordx4
625; SI: buffer_store_dword
626; SI: buffer_store_dword
627; SI: buffer_store_dword
628; SI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000629define amdgpu_kernel void @store_v4f32(<4 x float> addrspace(0)* %out, <4 x float> addrspace(0)* %in) {
Jan Vesely06200bd2017-01-06 21:00:46 +0000630 %1 = load <4 x float>, <4 x float> addrspace(0) * %in
631 store <4 x float> %1, <4 x float> addrspace(0)* %out
632 ret void
633}
634
635; FUNC-LABEL: {{^}}store_i64_i8:
636; EG: MOVA_INT
637; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
638; EG: MOVA_INT
639; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
640
641; CM: MOVA_INT
642; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
643; CM: MOVA_INT
644; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
645
646; SI: buffer_store_byte
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000647define amdgpu_kernel void @store_i64_i8(i8 addrspace(0)* %out, i64 %in) {
Jan Vesely06200bd2017-01-06 21:00:46 +0000648entry:
649 %0 = trunc i64 %in to i8
650 store i8 %0, i8 addrspace(0)* %out
651 ret void
652}
653
654; FUNC-LABEL: {{^}}store_i64_i16:
655; EG: MOVA_INT
656; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
657; EG: MOVA_INT
658; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
659
660; CM: MOVA_INT
661; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+,
662; CM: MOVA_INT
663; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
664
665; SI: buffer_store_short
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000666define amdgpu_kernel void @store_i64_i16(i16 addrspace(0)* %out, i64 %in) {
Jan Vesely06200bd2017-01-06 21:00:46 +0000667entry:
668 %0 = trunc i64 %in to i16
669 store i16 %0, i16 addrspace(0)* %out
670 ret void
671}
672
673; The stores in this function are combined by the optimizer to create a
674; 64-bit store with 32-bit alignment. This is legal and the legalizer
675; should not try to split the 64-bit store back into 2 32-bit stores.
676
677; FUNC-LABEL: {{^}}vecload2:
678; EG: MOVA_INT
679; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
680; EG: MOVA_INT
681; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
682
683; CM: MOVA_INT
684; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
685; CM: MOVA_INT
686; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
687
688;TODO: why not x2?
689; XSI: buffer_store_dwordx2
690; SI: buffer_store_dword
691; SI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000692define amdgpu_kernel void @vecload2(i32 addrspace(0)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
Jan Vesely06200bd2017-01-06 21:00:46 +0000693entry:
694 %0 = load i32, i32 addrspace(2)* %mem, align 4
695 %arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1
696 %1 = load i32, i32 addrspace(2)* %arrayidx1.i, align 4
697 store i32 %0, i32 addrspace(0)* %out, align 4
698 %arrayidx1 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 1
699 store i32 %1, i32 addrspace(0)* %arrayidx1, align 4
700 ret void
701}
702
703; When i128 was a legal type this program generated cannot select errors:
704
705; FUNC-LABEL: {{^}}"i128-const-store":
706; EG: MOVA_INT
707; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
708; EG: MOVA_INT
709; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
710; EG: MOVA_INT
711; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
712; EG: MOVA_INT
713; EG: MOV {{[\* ]*}}T(0 + AR.x).X+,
714
715; CM: MOVA_INT
716; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
717; CM: MOVA_INT
718; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
719; CM: MOVA_INT
720; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
721; CM: MOVA_INT
722; CM: MOV {{[\* ]*}}T(0 + AR.x).X+,
723
724;TODO: why not x4?
725; XSI: buffer_store_dwordx4
726; SI: buffer_store_dword
727; SI: buffer_store_dword
728; SI: buffer_store_dword
729; SI: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000730define amdgpu_kernel void @i128-const-store(i32 addrspace(0)* %out) {
Jan Vesely06200bd2017-01-06 21:00:46 +0000731entry:
732 store i32 1, i32 addrspace(0)* %out, align 4
733 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 1
734 store i32 1, i32 addrspace(0)* %arrayidx2, align 4
735 %arrayidx4 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 2
736 store i32 2, i32 addrspace(0)* %arrayidx4, align 4
737 %arrayidx6 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 3
738 store i32 2, i32 addrspace(0)* %arrayidx6, align 4
739 ret void
740}
741
742
743attributes #0 = { nounwind }