blob: 5e51d56917476325f8a344dd94ce7e56be8b7fe9 [file] [log] [blame]
Tom Stellard754f80f2013-04-05 23:31:51 +00001; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
Tom Stellard6aa0d552013-06-14 22:12:24 +00002; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=CM-CHECK %s
Tom Stellard70f13db2013-10-10 17:11:46 +00003; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
Tom Stellard754f80f2013-04-05 23:31:51 +00004
Tom Stellardd3ee8c12013-08-16 01:12:06 +00005;===------------------------------------------------------------------------===;
6; Global Address Space
7;===------------------------------------------------------------------------===;
8
9; i8 store
Tom Stellardaf775432013-10-23 00:44:32 +000010; EG-CHECK-LABEL: @store_i8
Tom Stellardd3ee8c12013-08-16 01:12:06 +000011; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
12; EG-CHECK: VTX_READ_8 [[VAL:T[0-9]\.X]], [[VAL]]
Vincent Lejeune7e2c8322013-09-04 19:53:46 +000013; IG 0: Get the byte index and truncate the value
14; EG-CHECK: AND_INT T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
15; EG-CHECK-NEXT: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.y
16; EG-CHECK-NEXT: 3(4.203895e-45), 255(3.573311e-43)
17; IG 1: Truncate the calculated the shift amount for the mask
18; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
Tom Stellardd3ee8c12013-08-16 01:12:06 +000019; EG-CHECK-NEXT: 3
Tom Stellardd3ee8c12013-08-16 01:12:06 +000020; IG 2: Shift the value and the mask
Vincent Lejeune7e2c8322013-09-04 19:53:46 +000021; EG-CHECK: LSHL T[[RW_GPR]].X, T{{[0-9]}}.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]]
Tom Stellardd3ee8c12013-08-16 01:12:06 +000022; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
23; EG-CHECK-NEXT: 255
24; IG 3: Initialize the Y and Z channels to zero
25; XXX: An optimal scheduler should merge this into one of the prevous IGs.
26; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0
27; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0
28
Tom Stellardaf775432013-10-23 00:44:32 +000029; SI-CHECK-LABEL: @store_i8
Tom Stellardd3ee8c12013-08-16 01:12:06 +000030; SI-CHECK: BUFFER_STORE_BYTE
31
32define void @store_i8(i8 addrspace(1)* %out, i8 %in) {
33entry:
34 store i8 %in, i8 addrspace(1)* %out
35 ret void
36}
37
38; i16 store
Tom Stellardaf775432013-10-23 00:44:32 +000039; EG-CHECK-LABEL: @store_i16
Tom Stellardd3ee8c12013-08-16 01:12:06 +000040; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
41; EG-CHECK: VTX_READ_16 [[VAL:T[0-9]\.X]], [[VAL]]
Vincent Lejeune7e2c8322013-09-04 19:53:46 +000042; IG 0: Get the byte index and truncate the value
43; EG-CHECK: AND_INT T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
44; EG-CHECK: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.y
45; EG-CHECK-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
46; IG 1: Truncate the calculated the shift amount for the mask
47; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
48; EG-CHECK: 3
Tom Stellardd3ee8c12013-08-16 01:12:06 +000049; IG 2: Shift the value and the mask
Vincent Lejeune7e2c8322013-09-04 19:53:46 +000050; EG-CHECK: LSHL T[[RW_GPR]].X, T{{[0-9]}}.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]]
Tom Stellardd3ee8c12013-08-16 01:12:06 +000051; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
52; EG-CHECK-NEXT: 65535
53; IG 3: Initialize the Y and Z channels to zero
54; XXX: An optimal scheduler should merge this into one of the prevous IGs.
55; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0
56; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0
57
Tom Stellardaf775432013-10-23 00:44:32 +000058; SI-CHECK-LABEL: @store_i16
Tom Stellardd3ee8c12013-08-16 01:12:06 +000059; SI-CHECK: BUFFER_STORE_SHORT
60define void @store_i16(i16 addrspace(1)* %out, i16 %in) {
61entry:
62 store i16 %in, i16 addrspace(1)* %out
63 ret void
64}
65
Tom Stellardaf775432013-10-23 00:44:32 +000066; EG-CHECK-LABEL: @store_v2i8
Tom Stellardfbab8272013-08-16 01:12:11 +000067; EG-CHECK: MEM_RAT MSKOR
68; EG-CHECK-NOT: MEM_RAT MSKOR
Tom Stellardaf775432013-10-23 00:44:32 +000069; SI-CHECK-LABEL: @store_v2i8
Tom Stellardfbab8272013-08-16 01:12:11 +000070; SI-CHECK: BUFFER_STORE_BYTE
71; SI-CHECK: BUFFER_STORE_BYTE
72define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
73entry:
74 %0 = trunc <2 x i32> %in to <2 x i8>
75 store <2 x i8> %0, <2 x i8> addrspace(1)* %out
76 ret void
77}
78
79
Tom Stellardaf775432013-10-23 00:44:32 +000080; EG-CHECK-LABEL: @store_v2i16
Tom Stellardfbab8272013-08-16 01:12:11 +000081; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
Tom Stellardaf775432013-10-23 00:44:32 +000082; CM-CHECK-LABEL: @store_v2i16
Tom Stellardfbab8272013-08-16 01:12:11 +000083; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
Tom Stellardaf775432013-10-23 00:44:32 +000084; SI-CHECK-LABEL: @store_v2i16
85; SI-CHECK: BUFFER_STORE_SHORT
86; SI-CHECK: BUFFER_STORE_SHORT
Tom Stellardfbab8272013-08-16 01:12:11 +000087define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
88entry:
89 %0 = trunc <2 x i32> %in to <2 x i16>
90 store <2 x i16> %0, <2 x i16> addrspace(1)* %out
91 ret void
92}
93
Tom Stellardaf775432013-10-23 00:44:32 +000094; EG-CHECK-LABEL: @store_v4i8
Tom Stellardfbab8272013-08-16 01:12:11 +000095; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
Tom Stellardaf775432013-10-23 00:44:32 +000096; CM-CHECK-LABEL: @store_v4i8
Tom Stellardfbab8272013-08-16 01:12:11 +000097; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
Tom Stellardaf775432013-10-23 00:44:32 +000098; SI-CHECK-LABEL: @store_v4i8
Tom Stellardfbab8272013-08-16 01:12:11 +000099; SI-CHECK: BUFFER_STORE_BYTE
100; SI-CHECK: BUFFER_STORE_BYTE
101; SI-CHECK: BUFFER_STORE_BYTE
102; SI-CHECK: BUFFER_STORE_BYTE
103define void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
104entry:
105 %0 = trunc <4 x i32> %in to <4 x i8>
106 store <4 x i8> %0, <4 x i8> addrspace(1)* %out
107 ret void
108}
109
Tom Stellard5a6b0d82013-04-19 02:10:53 +0000110; floating-point store
Tom Stellardaf775432013-10-23 00:44:32 +0000111; EG-CHECK-LABEL: @store_f32
Tom Stellardac00f9d2013-08-16 01:11:46 +0000112; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1
Tom Stellardaf775432013-10-23 00:44:32 +0000113; CM-CHECK-LABEL: @store_f32
Tom Stellardac00f9d2013-08-16 01:11:46 +0000114; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}}
Tom Stellardaf775432013-10-23 00:44:32 +0000115; SI-CHECK-LABEL: @store_f32
Tom Stellard754f80f2013-04-05 23:31:51 +0000116; SI-CHECK: BUFFER_STORE_DWORD
117
Tom Stellard5a6b0d82013-04-19 02:10:53 +0000118define void @store_f32(float addrspace(1)* %out, float %in) {
Tom Stellard754f80f2013-04-05 23:31:51 +0000119 store float %in, float addrspace(1)* %out
120 ret void
121}
Tom Stellard0125f2a2013-06-25 02:39:35 +0000122
Tom Stellardaf775432013-10-23 00:44:32 +0000123; EG-CHECK-LABEL: @store_v4i16
Tom Stellardfbab8272013-08-16 01:12:11 +0000124; EG-CHECK: MEM_RAT MSKOR
125; EG-CHECK: MEM_RAT MSKOR
126; EG-CHECK: MEM_RAT MSKOR
127; EG-CHECK: MEM_RAT MSKOR
128; EG-CHECK-NOT: MEM_RAT MSKOR
Tom Stellardaf775432013-10-23 00:44:32 +0000129; SI-CHECK-LABEL: @store_v4i16
Tom Stellardfbab8272013-08-16 01:12:11 +0000130; SI-CHECK: BUFFER_STORE_SHORT
131; SI-CHECK: BUFFER_STORE_SHORT
132; SI-CHECK: BUFFER_STORE_SHORT
133; SI-CHECK: BUFFER_STORE_SHORT
134; SI-CHECK-NOT: BUFFER_STORE_BYTE
135define void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) {
136entry:
137 %0 = trunc <4 x i32> %in to <4 x i16>
138 store <4 x i16> %0, <4 x i16> addrspace(1)* %out
139 ret void
140}
141
Tom Stellarded2f6142013-07-18 21:43:42 +0000142; vec2 floating-point stores
Tom Stellardaf775432013-10-23 00:44:32 +0000143; EG-CHECK-LABEL: @store_v2f32
Tom Stellardac00f9d2013-08-16 01:11:46 +0000144; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
Tom Stellardaf775432013-10-23 00:44:32 +0000145; CM-CHECK-LABEL: @store_v2f32
Tom Stellardac00f9d2013-08-16 01:11:46 +0000146; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
Tom Stellardaf775432013-10-23 00:44:32 +0000147; SI-CHECK-LABEL: @store_v2f32
Tom Stellarded2f6142013-07-18 21:43:42 +0000148; SI-CHECK: BUFFER_STORE_DWORDX2
149
150define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
151entry:
152 %0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0
Tom Stellard8e5da412013-08-14 23:24:32 +0000153 %1 = insertelement <2 x float> %0, float %b, i32 1
Tom Stellarded2f6142013-07-18 21:43:42 +0000154 store <2 x float> %1, <2 x float> addrspace(1)* %out
155 ret void
156}
157
Tom Stellardaf775432013-10-23 00:44:32 +0000158; EG-CHECK-LABEL: @store_v4i32
Tom Stellard6d1379e2013-08-16 01:12:00 +0000159; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
160; EG-CHECK-NOT: MEM_RAT_CACHELESS STORE_RAW
Tom Stellardaf775432013-10-23 00:44:32 +0000161; CM-CHECK-LABEL: @store_v4i32
Tom Stellard6d1379e2013-08-16 01:12:00 +0000162; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
163; CM-CHECK-NOT: MEM_RAT_CACHELESS STORE_DWORD
Tom Stellardaf775432013-10-23 00:44:32 +0000164; SI-CHECK-LABEL: @store_v4i32
Tom Stellard6d1379e2013-08-16 01:12:00 +0000165; SI-CHECK: BUFFER_STORE_DWORDX4
166define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
167entry:
168 store <4 x i32> %in, <4 x i32> addrspace(1)* %out
169 ret void
170}
171
Tom Stellard2ffc3302013-08-26 15:05:44 +0000172;===------------------------------------------------------------------------===;
173; Local Address Space
174;===------------------------------------------------------------------------===;
175
Tom Stellardaf775432013-10-23 00:44:32 +0000176; EG-CHECK-LABEL: @store_local_i8
Tom Stellardf3d166a2013-08-26 15:05:49 +0000177; EG-CHECK: LDS_BYTE_WRITE
Tom Stellardaf775432013-10-23 00:44:32 +0000178; SI-CHECK-LABEL: @store_local_i8
Tom Stellardf3d166a2013-08-26 15:05:49 +0000179; SI-CHECK: DS_WRITE_B8
180define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
181 store i8 %in, i8 addrspace(3)* %out
182 ret void
183}
184
Tom Stellardaf775432013-10-23 00:44:32 +0000185; EG-CHECK-LABEL: @store_local_i16
Tom Stellardf3d166a2013-08-26 15:05:49 +0000186; EG-CHECK: LDS_SHORT_WRITE
Tom Stellardaf775432013-10-23 00:44:32 +0000187; SI-CHECK-LABEL: @store_local_i16
Tom Stellardf3d166a2013-08-26 15:05:49 +0000188; SI-CHECK: DS_WRITE_B16
189define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
190 store i16 %in, i16 addrspace(3)* %out
191 ret void
192}
193
Tom Stellardaf775432013-10-23 00:44:32 +0000194; EG-CHECK-LABEL: @store_local_v2i16
Tom Stellard2ffc3302013-08-26 15:05:44 +0000195; EG-CHECK: LDS_WRITE
Tom Stellardaf775432013-10-23 00:44:32 +0000196; CM-CHECK-LABEL: @store_local_v2i16
Tom Stellard2ffc3302013-08-26 15:05:44 +0000197; CM-CHECK: LDS_WRITE
Tom Stellardaf775432013-10-23 00:44:32 +0000198; SI-CHECK-LABEL: @store_local_v2i16
199; SI-CHECK: DS_WRITE_B16
200; SI-CHECK: DS_WRITE_B16
Tom Stellard2ffc3302013-08-26 15:05:44 +0000201define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
202entry:
203 store <2 x i16> %in, <2 x i16> addrspace(3)* %out
204 ret void
205}
206
Tom Stellardaf775432013-10-23 00:44:32 +0000207; EG-CHECK-LABEL: @store_local_v4i8
Tom Stellard7da047c2013-08-26 15:05:55 +0000208; EG-CHECK: LDS_WRITE
Tom Stellardaf775432013-10-23 00:44:32 +0000209; CM-CHECK-LABEL: @store_local_v4i8
Tom Stellard7da047c2013-08-26 15:05:55 +0000210; CM-CHECK: LDS_WRITE
Tom Stellardaf775432013-10-23 00:44:32 +0000211; SI-CHECK-LABEL: @store_local_v4i8
Tom Stellard7da047c2013-08-26 15:05:55 +0000212; SI-CHECK: DS_WRITE_B8
213; SI-CHECK: DS_WRITE_B8
214; SI-CHECK: DS_WRITE_B8
215; SI-CHECK: DS_WRITE_B8
216define void @store_local_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
217entry:
218 store <4 x i8> %in, <4 x i8> addrspace(3)* %out
219 ret void
220}
221
Tom Stellardaf775432013-10-23 00:44:32 +0000222; EG-CHECK-LABEL: @store_local_v2i32
Tom Stellard2ffc3302013-08-26 15:05:44 +0000223; EG-CHECK: LDS_WRITE
224; EG-CHECK: LDS_WRITE
Tom Stellardaf775432013-10-23 00:44:32 +0000225; CM-CHECK-LABEL: @store_local_v2i32
Tom Stellard2ffc3302013-08-26 15:05:44 +0000226; CM-CHECK: LDS_WRITE
227; CM-CHECK: LDS_WRITE
Tom Stellardaf775432013-10-23 00:44:32 +0000228; SI-CHECK-LABEL: @store_local_v2i32
Tom Stellard2ffc3302013-08-26 15:05:44 +0000229; SI-CHECK: DS_WRITE_B32
230; SI-CHECK: DS_WRITE_B32
231define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
232entry:
233 store <2 x i32> %in, <2 x i32> addrspace(3)* %out
234 ret void
235}
236
Tom Stellardaf775432013-10-23 00:44:32 +0000237; EG-CHECK-LABEL: @store_local_v4i32
Tom Stellard2ffc3302013-08-26 15:05:44 +0000238; EG-CHECK: LDS_WRITE
239; EG-CHECK: LDS_WRITE
240; EG-CHECK: LDS_WRITE
241; EG-CHECK: LDS_WRITE
Tom Stellardaf775432013-10-23 00:44:32 +0000242; CM-CHECK-LABEL: @store_local_v4i32
Tom Stellard2ffc3302013-08-26 15:05:44 +0000243; CM-CHECK: LDS_WRITE
244; CM-CHECK: LDS_WRITE
245; CM-CHECK: LDS_WRITE
246; CM-CHECK: LDS_WRITE
Tom Stellardaf775432013-10-23 00:44:32 +0000247; SI-CHECK-LABEL: @store_local_v4i32
Tom Stellard2ffc3302013-08-26 15:05:44 +0000248; SI-CHECK: DS_WRITE_B32
249; SI-CHECK: DS_WRITE_B32
250; SI-CHECK: DS_WRITE_B32
251; SI-CHECK: DS_WRITE_B32
252define void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
253entry:
254 store <4 x i32> %in, <4 x i32> addrspace(3)* %out
255 ret void
256}
257
Tom Stellard0125f2a2013-06-25 02:39:35 +0000258; The stores in this function are combined by the optimizer to create a
259; 64-bit store with 32-bit alignment. This is legal for SI and the legalizer
260; should not try to split the 64-bit store back into 2 32-bit stores.
261;
262; Evergreen / Northern Islands don't support 64-bit stores yet, so there should
263; be two 32-bit stores.
264
Tom Stellardaf775432013-10-23 00:44:32 +0000265; EG-CHECK-LABEL: @vecload2
Tom Stellardac00f9d2013-08-16 01:11:46 +0000266; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
Tom Stellardaf775432013-10-23 00:44:32 +0000267; CM-CHECK-LABEL: @vecload2
Tom Stellardac00f9d2013-08-16 01:11:46 +0000268; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
Tom Stellardaf775432013-10-23 00:44:32 +0000269; SI-CHECK-LABEL: @vecload2
Tom Stellard0125f2a2013-06-25 02:39:35 +0000270; SI-CHECK: BUFFER_STORE_DWORDX2
271define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
272entry:
Manman Ren1047fe42013-09-30 18:17:35 +0000273 %0 = load i32 addrspace(2)* %mem, align 4
Tom Stellard0125f2a2013-06-25 02:39:35 +0000274 %arrayidx1.i = getelementptr inbounds i32 addrspace(2)* %mem, i64 1
Manman Ren1047fe42013-09-30 18:17:35 +0000275 %1 = load i32 addrspace(2)* %arrayidx1.i, align 4
276 store i32 %0, i32 addrspace(1)* %out, align 4
Tom Stellard0125f2a2013-06-25 02:39:35 +0000277 %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %out, i64 1
Manman Ren1047fe42013-09-30 18:17:35 +0000278 store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
Tom Stellard0125f2a2013-06-25 02:39:35 +0000279 ret void
280}
281
Bill Wendling187d3dd2013-08-22 21:28:54 +0000282attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }