blob: 515203fad4cb12fccca4311a7eacad016987999c [file] [log] [blame]
Marek Olsak774c0d62016-01-13 17:23:15 +00001; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
Marek Olsak774c0d62016-01-13 17:23:15 +00004declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
5
6; GCN-LABEL: {{^}}vgpr:
7; GCN: v_mov_b32_e32 v1, v0
8; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
Matt Arsenault8a63cb92016-12-05 20:31:49 +00009; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm
Marek Olsak774c0d62016-01-13 17:23:15 +000010; GCN: s_waitcnt expcnt(0)
11; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +000012define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
Marek Olsak774c0d62016-01-13 17:23:15 +000013 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
14 %x = fadd float %3, 1.0
15 %a = insertvalue {float, float} undef, float %x, 0
16 %b = insertvalue {float, float} %a, float %3, 1
17 ret {float, float} %b
18}
19
20; GCN-LABEL: {{^}}vgpr_literal:
Tom Stellard0d23ebe2016-08-29 19:42:52 +000021; GCN: v_mov_b32_e32 v4, v0
Matt Arsenault8a63cb92016-12-05 20:31:49 +000022; GCN: exp mrt0 v4, v4, v4, v4 done compr vm
23
Marek Olsak774c0d62016-01-13 17:23:15 +000024; GCN-DAG: v_mov_b32_e32 v0, 1.0
25; GCN-DAG: v_mov_b32_e32 v1, 2.0
26; GCN-DAG: v_mov_b32_e32 v2, 4.0
27; GCN-DAG: v_mov_b32_e32 v3, -1.0
Tom Stellard0d23ebe2016-08-29 19:42:52 +000028; GCN: s_waitcnt expcnt(0)
Marek Olsak774c0d62016-01-13 17:23:15 +000029; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +000030define amdgpu_vs {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
Marek Olsak774c0d62016-01-13 17:23:15 +000031 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
32 ret {float, float, float, float} {float 1.0, float 2.0, float 4.0, float -1.0}
33}
34
35
36; GCN: .long 165580
37; GCN-NEXT: .long 562
38; GCN-NEXT: .long 165584
39; GCN-NEXT: .long 562
40; GCN-LABEL: {{^}}vgpr_ps_addr0:
41; GCN-NOT: v_mov_b32_e32 v0
42; GCN-NOT: v_mov_b32_e32 v1
43; GCN-NOT: v_mov_b32_e32 v2
44; GCN: v_mov_b32_e32 v3, v4
45; GCN: v_mov_b32_e32 v4, v6
46; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +000047define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
Marek Olsak774c0d62016-01-13 17:23:15 +000048 %i0 = extractelement <2 x i32> %4, i32 0
49 %i1 = extractelement <2 x i32> %4, i32 1
50 %i2 = extractelement <2 x i32> %7, i32 0
51 %i3 = extractelement <2 x i32> %8, i32 0
52 %f0 = bitcast i32 %i0 to float
53 %f1 = bitcast i32 %i1 to float
54 %f2 = bitcast i32 %i2 to float
55 %f3 = bitcast i32 %i3 to float
56 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
57 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
58 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
59 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
60 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
61 ret {float, float, float, float, float} %r4
62}
63
64
65; GCN: .long 165580
66; GCN-NEXT: .long 1
67; GCN-NEXT: .long 165584
68; GCN-NEXT: .long 1
69; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
70; GCN: v_mov_b32_e32 v0, 1.0
71; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +000072define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
Marek Olsak774c0d62016-01-13 17:23:15 +000073 ret float 1.0
74}
75
76
77; GCN: .long 165580
Marek Olsak46dadbf2016-01-13 17:23:20 +000078; GCN-NEXT: .long 2081
79; GCN-NEXT: .long 165584
80; GCN-NEXT: .long 2081
81; GCN-LABEL: {{^}}ps_input_ena_pos_w:
82; GCN-DAG: v_mov_b32_e32 v0, v4
83; GCN-DAG: v_mov_b32_e32 v1, v2
84; GCN: v_mov_b32_e32 v2, v3
85; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +000086define amdgpu_ps {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
Marek Olsak46dadbf2016-01-13 17:23:20 +000087 %f = bitcast <2 x i32> %8 to <2 x float>
88 %s = insertvalue {float, <2 x float>} undef, float %14, 0
89 %s1 = insertvalue {float, <2 x float>} %s, <2 x float> %f, 1
90 ret {float, <2 x float>} %s1
91}
92
93
94; GCN: .long 165580
Marek Olsak774c0d62016-01-13 17:23:15 +000095; GCN-NEXT: .long 562
96; GCN-NEXT: .long 165584
97; GCN-NEXT: .long 563
98; GCN-LABEL: {{^}}vgpr_ps_addr1:
99; GCN-DAG: v_mov_b32_e32 v0, v2
100; GCN-DAG: v_mov_b32_e32 v1, v3
101; GCN: v_mov_b32_e32 v2, v4
102; GCN-DAG: v_mov_b32_e32 v3, v6
103; GCN-DAG: v_mov_b32_e32 v4, v8
104; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000105attributes #1 = { "InitialPSInputAddr"="1" }
106define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
Marek Olsak774c0d62016-01-13 17:23:15 +0000107 %i0 = extractelement <2 x i32> %4, i32 0
108 %i1 = extractelement <2 x i32> %4, i32 1
109 %i2 = extractelement <2 x i32> %7, i32 0
110 %i3 = extractelement <2 x i32> %8, i32 0
111 %f0 = bitcast i32 %i0 to float
112 %f1 = bitcast i32 %i1 to float
113 %f2 = bitcast i32 %i2 to float
114 %f3 = bitcast i32 %i3 to float
115 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
116 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
117 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
118 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
119 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
120 ret {float, float, float, float, float} %r4
121}
122
123
124; GCN: .long 165580
125; GCN-NEXT: .long 562
126; GCN-NEXT: .long 165584
127; GCN-NEXT: .long 631
128; GCN-LABEL: {{^}}vgpr_ps_addr119:
129; GCN-DAG: v_mov_b32_e32 v0, v2
130; GCN-DAG: v_mov_b32_e32 v1, v3
131; GCN: v_mov_b32_e32 v2, v6
132; GCN: v_mov_b32_e32 v3, v8
133; GCN: v_mov_b32_e32 v4, v12
134; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000135attributes #2 = { "InitialPSInputAddr"="119" }
136define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #2 {
Marek Olsak774c0d62016-01-13 17:23:15 +0000137 %i0 = extractelement <2 x i32> %4, i32 0
138 %i1 = extractelement <2 x i32> %4, i32 1
139 %i2 = extractelement <2 x i32> %7, i32 0
140 %i3 = extractelement <2 x i32> %8, i32 0
141 %f0 = bitcast i32 %i0 to float
142 %f1 = bitcast i32 %i1 to float
143 %f2 = bitcast i32 %i2 to float
144 %f3 = bitcast i32 %i3 to float
145 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
146 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
147 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
148 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
149 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
150 ret {float, float, float, float, float} %r4
151}
152
153
154; GCN: .long 165580
155; GCN-NEXT: .long 562
156; GCN-NEXT: .long 165584
157; GCN-NEXT: .long 946
158; GCN-LABEL: {{^}}vgpr_ps_addr418:
159; GCN-NOT: v_mov_b32_e32 v0
160; GCN-NOT: v_mov_b32_e32 v1
161; GCN-NOT: v_mov_b32_e32 v2
162; GCN: v_mov_b32_e32 v3, v4
163; GCN: v_mov_b32_e32 v4, v8
164; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000165attributes #3 = { "InitialPSInputAddr"="418" }
166define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #3 {
Marek Olsak774c0d62016-01-13 17:23:15 +0000167 %i0 = extractelement <2 x i32> %4, i32 0
168 %i1 = extractelement <2 x i32> %4, i32 1
169 %i2 = extractelement <2 x i32> %7, i32 0
170 %i3 = extractelement <2 x i32> %8, i32 0
171 %f0 = bitcast i32 %i0 to float
172 %f1 = bitcast i32 %i1 to float
173 %f2 = bitcast i32 %i2 to float
174 %f3 = bitcast i32 %i3 to float
175 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
176 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
177 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
178 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
179 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
180 ret {float, float, float, float, float} %r4
181}
182
183
184; GCN-LABEL: {{^}}sgpr:
185; GCN: s_add_i32 s0, s3, 2
186; GCN: s_mov_b32 s2, s3
187; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000188define amdgpu_vs {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
Marek Olsak774c0d62016-01-13 17:23:15 +0000189 %x = add i32 %2, 2
190 %a = insertvalue {i32, i32, i32} undef, i32 %x, 0
191 %b = insertvalue {i32, i32, i32} %a, i32 %1, 1
192 %c = insertvalue {i32, i32, i32} %a, i32 %2, 2
193 ret {i32, i32, i32} %c
194}
195
196
197; GCN-LABEL: {{^}}sgpr_literal:
198; GCN: s_mov_b32 s0, 5
199; GCN-NOT: s_mov_b32 s0, s0
200; GCN-DAG: s_mov_b32 s1, 6
201; GCN-DAG: s_mov_b32 s2, 7
202; GCN-DAG: s_mov_b32 s3, 8
203; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000204define amdgpu_vs {i32, i32, i32, i32} @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
Marek Olsak774c0d62016-01-13 17:23:15 +0000205 %x = add i32 %2, 2
206 ret {i32, i32, i32, i32} {i32 5, i32 6, i32 7, i32 8}
207}
208
209
210; GCN-LABEL: {{^}}both:
211; GCN: v_mov_b32_e32 v1, v0
Matt Arsenault8a63cb92016-12-05 20:31:49 +0000212; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm
Marek Olsak774c0d62016-01-13 17:23:15 +0000213; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
214; GCN-DAG: s_add_i32 s0, s3, 2
215; GCN-DAG: s_mov_b32 s1, s2
216; GCN: s_mov_b32 s2, s3
217; GCN: s_waitcnt expcnt(0)
218; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000219define amdgpu_vs {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
Marek Olsak774c0d62016-01-13 17:23:15 +0000220 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
221 %v = fadd float %3, 1.0
222 %s = add i32 %2, 2
223 %a0 = insertvalue {float, i32, float, i32, i32} undef, float %v, 0
224 %a1 = insertvalue {float, i32, float, i32, i32} %a0, i32 %s, 1
225 %a2 = insertvalue {float, i32, float, i32, i32} %a1, float %3, 2
226 %a3 = insertvalue {float, i32, float, i32, i32} %a2, i32 %1, 3
227 %a4 = insertvalue {float, i32, float, i32, i32} %a3, i32 %2, 4
228 ret {float, i32, float, i32, i32} %a4
229}
230
231
232; GCN-LABEL: {{^}}structure_literal:
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000233; GCN: v_mov_b32_e32 v3, v0
Matt Arsenault8a63cb92016-12-05 20:31:49 +0000234; GCN: exp mrt0 v3, v3, v3, v3 done compr vm
235
Marek Olsak774c0d62016-01-13 17:23:15 +0000236; GCN-DAG: v_mov_b32_e32 v0, 1.0
237; GCN-DAG: s_mov_b32 s0, 2
238; GCN-DAG: s_mov_b32 s1, 3
239; GCN-DAG: v_mov_b32_e32 v1, 2.0
240; GCN-DAG: v_mov_b32_e32 v2, 4.0
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000241; GCN: s_waitcnt expcnt(0)
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000242define amdgpu_vs {{float, i32}, {i32, <2 x float>}} @structure_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
Marek Olsak774c0d62016-01-13 17:23:15 +0000243 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
244 ret {{float, i32}, {i32, <2 x float>}} {{float, i32} {float 1.0, i32 2}, {i32, <2 x float>} {i32 3, <2 x float> <float 2.0, float 4.0>}}
245}
Matt Arsenault8a63cb92016-12-05 20:31:49 +0000246
247attributes #0 = { nounwind "InitialPSInputAddr"="0" }