blob: 915c4383ff493fbf80c1d47aeab920969bbe0827 [file] [log] [blame]
Marek Olsak774c0d62016-01-13 17:23:15 +00001; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
Marek Olsak774c0d62016-01-13 17:23:15 +00004declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
5
6; GCN-LABEL: {{^}}vgpr:
7; GCN: v_mov_b32_e32 v1, v0
8; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
9; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
10; GCN: s_waitcnt expcnt(0)
11; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +000012define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
Marek Olsak774c0d62016-01-13 17:23:15 +000013 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
14 %x = fadd float %3, 1.0
15 %a = insertvalue {float, float} undef, float %x, 0
16 %b = insertvalue {float, float} %a, float %3, 1
17 ret {float, float} %b
18}
19
20; GCN-LABEL: {{^}}vgpr_literal:
Tom Stellard91127582016-04-07 18:30:05 +000021; GCN: exp 15, 0, 1, 1, 1, v0, v0, v0, v0
22; GCN: s_waitcnt expcnt(0)
Marek Olsak774c0d62016-01-13 17:23:15 +000023; GCN-DAG: v_mov_b32_e32 v0, 1.0
24; GCN-DAG: v_mov_b32_e32 v1, 2.0
25; GCN-DAG: v_mov_b32_e32 v2, 4.0
26; GCN-DAG: v_mov_b32_e32 v3, -1.0
Marek Olsak774c0d62016-01-13 17:23:15 +000027; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +000028define amdgpu_vs {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
Marek Olsak774c0d62016-01-13 17:23:15 +000029 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
30 ret {float, float, float, float} {float 1.0, float 2.0, float 4.0, float -1.0}
31}
32
33
34; GCN: .long 165580
35; GCN-NEXT: .long 562
36; GCN-NEXT: .long 165584
37; GCN-NEXT: .long 562
38; GCN-LABEL: {{^}}vgpr_ps_addr0:
39; GCN-NOT: v_mov_b32_e32 v0
40; GCN-NOT: v_mov_b32_e32 v1
41; GCN-NOT: v_mov_b32_e32 v2
42; GCN: v_mov_b32_e32 v3, v4
43; GCN: v_mov_b32_e32 v4, v6
44; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +000045attributes #0 = { "InitialPSInputAddr"="0" }
46define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
Marek Olsak774c0d62016-01-13 17:23:15 +000047 %i0 = extractelement <2 x i32> %4, i32 0
48 %i1 = extractelement <2 x i32> %4, i32 1
49 %i2 = extractelement <2 x i32> %7, i32 0
50 %i3 = extractelement <2 x i32> %8, i32 0
51 %f0 = bitcast i32 %i0 to float
52 %f1 = bitcast i32 %i1 to float
53 %f2 = bitcast i32 %i2 to float
54 %f3 = bitcast i32 %i3 to float
55 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
56 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
57 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
58 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
59 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
60 ret {float, float, float, float, float} %r4
61}
62
63
64; GCN: .long 165580
65; GCN-NEXT: .long 1
66; GCN-NEXT: .long 165584
67; GCN-NEXT: .long 1
68; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
69; GCN: v_mov_b32_e32 v0, 1.0
70; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +000071define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
Marek Olsak774c0d62016-01-13 17:23:15 +000072 ret float 1.0
73}
74
75
76; GCN: .long 165580
Marek Olsak46dadbf2016-01-13 17:23:20 +000077; GCN-NEXT: .long 2081
78; GCN-NEXT: .long 165584
79; GCN-NEXT: .long 2081
80; GCN-LABEL: {{^}}ps_input_ena_pos_w:
81; GCN-DAG: v_mov_b32_e32 v0, v4
82; GCN-DAG: v_mov_b32_e32 v1, v2
83; GCN: v_mov_b32_e32 v2, v3
84; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +000085define amdgpu_ps {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
Marek Olsak46dadbf2016-01-13 17:23:20 +000086 %f = bitcast <2 x i32> %8 to <2 x float>
87 %s = insertvalue {float, <2 x float>} undef, float %14, 0
88 %s1 = insertvalue {float, <2 x float>} %s, <2 x float> %f, 1
89 ret {float, <2 x float>} %s1
90}
91
92
93; GCN: .long 165580
Marek Olsak774c0d62016-01-13 17:23:15 +000094; GCN-NEXT: .long 562
95; GCN-NEXT: .long 165584
96; GCN-NEXT: .long 563
97; GCN-LABEL: {{^}}vgpr_ps_addr1:
98; GCN-DAG: v_mov_b32_e32 v0, v2
99; GCN-DAG: v_mov_b32_e32 v1, v3
100; GCN: v_mov_b32_e32 v2, v4
101; GCN-DAG: v_mov_b32_e32 v3, v6
102; GCN-DAG: v_mov_b32_e32 v4, v8
103; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000104attributes #1 = { "InitialPSInputAddr"="1" }
105define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
Marek Olsak774c0d62016-01-13 17:23:15 +0000106 %i0 = extractelement <2 x i32> %4, i32 0
107 %i1 = extractelement <2 x i32> %4, i32 1
108 %i2 = extractelement <2 x i32> %7, i32 0
109 %i3 = extractelement <2 x i32> %8, i32 0
110 %f0 = bitcast i32 %i0 to float
111 %f1 = bitcast i32 %i1 to float
112 %f2 = bitcast i32 %i2 to float
113 %f3 = bitcast i32 %i3 to float
114 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
115 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
116 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
117 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
118 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
119 ret {float, float, float, float, float} %r4
120}
121
122
123; GCN: .long 165580
124; GCN-NEXT: .long 562
125; GCN-NEXT: .long 165584
126; GCN-NEXT: .long 631
127; GCN-LABEL: {{^}}vgpr_ps_addr119:
128; GCN-DAG: v_mov_b32_e32 v0, v2
129; GCN-DAG: v_mov_b32_e32 v1, v3
130; GCN: v_mov_b32_e32 v2, v6
131; GCN: v_mov_b32_e32 v3, v8
132; GCN: v_mov_b32_e32 v4, v12
133; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000134attributes #2 = { "InitialPSInputAddr"="119" }
135define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #2 {
Marek Olsak774c0d62016-01-13 17:23:15 +0000136 %i0 = extractelement <2 x i32> %4, i32 0
137 %i1 = extractelement <2 x i32> %4, i32 1
138 %i2 = extractelement <2 x i32> %7, i32 0
139 %i3 = extractelement <2 x i32> %8, i32 0
140 %f0 = bitcast i32 %i0 to float
141 %f1 = bitcast i32 %i1 to float
142 %f2 = bitcast i32 %i2 to float
143 %f3 = bitcast i32 %i3 to float
144 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
145 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
146 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
147 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
148 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
149 ret {float, float, float, float, float} %r4
150}
151
152
153; GCN: .long 165580
154; GCN-NEXT: .long 562
155; GCN-NEXT: .long 165584
156; GCN-NEXT: .long 946
157; GCN-LABEL: {{^}}vgpr_ps_addr418:
158; GCN-NOT: v_mov_b32_e32 v0
159; GCN-NOT: v_mov_b32_e32 v1
160; GCN-NOT: v_mov_b32_e32 v2
161; GCN: v_mov_b32_e32 v3, v4
162; GCN: v_mov_b32_e32 v4, v8
163; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000164attributes #3 = { "InitialPSInputAddr"="418" }
165define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #3 {
Marek Olsak774c0d62016-01-13 17:23:15 +0000166 %i0 = extractelement <2 x i32> %4, i32 0
167 %i1 = extractelement <2 x i32> %4, i32 1
168 %i2 = extractelement <2 x i32> %7, i32 0
169 %i3 = extractelement <2 x i32> %8, i32 0
170 %f0 = bitcast i32 %i0 to float
171 %f1 = bitcast i32 %i1 to float
172 %f2 = bitcast i32 %i2 to float
173 %f3 = bitcast i32 %i3 to float
174 %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
175 %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
176 %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
177 %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
178 %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
179 ret {float, float, float, float, float} %r4
180}
181
182
183; GCN-LABEL: {{^}}sgpr:
184; GCN: s_add_i32 s0, s3, 2
185; GCN: s_mov_b32 s2, s3
186; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000187define amdgpu_vs {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
Marek Olsak774c0d62016-01-13 17:23:15 +0000188 %x = add i32 %2, 2
189 %a = insertvalue {i32, i32, i32} undef, i32 %x, 0
190 %b = insertvalue {i32, i32, i32} %a, i32 %1, 1
191 %c = insertvalue {i32, i32, i32} %a, i32 %2, 2
192 ret {i32, i32, i32} %c
193}
194
195
196; GCN-LABEL: {{^}}sgpr_literal:
197; GCN: s_mov_b32 s0, 5
198; GCN-NOT: s_mov_b32 s0, s0
199; GCN-DAG: s_mov_b32 s1, 6
200; GCN-DAG: s_mov_b32 s2, 7
201; GCN-DAG: s_mov_b32 s3, 8
202; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000203define amdgpu_vs {i32, i32, i32, i32} @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
Marek Olsak774c0d62016-01-13 17:23:15 +0000204 %x = add i32 %2, 2
205 ret {i32, i32, i32, i32} {i32 5, i32 6, i32 7, i32 8}
206}
207
208
209; GCN-LABEL: {{^}}both:
210; GCN: v_mov_b32_e32 v1, v0
211; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
212; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
213; GCN-DAG: s_add_i32 s0, s3, 2
214; GCN-DAG: s_mov_b32 s1, s2
215; GCN: s_mov_b32 s2, s3
216; GCN: s_waitcnt expcnt(0)
217; GCN-NOT: s_endpgm
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000218define amdgpu_vs {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
Marek Olsak774c0d62016-01-13 17:23:15 +0000219 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
220 %v = fadd float %3, 1.0
221 %s = add i32 %2, 2
222 %a0 = insertvalue {float, i32, float, i32, i32} undef, float %v, 0
223 %a1 = insertvalue {float, i32, float, i32, i32} %a0, i32 %s, 1
224 %a2 = insertvalue {float, i32, float, i32, i32} %a1, float %3, 2
225 %a3 = insertvalue {float, i32, float, i32, i32} %a2, i32 %1, 3
226 %a4 = insertvalue {float, i32, float, i32, i32} %a3, i32 %2, 4
227 ret {float, i32, float, i32, i32} %a4
228}
229
230
231; GCN-LABEL: {{^}}structure_literal:
Tom Stellard91127582016-04-07 18:30:05 +0000232; GCN: exp 15, 0, 1, 1, 1, v0, v0, v0, v0
233; GCN: s_waitcnt expcnt(0)
Marek Olsak774c0d62016-01-13 17:23:15 +0000234; GCN-DAG: v_mov_b32_e32 v0, 1.0
235; GCN-DAG: s_mov_b32 s0, 2
236; GCN-DAG: s_mov_b32 s1, 3
237; GCN-DAG: v_mov_b32_e32 v1, 2.0
238; GCN-DAG: v_mov_b32_e32 v2, 4.0
Nicolai Haehnledf3a20c2016-04-06 19:40:20 +0000239define amdgpu_vs {{float, i32}, {i32, <2 x float>}} @structure_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
Marek Olsak774c0d62016-01-13 17:23:15 +0000240 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
241 ret {{float, i32}, {i32, <2 x float>}} {{float, i32} {float 1.0, i32 2}, {i32, <2 x float>} {i32 3, <2 x float> <float 2.0, float 4.0>}}
242}