blob: 831c71dff79de38fe867a6ac2bdf2a3d8e0e63a5 [file] [log] [blame]
Marek Olsak774c0d62016-01-13 17:23:15 +00001; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
Marek Olsak774c0d62016-01-13 17:23:15 +00004; GCN-LABEL: {{^}}vgpr:
5; GCN: v_mov_b32_e32 v1, v0
6; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
Matt Arsenault3ea06332017-02-22 00:02:21 +00007; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm
Marek Olsak774c0d62016-01-13 17:23:15 +00008; GCN: s_waitcnt expcnt(0)
9; GCN-NOT: s_endpgm
Matt Arsenault3ea06332017-02-22 00:02:21 +000010define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
11bb:
12 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
13 %x = fadd float %arg3, 1.000000e+00
14 %a = insertvalue { float, float } undef, float %x, 0
15 %b = insertvalue { float, float } %a, float %arg3, 1
16 ret { float, float } %b
Marek Olsak774c0d62016-01-13 17:23:15 +000017}
18
19; GCN-LABEL: {{^}}vgpr_literal:
Tom Stellard0d23ebe2016-08-29 19:42:52 +000020; GCN: v_mov_b32_e32 v4, v0
Matt Arsenault3ea06332017-02-22 00:02:21 +000021; GCN: exp mrt0 v4, v4, v4, v4 done vm
Matt Arsenault8a63cb92016-12-05 20:31:49 +000022
Marek Olsak774c0d62016-01-13 17:23:15 +000023; GCN-DAG: v_mov_b32_e32 v0, 1.0
24; GCN-DAG: v_mov_b32_e32 v1, 2.0
25; GCN-DAG: v_mov_b32_e32 v2, 4.0
26; GCN-DAG: v_mov_b32_e32 v3, -1.0
Tom Stellard0d23ebe2016-08-29 19:42:52 +000027; GCN: s_waitcnt expcnt(0)
Marek Olsak774c0d62016-01-13 17:23:15 +000028; GCN-NOT: s_endpgm
Matt Arsenault3ea06332017-02-22 00:02:21 +000029define amdgpu_vs { float, float, float, float } @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
30bb:
31 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
32 ret { float, float, float, float } { float 1.000000e+00, float 2.000000e+00, float 4.000000e+00, float -1.000000e+00 }
Marek Olsak774c0d62016-01-13 17:23:15 +000033}
34
Marek Olsak774c0d62016-01-13 17:23:15 +000035; GCN: .long 165580
36; GCN-NEXT: .long 562
37; GCN-NEXT: .long 165584
38; GCN-NEXT: .long 562
39; GCN-LABEL: {{^}}vgpr_ps_addr0:
40; GCN-NOT: v_mov_b32_e32 v0
41; GCN-NOT: v_mov_b32_e32 v1
42; GCN-NOT: v_mov_b32_e32 v2
43; GCN: v_mov_b32_e32 v3, v4
44; GCN: v_mov_b32_e32 v4, v6
45; GCN-NOT: s_endpgm
Matt Arsenault3ea06332017-02-22 00:02:21 +000046define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
47bb:
48 %i0 = extractelement <2 x i32> %arg4, i32 0
49 %i1 = extractelement <2 x i32> %arg4, i32 1
50 %i2 = extractelement <2 x i32> %arg7, i32 0
51 %i3 = extractelement <2 x i32> %arg8, i32 0
Marek Olsak774c0d62016-01-13 17:23:15 +000052 %f0 = bitcast i32 %i0 to float
53 %f1 = bitcast i32 %i1 to float
54 %f2 = bitcast i32 %i2 to float
55 %f3 = bitcast i32 %i3 to float
Matt Arsenault3ea06332017-02-22 00:02:21 +000056 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
57 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
58 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
59 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
60 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
61 ret { float, float, float, float, float } %r4
Marek Olsak774c0d62016-01-13 17:23:15 +000062}
63
Marek Olsak774c0d62016-01-13 17:23:15 +000064; GCN: .long 165580
65; GCN-NEXT: .long 1
66; GCN-NEXT: .long 165584
67; GCN-NEXT: .long 1
68; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
69; GCN: v_mov_b32_e32 v0, 1.0
70; GCN-NOT: s_endpgm
Matt Arsenault3ea06332017-02-22 00:02:21 +000071define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
72bb:
73 ret float 1.000000e+00
Marek Olsak774c0d62016-01-13 17:23:15 +000074}
75
Marek Olsak774c0d62016-01-13 17:23:15 +000076; GCN: .long 165580
Marek Olsak46dadbf2016-01-13 17:23:20 +000077; GCN-NEXT: .long 2081
78; GCN-NEXT: .long 165584
79; GCN-NEXT: .long 2081
80; GCN-LABEL: {{^}}ps_input_ena_pos_w:
81; GCN-DAG: v_mov_b32_e32 v0, v4
82; GCN-DAG: v_mov_b32_e32 v1, v2
83; GCN: v_mov_b32_e32 v2, v3
84; GCN-NOT: s_endpgm
Matt Arsenault3ea06332017-02-22 00:02:21 +000085define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
86bb:
87 %f = bitcast <2 x i32> %arg8 to <2 x float>
88 %s = insertvalue { float, <2 x float> } undef, float %arg14, 0
89 %s1 = insertvalue { float, <2 x float> } %s, <2 x float> %f, 1
90 ret { float, <2 x float> } %s1
Marek Olsak46dadbf2016-01-13 17:23:20 +000091}
92
Marek Olsak46dadbf2016-01-13 17:23:20 +000093; GCN: .long 165580
Marek Olsak774c0d62016-01-13 17:23:15 +000094; GCN-NEXT: .long 562
95; GCN-NEXT: .long 165584
96; GCN-NEXT: .long 563
97; GCN-LABEL: {{^}}vgpr_ps_addr1:
98; GCN-DAG: v_mov_b32_e32 v0, v2
99; GCN-DAG: v_mov_b32_e32 v1, v3
100; GCN: v_mov_b32_e32 v2, v4
101; GCN-DAG: v_mov_b32_e32 v3, v6
102; GCN-DAG: v_mov_b32_e32 v4, v8
103; GCN-NOT: s_endpgm
Matt Arsenault3ea06332017-02-22 00:02:21 +0000104define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 {
105bb:
106 %i0 = extractelement <2 x i32> %arg4, i32 0
107 %i1 = extractelement <2 x i32> %arg4, i32 1
108 %i2 = extractelement <2 x i32> %arg7, i32 0
109 %i3 = extractelement <2 x i32> %arg8, i32 0
Marek Olsak774c0d62016-01-13 17:23:15 +0000110 %f0 = bitcast i32 %i0 to float
111 %f1 = bitcast i32 %i1 to float
112 %f2 = bitcast i32 %i2 to float
113 %f3 = bitcast i32 %i3 to float
Matt Arsenault3ea06332017-02-22 00:02:21 +0000114 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
115 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
116 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
117 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
118 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
119 ret { float, float, float, float, float } %r4
Marek Olsak774c0d62016-01-13 17:23:15 +0000120}
121
Marek Olsak774c0d62016-01-13 17:23:15 +0000122; GCN: .long 165580
123; GCN-NEXT: .long 562
124; GCN-NEXT: .long 165584
125; GCN-NEXT: .long 631
126; GCN-LABEL: {{^}}vgpr_ps_addr119:
127; GCN-DAG: v_mov_b32_e32 v0, v2
128; GCN-DAG: v_mov_b32_e32 v1, v3
129; GCN: v_mov_b32_e32 v2, v6
130; GCN: v_mov_b32_e32 v3, v8
131; GCN: v_mov_b32_e32 v4, v12
132; GCN-NOT: s_endpgm
Matt Arsenault3ea06332017-02-22 00:02:21 +0000133define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 {
134bb:
135 %i0 = extractelement <2 x i32> %arg4, i32 0
136 %i1 = extractelement <2 x i32> %arg4, i32 1
137 %i2 = extractelement <2 x i32> %arg7, i32 0
138 %i3 = extractelement <2 x i32> %arg8, i32 0
Marek Olsak774c0d62016-01-13 17:23:15 +0000139 %f0 = bitcast i32 %i0 to float
140 %f1 = bitcast i32 %i1 to float
141 %f2 = bitcast i32 %i2 to float
142 %f3 = bitcast i32 %i3 to float
Matt Arsenault3ea06332017-02-22 00:02:21 +0000143 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
144 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
145 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
146 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
147 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
148 ret { float, float, float, float, float } %r4
Marek Olsak774c0d62016-01-13 17:23:15 +0000149}
150
Marek Olsak774c0d62016-01-13 17:23:15 +0000151; GCN: .long 165580
152; GCN-NEXT: .long 562
153; GCN-NEXT: .long 165584
154; GCN-NEXT: .long 946
155; GCN-LABEL: {{^}}vgpr_ps_addr418:
156; GCN-NOT: v_mov_b32_e32 v0
157; GCN-NOT: v_mov_b32_e32 v1
158; GCN-NOT: v_mov_b32_e32 v2
159; GCN: v_mov_b32_e32 v3, v4
160; GCN: v_mov_b32_e32 v4, v8
161; GCN-NOT: s_endpgm
Matt Arsenault3ea06332017-02-22 00:02:21 +0000162define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 {
163bb:
164 %i0 = extractelement <2 x i32> %arg4, i32 0
165 %i1 = extractelement <2 x i32> %arg4, i32 1
166 %i2 = extractelement <2 x i32> %arg7, i32 0
167 %i3 = extractelement <2 x i32> %arg8, i32 0
Marek Olsak774c0d62016-01-13 17:23:15 +0000168 %f0 = bitcast i32 %i0 to float
169 %f1 = bitcast i32 %i1 to float
170 %f2 = bitcast i32 %i2 to float
171 %f3 = bitcast i32 %i3 to float
Matt Arsenault3ea06332017-02-22 00:02:21 +0000172 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
173 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
174 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
175 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
176 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
177 ret { float, float, float, float, float } %r4
Marek Olsak774c0d62016-01-13 17:23:15 +0000178}
179
Marek Olsak774c0d62016-01-13 17:23:15 +0000180; GCN-LABEL: {{^}}sgpr:
181; GCN: s_add_i32 s0, s3, 2
182; GCN: s_mov_b32 s2, s3
183; GCN-NOT: s_endpgm
Matt Arsenault3ea06332017-02-22 00:02:21 +0000184define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
185bb:
186 %x = add i32 %arg2, 2
187 %a = insertvalue { i32, i32, i32 } undef, i32 %x, 0
188 %b = insertvalue { i32, i32, i32 } %a, i32 %arg1, 1
189 %c = insertvalue { i32, i32, i32 } %a, i32 %arg2, 2
190 ret { i32, i32, i32 } %c
Marek Olsak774c0d62016-01-13 17:23:15 +0000191}
192
Marek Olsak774c0d62016-01-13 17:23:15 +0000193; GCN-LABEL: {{^}}sgpr_literal:
194; GCN: s_mov_b32 s0, 5
195; GCN-NOT: s_mov_b32 s0, s0
196; GCN-DAG: s_mov_b32 s1, 6
197; GCN-DAG: s_mov_b32 s2, 7
198; GCN-DAG: s_mov_b32 s3, 8
199; GCN-NOT: s_endpgm
Matt Arsenault3ea06332017-02-22 00:02:21 +0000200define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
201bb:
202 %x = add i32 %arg2, 2
203 ret { i32, i32, i32, i32 } { i32 5, i32 6, i32 7, i32 8 }
Marek Olsak774c0d62016-01-13 17:23:15 +0000204}
205
Marek Olsak774c0d62016-01-13 17:23:15 +0000206; GCN-LABEL: {{^}}both:
207; GCN: v_mov_b32_e32 v1, v0
Matt Arsenault3ea06332017-02-22 00:02:21 +0000208; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm
Marek Olsak774c0d62016-01-13 17:23:15 +0000209; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
210; GCN-DAG: s_add_i32 s0, s3, 2
211; GCN-DAG: s_mov_b32 s1, s2
212; GCN: s_mov_b32 s2, s3
213; GCN: s_waitcnt expcnt(0)
214; GCN-NOT: s_endpgm
Matt Arsenault3ea06332017-02-22 00:02:21 +0000215define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
216bb:
217 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
218 %v = fadd float %arg3, 1.000000e+00
219 %s = add i32 %arg2, 2
220 %a0 = insertvalue { float, i32, float, i32, i32 } undef, float %v, 0
221 %a1 = insertvalue { float, i32, float, i32, i32 } %a0, i32 %s, 1
222 %a2 = insertvalue { float, i32, float, i32, i32 } %a1, float %arg3, 2
223 %a3 = insertvalue { float, i32, float, i32, i32 } %a2, i32 %arg1, 3
224 %a4 = insertvalue { float, i32, float, i32, i32 } %a3, i32 %arg2, 4
225 ret { float, i32, float, i32, i32 } %a4
Marek Olsak774c0d62016-01-13 17:23:15 +0000226}
227
Marek Olsak774c0d62016-01-13 17:23:15 +0000228; GCN-LABEL: {{^}}structure_literal:
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000229; GCN: v_mov_b32_e32 v3, v0
Matt Arsenault3ea06332017-02-22 00:02:21 +0000230; GCN: exp mrt0 v3, v3, v3, v3 done vm
Matt Arsenault8a63cb92016-12-05 20:31:49 +0000231
Marek Olsak774c0d62016-01-13 17:23:15 +0000232; GCN-DAG: v_mov_b32_e32 v0, 1.0
233; GCN-DAG: s_mov_b32 s0, 2
234; GCN-DAG: s_mov_b32 s1, 3
235; GCN-DAG: v_mov_b32_e32 v1, 2.0
236; GCN-DAG: v_mov_b32_e32 v2, 4.0
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000237; GCN: s_waitcnt expcnt(0)
Matt Arsenault3ea06332017-02-22 00:02:21 +0000238define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
239bb:
240 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
241 ret { { float, i32 }, { i32, <2 x float> } } { { float, i32 } { float 1.000000e+00, i32 2 }, { i32, <2 x float> } { i32 3, <2 x float> <float 2.000000e+00, float 4.000000e+00> } }
Marek Olsak774c0d62016-01-13 17:23:15 +0000242}
Matt Arsenault8a63cb92016-12-05 20:31:49 +0000243
Matt Arsenault3ea06332017-02-22 00:02:21 +0000244declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
245
246attributes #0 = { nounwind }
247attributes #1 = { nounwind "InitialPSInputAddr"="0" }
248attributes #2 = { nounwind "InitialPSInputAddr"="1" }
249attributes #3 = { nounwind "InitialPSInputAddr"="119" }
250attributes #4 = { nounwind "InitialPSInputAddr"="418" }