blob: 7debc130a64c1cdf4c9a4b532d3960e157fe81b9 [file] [log] [blame]
Marek Olsakfa6607d2015-02-11 14:26:46 +00001; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
Matt Arsenault6a0919f2014-09-26 17:55:03 +00003
4declare float @llvm.fma.f32(float, float, float) #1
Matt Arsenault28bd7d42015-09-25 18:21:47 +00005declare double @llvm.fma.f64(double, double, double) #1
Matt Arsenault6a0919f2014-09-26 17:55:03 +00006declare float @llvm.fmuladd.f32(float, float, float) #1
7declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1
8
9
Marek Olsakfa6607d2015-02-11 14:26:46 +000010; GCN-LABEL: {{^}}test_sgpr_use_twice_binop:
11; GCN: s_load_dword [[SGPR:s[0-9]+]],
12; GCN: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
13; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault6a0919f2014-09-26 17:55:03 +000014define void @test_sgpr_use_twice_binop(float addrspace(1)* %out, float %a) #0 {
15 %dbl = fadd float %a, %a
16 store float %dbl, float addrspace(1)* %out, align 4
17 ret void
18}
19
Marek Olsakfa6607d2015-02-11 14:26:46 +000020; GCN-LABEL: {{^}}test_sgpr_use_three_ternary_op:
21; GCN: s_load_dword [[SGPR:s[0-9]+]],
22; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
23; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault6a0919f2014-09-26 17:55:03 +000024define void @test_sgpr_use_three_ternary_op(float addrspace(1)* %out, float %a) #0 {
25 %fma = call float @llvm.fma.f32(float %a, float %a, float %a) #1
26 store float %fma, float addrspace(1)* %out, align 4
27 ret void
28}
29
Marek Olsakfa6607d2015-02-11 14:26:46 +000030; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
Tom Stellard326d6ec2014-11-05 14:50:53 +000031; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
32; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
Marek Olsakfa6607d2015-02-11 14:26:46 +000033; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
34; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
35; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
36; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
37; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault6a0919f2014-09-26 17:55:03 +000038define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, float %a, float %b) #0 {
39 %fma = call float @llvm.fma.f32(float %a, float %a, float %b) #1
40 store float %fma, float addrspace(1)* %out, align 4
41 ret void
42}
43
Marek Olsakfa6607d2015-02-11 14:26:46 +000044; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
Tom Stellard326d6ec2014-11-05 14:50:53 +000045; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
46; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
Marek Olsakfa6607d2015-02-11 14:26:46 +000047; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
48; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
49; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
50; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
51; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault6a0919f2014-09-26 17:55:03 +000052define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
53 %fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
54 store float %fma, float addrspace(1)* %out, align 4
55 ret void
56}
57
Marek Olsakfa6607d2015-02-11 14:26:46 +000058; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
Tom Stellard326d6ec2014-11-05 14:50:53 +000059; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
60; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
Marek Olsakfa6607d2015-02-11 14:26:46 +000061; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
62; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
63; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
64; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
65; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault6a0919f2014-09-26 17:55:03 +000066define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
67 %fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
68 store float %fma, float addrspace(1)* %out, align 4
69 ret void
70}
71
Marek Olsakfa6607d2015-02-11 14:26:46 +000072; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm:
73; GCN: s_load_dword [[SGPR:s[0-9]+]]
74; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
75; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault6a0919f2014-09-26 17:55:03 +000076define void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, float %a) #0 {
77 %fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) #1
78 store float %fma, float addrspace(1)* %out, align 4
79 ret void
80}
81
Marek Olsakfa6607d2015-02-11 14:26:46 +000082; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a:
83; GCN: s_load_dword [[SGPR:s[0-9]+]]
84; GCN: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
85; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault6a0919f2014-09-26 17:55:03 +000086define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 {
87 %fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1
88 store float %fma, float addrspace(1)* %out, align 4
89 ret void
90}
91
92; Don't use fma since fma c, x, y is canonicalized to fma x, c, y
Marek Olsakfa6607d2015-02-11 14:26:46 +000093; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a:
94; GCN: s_load_dword [[SGPR:s[0-9]+]]
95; GCN: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]]
96; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault6a0919f2014-09-26 17:55:03 +000097define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32 %a) #0 {
98 %fma = call i32 @llvm.AMDGPU.imad24(i32 2, i32 %a, i32 %a) #1
99 store i32 %fma, i32 addrspace(1)* %out, align 4
100 ret void
101}
102
Matt Arsenault28bd7d42015-09-25 18:21:47 +0000103; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_kimm:
104; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
105; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
106; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[VK]]
107; GCN: buffer_store_dword [[RESULT]]
108define void @test_sgpr_use_twice_ternary_op_a_a_kimm(float addrspace(1)* %out, float %a) #0 {
109 %fma = call float @llvm.fma.f32(float %a, float %a, float 1024.0) #1
110 store float %fma, float addrspace(1)* %out, align 4
111 ret void
112}
113
114; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_k_s:
115; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
116; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
117; GCN: v_fma_f32 [[RESULT0:v[0-9]+]], [[VK]], [[VK]], [[SGPR]]
118; GCN: buffer_store_dword [[RESULT0]]
119define void @test_literal_use_twice_ternary_op_k_k_s(float addrspace(1)* %out, float %a) #0 {
120 %fma = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %a) #1
121 store float %fma, float addrspace(1)* %out
122 ret void
123}
124
125; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_k_s_x2:
126; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
127; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
128; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
129; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VK]], [[VK]], [[SGPR0]]
130; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VK]], [[VK]], [[SGPR1]]
131; GCN: buffer_store_dword [[RESULT0]]
132; GCN: buffer_store_dword [[RESULT1]]
133; GCN: s_endpgm
134define void @test_literal_use_twice_ternary_op_k_k_s_x2(float addrspace(1)* %out, float %a, float %b) #0 {
135 %fma0 = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %a) #1
136 %fma1 = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %b) #1
137 store volatile float %fma0, float addrspace(1)* %out
138 store volatile float %fma1, float addrspace(1)* %out
139 ret void
140}
141
142; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_s_k:
143; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
144; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
145; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[VK]], [[VK]]
146; GCN: buffer_store_dword [[RESULT]]
147define void @test_literal_use_twice_ternary_op_k_s_k(float addrspace(1)* %out, float %a) #0 {
148 %fma = call float @llvm.fma.f32(float 1024.0, float %a, float 1024.0) #1
149 store float %fma, float addrspace(1)* %out
150 ret void
151}
152
153; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_s_k_x2:
154; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
155; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
156; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
157; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VK]], [[VK]]
158; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR1]], [[VK]], [[VK]]
159; GCN: buffer_store_dword [[RESULT0]]
160; GCN: buffer_store_dword [[RESULT1]]
161; GCN: s_endpgm
162define void @test_literal_use_twice_ternary_op_k_s_k_x2(float addrspace(1)* %out, float %a, float %b) #0 {
163 %fma0 = call float @llvm.fma.f32(float 1024.0, float %a, float 1024.0) #1
164 %fma1 = call float @llvm.fma.f32(float 1024.0, float %b, float 1024.0) #1
165 store volatile float %fma0, float addrspace(1)* %out
166 store volatile float %fma1, float addrspace(1)* %out
167 ret void
168}
169
170; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_s_k_k:
171; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
172; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
173; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[VK]], [[VK]]
174; GCN: buffer_store_dword [[RESULT]]
175define void @test_literal_use_twice_ternary_op_s_k_k(float addrspace(1)* %out, float %a) #0 {
176 %fma = call float @llvm.fma.f32(float %a, float 1024.0, float 1024.0) #1
177 store float %fma, float addrspace(1)* %out
178 ret void
179}
180
181; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_s_k_k_x2:
182; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
183; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
184; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
185; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VK]], [[VK]]
186; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR1]], [[VK]], [[VK]]
187; GCN: buffer_store_dword [[RESULT0]]
188; GCN: buffer_store_dword [[RESULT1]]
189; GCN: s_endpgm
190define void @test_literal_use_twice_ternary_op_s_k_k_x2(float addrspace(1)* %out, float %a, float %b) #0 {
191 %fma0 = call float @llvm.fma.f32(float %a, float 1024.0, float 1024.0) #1
192 %fma1 = call float @llvm.fma.f32(float %b, float 1024.0, float 1024.0) #1
193 store volatile float %fma0, float addrspace(1)* %out
194 store volatile float %fma1, float addrspace(1)* %out
195 ret void
196}
197
198; GCN-LABEL: {{^}}test_s0_s1_k_f32:
199; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
200; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
201; GCN-DAG: v_mov_b32_e32 [[VK0:v[0-9]+]], 0x44800000
202
203; FIXME: Why do we end up with 2 copies of the same SGPR? These should be CSE'd
204; GCN: v_mov_b32_e32 [[VS1_1:v[0-9]+]], [[SGPR1]]
205; GCN: v_mov_b32_e32 [[VS1_0:v[0-9]+]], [[SGPR1]]
206
207; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VS1_0]], [[VK0]]
208; GCN-DAG: v_mov_b32_e32 [[VK1:v[0-9]+]], 0x45800000
209; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR0]], [[VS1_1]], [[VK1]]
210
211; GCN: buffer_store_dword [[RESULT0]]
212; GCN: buffer_store_dword [[RESULT1]]
213define void @test_s0_s1_k_f32(float addrspace(1)* %out, float %a, float %b) #0 {
214 %fma0 = call float @llvm.fma.f32(float %a, float %b, float 1024.0) #1
215 %fma1 = call float @llvm.fma.f32(float %a, float %b, float 4096.0) #1
216 store volatile float %fma0, float addrspace(1)* %out
217 store volatile float %fma1, float addrspace(1)* %out
218 ret void
219}
220
221; FIXME: Immediate in SGPRs just copied to VGPRs
222; GCN-LABEL: {{^}}test_s0_s1_k_f64:
223; GCN-DAG: s_load_dwordx2 [[SGPR0:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
224; GCN-DAG: s_load_dwordx2 s{{\[}}[[SGPR1_SUB0:[0-9]+]]:[[SGPR1_SUB1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xd|0x34}}
225; GCN-DAG: s_mov_b32 s[[SK0_SUB1:[0-9]+]], 0x40900000
226; GCN-DAG: s_mov_b32 s[[SZERO:[0-9]+]], 0{{$}}
227; GCN-DAG: v_mov_b32_e32 v[[VK0_SUB0:[0-9]+]], s[[SZERO]]
228; GCN-DAG: v_mov_b32_e32 v[[VK0_SUB1:[0-9]+]], s[[SK0_SUB1]]
229
230; GCN-DAG: s_mov_b32 s[[SK1_SUB0:[0-9]+]], 0x40b00000{{$}}
231
232; FIXME: Redundant copies
233; GCN: v_mov_b32_e32 v[[VS1_1_SUB0:[0-9]+]], s[[SGPR1_SUB0]]
234; GCN: v_mov_b32_e32 v[[VS1_1_SUB1:[0-9]+]], s[[SGPR1_SUB1]]
235; GCN: v_mov_b32_e32 v[[VS1_0_SUB0:[0-9]+]], s[[SGPR1_SUB0]]
236; GCN: v_mov_b32_e32 v[[VS1_0_SUB1:[0-9]+]], s[[SGPR1_SUB1]]
237
238
239; GCN-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[SGPR0]], v{{\[}}[[VS1_0_SUB0]]:[[VS1_0_SUB1]]{{\]}}, v{{\[}}[[VK0_SUB0]]:[[VK0_SUB1]]{{\]}}
240
241; GCN-DAG: v_mov_b32_e32 v[[VK1_SUB0:[0-9]+]], s[[SZERO]]
242; GCN-DAG: v_mov_b32_e32 v[[VK1_SUB1:[0-9]+]], s[[SK1_SUB0]]
243
244; GCN-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[SGPR0]], v{{\[}}[[VS1_1_SUB0]]:[[VS1_1_SUB1]]{{\]}}, v{{\[}}[[VK1_SUB0]]:[[VK1_SUB1]]{{\]}}
245
246; GCN: buffer_store_dwordx2 [[RESULT0]]
247; GCN: buffer_store_dwordx2 [[RESULT1]]
248define void @test_s0_s1_k_f64(double addrspace(1)* %out, double %a, double %b) #0 {
249 %fma0 = call double @llvm.fma.f64(double %a, double %b, double 1024.0) #1
250 %fma1 = call double @llvm.fma.f64(double %a, double %b, double 4096.0) #1
251 store volatile double %fma0, double addrspace(1)* %out
252 store volatile double %fma1, double addrspace(1)* %out
253 ret void
254}
255
Matt Arsenault6a0919f2014-09-26 17:55:03 +0000256attributes #0 = { nounwind }
257attributes #1 = { nounwind readnone }