blob: 3cce2c96e8380f10c381fa2c67bf05bc9e2098aa [file] [log] [blame]
Matt Arsenault3d1c1de2016-04-14 21:58:24 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00002; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
Matt Arsenault6a0919f2014-09-26 17:55:03 +00003
4declare float @llvm.fma.f32(float, float, float) #1
Matt Arsenault28bd7d42015-09-25 18:21:47 +00005declare double @llvm.fma.f64(double, double, double) #1
Matt Arsenault6a0919f2014-09-26 17:55:03 +00006declare float @llvm.fmuladd.f32(float, float, float) #1
Matt Arsenault295875e2016-01-29 10:05:16 +00007declare float @llvm.amdgcn.div.fixup.f32(float, float, float) #1
Matt Arsenault6a0919f2014-09-26 17:55:03 +00008
9
Marek Olsakfa6607d2015-02-11 14:26:46 +000010; GCN-LABEL: {{^}}test_sgpr_use_twice_binop:
11; GCN: s_load_dword [[SGPR:s[0-9]+]],
12; GCN: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
13; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000014define amdgpu_kernel void @test_sgpr_use_twice_binop(float addrspace(1)* %out, float %a) #0 {
Matt Arsenault6a0919f2014-09-26 17:55:03 +000015 %dbl = fadd float %a, %a
16 store float %dbl, float addrspace(1)* %out, align 4
17 ret void
18}
19
Marek Olsakfa6607d2015-02-11 14:26:46 +000020; GCN-LABEL: {{^}}test_sgpr_use_three_ternary_op:
21; GCN: s_load_dword [[SGPR:s[0-9]+]],
22; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
23; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000024define amdgpu_kernel void @test_sgpr_use_three_ternary_op(float addrspace(1)* %out, float %a) #0 {
Matt Arsenault6a0919f2014-09-26 17:55:03 +000025 %fma = call float @llvm.fma.f32(float %a, float %a, float %a) #1
26 store float %fma, float addrspace(1)* %out, align 4
27 ret void
28}
29
Marek Olsakfa6607d2015-02-11 14:26:46 +000030; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
Tom Stellard0bc954e2016-03-30 16:35:09 +000031; SI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
32; SI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
33; VI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
34; VI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
Marek Olsakfa6607d2015-02-11 14:26:46 +000035; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
36; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
37; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000038define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, float %a, float %b) #0 {
Matt Arsenault6a0919f2014-09-26 17:55:03 +000039 %fma = call float @llvm.fma.f32(float %a, float %a, float %b) #1
40 store float %fma, float addrspace(1)* %out, align 4
41 ret void
42}
43
Matt Arsenault10aa8072015-09-25 20:22:12 +000044; GCN-LABEL: {{^}}test_use_s_v_s:
Changpeng Fang71369b32016-05-26 19:35:29 +000045; GCN-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
46; GCN-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
Tom Stellard0d23ebe2016-08-29 19:42:52 +000047; SI: buffer_load_dword [[VA0:v[0-9]+]]
Matt Arsenault03c67d12017-11-17 04:18:24 +000048; SI-NEXT: buffer_load_dword [[VA1:v[0-9]+]]
49
50; GCN-NOT: v_mov_b32
51
52; VI: buffer_load_dword [[VA0:v[0-9]+]]
53; VI-NEXT: buffer_load_dword [[VA1:v[0-9]+]]
Changpeng Fang71369b32016-05-26 19:35:29 +000054
Matt Arsenault10aa8072015-09-25 20:22:12 +000055; GCN-NOT: v_mov_b32
56; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
57; GCN-NOT: v_mov_b32
58
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +000059; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SA]], [[VA0]], [[VB]]
60; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SA]], [[VA1]], [[VB]]
Matt Arsenault10aa8072015-09-25 20:22:12 +000061; GCN: buffer_store_dword [[RESULT0]]
62; GCN: buffer_store_dword [[RESULT1]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000063define amdgpu_kernel void @test_use_s_v_s(float addrspace(1)* %out, float %a, float %b, float addrspace(1)* %in) #0 {
Matt Arsenault10aa8072015-09-25 20:22:12 +000064 %va0 = load volatile float, float addrspace(1)* %in
65 %va1 = load volatile float, float addrspace(1)* %in
66 %fma0 = call float @llvm.fma.f32(float %a, float %va0, float %b) #1
67 %fma1 = call float @llvm.fma.f32(float %a, float %va1, float %b) #1
68 store volatile float %fma0, float addrspace(1)* %out
69 store volatile float %fma1, float addrspace(1)* %out
70 ret void
71}
72
Marek Olsakfa6607d2015-02-11 14:26:46 +000073; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
Tom Stellard0bc954e2016-03-30 16:35:09 +000074; SI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
75; SI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
76; VI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
77; VI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
Marek Olsakfa6607d2015-02-11 14:26:46 +000078; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +000079; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
Marek Olsakfa6607d2015-02-11 14:26:46 +000080; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000081define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
Matt Arsenault6a0919f2014-09-26 17:55:03 +000082 %fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
83 store float %fma, float addrspace(1)* %out, align 4
84 ret void
85}
86
Marek Olsakfa6607d2015-02-11 14:26:46 +000087; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
Tom Stellard0bc954e2016-03-30 16:35:09 +000088; SI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
89; SI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
90; VI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
91; VI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
Marek Olsakfa6607d2015-02-11 14:26:46 +000092; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +000093; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
Marek Olsakfa6607d2015-02-11 14:26:46 +000094; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000095define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
Matt Arsenault6a0919f2014-09-26 17:55:03 +000096 %fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
97 store float %fma, float addrspace(1)* %out, align 4
98 ret void
99}
100
Marek Olsakfa6607d2015-02-11 14:26:46 +0000101; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm:
102; GCN: s_load_dword [[SGPR:s[0-9]+]]
103; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
104; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000105define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, float %a) #0 {
Matt Arsenault6a0919f2014-09-26 17:55:03 +0000106 %fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) #1
107 store float %fma, float addrspace(1)* %out, align 4
108 ret void
109}
110
Marek Olsakfa6607d2015-02-11 14:26:46 +0000111; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a:
112; GCN: s_load_dword [[SGPR:s[0-9]+]]
Matt Arsenault3d1c1de2016-04-14 21:58:24 +0000113; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], 2.0, [[SGPR]]
Marek Olsakfa6607d2015-02-11 14:26:46 +0000114; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000115define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 {
Matt Arsenault6a0919f2014-09-26 17:55:03 +0000116 %fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1
117 store float %fma, float addrspace(1)* %out, align 4
118 ret void
119}
120
121; Don't use fma since fma c, x, y is canonicalized to fma x, c, y
Marek Olsakfa6607d2015-02-11 14:26:46 +0000122; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a:
123; GCN: s_load_dword [[SGPR:s[0-9]+]]
Matt Arsenault295875e2016-01-29 10:05:16 +0000124; GCN: v_div_fixup_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
Marek Olsakfa6607d2015-02-11 14:26:46 +0000125; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000126define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_imm_a_a(float addrspace(1)* %out, float %a) #0 {
Matt Arsenault295875e2016-01-29 10:05:16 +0000127 %val = call float @llvm.amdgcn.div.fixup.f32(float 2.0, float %a, float %a) #1
128 store float %val, float addrspace(1)* %out, align 4
Matt Arsenault6a0919f2014-09-26 17:55:03 +0000129 ret void
130}
131
Matt Arsenault28bd7d42015-09-25 18:21:47 +0000132; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_kimm:
133; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
134; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
135; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[VK]]
136; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000137define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_a_kimm(float addrspace(1)* %out, float %a) #0 {
Matt Arsenault28bd7d42015-09-25 18:21:47 +0000138 %fma = call float @llvm.fma.f32(float %a, float %a, float 1024.0) #1
139 store float %fma, float addrspace(1)* %out, align 4
140 ret void
141}
142
143; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_k_s:
144; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
145; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
146; GCN: v_fma_f32 [[RESULT0:v[0-9]+]], [[VK]], [[VK]], [[SGPR]]
147; GCN: buffer_store_dword [[RESULT0]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000148define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_k_s(float addrspace(1)* %out, float %a) #0 {
Matt Arsenault28bd7d42015-09-25 18:21:47 +0000149 %fma = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %a) #1
150 store float %fma, float addrspace(1)* %out
151 ret void
152}
153
154; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_k_s_x2:
155; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
156; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
157; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
158; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VK]], [[VK]], [[SGPR0]]
159; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VK]], [[VK]], [[SGPR1]]
160; GCN: buffer_store_dword [[RESULT0]]
161; GCN: buffer_store_dword [[RESULT1]]
162; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000163define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_k_s_x2(float addrspace(1)* %out, float %a, float %b) #0 {
Matt Arsenault28bd7d42015-09-25 18:21:47 +0000164 %fma0 = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %a) #1
165 %fma1 = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %b) #1
166 store volatile float %fma0, float addrspace(1)* %out
167 store volatile float %fma1, float addrspace(1)* %out
168 ret void
169}
170
171; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_s_k:
172; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
173; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
174; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[VK]], [[VK]]
175; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000176define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_s_k(float addrspace(1)* %out, float %a) #0 {
Matt Arsenault28bd7d42015-09-25 18:21:47 +0000177 %fma = call float @llvm.fma.f32(float 1024.0, float %a, float 1024.0) #1
178 store float %fma, float addrspace(1)* %out
179 ret void
180}
181
182; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_s_k_x2:
183; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
184; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
185; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
186; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VK]], [[VK]]
187; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR1]], [[VK]], [[VK]]
188; GCN: buffer_store_dword [[RESULT0]]
189; GCN: buffer_store_dword [[RESULT1]]
190; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000191define amdgpu_kernel void @test_literal_use_twice_ternary_op_k_s_k_x2(float addrspace(1)* %out, float %a, float %b) #0 {
Matt Arsenault28bd7d42015-09-25 18:21:47 +0000192 %fma0 = call float @llvm.fma.f32(float 1024.0, float %a, float 1024.0) #1
193 %fma1 = call float @llvm.fma.f32(float 1024.0, float %b, float 1024.0) #1
194 store volatile float %fma0, float addrspace(1)* %out
195 store volatile float %fma1, float addrspace(1)* %out
196 ret void
197}
198
199; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_s_k_k:
200; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
201; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
202; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[VK]], [[VK]]
203; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000204define amdgpu_kernel void @test_literal_use_twice_ternary_op_s_k_k(float addrspace(1)* %out, float %a) #0 {
Matt Arsenault28bd7d42015-09-25 18:21:47 +0000205 %fma = call float @llvm.fma.f32(float %a, float 1024.0, float 1024.0) #1
206 store float %fma, float addrspace(1)* %out
207 ret void
208}
209
210; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_s_k_k_x2:
211; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
212; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
213; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
214; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VK]], [[VK]]
215; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR1]], [[VK]], [[VK]]
216; GCN: buffer_store_dword [[RESULT0]]
217; GCN: buffer_store_dword [[RESULT1]]
218; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000219define amdgpu_kernel void @test_literal_use_twice_ternary_op_s_k_k_x2(float addrspace(1)* %out, float %a, float %b) #0 {
Matt Arsenault28bd7d42015-09-25 18:21:47 +0000220 %fma0 = call float @llvm.fma.f32(float %a, float 1024.0, float 1024.0) #1
221 %fma1 = call float @llvm.fma.f32(float %b, float 1024.0, float 1024.0) #1
222 store volatile float %fma0, float addrspace(1)* %out
223 store volatile float %fma1, float addrspace(1)* %out
224 ret void
225}
226
227; GCN-LABEL: {{^}}test_s0_s1_k_f32:
228; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
229; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
230; GCN-DAG: v_mov_b32_e32 [[VK0:v[0-9]+]], 0x44800000
Matt Arsenault10aa8072015-09-25 20:22:12 +0000231; GCN-DAG: v_mov_b32_e32 [[VS1:v[0-9]+]], [[SGPR1]]
Matt Arsenault28bd7d42015-09-25 18:21:47 +0000232
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000233; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VS1]], [[VK0]]
Matt Arsenault28bd7d42015-09-25 18:21:47 +0000234; GCN-DAG: v_mov_b32_e32 [[VK1:v[0-9]+]], 0x45800000
Matt Arsenault10aa8072015-09-25 20:22:12 +0000235; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR0]], [[VS1]], [[VK1]]
Matt Arsenault28bd7d42015-09-25 18:21:47 +0000236
237; GCN: buffer_store_dword [[RESULT0]]
238; GCN: buffer_store_dword [[RESULT1]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000239define amdgpu_kernel void @test_s0_s1_k_f32(float addrspace(1)* %out, float %a, float %b) #0 {
Matt Arsenault28bd7d42015-09-25 18:21:47 +0000240 %fma0 = call float @llvm.fma.f32(float %a, float %b, float 1024.0) #1
241 %fma1 = call float @llvm.fma.f32(float %a, float %b, float 4096.0) #1
242 store volatile float %fma0, float addrspace(1)* %out
243 store volatile float %fma1, float addrspace(1)* %out
244 ret void
245}
246
247; FIXME: Immediate in SGPRs just copied to VGPRs
248; GCN-LABEL: {{^}}test_s0_s1_k_f64:
249; GCN-DAG: s_load_dwordx2 [[SGPR0:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
250; GCN-DAG: s_load_dwordx2 s{{\[}}[[SGPR1_SUB0:[0-9]+]]:[[SGPR1_SUB1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xd|0x34}}
Matt Arsenault0de924b2015-11-02 23:15:42 +0000251; GCN-DAG: v_mov_b32_e32 v[[VK0_SUB1:[0-9]+]], 0x40900000
252; GCN-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0{{$}}
Matt Arsenault28bd7d42015-09-25 18:21:47 +0000253
Matt Arsenault10aa8072015-09-25 20:22:12 +0000254; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB0:[0-9]+]], s[[SGPR1_SUB0]]
255; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB1:[0-9]+]], s[[SGPR1_SUB1]]
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000256; GCN: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[SGPR0]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, v{{\[}}[[VZERO]]:[[VK0_SUB1]]{{\]}}
Matt Arsenault0de924b2015-11-02 23:15:42 +0000257
258; Same zero component is re-used for half of each immediate.
259; GCN: v_mov_b32_e32 v[[VK1_SUB1:[0-9]+]], 0x40b00000
Matt Arsenault3d1c1de2016-04-14 21:58:24 +0000260; GCN: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[SGPR0]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, v{{\[}}[[VZERO]]:[[VK1_SUB1]]{{\]}}
Matt Arsenault28bd7d42015-09-25 18:21:47 +0000261
262; GCN: buffer_store_dwordx2 [[RESULT0]]
263; GCN: buffer_store_dwordx2 [[RESULT1]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000264define amdgpu_kernel void @test_s0_s1_k_f64(double addrspace(1)* %out, double %a, double %b) #0 {
Matt Arsenault28bd7d42015-09-25 18:21:47 +0000265 %fma0 = call double @llvm.fma.f64(double %a, double %b, double 1024.0) #1
266 %fma1 = call double @llvm.fma.f64(double %a, double %b, double 4096.0) #1
267 store volatile double %fma0, double addrspace(1)* %out
268 store volatile double %fma1, double addrspace(1)* %out
269 ret void
270}
271
Matt Arsenault6a0919f2014-09-26 17:55:03 +0000272attributes #0 = { nounwind }
273attributes #1 = { nounwind readnone }