blob: 5773da0bb2e416ff76b1ad07abc626885f24eb7f [file] [log] [blame]
Tom Stellard49f8bfd2015-01-06 18:00:21 +00001; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
Matt Arsenaulta0050b02014-06-19 01:19:19 +00002
Matt Arsenaulted8a3e02014-09-26 17:55:11 +00003declare i32 @llvm.r600.read.tidig.x() nounwind readnone
Matt Arsenaultf2b0aeb2014-06-23 18:28:28 +00004declare { float, i1 } @llvm.AMDGPU.div.scale.f32(float, float, i1) nounwind readnone
5declare { double, i1 } @llvm.AMDGPU.div.scale.f64(double, double, i1) nounwind readnone
Matt Arsenault044f1d12015-02-14 04:24:28 +00006declare float @llvm.fabs.f32(float) nounwind readnone
Matt Arsenaulta0050b02014-06-19 01:19:19 +00007
Matt Arsenaultf2b0aeb2014-06-23 18:28:28 +00008; SI-LABEL @test_div_scale_f32_1:
Tom Stellard326d6ec2014-11-05 14:50:53 +00009; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
Matt Arsenaultfb13b222014-12-03 03:12:13 +000010; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
Tom Stellard326d6ec2014-11-05 14:50:53 +000011; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
12; SI: buffer_store_dword [[RESULT0]]
13; SI: s_endpgm
Matt Arsenaulted8a3e02014-09-26 17:55:11 +000014define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
15 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
16 %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
17 %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
18
19 %a = load float addrspace(1)* %gep.0, align 4
20 %b = load float addrspace(1)* %gep.1, align 4
21
Matt Arsenaultf2b0aeb2014-06-23 18:28:28 +000022 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
23 %result0 = extractvalue { float, i1 } %result, 0
24 store float %result0, float addrspace(1)* %out, align 4
Matt Arsenaulta0050b02014-06-19 01:19:19 +000025 ret void
26}
27
Matt Arsenaultf2b0aeb2014-06-23 18:28:28 +000028; SI-LABEL @test_div_scale_f32_2:
Tom Stellard326d6ec2014-11-05 14:50:53 +000029; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
Matt Arsenaultfb13b222014-12-03 03:12:13 +000030; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
Tom Stellard326d6ec2014-11-05 14:50:53 +000031; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
32; SI: buffer_store_dword [[RESULT0]]
33; SI: s_endpgm
Matt Arsenaulted8a3e02014-09-26 17:55:11 +000034define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
35 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
36 %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
37 %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
38
39 %a = load float addrspace(1)* %gep.0, align 4
40 %b = load float addrspace(1)* %gep.1, align 4
41
Matt Arsenaultf2b0aeb2014-06-23 18:28:28 +000042 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
43 %result0 = extractvalue { float, i1 } %result, 0
44 store float %result0, float addrspace(1)* %out, align 4
45 ret void
46}
47
48; SI-LABEL @test_div_scale_f64_1:
Tom Stellard326d6ec2014-11-05 14:50:53 +000049; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
Matt Arsenaultfb13b222014-12-03 03:12:13 +000050; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
Tom Stellard326d6ec2014-11-05 14:50:53 +000051; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
52; SI: buffer_store_dwordx2 [[RESULT0]]
53; SI: s_endpgm
Matt Arsenaulted8a3e02014-09-26 17:55:11 +000054define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
55 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
56 %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
57 %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
58
59 %a = load double addrspace(1)* %gep.0, align 8
60 %b = load double addrspace(1)* %gep.1, align 8
61
Matt Arsenaultf2b0aeb2014-06-23 18:28:28 +000062 %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
63 %result0 = extractvalue { double, i1 } %result, 0
64 store double %result0, double addrspace(1)* %out, align 8
65 ret void
66}
67
68; SI-LABEL @test_div_scale_f64_1:
Tom Stellard326d6ec2014-11-05 14:50:53 +000069; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
Matt Arsenaultfb13b222014-12-03 03:12:13 +000070; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
Tom Stellard326d6ec2014-11-05 14:50:53 +000071; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
72; SI: buffer_store_dwordx2 [[RESULT0]]
73; SI: s_endpgm
Matt Arsenaulted8a3e02014-09-26 17:55:11 +000074define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
75 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
76 %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
77 %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
78
79 %a = load double addrspace(1)* %gep.0, align 8
80 %b = load double addrspace(1)* %gep.1, align 8
81
82 %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
83 %result0 = extractvalue { double, i1 } %result, 0
84 store double %result0, double addrspace(1)* %out, align 8
85 ret void
86}
87
88; SI-LABEL @test_div_scale_f32_scalar_num_1:
Tom Stellard326d6ec2014-11-05 14:50:53 +000089; SI-DAG: buffer_load_dword [[B:v[0-9]+]]
90; SI-DAG: s_load_dword [[A:s[0-9]+]]
91; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
92; SI: buffer_store_dword [[RESULT0]]
93; SI: s_endpgm
Matt Arsenaulted8a3e02014-09-26 17:55:11 +000094define void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
95 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
96 %gep = getelementptr float addrspace(1)* %in, i32 %tid
97
98 %b = load float addrspace(1)* %gep, align 4
99
100 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
101 %result0 = extractvalue { float, i1 } %result, 0
102 store float %result0, float addrspace(1)* %out, align 4
103 ret void
104}
105
106; SI-LABEL @test_div_scale_f32_scalar_num_2:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000107; SI-DAG: buffer_load_dword [[B:v[0-9]+]]
108; SI-DAG: s_load_dword [[A:s[0-9]+]]
109; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
110; SI: buffer_store_dword [[RESULT0]]
111; SI: s_endpgm
Matt Arsenaulted8a3e02014-09-26 17:55:11 +0000112define void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
113 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
114 %gep = getelementptr float addrspace(1)* %in, i32 %tid
115
116 %b = load float addrspace(1)* %gep, align 4
117
118 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
119 %result0 = extractvalue { float, i1 } %result, 0
120 store float %result0, float addrspace(1)* %out, align 4
121 ret void
122}
123
124; SI-LABEL @test_div_scale_f32_scalar_den_1:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000125; SI-DAG: buffer_load_dword [[A:v[0-9]+]]
126; SI-DAG: s_load_dword [[B:s[0-9]+]]
127; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
128; SI: buffer_store_dword [[RESULT0]]
129; SI: s_endpgm
Matt Arsenaulted8a3e02014-09-26 17:55:11 +0000130define void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
131 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
132 %gep = getelementptr float addrspace(1)* %in, i32 %tid
133
134 %a = load float addrspace(1)* %gep, align 4
135
136 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
137 %result0 = extractvalue { float, i1 } %result, 0
138 store float %result0, float addrspace(1)* %out, align 4
139 ret void
140}
141
142; SI-LABEL @test_div_scale_f32_scalar_den_2:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000143; SI-DAG: buffer_load_dword [[A:v[0-9]+]]
144; SI-DAG: s_load_dword [[B:s[0-9]+]]
145; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
146; SI: buffer_store_dword [[RESULT0]]
147; SI: s_endpgm
Matt Arsenaulted8a3e02014-09-26 17:55:11 +0000148define void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
149 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
150 %gep = getelementptr float addrspace(1)* %in, i32 %tid
151
152 %a = load float addrspace(1)* %gep, align 4
153
154 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
155 %result0 = extractvalue { float, i1 } %result, 0
156 store float %result0, float addrspace(1)* %out, align 4
157 ret void
158}
159
160; SI-LABEL @test_div_scale_f64_scalar_num_1:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000161; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
162; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
163; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
164; SI: buffer_store_dwordx2 [[RESULT0]]
165; SI: s_endpgm
Matt Arsenaulted8a3e02014-09-26 17:55:11 +0000166define void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
167 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
168 %gep = getelementptr double addrspace(1)* %in, i32 %tid
169
170 %b = load double addrspace(1)* %gep, align 8
171
172 %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
173 %result0 = extractvalue { double, i1 } %result, 0
174 store double %result0, double addrspace(1)* %out, align 8
175 ret void
176}
177
178; SI-LABEL @test_div_scale_f64_scalar_num_2:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000179; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
180; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
181; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
182; SI: buffer_store_dwordx2 [[RESULT0]]
183; SI: s_endpgm
Matt Arsenaulted8a3e02014-09-26 17:55:11 +0000184define void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
185 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
186 %gep = getelementptr double addrspace(1)* %in, i32 %tid
187
188 %b = load double addrspace(1)* %gep, align 8
189
190 %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
191 %result0 = extractvalue { double, i1 } %result, 0
192 store double %result0, double addrspace(1)* %out, align 8
193 ret void
194}
195
196; SI-LABEL @test_div_scale_f64_scalar_den_1:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000197; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
198; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
199; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
200; SI: buffer_store_dwordx2 [[RESULT0]]
201; SI: s_endpgm
Matt Arsenaulted8a3e02014-09-26 17:55:11 +0000202define void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
203 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
204 %gep = getelementptr double addrspace(1)* %in, i32 %tid
205
206 %a = load double addrspace(1)* %gep, align 8
207
208 %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
209 %result0 = extractvalue { double, i1 } %result, 0
210 store double %result0, double addrspace(1)* %out, align 8
211 ret void
212}
213
214; SI-LABEL @test_div_scale_f64_scalar_den_2:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000215; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
216; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
217; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
218; SI: buffer_store_dwordx2 [[RESULT0]]
219; SI: s_endpgm
Matt Arsenaulted8a3e02014-09-26 17:55:11 +0000220define void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
221 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
222 %gep = getelementptr double addrspace(1)* %in, i32 %tid
223
224 %a = load double addrspace(1)* %gep, align 8
225
226 %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
227 %result0 = extractvalue { double, i1 } %result, 0
228 store double %result0, double addrspace(1)* %out, align 8
229 ret void
230}
231
232; SI-LABEL @test_div_scale_f32_all_scalar_1:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000233; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
234; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
235; SI: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
236; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[VA]]
237; SI: buffer_store_dword [[RESULT0]]
238; SI: s_endpgm
Matt Arsenaulted8a3e02014-09-26 17:55:11 +0000239define void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %out, float %a, float %b) nounwind {
240 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
241 %result0 = extractvalue { float, i1 } %result, 0
242 store float %result0, float addrspace(1)* %out, align 4
243 ret void
244}
245
246; SI-LABEL @test_div_scale_f32_all_scalar_2:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000247; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
248; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
249; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
250; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[VB]], [[A]]
251; SI: buffer_store_dword [[RESULT0]]
252; SI: s_endpgm
Matt Arsenaulted8a3e02014-09-26 17:55:11 +0000253define void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %out, float %a, float %b) nounwind {
254 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
255 %result0 = extractvalue { float, i1 } %result, 0
256 store float %result0, float addrspace(1)* %out, align 4
257 ret void
258}
259
260; SI-LABEL @test_div_scale_f64_all_scalar_1:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000261; SI-DAG: s_load_dwordx2 s{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb
262; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
263; SI-DAG: v_mov_b32_e32 v[[VA_LO:[0-9]+]], s[[A_LO]]
264; SI-DAG: v_mov_b32_e32 v[[VA_HI:[0-9]+]], s[[A_HI]]
265; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], v{{\[}}[[VA_LO]]:[[VA_HI]]{{\]}}
266; SI: buffer_store_dwordx2 [[RESULT0]]
267; SI: s_endpgm
Matt Arsenaulted8a3e02014-09-26 17:55:11 +0000268define void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, double %a, double %b) nounwind {
269 %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
270 %result0 = extractvalue { double, i1 } %result, 0
271 store double %result0, double addrspace(1)* %out, align 8
272 ret void
273}
274
275; SI-LABEL @test_div_scale_f64_all_scalar_2:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000276; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
277; SI-DAG: s_load_dwordx2 s{{\[}}[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xd
278; SI-DAG: v_mov_b32_e32 v[[VB_LO:[0-9]+]], s[[B_LO]]
279; SI-DAG: v_mov_b32_e32 v[[VB_HI:[0-9]+]], s[[B_HI]]
280; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], v{{\[}}[[VB_LO]]:[[VB_HI]]{{\]}}, [[A]]
281; SI: buffer_store_dwordx2 [[RESULT0]]
282; SI: s_endpgm
Matt Arsenaulted8a3e02014-09-26 17:55:11 +0000283define void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, double %a, double %b) nounwind {
Matt Arsenaultf2b0aeb2014-06-23 18:28:28 +0000284 %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
285 %result0 = extractvalue { double, i1 } %result, 0
286 store double %result0, double addrspace(1)* %out, align 8
Matt Arsenaulta0050b02014-06-19 01:19:19 +0000287 ret void
288}
Matt Arsenault044f1d12015-02-14 04:24:28 +0000289
290; SI-LABEL @test_div_scale_f32_inline_imm_num:
291; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
292; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[A]], 1.0
293; SI: buffer_store_dword [[RESULT0]]
294; SI: s_endpgm
295define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
296 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
297 %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
298 %a = load float addrspace(1)* %gep.0, align 4
299
300 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone
301 %result0 = extractvalue { float, i1 } %result, 0
302 store float %result0, float addrspace(1)* %out, align 4
303 ret void
304}
305
306; SI-LABEL @test_div_scale_f32_inline_imm_den:
307; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
308; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], 2.0, 2.0, [[A]]
309; SI: buffer_store_dword [[RESULT0]]
310; SI: s_endpgm
311define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
312 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
313 %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
314 %a = load float addrspace(1)* %gep.0, align 4
315
316 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone
317 %result0 = extractvalue { float, i1 } %result, 0
318 store float %result0, float addrspace(1)* %out, align 4
319 ret void
320}
321
322; SI-LABEL @test_div_scale_f32_fabs_num:
323; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
324; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
325; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], |[[A]]|
326; SI: buffer_store_dword [[RESULT0]]
327; SI: s_endpgm
328define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
329 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
330 %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
331 %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
332
333 %a = load float addrspace(1)* %gep.0, align 4
334 %b = load float addrspace(1)* %gep.1, align 4
335
336 %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
337
338 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone
339 %result0 = extractvalue { float, i1 } %result, 0
340 store float %result0, float addrspace(1)* %out, align 4
341 ret void
342}
343
344; SI-LABEL @test_div_scale_f32_fabs_den:
345; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
346; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
347; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], |[[B]]|, |[[B]]|, [[A]]
348; SI: buffer_store_dword [[RESULT0]]
349; SI: s_endpgm
350define void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
351 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
352 %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
353 %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
354
355 %a = load float addrspace(1)* %gep.0, align 4
356 %b = load float addrspace(1)* %gep.1, align 4
357
358 %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
359
360 %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone
361 %result0 = extractvalue { float, i1 } %result, 0
362 store float %result0, float addrspace(1)* %out, align 4
363 ret void
364}