blob: bc489454341a06a50961ce51ecfe308299e6e7d2 [file] [log] [blame]
Tom Stellard8485fa02016-12-07 02:42:15 +00001; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
3; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
Matt Arsenaultdfec5ce2016-07-09 07:48:11 +00004; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
Tom Stellard75aadc22012-12-11 21:25:42 +00005
Tom Stellard0344cdf2013-08-01 15:23:42 +00006; These tests check that fdiv is expanded correctly and also test that the
7; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate
8; instruction groups.
Tom Stellard75aadc22012-12-11 21:25:42 +00009
Wei Dinged0f97f2016-06-09 19:17:15 +000010; These test check that fdiv using unsafe_fp_math, coarse fp div, and IEEE754 fp div.
11
Tom Stellard79243d92014-10-01 17:15:17 +000012; FUNC-LABEL: {{^}}fdiv_f32:
Matt Arsenaultdfec5ce2016-07-09 07:48:11 +000013; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
14; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
Matt Arsenaulte9fa3b82014-07-15 20:18:31 +000015
Tom Stellard8485fa02016-12-07 02:42:15 +000016; GCN: v_div_scale_f32 [[NUM_SCALE:v[0-9]+]]
17; GCN-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]]
18; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]]
Wei Dinged0f97f2016-06-09 19:17:15 +000019
Tom Stellard8485fa02016-12-07 02:42:15 +000020; GCN: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
21; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
22; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000023; GCN: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]]
Tom Stellard8485fa02016-12-07 02:42:15 +000024; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]]
25; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]]
26; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]]
27; GCN: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
28; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]]
29; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000030define amdgpu_kernel void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 {
Matt Arsenaulte9fa3b82014-07-15 20:18:31 +000031entry:
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +000032 %fdiv = fdiv float %a, %b
33 store float %fdiv, float addrspace(1)* %out
34 ret void
35}
36
Tom Stellard8485fa02016-12-07 02:42:15 +000037; FUNC-LABEL: {{^}}fdiv_f32_denormals:
38; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
39; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
40
41; GCN: v_div_scale_f32 [[NUM_SCALE:v[0-9]+]]
42; GCN-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]]
43; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]]
44
45; GCN-NOT: s_setreg
46; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0
47; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]]
Matt Arsenault6c29c5a2017-07-10 19:53:57 +000048; GCN: v_mul_f32_e32 [[C:v[0-9]+]], [[DEN_SCALE]], [[B]]
Tom Stellard8485fa02016-12-07 02:42:15 +000049; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]]
50; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]]
51; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]]
52; GCN-NOT: s_setreg
53; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]]
54; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000055define amdgpu_kernel void @fdiv_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 {
Tom Stellard8485fa02016-12-07 02:42:15 +000056entry:
57 %fdiv = fdiv float %a, %b
58 store float %fdiv, float addrspace(1)* %out
59 ret void
60}
61
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +000062; FUNC-LABEL: {{^}}fdiv_25ulp_f32:
Tom Stellard8485fa02016-12-07 02:42:15 +000063; GCN: v_cndmask_b32
64; GCN: v_mul_f32
65; GCN: v_rcp_f32
66; GCN: v_mul_f32
67; GCN: v_mul_f32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000068define amdgpu_kernel void @fdiv_25ulp_f32(float addrspace(1)* %out, float %a, float %b) #0 {
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +000069entry:
70 %fdiv = fdiv float %a, %b, !fpmath !0
71 store float %fdiv, float addrspace(1)* %out
72 ret void
73}
74
75; Use correct fdiv
76; FUNC-LABEL: {{^}}fdiv_25ulp_denormals_f32:
Tom Stellard8485fa02016-12-07 02:42:15 +000077; GCN: v_fma_f32
78; GCN: v_div_fmas_f32
79; GCN: v_div_fixup_f32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000080define amdgpu_kernel void @fdiv_25ulp_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 {
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +000081entry:
82 %fdiv = fdiv float %a, %b, !fpmath !0
83 store float %fdiv, float addrspace(1)* %out
84 ret void
85}
86
87; FUNC-LABEL: {{^}}fdiv_fast_denormals_f32:
Stanislav Mekhanoshin9d7b1c92017-07-06 20:34:21 +000088; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
89; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
90; GCN-NOT: [[RESULT]]
Konstantin Zhuravlyovc4b18e72017-04-21 19:25:33 +000091; GCN-NOT: s_setreg
Stanislav Mekhanoshin9d7b1c92017-07-06 20:34:21 +000092; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000093define amdgpu_kernel void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 {
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +000094entry:
95 %fdiv = fdiv fast float %a, %b
96 store float %fdiv, float addrspace(1)* %out
Matt Arsenaulte9fa3b82014-07-15 20:18:31 +000097 ret void
98}
99
Wei Dinged0f97f2016-06-09 19:17:15 +0000100; FUNC-LABEL: {{^}}fdiv_f32_fast_math:
Matt Arsenaultdfec5ce2016-07-09 07:48:11 +0000101; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
102; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
Matt Arsenaulte9fa3b82014-07-15 20:18:31 +0000103
Tom Stellard8485fa02016-12-07 02:42:15 +0000104; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
105; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
106; GCN-NOT: [[RESULT]]
107; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000108define amdgpu_kernel void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) #0 {
Wei Dinged0f97f2016-06-09 19:17:15 +0000109entry:
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000110 %fdiv = fdiv fast float %a, %b
111 store float %fdiv, float addrspace(1)* %out
Wei Dinged0f97f2016-06-09 19:17:15 +0000112 ret void
113}
114
Stanislav Mekhanoshin9d7b1c92017-07-06 20:34:21 +0000115; FUNC-LABEL: {{^}}fdiv_ulp25_f32_fast_math:
116; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
117; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
118
119; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
120; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
121; GCN-NOT: [[RESULT]]
122; GCN: buffer_store_dword [[RESULT]]
123define amdgpu_kernel void @fdiv_ulp25_f32_fast_math(float addrspace(1)* %out, float %a, float %b) #0 {
124entry:
125 %fdiv = fdiv fast float %a, %b, !fpmath !0
126 store float %fdiv, float addrspace(1)* %out
127 ret void
128}
129
Wei Dinged0f97f2016-06-09 19:17:15 +0000130; FUNC-LABEL: {{^}}fdiv_f32_arcp_math:
Matt Arsenaultdfec5ce2016-07-09 07:48:11 +0000131; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
132; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
Wei Dinged0f97f2016-06-09 19:17:15 +0000133
Tom Stellard8485fa02016-12-07 02:42:15 +0000134; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
135; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
136; GCN-NOT: [[RESULT]]
137; GCN: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000138define amdgpu_kernel void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) #0 {
Wei Dinged0f97f2016-06-09 19:17:15 +0000139entry:
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000140 %fdiv = fdiv arcp float %a, %b
141 store float %fdiv, float addrspace(1)* %out
Wei Dinged0f97f2016-06-09 19:17:15 +0000142 ret void
143}
Matt Arsenaulte9fa3b82014-07-15 20:18:31 +0000144
Tom Stellard79243d92014-10-01 17:15:17 +0000145; FUNC-LABEL: {{^}}fdiv_v2f32:
Matt Arsenaulte9fa3b82014-07-15 20:18:31 +0000146; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
147; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
148; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
149; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
150
Tom Stellard8485fa02016-12-07 02:42:15 +0000151; GCN: v_div_scale_f32
152; GCN: v_div_scale_f32
153; GCN: v_div_scale_f32
154; GCN: v_div_scale_f32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000155define amdgpu_kernel void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
Tom Stellard1e803092013-07-23 01:48:18 +0000156entry:
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000157 %fdiv = fdiv <2 x float> %a, %b
158 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
159 ret void
160}
161
162; FUNC-LABEL: {{^}}fdiv_ulp25_v2f32:
Stanislav Mekhanoshin9d7b1c92017-07-06 20:34:21 +0000163; GCN: v_rcp_f32
164; GCN: v_rcp_f32
165; GCN-NOT: v_cmp_gt_f32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000166define amdgpu_kernel void @fdiv_ulp25_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000167entry:
168 %fdiv = fdiv arcp <2 x float> %a, %b, !fpmath !0
169 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
Tom Stellard0344cdf2013-08-01 15:23:42 +0000170 ret void
171}
172
Wei Dinged0f97f2016-06-09 19:17:15 +0000173; FUNC-LABEL: {{^}}fdiv_v2f32_fast_math:
174; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
175; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
176; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
177; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
178
Tom Stellard8485fa02016-12-07 02:42:15 +0000179; GCN: v_rcp_f32
180; GCN: v_rcp_f32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000181define amdgpu_kernel void @fdiv_v2f32_fast_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
Wei Dinged0f97f2016-06-09 19:17:15 +0000182entry:
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000183 %fdiv = fdiv fast <2 x float> %a, %b
184 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
Wei Dinged0f97f2016-06-09 19:17:15 +0000185 ret void
186}
187
188; FUNC-LABEL: {{^}}fdiv_v2f32_arcp_math:
189; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
190; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
191; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
192; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
193
Tom Stellard8485fa02016-12-07 02:42:15 +0000194; GCN: v_rcp_f32
195; GCN: v_rcp_f32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000196define amdgpu_kernel void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
Wei Dinged0f97f2016-06-09 19:17:15 +0000197entry:
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000198 %fdiv = fdiv arcp <2 x float> %a, %b
199 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
Wei Dinged0f97f2016-06-09 19:17:15 +0000200 ret void
201}
202
Tom Stellard79243d92014-10-01 17:15:17 +0000203; FUNC-LABEL: {{^}}fdiv_v4f32:
Matt Arsenaulte9fa3b82014-07-15 20:18:31 +0000204; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
205; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
206; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
207; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
208; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
209; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
210; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
211; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
212
Tom Stellard8485fa02016-12-07 02:42:15 +0000213; GCN: v_div_fixup_f32
214; GCN: v_div_fixup_f32
215; GCN: v_div_fixup_f32
216; GCN: v_div_fixup_f32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000217define amdgpu_kernel void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
David Blaikie79e6c742015-02-27 19:29:02 +0000218 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
David Blaikiea79ac142015-02-27 21:17:42 +0000219 %a = load <4 x float>, <4 x float> addrspace(1) * %in
220 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
Tom Stellard0344cdf2013-08-01 15:23:42 +0000221 %result = fdiv <4 x float> %a, %b
222 store <4 x float> %result, <4 x float> addrspace(1)* %out
Tom Stellard75aadc22012-12-11 21:25:42 +0000223 ret void
224}
Wei Dinged0f97f2016-06-09 19:17:15 +0000225
226; FUNC-LABEL: {{^}}fdiv_v4f32_fast_math:
227; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
228; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
229; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
230; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
231; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
232; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
233; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
234; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
235
Tom Stellard8485fa02016-12-07 02:42:15 +0000236; GCN: v_rcp_f32
237; GCN: v_rcp_f32
238; GCN: v_rcp_f32
239; GCN: v_rcp_f32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000240define amdgpu_kernel void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
Wei Dinged0f97f2016-06-09 19:17:15 +0000241 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
242 %a = load <4 x float>, <4 x float> addrspace(1) * %in
243 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
244 %result = fdiv fast <4 x float> %a, %b
245 store <4 x float> %result, <4 x float> addrspace(1)* %out
246 ret void
247}
248
249; FUNC-LABEL: {{^}}fdiv_v4f32_arcp_math:
250; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
251; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
252; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
253; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
254; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
255; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
256; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
257; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
258
Tom Stellard8485fa02016-12-07 02:42:15 +0000259; GCN: v_rcp_f32
260; GCN: v_rcp_f32
261; GCN: v_rcp_f32
262; GCN: v_rcp_f32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000263define amdgpu_kernel void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
Wei Dinged0f97f2016-06-09 19:17:15 +0000264 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
265 %a = load <4 x float>, <4 x float> addrspace(1) * %in
266 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
267 %result = fdiv arcp <4 x float> %a, %b
268 store <4 x float> %result, <4 x float> addrspace(1)* %out
269 ret void
270}
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000271
Matt Arsenault7aad8fd2017-01-24 22:02:15 +0000272attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="-fp32-denormals,-flat-for-global" }
273attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "target-features"="-fp32-denormals,-flat-for-global" }
274attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="+fp32-denormals,-flat-for-global" }
Matt Arsenaulta1fe17c2016-07-19 23:16:53 +0000275
276!0 = !{float 2.500000e+00}