blob: ad2d84b7911b48025d0a38829ef90939d399bd92 [file] [log] [blame]
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
Matt Arsenaultbef34e22016-01-22 21:30:34 +00002
3declare float @llvm.amdgcn.rcp.f32(float) #0
4declare double @llvm.amdgcn.rcp.f64(double) #0
5
6declare double @llvm.sqrt.f64(double) #0
7declare float @llvm.sqrt.f32(float) #0
8
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +00009; FUNC-LABEL: {{^}}rcp_undef_f32:
10; SI-NOT: v_rcp_f32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000011define amdgpu_kernel void @rcp_undef_f32(float addrspace(1)* %out) #1 {
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +000012 %rcp = call float @llvm.amdgcn.rcp.f32(float undef)
Matt Arsenaultbef34e22016-01-22 21:30:34 +000013 store float %rcp, float addrspace(1)* %out, align 4
14 ret void
15}
16
Matt Arsenaultd8ed2072017-03-08 00:48:46 +000017; FUNC-LABEL: {{^}}rcp_2_f32:
18; SI-NOT: v_rcp_f32
19; SI: v_mov_b32_e32 v{{[0-9]+}}, 0.5
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000020define amdgpu_kernel void @rcp_2_f32(float addrspace(1)* %out) #1 {
Matt Arsenaultd8ed2072017-03-08 00:48:46 +000021 %rcp = call float @llvm.amdgcn.rcp.f32(float 2.0)
22 store float %rcp, float addrspace(1)* %out, align 4
23 ret void
24}
25
26; FUNC-LABEL: {{^}}rcp_10_f32:
27; SI-NOT: v_rcp_f32
28; SI: v_mov_b32_e32 v{{[0-9]+}}, 0x3dcccccd
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000029define amdgpu_kernel void @rcp_10_f32(float addrspace(1)* %out) #1 {
Matt Arsenaultd8ed2072017-03-08 00:48:46 +000030 %rcp = call float @llvm.amdgcn.rcp.f32(float 10.0)
31 store float %rcp, float addrspace(1)* %out, align 4
32 ret void
33}
34
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +000035; FUNC-LABEL: {{^}}safe_no_fp32_denormals_rcp_f32:
36; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}
37; SI-NOT: [[RESULT]]
38; SI: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000039define amdgpu_kernel void @safe_no_fp32_denormals_rcp_f32(float addrspace(1)* %out, float %src) #1 {
Matt Arsenaultbef34e22016-01-22 21:30:34 +000040 %rcp = fdiv float 1.0, %src
41 store float %rcp, float addrspace(1)* %out, align 4
42 ret void
43}
44
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +000045; FUNC-LABEL: {{^}}safe_f32_denormals_rcp_pat_f32:
46; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}
47; SI-NOT: [[RESULT]]
48; SI: buffer_store_dword [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000049define amdgpu_kernel void @safe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #4 {
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +000050 %rcp = fdiv float 1.0, %src
51 store float %rcp, float addrspace(1)* %out, align 4
52 ret void
53}
54
55; FUNC-LABEL: {{^}}unsafe_f32_denormals_rcp_pat_f32:
56; SI: v_div_scale_f32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000057define amdgpu_kernel void @unsafe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #3 {
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +000058 %rcp = fdiv float 1.0, %src
59 store float %rcp, float addrspace(1)* %out, align 4
60 ret void
61}
62
63; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f32:
64; SI: v_sqrt_f32_e32
65; SI: v_rcp_f32_e32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000066define amdgpu_kernel void @safe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #1 {
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +000067 %sqrt = call float @llvm.sqrt.f32(float %src)
68 %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt)
69 store float %rcp, float addrspace(1)* %out, align 4
70 ret void
71}
72
73; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f32:
74; SI: v_rsq_f32_e32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000075define amdgpu_kernel void @unsafe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #2 {
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +000076 %sqrt = call float @llvm.sqrt.f32(float %src)
77 %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt)
Matt Arsenaultbef34e22016-01-22 21:30:34 +000078 store float %rcp, float addrspace(1)* %out, align 4
79 ret void
80}
81
82; FUNC-LABEL: {{^}}rcp_f64:
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +000083; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
84; SI-NOT: [[RESULT]]
85; SI: buffer_store_dwordx2 [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000086define amdgpu_kernel void @rcp_f64(double addrspace(1)* %out, double %src) #1 {
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +000087 %rcp = call double @llvm.amdgcn.rcp.f64(double %src)
88 store double %rcp, double addrspace(1)* %out, align 8
89 ret void
90}
91
92; FUNC-LABEL: {{^}}unsafe_rcp_f64:
93; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
94; SI-NOT: [[RESULT]]
95; SI: buffer_store_dwordx2 [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000096define amdgpu_kernel void @unsafe_rcp_f64(double addrspace(1)* %out, double %src) #2 {
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +000097 %rcp = call double @llvm.amdgcn.rcp.f64(double %src)
Matt Arsenaultbef34e22016-01-22 21:30:34 +000098 store double %rcp, double addrspace(1)* %out, align 8
99 ret void
100}
101
102; FUNC-LABEL: {{^}}rcp_pat_f64:
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000103; SI: v_div_scale_f64
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000104define amdgpu_kernel void @rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000105 %rcp = fdiv double 1.0, %src
106 store double %rcp, double addrspace(1)* %out, align 8
107 ret void
108}
109
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000110; FUNC-LABEL: {{^}}unsafe_rcp_pat_f64:
111; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
112; SI-NOT: [[RESULT]]
113; SI: buffer_store_dwordx2 [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000114define amdgpu_kernel void @unsafe_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 {
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000115 %rcp = fdiv double 1.0, %src
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000116 store double %rcp, double addrspace(1)* %out, align 8
117 ret void
118}
119
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000120; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f64:
121; SI-NOT: v_rsq_f64_e32
122; SI: v_sqrt_f64
123; SI: v_rcp_f64
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000124define amdgpu_kernel void @safe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000125 %sqrt = call double @llvm.sqrt.f64(double %src)
126 %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
127 store double %rcp, double addrspace(1)* %out, align 8
128 ret void
129}
130
131; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f64:
132; SI: v_rsq_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
133; SI-NOT: [[RESULT]]
134; SI: buffer_store_dwordx2 [[RESULT]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000135define amdgpu_kernel void @unsafe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 {
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000136 %sqrt = call double @llvm.sqrt.f64(double %src)
137 %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
138 store double %rcp, double addrspace(1)* %out, align 8
Matt Arsenaultb6d8c372016-06-20 18:33:56 +0000139 ret void
140}
141
Matt Arsenaultbef34e22016-01-22 21:30:34 +0000142attributes #0 = { nounwind readnone }
Matt Arsenaultb2cb5f82016-07-09 07:55:03 +0000143attributes #1 = { nounwind "unsafe-fp-math"="false" "target-features"="-fp32-denormals" }
144attributes #2 = { nounwind "unsafe-fp-math"="true" "target-features"="-fp32-denormals" }
145attributes #3 = { nounwind "unsafe-fp-math"="false" "target-features"="+fp32-denormals" }
146attributes #4 = { nounwind "unsafe-fp-math"="true" "target-features"="+fp32-denormals" }