Alexander Timofeev | 982aee6 | 2017-07-04 17:32:00 +0000 | [diff] [blame] | 1 | ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s |
| 2 | ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s |
Matt Arsenault | 1513046 | 2014-06-05 00:15:55 +0000 | [diff] [blame] | 3 | |
Matt Arsenault | 9c47dd5 | 2016-02-11 06:02:01 +0000 | [diff] [blame] | 4 | declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone |
Matt Arsenault | 1513046 | 2014-06-05 00:15:55 +0000 | [diff] [blame] | 5 | declare float @llvm.sqrt.f32(float) nounwind readnone |
| 6 | declare double @llvm.sqrt.f64(double) nounwind readnone |
| 7 | |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 8 | ; SI-LABEL: {{^}}rsq_f32: |
Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 9 | ; SI: v_rsq_f32_e32 |
| 10 | ; SI: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 11 | define amdgpu_kernel void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { |
David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 12 | %val = load float, float addrspace(1)* %in, align 4 |
Matt Arsenault | 1513046 | 2014-06-05 00:15:55 +0000 | [diff] [blame] | 13 | %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone |
| 14 | %div = fdiv float 1.0, %sqrt |
| 15 | store float %div, float addrspace(1)* %out, align 4 |
| 16 | ret void |
| 17 | } |
| 18 | |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 19 | ; SI-LABEL: {{^}}rsq_f64: |
Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 20 | ; SI-UNSAFE: v_rsq_f64_e32 |
| 21 | ; SI-SAFE: v_sqrt_f64_e32 |
| 22 | ; SI: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 23 | define amdgpu_kernel void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind { |
David Blaikie | a79ac14 | 2015-02-27 21:17:42 +0000 | [diff] [blame] | 24 | %val = load double, double addrspace(1)* %in, align 4 |
Matt Arsenault | 1513046 | 2014-06-05 00:15:55 +0000 | [diff] [blame] | 25 | %sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone |
| 26 | %div = fdiv double 1.0, %sqrt |
| 27 | store double %div, double addrspace(1)* %out, align 4 |
| 28 | ret void |
| 29 | } |
Matt Arsenault | 49dd428 | 2014-09-15 17:15:02 +0000 | [diff] [blame] | 30 | |
Tom Stellard | 79243d9 | 2014-10-01 17:15:17 +0000 | [diff] [blame] | 31 | ; SI-LABEL: {{^}}rsq_f32_sgpr: |
Tom Stellard | 326d6ec | 2014-11-05 14:50:53 +0000 | [diff] [blame] | 32 | ; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}} |
| 33 | ; SI: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 34 | define amdgpu_kernel void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) nounwind { |
Matt Arsenault | 49dd428 | 2014-09-15 17:15:02 +0000 | [diff] [blame] | 35 | %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone |
| 36 | %div = fdiv float 1.0, %sqrt |
| 37 | store float %div, float addrspace(1)* %out, align 4 |
| 38 | ret void |
| 39 | } |
Matt Arsenault | e93d06a | 2015-01-13 20:53:18 +0000 | [diff] [blame] | 40 | |
| 41 | ; Recognize that this is rsqrt(a) * rcp(b) * c, |
| 42 | ; not 1 / ( 1 / sqrt(a)) * rcp(b) * c. |
| 43 | |
| 44 | ; SI-LABEL: @rsqrt_fmul |
| 45 | ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} |
| 46 | ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 |
| 47 | ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 |
| 48 | |
| 49 | ; SI-UNSAFE-DAG: v_rsq_f32_e32 [[RSQA:v[0-9]+]], [[A]] |
| 50 | ; SI-UNSAFE-DAG: v_rcp_f32_e32 [[RCPB:v[0-9]+]], [[B]] |
Matt Arsenault | 6c29c5a | 2017-07-10 19:53:57 +0000 | [diff] [blame] | 51 | ; SI-UNSAFE-DAG: v_mul_f32_e32 [[TMP:v[0-9]+]], [[RSQA]], [[RCPB]] |
| 52 | ; SI-UNSAFE: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]] |
Matt Arsenault | e93d06a | 2015-01-13 20:53:18 +0000 | [diff] [blame] | 53 | ; SI-UNSAFE: buffer_store_dword [[RESULT]] |
| 54 | |
| 55 | ; SI-SAFE-NOT: v_rsq_f32 |
| 56 | |
| 57 | ; SI: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 58 | define amdgpu_kernel void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(1)* %in) { |
Matt Arsenault | 9c47dd5 | 2016-02-11 06:02:01 +0000 | [diff] [blame] | 59 | %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 60 | %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid |
| 61 | %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid |
| 62 | %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 |
| 63 | %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 |
Matt Arsenault | e93d06a | 2015-01-13 20:53:18 +0000 | [diff] [blame] | 64 | |
Matt Arsenault | 44e5483 | 2016-04-12 13:38:18 +0000 | [diff] [blame] | 65 | %a = load volatile float, float addrspace(1)* %gep.0 |
| 66 | %b = load volatile float, float addrspace(1)* %gep.1 |
| 67 | %c = load volatile float, float addrspace(1)* %gep.2 |
Matt Arsenault | e93d06a | 2015-01-13 20:53:18 +0000 | [diff] [blame] | 68 | |
| 69 | %x = call float @llvm.sqrt.f32(float %a) |
| 70 | %y = fmul float %x, %b |
| 71 | %z = fdiv float %c, %y |
| 72 | store float %z, float addrspace(1)* %out.gep |
| 73 | ret void |
| 74 | } |
Matt Arsenault | 979902b | 2016-08-02 22:25:04 +0000 | [diff] [blame] | 75 | |
| 76 | ; SI-LABEL: {{^}}neg_rsq_f32: |
| 77 | ; SI-SAFE: v_sqrt_f32_e32 [[SQRT:v[0-9]+]], v{{[0-9]+}} |
| 78 | ; SI-SAFE: v_rcp_f32_e64 [[RSQ:v[0-9]+]], -[[SQRT]] |
| 79 | ; SI-SAFE: buffer_store_dword [[RSQ]] |
| 80 | |
| 81 | ; SI-UNSAFE: v_rsq_f32_e32 [[RSQ:v[0-9]+]], v{{[0-9]+}} |
| 82 | ; SI-UNSAFE: v_xor_b32_e32 [[NEG_RSQ:v[0-9]+]], 0x80000000, [[RSQ]] |
| 83 | ; SI-UNSAFE: buffer_store_dword [[NEG_RSQ]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 84 | define amdgpu_kernel void @neg_rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { |
Matt Arsenault | 979902b | 2016-08-02 22:25:04 +0000 | [diff] [blame] | 85 | %val = load float, float addrspace(1)* %in, align 4 |
| 86 | %sqrt = call float @llvm.sqrt.f32(float %val) |
| 87 | %div = fdiv float -1.0, %sqrt |
| 88 | store float %div, float addrspace(1)* %out, align 4 |
| 89 | ret void |
| 90 | } |
| 91 | |
| 92 | ; SI-LABEL: {{^}}neg_rsq_f64: |
| 93 | ; SI-SAFE: v_sqrt_f64_e32 |
| 94 | ; SI-SAFE: v_div_scale_f64 |
| 95 | |
| 96 | ; SI-UNSAFE: v_sqrt_f64_e32 [[SQRT:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}} |
| 97 | ; SI-UNSAFE: v_rcp_f64_e64 [[RCP:v\[[0-9]+:[0-9]+\]]], -[[SQRT]] |
| 98 | ; SI-UNSAFE: buffer_store_dwordx2 [[RCP]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 99 | define amdgpu_kernel void @neg_rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind { |
Matt Arsenault | 979902b | 2016-08-02 22:25:04 +0000 | [diff] [blame] | 100 | %val = load double, double addrspace(1)* %in, align 4 |
| 101 | %sqrt = call double @llvm.sqrt.f64(double %val) |
| 102 | %div = fdiv double -1.0, %sqrt |
| 103 | store double %div, double addrspace(1)* %out, align 4 |
| 104 | ret void |
| 105 | } |
| 106 | |
| 107 | ; SI-LABEL: {{^}}neg_rsq_neg_f32: |
| 108 | ; SI-SAFE: v_sqrt_f32_e64 [[SQRT:v[0-9]+]], -v{{[0-9]+}} |
| 109 | ; SI-SAFE: v_rcp_f32_e64 [[RSQ:v[0-9]+]], -[[SQRT]] |
| 110 | ; SI-SAFE: buffer_store_dword [[RSQ]] |
| 111 | |
| 112 | ; SI-UNSAFE: v_rsq_f32_e64 [[RSQ:v[0-9]+]], -v{{[0-9]+}} |
| 113 | ; SI-UNSAFE: v_xor_b32_e32 [[NEG_RSQ:v[0-9]+]], 0x80000000, [[RSQ]] |
| 114 | ; SI-UNSAFE: buffer_store_dword [[NEG_RSQ]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 115 | define amdgpu_kernel void @neg_rsq_neg_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { |
Matt Arsenault | 979902b | 2016-08-02 22:25:04 +0000 | [diff] [blame] | 116 | %val = load float, float addrspace(1)* %in, align 4 |
| 117 | %val.fneg = fsub float -0.0, %val |
| 118 | %sqrt = call float @llvm.sqrt.f32(float %val.fneg) |
| 119 | %div = fdiv float -1.0, %sqrt |
| 120 | store float %div, float addrspace(1)* %out, align 4 |
| 121 | ret void |
| 122 | } |
| 123 | |
| 124 | ; SI-LABEL: {{^}}neg_rsq_neg_f64: |
| 125 | ; SI-SAFE: v_sqrt_f64_e64 v{{\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+:[0-9]+\]}} |
| 126 | ; SI-SAFE: v_div_scale_f64 |
| 127 | |
| 128 | ; SI-UNSAFE: v_sqrt_f64_e64 [[SQRT:v\[[0-9]+:[0-9]+\]]], -v{{\[[0-9]+:[0-9]+\]}} |
| 129 | ; SI-UNSAFE: v_rcp_f64_e64 [[RCP:v\[[0-9]+:[0-9]+\]]], -[[SQRT]] |
| 130 | ; SI-UNSAFE: buffer_store_dwordx2 [[RCP]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 131 | define amdgpu_kernel void @neg_rsq_neg_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind { |
Matt Arsenault | 979902b | 2016-08-02 22:25:04 +0000 | [diff] [blame] | 132 | %val = load double, double addrspace(1)* %in, align 4 |
| 133 | %val.fneg = fsub double -0.0, %val |
| 134 | %sqrt = call double @llvm.sqrt.f64(double %val.fneg) |
| 135 | %div = fdiv double -1.0, %sqrt |
| 136 | store double %div, double addrspace(1)* %out, align 4 |
| 137 | ret void |
| 138 | } |