Matt Arsenault | f639c32 | 2016-01-28 20:53:42 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN %s |
| 2 | ; RUN: llc -march=amdgcn -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN %s |
| 3 | |
| 4 | declare i32 @llvm.r600.read.tidig.x() #0 |
| 5 | declare float @llvm.minnum.f32(float, float) #0 |
| 6 | declare float @llvm.maxnum.f32(float, float) #0 |
| 7 | declare double @llvm.minnum.f64(double, double) #0 |
| 8 | declare double @llvm.maxnum.f64(double, double) #0 |
| 9 | |
| 10 | ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_f32: |
| 11 | ; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0 |
| 12 | |
| 13 | ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}} |
| 14 | ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} |
| 15 | define void @v_test_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 { |
| 16 | %tid = call i32 @llvm.r600.read.tidig.x() |
| 17 | %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid |
| 18 | %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid |
| 19 | %a = load float, float addrspace(1)* %gep0 |
| 20 | |
| 21 | %max = call float @llvm.maxnum.f32(float %a, float 2.0) |
| 22 | %med = call float @llvm.minnum.f32(float %max, float 4.0) |
| 23 | |
| 24 | store float %med, float addrspace(1)* %outgep |
| 25 | ret void |
| 26 | } |
| 27 | |
| 28 | ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_commute0_f32: |
| 29 | ; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0 |
| 30 | |
| 31 | ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}} |
| 32 | ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} |
| 33 | define void @v_test_fmed3_r_i_i_commute0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 { |
| 34 | %tid = call i32 @llvm.r600.read.tidig.x() |
| 35 | %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid |
| 36 | %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid |
| 37 | %a = load float, float addrspace(1)* %gep0 |
| 38 | |
| 39 | %max = call float @llvm.maxnum.f32(float 2.0, float %a) |
| 40 | %med = call float @llvm.minnum.f32(float 4.0, float %max) |
| 41 | |
| 42 | store float %med, float addrspace(1)* %outgep |
| 43 | ret void |
| 44 | } |
| 45 | |
| 46 | ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_commute1_f32: |
| 47 | ; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0 |
| 48 | |
| 49 | ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}} |
| 50 | ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} |
| 51 | define void @v_test_fmed3_r_i_i_commute1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 { |
| 52 | %tid = call i32 @llvm.r600.read.tidig.x() |
| 53 | %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid |
| 54 | %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid |
| 55 | %a = load float, float addrspace(1)* %gep0 |
| 56 | |
| 57 | %max = call float @llvm.maxnum.f32(float %a, float 2.0) |
| 58 | %med = call float @llvm.minnum.f32(float 4.0, float %max) |
| 59 | |
| 60 | store float %med, float addrspace(1)* %outgep |
| 61 | ret void |
| 62 | } |
| 63 | |
| 64 | ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_constant_order_f32: |
| 65 | ; GCN: v_max_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} |
| 66 | ; GCN: v_min_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}} |
| 67 | define void @v_test_fmed3_r_i_i_constant_order_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 { |
| 68 | %tid = call i32 @llvm.r600.read.tidig.x() |
| 69 | %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid |
| 70 | %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid |
| 71 | %a = load float, float addrspace(1)* %gep0 |
| 72 | |
| 73 | %max = call float @llvm.maxnum.f32(float %a, float 4.0) |
| 74 | %med = call float @llvm.minnum.f32(float %max, float 2.0) |
| 75 | |
| 76 | store float %med, float addrspace(1)* %outgep |
| 77 | ret void |
| 78 | } |
| 79 | |
| 80 | |
| 81 | ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_multi_use_f32: |
| 82 | ; GCN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}} |
| 83 | ; GCN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} |
| 84 | define void @v_test_fmed3_r_i_i_multi_use_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 { |
| 85 | %tid = call i32 @llvm.r600.read.tidig.x() |
| 86 | %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid |
| 87 | %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid |
| 88 | %a = load float, float addrspace(1)* %gep0 |
| 89 | |
| 90 | %max = call float @llvm.maxnum.f32(float %a, float 2.0) |
| 91 | %med = call float @llvm.minnum.f32(float %max, float 4.0) |
| 92 | |
| 93 | store volatile float %med, float addrspace(1)* %outgep |
| 94 | store volatile float %max, float addrspace(1)* %outgep |
| 95 | ret void |
| 96 | } |
| 97 | |
| 98 | ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_f64: |
| 99 | ; GCN: v_max_f64 {{v\[[0-9]+:[0-9]+\]}}, 2.0, {{v\[[0-9]+:[0-9]+\]}} |
| 100 | ; GCN: v_min_f64 {{v\[[0-9]+:[0-9]+\]}}, 4.0, {{v\[[0-9]+:[0-9]+\]}} |
| 101 | define void @v_test_fmed3_r_i_i_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #1 { |
| 102 | %tid = call i32 @llvm.r600.read.tidig.x() |
| 103 | %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid |
| 104 | %outgep = getelementptr double, double addrspace(1)* %out, i32 %tid |
| 105 | %a = load double, double addrspace(1)* %gep0 |
| 106 | |
| 107 | %max = call double @llvm.maxnum.f64(double %a, double 2.0) |
| 108 | %med = call double @llvm.minnum.f64(double %max, double 4.0) |
| 109 | |
| 110 | store double %med, double addrspace(1)* %outgep |
| 111 | ret void |
| 112 | } |
| 113 | |
| 114 | ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_no_nans_f32: |
| 115 | ; GCN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0 |
| 116 | define void @v_test_fmed3_r_i_i_no_nans_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 { |
| 117 | %tid = call i32 @llvm.r600.read.tidig.x() |
| 118 | %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid |
| 119 | %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid |
| 120 | %a = load float, float addrspace(1)* %gep0 |
| 121 | |
| 122 | %max = call float @llvm.maxnum.f32(float %a, float 2.0) |
| 123 | %med = call float @llvm.minnum.f32(float %max, float 4.0) |
| 124 | |
| 125 | store float %med, float addrspace(1)* %outgep |
| 126 | ret void |
| 127 | } |
| 128 | |
Matt Arsenault | 5b39b34 | 2016-01-28 20:53:48 +0000 | [diff] [blame^] | 129 | ; GCN-LABEL: {{^}}v_test_legacy_fmed3_r_i_i_f32: |
| 130 | ; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0 |
| 131 | |
| 132 | ; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}} |
| 133 | ; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} |
| 134 | define void @v_test_legacy_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 { |
| 135 | %tid = call i32 @llvm.r600.read.tidig.x() |
| 136 | %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid |
| 137 | %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid |
| 138 | %a = load float, float addrspace(1)* %gep0 |
| 139 | |
| 140 | ; fmax_legacy |
| 141 | %cmp0 = fcmp ule float %a, 2.0 |
| 142 | %max = select i1 %cmp0, float 2.0, float %a |
| 143 | |
| 144 | ; fmin_legacy |
| 145 | %cmp1 = fcmp uge float %max, 4.0 |
| 146 | %med = select i1 %cmp1, float 4.0, float %max |
| 147 | |
| 148 | store float %med, float addrspace(1)* %outgep |
| 149 | ret void |
| 150 | } |
| 151 | |
Matt Arsenault | f639c32 | 2016-01-28 20:53:42 +0000 | [diff] [blame] | 152 | attributes #0 = { nounwind readnone } |
| 153 | attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" } |
| 154 | attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" } |