Matt Arsenault | fcb345f | 2016-02-11 06:15:39 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s |
| 2 | ; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s |
Matt Arsenault | da59f3d | 2014-11-13 23:03:09 +0000 | [diff] [blame] | 3 | ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s |
| 4 | |
Matt Arsenault | a982e4f | 2015-01-13 00:43:00 +0000 | [diff] [blame] | 5 | ; FIXME: Should replace unsafe-fp-math with no signed zeros. |
| 6 | |
Matt Arsenault | 36094d7 | 2014-11-15 05:02:57 +0000 | [diff] [blame] | 7 | declare i32 @llvm.r600.read.tidig.x() #1 |
| 8 | |
Matt Arsenault | fcb345f | 2016-02-11 06:15:39 +0000 | [diff] [blame] | 9 | ; The two inputs to the instruction are different SGPRs from the same |
| 10 | ; super register, so we can't fold both SGPR operands even though they |
| 11 | ; are both the same register. |
| 12 | |
| 13 | ; FUNC-LABEL: {{^}}s_test_fmin_legacy_subreg_inputs_f32: |
Matt Arsenault | da59f3d | 2014-11-13 23:03:09 +0000 | [diff] [blame] | 14 | ; EG: MIN * |
Matt Arsenault | fcb345f | 2016-02-11 06:15:39 +0000 | [diff] [blame] | 15 | ; SI-SAFE: v_min_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} |
| 16 | ; SI-NONAN: v_min_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 17 | define amdgpu_kernel void @s_test_fmin_legacy_subreg_inputs_f32(<4 x float> addrspace(1)* %out, <4 x float> inreg %reg0) #0 { |
Matt Arsenault | da59f3d | 2014-11-13 23:03:09 +0000 | [diff] [blame] | 18 | %r0 = extractelement <4 x float> %reg0, i32 0 |
| 19 | %r1 = extractelement <4 x float> %reg0, i32 1 |
| 20 | %r2 = fcmp uge float %r0, %r1 |
| 21 | %r3 = select i1 %r2, float %r1, float %r0 |
| 22 | %vec = insertelement <4 x float> undef, float %r3, i32 0 |
| 23 | store <4 x float> %vec, <4 x float> addrspace(1)* %out, align 16 |
| 24 | ret void |
| 25 | } |
| 26 | |
Matt Arsenault | fcb345f | 2016-02-11 06:15:39 +0000 | [diff] [blame] | 27 | ; FUNC-LABEL: {{^}}s_test_fmin_legacy_ule_f32: |
| 28 | ; SI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb |
| 29 | ; SI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc |
| 30 | |
| 31 | ; SI-SAFE-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]] |
| 32 | ; SI-NONAN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]] |
| 33 | |
| 34 | ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[VA]] |
| 35 | ; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[VB]] |
| 36 | |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 37 | define amdgpu_kernel void @s_test_fmin_legacy_ule_f32(float addrspace(1)* %out, float %a, float %b) #0 { |
Matt Arsenault | fcb345f | 2016-02-11 06:15:39 +0000 | [diff] [blame] | 38 | %cmp = fcmp ule float %a, %b |
| 39 | %val = select i1 %cmp, float %a, float %b |
| 40 | store float %val, float addrspace(1)* %out, align 4 |
| 41 | ret void |
| 42 | } |
| 43 | |
Matt Arsenault | da59f3d | 2014-11-13 23:03:09 +0000 | [diff] [blame] | 44 | ; FUNC-LABEL: @test_fmin_legacy_ule_f32 |
Matt Arsenault | 36094d7 | 2014-11-15 05:02:57 +0000 | [diff] [blame] | 45 | ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} |
Matt Arsenault | fb13b22 | 2014-12-03 03:12:13 +0000 | [diff] [blame] | 46 | ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 |
Matt Arsenault | a982e4f | 2015-01-13 00:43:00 +0000 | [diff] [blame] | 47 | ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]] |
| 48 | ; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 49 | define amdgpu_kernel void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { |
Matt Arsenault | 36094d7 | 2014-11-15 05:02:57 +0000 | [diff] [blame] | 50 | %tid = call i32 @llvm.r600.read.tidig.x() #1 |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 51 | %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid |
| 52 | %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 |
Matt Arsenault | 36094d7 | 2014-11-15 05:02:57 +0000 | [diff] [blame] | 53 | |
Matt Arsenault | 44e5483 | 2016-04-12 13:38:18 +0000 | [diff] [blame] | 54 | %a = load volatile float, float addrspace(1)* %gep.0, align 4 |
| 55 | %b = load volatile float, float addrspace(1)* %gep.1, align 4 |
Matt Arsenault | 36094d7 | 2014-11-15 05:02:57 +0000 | [diff] [blame] | 56 | |
Matt Arsenault | da59f3d | 2014-11-13 23:03:09 +0000 | [diff] [blame] | 57 | %cmp = fcmp ule float %a, %b |
| 58 | %val = select i1 %cmp, float %a, float %b |
| 59 | store float %val, float addrspace(1)* %out, align 4 |
| 60 | ret void |
| 61 | } |
| 62 | |
| 63 | ; FUNC-LABEL: @test_fmin_legacy_ole_f32 |
Matt Arsenault | 36094d7 | 2014-11-15 05:02:57 +0000 | [diff] [blame] | 64 | ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} |
Matt Arsenault | fb13b22 | 2014-12-03 03:12:13 +0000 | [diff] [blame] | 65 | ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 |
Matt Arsenault | a982e4f | 2015-01-13 00:43:00 +0000 | [diff] [blame] | 66 | ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] |
| 67 | ; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 68 | define amdgpu_kernel void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { |
Matt Arsenault | 36094d7 | 2014-11-15 05:02:57 +0000 | [diff] [blame] | 69 | %tid = call i32 @llvm.r600.read.tidig.x() #1 |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 70 | %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid |
| 71 | %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 |
Matt Arsenault | 36094d7 | 2014-11-15 05:02:57 +0000 | [diff] [blame] | 72 | |
Matt Arsenault | 44e5483 | 2016-04-12 13:38:18 +0000 | [diff] [blame] | 73 | %a = load volatile float, float addrspace(1)* %gep.0, align 4 |
| 74 | %b = load volatile float, float addrspace(1)* %gep.1, align 4 |
Matt Arsenault | 36094d7 | 2014-11-15 05:02:57 +0000 | [diff] [blame] | 75 | |
Matt Arsenault | da59f3d | 2014-11-13 23:03:09 +0000 | [diff] [blame] | 76 | %cmp = fcmp ole float %a, %b |
| 77 | %val = select i1 %cmp, float %a, float %b |
| 78 | store float %val, float addrspace(1)* %out, align 4 |
| 79 | ret void |
| 80 | } |
| 81 | |
| 82 | ; FUNC-LABEL: @test_fmin_legacy_olt_f32 |
Matt Arsenault | 36094d7 | 2014-11-15 05:02:57 +0000 | [diff] [blame] | 83 | ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} |
Matt Arsenault | fb13b22 | 2014-12-03 03:12:13 +0000 | [diff] [blame] | 84 | ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 |
Matt Arsenault | a982e4f | 2015-01-13 00:43:00 +0000 | [diff] [blame] | 85 | ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] |
| 86 | ; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 87 | define amdgpu_kernel void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { |
Matt Arsenault | 36094d7 | 2014-11-15 05:02:57 +0000 | [diff] [blame] | 88 | %tid = call i32 @llvm.r600.read.tidig.x() #1 |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 89 | %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid |
| 90 | %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 |
Matt Arsenault | 36094d7 | 2014-11-15 05:02:57 +0000 | [diff] [blame] | 91 | |
Matt Arsenault | 44e5483 | 2016-04-12 13:38:18 +0000 | [diff] [blame] | 92 | %a = load volatile float, float addrspace(1)* %gep.0, align 4 |
| 93 | %b = load volatile float, float addrspace(1)* %gep.1, align 4 |
Matt Arsenault | 36094d7 | 2014-11-15 05:02:57 +0000 | [diff] [blame] | 94 | |
Matt Arsenault | da59f3d | 2014-11-13 23:03:09 +0000 | [diff] [blame] | 95 | %cmp = fcmp olt float %a, %b |
| 96 | %val = select i1 %cmp, float %a, float %b |
| 97 | store float %val, float addrspace(1)* %out, align 4 |
| 98 | ret void |
| 99 | } |
| 100 | |
| 101 | ; FUNC-LABEL: @test_fmin_legacy_ult_f32 |
Matt Arsenault | 36094d7 | 2014-11-15 05:02:57 +0000 | [diff] [blame] | 102 | ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} |
Matt Arsenault | fb13b22 | 2014-12-03 03:12:13 +0000 | [diff] [blame] | 103 | ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 |
Matt Arsenault | a982e4f | 2015-01-13 00:43:00 +0000 | [diff] [blame] | 104 | ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]] |
| 105 | ; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 106 | define amdgpu_kernel void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { |
Matt Arsenault | 36094d7 | 2014-11-15 05:02:57 +0000 | [diff] [blame] | 107 | %tid = call i32 @llvm.r600.read.tidig.x() #1 |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 108 | %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid |
| 109 | %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 |
Matt Arsenault | 36094d7 | 2014-11-15 05:02:57 +0000 | [diff] [blame] | 110 | |
Matt Arsenault | 44e5483 | 2016-04-12 13:38:18 +0000 | [diff] [blame] | 111 | %a = load volatile float, float addrspace(1)* %gep.0, align 4 |
| 112 | %b = load volatile float, float addrspace(1)* %gep.1, align 4 |
Matt Arsenault | 36094d7 | 2014-11-15 05:02:57 +0000 | [diff] [blame] | 113 | |
Matt Arsenault | da59f3d | 2014-11-13 23:03:09 +0000 | [diff] [blame] | 114 | %cmp = fcmp ult float %a, %b |
| 115 | %val = select i1 %cmp, float %a, float %b |
| 116 | store float %val, float addrspace(1)* %out, align 4 |
| 117 | ret void |
| 118 | } |
Matt Arsenault | 36094d7 | 2014-11-15 05:02:57 +0000 | [diff] [blame] | 119 | |
Matt Arsenault | fabab4b | 2015-12-11 23:16:47 +0000 | [diff] [blame] | 120 | ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v1f32: |
| 121 | ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} |
| 122 | ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 |
| 123 | ; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]] |
| 124 | ; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]] |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 125 | define amdgpu_kernel void @test_fmin_legacy_ult_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 { |
Matt Arsenault | fabab4b | 2015-12-11 23:16:47 +0000 | [diff] [blame] | 126 | %tid = call i32 @llvm.r600.read.tidig.x() #1 |
| 127 | %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid |
| 128 | %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1 |
| 129 | |
| 130 | %a = load <1 x float>, <1 x float> addrspace(1)* %gep.0 |
| 131 | %b = load <1 x float>, <1 x float> addrspace(1)* %gep.1 |
| 132 | |
| 133 | %cmp = fcmp ult <1 x float> %a, %b |
| 134 | %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b |
| 135 | store <1 x float> %val, <1 x float> addrspace(1)* %out |
| 136 | ret void |
| 137 | } |
| 138 | |
| 139 | ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v2f32: |
| 140 | ; SI: buffer_load_dwordx2 |
| 141 | ; SI: buffer_load_dwordx2 |
| 142 | ; SI-SAFE: v_min_legacy_f32_e32 |
| 143 | ; SI-SAFE: v_min_legacy_f32_e32 |
| 144 | |
| 145 | ; SI-NONAN: v_min_f32_e32 |
| 146 | ; SI-NONAN: v_min_f32_e32 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 147 | define amdgpu_kernel void @test_fmin_legacy_ult_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 { |
Matt Arsenault | fabab4b | 2015-12-11 23:16:47 +0000 | [diff] [blame] | 148 | %tid = call i32 @llvm.r600.read.tidig.x() #1 |
| 149 | %gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid |
| 150 | %gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %gep.0, i32 1 |
| 151 | |
| 152 | %a = load <2 x float>, <2 x float> addrspace(1)* %gep.0 |
| 153 | %b = load <2 x float>, <2 x float> addrspace(1)* %gep.1 |
| 154 | |
| 155 | %cmp = fcmp ult <2 x float> %a, %b |
| 156 | %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b |
| 157 | store <2 x float> %val, <2 x float> addrspace(1)* %out |
| 158 | ret void |
| 159 | } |
| 160 | |
| 161 | ; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v3f32: |
| 162 | ; SI-SAFE: v_min_legacy_f32_e32 |
| 163 | ; SI-SAFE: v_min_legacy_f32_e32 |
| 164 | ; SI-SAFE: v_min_legacy_f32_e32 |
| 165 | |
| 166 | ; SI-NONAN: v_min_f32_e32 |
| 167 | ; SI-NONAN: v_min_f32_e32 |
| 168 | ; SI-NONAN: v_min_f32_e32 |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 169 | define amdgpu_kernel void @test_fmin_legacy_ult_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 { |
Matt Arsenault | fabab4b | 2015-12-11 23:16:47 +0000 | [diff] [blame] | 170 | %tid = call i32 @llvm.r600.read.tidig.x() #1 |
| 171 | %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid |
| 172 | %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1 |
| 173 | |
| 174 | %a = load <3 x float>, <3 x float> addrspace(1)* %gep.0 |
| 175 | %b = load <3 x float>, <3 x float> addrspace(1)* %gep.1 |
| 176 | |
| 177 | %cmp = fcmp ult <3 x float> %a, %b |
| 178 | %val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b |
| 179 | store <3 x float> %val, <3 x float> addrspace(1)* %out |
| 180 | ret void |
| 181 | } |
| 182 | |
Matt Arsenault | dc10307 | 2014-12-19 23:15:30 +0000 | [diff] [blame] | 183 | ; FUNC-LABEL: @test_fmin_legacy_ole_f32_multi_use |
| 184 | ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} |
| 185 | ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 |
| 186 | ; SI-NOT: v_min |
| 187 | ; SI: v_cmp_le_f32 |
| 188 | ; SI-NEXT: v_cndmask_b32 |
| 189 | ; SI-NOT: v_min |
| 190 | ; SI: s_endpgm |
Matt Arsenault | 3dbeefa | 2017-03-21 21:39:51 +0000 | [diff] [blame] | 191 | define amdgpu_kernel void @test_fmin_legacy_ole_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 { |
Matt Arsenault | dc10307 | 2014-12-19 23:15:30 +0000 | [diff] [blame] | 192 | %tid = call i32 @llvm.r600.read.tidig.x() #1 |
David Blaikie | 79e6c74 | 2015-02-27 19:29:02 +0000 | [diff] [blame] | 193 | %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid |
| 194 | %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 |
Matt Arsenault | dc10307 | 2014-12-19 23:15:30 +0000 | [diff] [blame] | 195 | |
Matt Arsenault | 44e5483 | 2016-04-12 13:38:18 +0000 | [diff] [blame] | 196 | %a = load volatile float, float addrspace(1)* %gep.0, align 4 |
| 197 | %b = load volatile float, float addrspace(1)* %gep.1, align 4 |
Matt Arsenault | dc10307 | 2014-12-19 23:15:30 +0000 | [diff] [blame] | 198 | |
| 199 | %cmp = fcmp ole float %a, %b |
| 200 | %val0 = select i1 %cmp, float %a, float %b |
| 201 | store float %val0, float addrspace(1)* %out0, align 4 |
| 202 | store i1 %cmp, i1 addrspace(1)* %out1 |
| 203 | ret void |
| 204 | } |
| 205 | |
Matt Arsenault | 36094d7 | 2014-11-15 05:02:57 +0000 | [diff] [blame] | 206 | attributes #0 = { nounwind } |
| 207 | attributes #1 = { nounwind readnone } |