| Stanislav Mekhanoshin | 20279dc | 2018-06-20 20:24:20 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s |
| 2 | |
| Stanislav Mekhanoshin | 22ee191 | 2018-06-21 16:02:05 +0000 | [diff] [blame] | 3 | ; GCN-LABEL: {{^}}select_and1: |
| 4 | ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, |
| 5 | ; GCN-NOT: v_and_b32 |
| 6 | ; GCN: store_dword v[{{[0-9:]+}}], [[SEL]], |
| 7 | define amdgpu_kernel void @select_and1(i32 addrspace(1)* %p, i32 %x, i32 %y) { |
| 8 | %c = icmp slt i32 %x, 11 |
| 9 | %s = select i1 %c, i32 0, i32 -1 |
| 10 | %a = and i32 %y, %s |
| 11 | store i32 %a, i32 addrspace(1)* %p, align 4 |
| 12 | ret void |
| 13 | } |
| 14 | |
| 15 | ; GCN-LABEL: {{^}}select_and2: |
| 16 | ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, |
| 17 | ; GCN-NOT: v_and_b32 |
| 18 | ; GCN: store_dword v[{{[0-9:]+}}], [[SEL]], |
| 19 | define amdgpu_kernel void @select_and2(i32 addrspace(1)* %p, i32 %x, i32 %y) { |
| 20 | %c = icmp slt i32 %x, 11 |
| 21 | %s = select i1 %c, i32 0, i32 -1 |
| 22 | %a = and i32 %s, %y |
| 23 | store i32 %a, i32 addrspace(1)* %p, align 4 |
| 24 | ret void |
| 25 | } |
| 26 | |
| 27 | ; GCN-LABEL: {{^}}select_and3: |
| 28 | ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, |
| 29 | ; GCN-NOT: v_and_b32 |
| 30 | ; GCN: store_dword v[{{[0-9:]+}}], [[SEL]], |
| 31 | define amdgpu_kernel void @select_and3(i32 addrspace(1)* %p, i32 %x, i32 %y) { |
| 32 | %c = icmp slt i32 %x, 11 |
| 33 | %s = select i1 %c, i32 -1, i32 0 |
| 34 | %a = and i32 %y, %s |
| 35 | store i32 %a, i32 addrspace(1)* %p, align 4 |
| 36 | ret void |
| 37 | } |
| 38 | |
| 39 | ; GCN-LABEL: {{^}}select_and_v4: |
| 40 | ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, |
| 41 | ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, |
| 42 | ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, |
| 43 | ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, |
| 44 | ; GCN-NOT: v_and_b32 |
| 45 | ; GCN: store_dword |
| 46 | define amdgpu_kernel void @select_and_v4(<4 x i32> addrspace(1)* %p, i32 %x, <4 x i32> %y) { |
| 47 | %c = icmp slt i32 %x, 11 |
| 48 | %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> |
| 49 | %a = and <4 x i32> %s, %y |
| 50 | store <4 x i32> %a, <4 x i32> addrspace(1)* %p, align 32 |
| 51 | ret void |
| 52 | } |
| 53 | |
| 54 | ; GCN-LABEL: {{^}}select_or1: |
| 55 | ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, |
| 56 | ; GCN-NOT: v_or_b32 |
| 57 | ; GCN: store_dword v[{{[0-9:]+}}], [[SEL]], |
| 58 | define amdgpu_kernel void @select_or1(i32 addrspace(1)* %p, i32 %x, i32 %y) { |
| 59 | %c = icmp slt i32 %x, 11 |
| 60 | %s = select i1 %c, i32 0, i32 -1 |
| 61 | %a = or i32 %y, %s |
| 62 | store i32 %a, i32 addrspace(1)* %p, align 4 |
| 63 | ret void |
| 64 | } |
| 65 | |
| 66 | ; GCN-LABEL: {{^}}select_or2: |
| 67 | ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, |
| 68 | ; GCN-NOT: v_or_b32 |
| 69 | ; GCN: store_dword v[{{[0-9:]+}}], [[SEL]], |
| 70 | define amdgpu_kernel void @select_or2(i32 addrspace(1)* %p, i32 %x, i32 %y) { |
| 71 | %c = icmp slt i32 %x, 11 |
| 72 | %s = select i1 %c, i32 0, i32 -1 |
| 73 | %a = or i32 %s, %y |
| 74 | store i32 %a, i32 addrspace(1)* %p, align 4 |
| 75 | ret void |
| 76 | } |
| 77 | |
| 78 | ; GCN-LABEL: {{^}}select_or3: |
| 79 | ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, |
| 80 | ; GCN-NOT: v_or_b32 |
| 81 | ; GCN: store_dword v[{{[0-9:]+}}], [[SEL]], |
| 82 | define amdgpu_kernel void @select_or3(i32 addrspace(1)* %p, i32 %x, i32 %y) { |
| 83 | %c = icmp slt i32 %x, 11 |
| 84 | %s = select i1 %c, i32 -1, i32 0 |
| 85 | %a = or i32 %y, %s |
| 86 | store i32 %a, i32 addrspace(1)* %p, align 4 |
| 87 | ret void |
| 88 | } |
| 89 | |
| 90 | ; GCN-LABEL: {{^}}select_or_v4: |
| 91 | ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, |
| 92 | ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, |
| 93 | ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, |
| 94 | ; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, |
| 95 | ; GCN-NOT: v_or_b32 |
| 96 | ; GCN: store_dword |
| 97 | define amdgpu_kernel void @select_or_v4(<4 x i32> addrspace(1)* %p, i32 %x, <4 x i32> %y) { |
| 98 | %c = icmp slt i32 %x, 11 |
| 99 | %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> |
| 100 | %a = or <4 x i32> %s, %y |
| 101 | store <4 x i32> %a, <4 x i32> addrspace(1)* %p, align 32 |
| 102 | ret void |
| 103 | } |
| 104 | |
| Stanislav Mekhanoshin | 20279dc | 2018-06-20 20:24:20 +0000 | [diff] [blame] | 105 | ; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants: |
| 106 | ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9, |
| 107 | define amdgpu_kernel void @sel_constants_sub_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) { |
| 108 | %sel = select i1 %cond, i32 -4, i32 3 |
| 109 | %bo = sub i32 5, %sel |
| 110 | store i32 %bo, i32 addrspace(1)* %p, align 4 |
| 111 | ret void |
| 112 | } |
| 113 | |
| 114 | ; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_i16: |
| 115 | ; TODO: shrink i16 constant. This is correct but suboptimal. |
| 116 | ; GCN: v_mov_b32_e32 [[T:v[0-9]+]], 0xffff0009 |
| 117 | ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 2, [[T]], |
| 118 | define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_i16(i16 addrspace(1)* %p, i1 %cond) { |
| 119 | %sel = select i1 %cond, i16 -4, i16 3 |
| 120 | %bo = sub i16 5, %sel |
| 121 | store i16 %bo, i16 addrspace(1)* %p, align 2 |
| 122 | ret void |
| 123 | } |
| 124 | |
| 125 | ; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_i16_neg: |
| 126 | ; GCN: v_mov_b32_e32 [[F:v[0-9]+]], 0xfffff449 |
| 127 | ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, [[F]], -3, |
| 128 | define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_i16_neg(i16 addrspace(1)* %p, i1 %cond) { |
| 129 | %sel = select i1 %cond, i16 4, i16 3000 |
| 130 | %bo = sub i16 1, %sel |
| 131 | store i16 %bo, i16 addrspace(1)* %p, align 2 |
| 132 | ret void |
| 133 | } |
| 134 | |
| 135 | ; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_v2i16: |
| 136 | ; GCN-DAG: v_mov_b32_e32 [[F:v[0-9]+]], 0x60002 |
| 137 | ; GCN-DAG: v_mov_b32_e32 [[T:v[0-9]+]], 0x50009 |
| 138 | ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, [[F]], [[T]], |
| 139 | define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v2i16(<2 x i16> addrspace(1)* %p, i1 %cond) { |
| 140 | %sel = select i1 %cond, <2 x i16> <i16 -4, i16 2>, <2 x i16> <i16 3, i16 1> |
| 141 | %bo = sub <2 x i16> <i16 5, i16 7>, %sel |
| 142 | store <2 x i16> %bo, <2 x i16> addrspace(1)* %p, align 4 |
| 143 | ret void |
| 144 | } |
| 145 | |
| 146 | ; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_v4i32: |
| 147 | ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9, |
| 148 | ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 6, 5, |
| 149 | ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 10, 6, |
| 150 | ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 14, 7, |
| 151 | define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v4i32(<4 x i32> addrspace(1)* %p, i1 %cond) { |
| 152 | %sel = select i1 %cond, <4 x i32> <i32 -4, i32 2, i32 3, i32 4>, <4 x i32> <i32 3, i32 1, i32 -1, i32 -3> |
| 153 | %bo = sub <4 x i32> <i32 5, i32 7, i32 9, i32 11>, %sel |
| 154 | store <4 x i32> %bo, <4 x i32> addrspace(1)* %p, align 32 |
| 155 | ret void |
| 156 | } |
| 157 | |
| 158 | ; GCN-LABEL: {{^}}sdiv_constant_sel_constants: |
| 159 | ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 5, 0, |
| Stanislav Mekhanoshin | 67aa18f | 2018-06-28 15:59:18 +0000 | [diff] [blame] | 160 | define amdgpu_kernel void @sdiv_constant_sel_constants(i64 addrspace(1)* %p, i1 %cond) { |
| 161 | %sel = select i1 %cond, i64 121, i64 23 |
| 162 | %bo = sdiv i64 120, %sel |
| 163 | store i64 %bo, i64 addrspace(1)* %p, align 8 |
| Stanislav Mekhanoshin | 20279dc | 2018-06-20 20:24:20 +0000 | [diff] [blame] | 164 | ret void |
| 165 | } |
| 166 | |
| 167 | ; GCN-LABEL: {{^}}udiv_constant_sel_constants: |
| 168 | ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 5, 0, |
| Stanislav Mekhanoshin | 67aa18f | 2018-06-28 15:59:18 +0000 | [diff] [blame] | 169 | define amdgpu_kernel void @udiv_constant_sel_constants(i64 addrspace(1)* %p, i1 %cond) { |
| 170 | %sel = select i1 %cond, i64 -4, i64 23 |
| 171 | %bo = udiv i64 120, %sel |
| 172 | store i64 %bo, i64 addrspace(1)* %p, align 8 |
| Stanislav Mekhanoshin | 20279dc | 2018-06-20 20:24:20 +0000 | [diff] [blame] | 173 | ret void |
| 174 | } |
| 175 | |
| 176 | ; GCN-LABEL: {{^}}srem_constant_sel_constants: |
| 177 | ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 3, 33, |
| Stanislav Mekhanoshin | 67aa18f | 2018-06-28 15:59:18 +0000 | [diff] [blame] | 178 | define amdgpu_kernel void @srem_constant_sel_constants(i64 addrspace(1)* %p, i1 %cond) { |
| 179 | %sel = select i1 %cond, i64 34, i64 15 |
| 180 | %bo = srem i64 33, %sel |
| 181 | store i64 %bo, i64 addrspace(1)* %p, align 8 |
| Stanislav Mekhanoshin | 20279dc | 2018-06-20 20:24:20 +0000 | [diff] [blame] | 182 | ret void |
| 183 | } |
| 184 | |
| 185 | ; GCN-LABEL: {{^}}urem_constant_sel_constants: |
| 186 | ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 3, 33, |
| Stanislav Mekhanoshin | 67aa18f | 2018-06-28 15:59:18 +0000 | [diff] [blame] | 187 | define amdgpu_kernel void @urem_constant_sel_constants(i64 addrspace(1)* %p, i1 %cond) { |
| 188 | %sel = select i1 %cond, i64 34, i64 15 |
| 189 | %bo = urem i64 33, %sel |
| 190 | store i64 %bo, i64 addrspace(1)* %p, align 8 |
| Stanislav Mekhanoshin | 20279dc | 2018-06-20 20:24:20 +0000 | [diff] [blame] | 191 | ret void |
| 192 | } |
| 193 | |
| 194 | ; GCN-LABEL: {{^}}shl_constant_sel_constants: |
| 195 | ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 8, 4, |
| 196 | define amdgpu_kernel void @shl_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) { |
| 197 | %sel = select i1 %cond, i32 2, i32 3 |
| 198 | %bo = shl i32 1, %sel |
| 199 | store i32 %bo, i32 addrspace(1)* %p, align 4 |
| 200 | ret void |
| 201 | } |
| 202 | |
| 203 | ; GCN-LABEL: {{^}}lshr_constant_sel_constants: |
| 204 | ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 8, 16, |
| 205 | define amdgpu_kernel void @lshr_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) { |
| 206 | %sel = select i1 %cond, i32 2, i32 3 |
| 207 | %bo = lshr i32 64, %sel |
| 208 | store i32 %bo, i32 addrspace(1)* %p, align 4 |
| 209 | ret void |
| 210 | } |
| 211 | |
| 212 | ; GCN-LABEL: {{^}}ashr_constant_sel_constants: |
| 213 | ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 16, 32, |
| 214 | define amdgpu_kernel void @ashr_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) { |
| 215 | %sel = select i1 %cond, i32 2, i32 3 |
| 216 | %bo = ashr i32 128, %sel |
| 217 | store i32 %bo, i32 addrspace(1)* %p, align 4 |
| 218 | ret void |
| 219 | } |
| 220 | |
| 221 | ; GCN-LABEL: {{^}}fsub_constant_sel_constants: |
| 222 | ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, -4.0, 1.0, |
| 223 | define amdgpu_kernel void @fsub_constant_sel_constants(float addrspace(1)* %p, i1 %cond) { |
| 224 | %sel = select i1 %cond, float -2.0, float 3.0 |
| 225 | %bo = fsub float -1.0, %sel |
| 226 | store float %bo, float addrspace(1)* %p, align 4 |
| 227 | ret void |
| 228 | } |
| 229 | |
| 230 | ; GCN-LABEL: {{^}}fsub_constant_sel_constants_f16: |
| 231 | ; TODO: it shall be possible to fold constants with OpSel |
| 232 | ; GCN-DAG: v_mov_b32_e32 [[T:v[0-9]+]], 0x3c00 |
| 233 | ; GCN-DAG: v_mov_b32_e32 [[F:v[0-9]+]], 0xc400 |
| 234 | ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, [[F]], [[T]], |
| 235 | define amdgpu_kernel void @fsub_constant_sel_constants_f16(half addrspace(1)* %p, i1 %cond) { |
| 236 | %sel = select i1 %cond, half -2.0, half 3.0 |
| 237 | %bo = fsub half -1.0, %sel |
| 238 | store half %bo, half addrspace(1)* %p, align 2 |
| 239 | ret void |
| 240 | } |
| 241 | |
| 242 | ; GCN-LABEL: {{^}}fsub_constant_sel_constants_v2f16: |
| 243 | ; GCN-DAG: v_mov_b32_e32 [[T:v[0-9]+]], 0x45003c00 |
| 244 | ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, -2.0, [[T]], |
| 245 | define amdgpu_kernel void @fsub_constant_sel_constants_v2f16(<2 x half> addrspace(1)* %p, i1 %cond) { |
| 246 | %sel = select i1 %cond, <2 x half> <half -2.0, half -3.0>, <2 x half> <half -1.0, half 4.0> |
| 247 | %bo = fsub <2 x half> <half -1.0, half 2.0>, %sel |
| 248 | store <2 x half> %bo, <2 x half> addrspace(1)* %p, align 4 |
| 249 | ret void |
| 250 | } |
| 251 | |
| 252 | ; GCN-LABEL: {{^}}fsub_constant_sel_constants_v4f32: |
| 253 | ; GCN-DAG: v_mov_b32_e32 [[T2:v[0-9]+]], 0x40a00000 |
| 254 | ; GCN-DAG: v_mov_b32_e32 [[T3:v[0-9]+]], 0x41100000 |
| 255 | ; GCN-DAG: v_mov_b32_e32 [[T4:v[0-9]+]], 0x41500000 |
| 256 | ; GCN-DAG: v_mov_b32_e32 [[F4:v[0-9]+]], 0x40c00000 |
| 257 | ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0, |
| 258 | ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, [[T2]], |
| 259 | ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, [[T3]], |
| 260 | ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, [[F4]], [[T4]], |
| 261 | define amdgpu_kernel void @fsub_constant_sel_constants_v4f32(<4 x float> addrspace(1)* %p, i1 %cond) { |
| 262 | %sel = select i1 %cond, <4 x float> <float -2.0, float -3.0, float -4.0, float -5.0>, <4 x float> <float -1.0, float 0.0, float 1.0, float 2.0> |
| 263 | %bo = fsub <4 x float> <float -1.0, float 2.0, float 5.0, float 8.0>, %sel |
| 264 | store <4 x float> %bo, <4 x float> addrspace(1)* %p, align 32 |
| 265 | ret void |
| 266 | } |
| 267 | |
| 268 | ; GCN-LABEL: {{^}}fdiv_constant_sel_constants: |
| 269 | ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 4.0, -2.0, |
| 270 | define amdgpu_kernel void @fdiv_constant_sel_constants(float addrspace(1)* %p, i1 %cond) { |
| 271 | %sel = select i1 %cond, float -4.0, float 2.0 |
| 272 | %bo = fdiv float 8.0, %sel |
| 273 | store float %bo, float addrspace(1)* %p, align 4 |
| 274 | ret void |
| 275 | } |
| 276 | |
| 277 | ; GCN-LABEL: {{^}}frem_constant_sel_constants: |
| 278 | ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 2.0, 1.0, |
| 279 | define amdgpu_kernel void @frem_constant_sel_constants(float addrspace(1)* %p, i1 %cond) { |
| 280 | %sel = select i1 %cond, float -4.0, float 3.0 |
| 281 | %bo = frem float 5.0, %sel |
| 282 | store float %bo, float addrspace(1)* %p, align 4 |
| 283 | ret void |
| 284 | } |