Sam Parker | e3a4a13 | 2019-07-30 08:14:28 +0000 | [diff] [blame] | 1 | ; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MAIN |
| 2 | ; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+fullfp16 -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP |
| 3 | ; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+fp-armv8,+fullfp16 -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP64 |
| 4 | ; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE |
| 5 | ; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP |
| 6 | ; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -hardware-loops -disable-arm-loloops=true %s -S -o - | FileCheck %s --check-prefix=DISABLED |
| 7 | |
| 8 | ; DISABLED-NOT: call i32 @llvm.loop.decrement |
Sam Parker | 757ac02 | 2019-06-12 12:00:42 +0000 | [diff] [blame] | 9 | |
| 10 | ; CHECK-LABEL: skip_call |
| 11 | ; CHECK-NOT: call void @llvm.set.loop.iterations |
| 12 | ; CHECK-NOT: call i32 @llvm.loop.decrement |
| 13 | |
| 14 | define i32 @skip_call(i32 %n) { |
| 15 | entry: |
| 16 | %cmp6 = icmp eq i32 %n, 0 |
| 17 | br i1 %cmp6, label %while.end, label %while.body.preheader |
| 18 | |
| 19 | while.body.preheader: |
| 20 | br label %while.body |
| 21 | |
| 22 | while.body: |
| 23 | %i.08 = phi i32 [ %inc1, %while.body ], [ 0, %while.body.preheader ] |
| 24 | %res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ] |
| 25 | %call = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() #2 |
| 26 | %add = add nsw i32 %call, %res.07 |
| 27 | %inc1 = add nuw i32 %i.08, 1 |
| 28 | %exitcond = icmp eq i32 %inc1, %n |
| 29 | br i1 %exitcond, label %while.end.loopexit, label %while.body |
| 30 | |
| 31 | while.end.loopexit: |
| 32 | br label %while.end |
| 33 | |
| 34 | while.end: |
| 35 | %res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.end.loopexit ] |
| 36 | ret i32 %res.0.lcssa |
| 37 | } |
| 38 | |
| 39 | ; CHECK-LABEL: test_target_specific |
| 40 | ; CHECK: call void @llvm.set.loop.iterations.i32(i32 50) |
| 41 | ; CHECK: [[COUNT:%[^ ]+]] = phi i32 [ 50, %entry ], [ [[LOOP_DEC:%[^ ]+]], %loop ] |
| 42 | ; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[COUNT]], i32 1) |
| 43 | ; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 |
| 44 | ; CHECK: br i1 [[CMP]], label %loop, label %exit |
| 45 | |
| 46 | define i32 @test_target_specific(i32* %a, i32* %b) { |
| 47 | entry: |
| 48 | br label %loop |
| 49 | loop: |
| 50 | %acc = phi i32 [ 0, %entry ], [ %res, %loop ] |
| 51 | %count = phi i32 [ 0, %entry ], [ %count.next, %loop ] |
| 52 | %addr.a = getelementptr i32, i32* %a, i32 %count |
| 53 | %addr.b = getelementptr i32, i32* %b, i32 %count |
| 54 | %load.a = load i32, i32* %addr.a |
| 55 | %load.b = load i32, i32* %addr.b |
| 56 | %res = call i32 @llvm.arm.smlad(i32 %load.a, i32 %load.b, i32 %acc) |
| 57 | %count.next = add nuw i32 %count, 2 |
| 58 | %cmp = icmp ne i32 %count.next, 100 |
| 59 | br i1 %cmp, label %loop, label %exit |
| 60 | exit: |
| 61 | ret i32 %res |
| 62 | } |
| 63 | |
| 64 | ; CHECK-LABEL: test_fabs_f16 |
| 65 | ; CHECK-MAIN-NOT: call void @llvm.set.loop.iterations |
| 66 | ; CHECK-MVE-NOT: call void @llvm.set.loop.iterations |
| 67 | ; CHECK-FP: call void @llvm.set.loop.iterations.i32(i32 100) |
| 68 | ; CHECK-MVEFP: call void @llvm.set.loop.iterations.i32(i32 100) |
| 69 | define void @test_fabs_f16(half* %a, half* %b) { |
| 70 | entry: |
| 71 | br label %loop |
| 72 | loop: |
| 73 | %count = phi i32 [ 0, %entry ], [ %count.next, %loop ] |
| 74 | %addr.a = getelementptr half, half* %a, i32 %count |
| 75 | %load.a = load half, half* %addr.a |
| 76 | %abs = call half @llvm.fabs.f16(half %load.a) |
| 77 | %addr.b = getelementptr half, half* %b, i32 %count |
| 78 | store half %abs, half *%addr.b |
| 79 | %count.next = add nuw i32 %count, 1 |
| 80 | %cmp = icmp ne i32 %count.next, 100 |
| 81 | br i1 %cmp, label %loop, label %exit |
| 82 | exit: |
| 83 | ret void |
| 84 | } |
| 85 | |
| 86 | ; CHECK-LABEL: test_fabs |
| 87 | ; CHECK-MAIN-NOT: call void @llvm.set.loop.iterations |
| 88 | ; CHECK-MVE-NOT: call void @llvm.set.loop.iterations |
| 89 | ; CHECK-FP: call void @llvm.set.loop.iterations.i32(i32 100) |
| 90 | ; CHECK-MVEFP: call void @llvm.set.loop.iterations.i32(i32 100) |
Sam Parker | a6fd919 | 2019-06-25 10:45:51 +0000 | [diff] [blame] | 91 | |
Sam Parker | 757ac02 | 2019-06-12 12:00:42 +0000 | [diff] [blame] | 92 | define float @test_fabs(float* %a) { |
| 93 | entry: |
| 94 | br label %loop |
| 95 | loop: |
| 96 | %acc = phi float [ 0.0, %entry ], [ %res, %loop ] |
| 97 | %count = phi i32 [ 0, %entry ], [ %count.next, %loop ] |
| 98 | %addr.a = getelementptr float, float* %a, i32 %count |
| 99 | %load.a = load float, float* %addr.a |
| 100 | %abs = call float @llvm.fabs.f32(float %load.a) |
| 101 | %res = fadd float %abs, %acc |
| 102 | %count.next = add nuw i32 %count, 1 |
| 103 | %cmp = icmp ne i32 %count.next, 100 |
| 104 | br i1 %cmp, label %loop, label %exit |
| 105 | exit: |
| 106 | ret float %res |
| 107 | } |
| 108 | |
| 109 | ; CHECK-LABEL: test_fabs_64 |
| 110 | ; CHECK-MAIN-NOT: call void @llvm.set.loop.iterations |
| 111 | ; CHECK-MVE-NOT: call void @llvm.set.loop.iterations |
| 112 | ; CHECK-FP-NOT: call void @llvm.set.loop.iterations.i32(i32 100) |
| 113 | ; CHECK-FP64: void @llvm.set.loop.iterations.i32(i32 100) |
| 114 | ; CHECK-MVEFP-NOT: call void @llvm.set.loop.iterations.i32(i32 100) |
| 115 | define void @test_fabs_64(double* %a, double* %b) { |
| 116 | entry: |
| 117 | br label %loop |
| 118 | loop: |
| 119 | %count = phi i32 [ 0, %entry ], [ %count.next, %loop ] |
| 120 | %addr.a = getelementptr double, double* %a, i32 %count |
| 121 | %load.a = load double, double* %addr.a |
| 122 | %abs = call double @llvm.fabs.f64(double %load.a) |
| 123 | %addr.b = getelementptr double, double* %b, i32 %count |
| 124 | store double %abs, double *%addr.b |
| 125 | %count.next = add nuw i32 %count, 1 |
| 126 | %cmp = icmp ne i32 %count.next, 100 |
| 127 | br i1 %cmp, label %loop, label %exit |
| 128 | exit: |
| 129 | ret void |
| 130 | } |
| 131 | |
| 132 | ; CHECK-LABEL: test_fabs_vec |
| 133 | ; CHECK-MVE-NOT: call void @llvm.set.loop.iterations |
| 134 | ; CHECK-MVEFP: call void @llvm.set.loop.iterations.i32(i32 100) |
| 135 | ; CHECK-MVEFP: [[COUNT:%[^ ]+]] = phi i32 [ 100, %entry ], [ [[LOOP_DEC:%[^ ]+]], %loop ] |
| 136 | ; CHECK-MVEFP: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[COUNT]], i32 1) |
| 137 | ; CHECK-MVEFP: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 |
| 138 | ; CHECK-MVEFP: br i1 [[CMP]], label %loop, label %exit |
| 139 | define <4 x float> @test_fabs_vec(<4 x float>* %a) { |
| 140 | entry: |
| 141 | br label %loop |
| 142 | loop: |
| 143 | %acc = phi <4 x float> [ zeroinitializer, %entry ], [ %res, %loop ] |
| 144 | %count = phi i32 [ 0, %entry ], [ %count.next, %loop ] |
| 145 | %addr.a = getelementptr <4 x float>, <4 x float>* %a, i32 %count |
| 146 | %load.a = load <4 x float>, <4 x float>* %addr.a |
| 147 | %abs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %load.a) |
| 148 | %res = fadd <4 x float> %abs, %acc |
| 149 | %count.next = add nuw i32 %count, 1 |
| 150 | %cmp = icmp ne i32 %count.next, 100 |
| 151 | br i1 %cmp, label %loop, label %exit |
| 152 | exit: |
| 153 | ret <4 x float> %res |
| 154 | } |
| 155 | |
| 156 | ; CHECK-LABEL: test_log |
| 157 | ; CHECK-NOT: call void @llvm.set.loop.iterations |
| 158 | ; CHECK-NOT: llvm.loop.decrement |
| 159 | define float @test_log(float* %a) { |
| 160 | entry: |
| 161 | br label %loop |
| 162 | loop: |
| 163 | %acc = phi float [ 0.0, %entry ], [ %res, %loop ] |
| 164 | %count = phi i32 [ 0, %entry ], [ %count.next, %loop ] |
| 165 | %addr.a = getelementptr float, float* %a, i32 %count |
| 166 | %load.a = load float, float* %addr.a |
| 167 | %abs = call float @llvm.log.f32(float %load.a) |
| 168 | %res = fadd float %abs, %acc |
| 169 | %count.next = add nuw i32 %count, 1 |
| 170 | %cmp = icmp ne i32 %count.next, 100 |
| 171 | br i1 %cmp, label %loop, label %exit |
| 172 | exit: |
| 173 | ret float %res |
| 174 | } |
| 175 | |
| 176 | ; CHECK-LABEL: test_sqrt_16 |
| 177 | ; CHECK-MAIN-NOT: call void @llvm.set.loop.iterations |
| 178 | ; CHECK-MVE-NOT: call void @llvm.set.loop.iterations |
| 179 | ; CHECK-FP: call void @llvm.set.loop.iterations.i32(i32 100) |
| 180 | ; CHECK-MVEFP: call void @llvm.set.loop.iterations.i32(i32 100) |
| 181 | ; CHECK-FP64: call void @llvm.set.loop.iterations.i32(i32 100) |
| 182 | define void @test_sqrt_16(half* %a, half* %b) { |
| 183 | entry: |
| 184 | br label %loop |
| 185 | loop: |
| 186 | %count = phi i32 [ 0, %entry ], [ %count.next, %loop ] |
| 187 | %addr.a = getelementptr half, half* %a, i32 %count |
| 188 | %load.a = load half, half* %addr.a |
| 189 | %sqrt = call half @llvm.sqrt.f16(half %load.a) |
| 190 | %addr.b = getelementptr half, half* %b, i32 %count |
| 191 | store half %sqrt, half *%addr.b |
| 192 | %count.next = add nuw i32 %count, 1 |
| 193 | %cmp = icmp ne i32 %count.next, 100 |
| 194 | br i1 %cmp, label %loop, label %exit |
| 195 | exit: |
| 196 | ret void |
| 197 | } |
| 198 | ; CHECK-LABEL: test_sqrt |
| 199 | ; CHECK-MAIN-NOT: call void @llvm.set.loop.iterations |
| 200 | ; CHECK-MVE-NOT: call void @llvm.set.loop.iterations |
| 201 | ; CHECK-FP: call void @llvm.set.loop.iterations |
| 202 | ; CHECK-MVEFP: call void @llvm.set.loop.iterations.i32(i32 100) |
| 203 | ; CHECK-MVEFP: [[COUNT:%[^ ]+]] = phi i32 [ 100, %entry ], [ [[LOOP_DEC:%[^ ]+]], %loop ] |
| 204 | ; CHECK-MVEFP: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[COUNT]], i32 1) |
| 205 | ; CHECK-MVEFP: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 |
| 206 | ; CHECK-MVEFP: br i1 [[CMP]], label %loop, label %exit |
| 207 | define void @test_sqrt(float* %a, float* %b) { |
| 208 | entry: |
| 209 | br label %loop |
| 210 | loop: |
| 211 | %count = phi i32 [ 0, %entry ], [ %count.next, %loop ] |
| 212 | %addr.a = getelementptr float, float* %a, i32 %count |
| 213 | %load.a = load float, float* %addr.a |
| 214 | %sqrt = call float @llvm.sqrt.f32(float %load.a) |
| 215 | %addr.b = getelementptr float, float* %b, i32 %count |
| 216 | store float %sqrt, float* %addr.b |
| 217 | %count.next = add nuw i32 %count, 1 |
| 218 | %cmp = icmp ne i32 %count.next, 100 |
| 219 | br i1 %cmp, label %loop, label %exit |
| 220 | exit: |
| 221 | ret void |
| 222 | } |
| 223 | |
| 224 | ; CHECK-LABEL: test_sqrt_64 |
| 225 | ; CHECK-MAIN-NOT: call void @llvm.set.loop.iterations |
| 226 | ; CHECK-MVE-NOT: call void @llvm.set.loop.iterations |
| 227 | ; CHECK-FP-NOT: call void @llvm.set.loop.iterations.i32(i32 100) |
| 228 | ; CHECK-MVEFP-NOT: call void @llvm.set.loop.iterations.i32(i32 100) |
| 229 | ; CHECK-FP64: call void @llvm.set.loop.iterations.i32(i32 100) |
| 230 | define void @test_sqrt_64(double* %a, double* %b) { |
| 231 | entry: |
| 232 | br label %loop |
| 233 | loop: |
| 234 | %count = phi i32 [ 0, %entry ], [ %count.next, %loop ] |
| 235 | %addr.a = getelementptr double, double* %a, i32 %count |
| 236 | %load.a = load double, double* %addr.a |
| 237 | %sqrt = call double @llvm.sqrt.f64(double %load.a) |
| 238 | %addr.b = getelementptr double, double* %b, i32 %count |
| 239 | store double %sqrt, double *%addr.b |
| 240 | %count.next = add nuw i32 %count, 1 |
| 241 | %cmp = icmp ne i32 %count.next, 100 |
| 242 | br i1 %cmp, label %loop, label %exit |
| 243 | exit: |
| 244 | ret void |
| 245 | } |
| 246 | |
| 247 | ; CHECK-LABEL: test_sqrt_vec |
| 248 | ; CHECK-MAIN-NOT: call void @llvm.set.loop.iterations |
| 249 | ; CHECK-MVE-NOT: call void @llvm.set.loop.iterations |
| 250 | ; CHECK-FP: call void @llvm.set.loop.iterations.i32(i32 100) |
| 251 | ; CHECK-MVEFP: call void @llvm.set.loop.iterations.i32(i32 100) |
| 252 | define void @test_sqrt_vec(<4 x float>* %a, <4 x float>* %b) { |
| 253 | entry: |
| 254 | br label %loop |
| 255 | loop: |
| 256 | %count = phi i32 [ 0, %entry ], [ %count.next, %loop ] |
| 257 | %addr.a = getelementptr <4 x float>, <4 x float>* %a, i32 %count |
| 258 | %load.a = load <4 x float>, <4 x float>* %addr.a |
| 259 | %sqrt = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %load.a) |
| 260 | %addr.b = getelementptr <4 x float>, <4 x float>* %b, i32 %count |
| 261 | store <4 x float> %sqrt, <4 x float>* %addr.b |
| 262 | %count.next = add nuw i32 %count, 1 |
| 263 | %cmp = icmp ne i32 %count.next, 100 |
| 264 | br i1 %cmp, label %loop, label %exit |
| 265 | exit: |
| 266 | ret void |
| 267 | } |
| 268 | |
| 269 | ; CHECK-LABEL: test_overflow |
| 270 | ; CHECK: call void @llvm.set.loop.iterations |
| 271 | define i32 @test_overflow(i32* %a, i32* %b) { |
| 272 | entry: |
| 273 | br label %loop |
| 274 | loop: |
| 275 | %acc = phi i32 [ 0, %entry ], [ %res, %loop ] |
| 276 | %count = phi i32 [ 0, %entry ], [ %count.next, %loop ] |
| 277 | %addr.a = getelementptr i32, i32* %a, i32 %count |
| 278 | %addr.b = getelementptr i32, i32* %b, i32 %count |
| 279 | %load.a = load i32, i32* %addr.a |
| 280 | %load.b = load i32, i32* %addr.b |
| 281 | %sadd = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %load.a, i32 %load.b) |
| 282 | %res = extractvalue {i32, i1} %sadd, 0 |
| 283 | %count.next = add nuw i32 %count, 1 |
| 284 | %cmp = icmp ne i32 %count.next, 100 |
| 285 | br i1 %cmp, label %loop, label %exit |
| 286 | exit: |
| 287 | ret i32 %res |
| 288 | } |
| 289 | |
| 290 | ; TODO: We should be able to generate a qadd/sub |
| 291 | ; CHECK-LABEL: test_sat |
| 292 | ; CHECK: call void @llvm.set.loop.iterations.i32(i32 100) |
| 293 | define i32 @test_sat(i32* %a, i32* %b) { |
| 294 | entry: |
| 295 | br label %loop |
| 296 | loop: |
| 297 | %acc = phi i32 [ 0, %entry ], [ %res, %loop ] |
| 298 | %count = phi i32 [ 0, %entry ], [ %count.next, %loop ] |
| 299 | %addr.a = getelementptr i32, i32* %a, i32 %count |
| 300 | %addr.b = getelementptr i32, i32* %b, i32 %count |
| 301 | %load.a = load i32, i32* %addr.a |
| 302 | %load.b = load i32, i32* %addr.b |
| 303 | %res = call i32 @llvm.sadd.sat.i32(i32 %load.a, i32 %load.b) |
| 304 | %count.next = add nuw i32 %count, 1 |
| 305 | %cmp = icmp ne i32 %count.next, 100 |
| 306 | br i1 %cmp, label %loop, label %exit |
| 307 | exit: |
| 308 | ret i32 %res |
| 309 | } |
| 310 | |
| 311 | ; CHECK-LABEL: test_masked_i32 |
| 312 | ; CHECK-NOT: call void @llvm.set.loop.iterations |
| 313 | ; CHECK-MVEFP: call void @llvm.set.loop.iterations |
| 314 | ; CHECK-MVE: call void @llvm.set.loop.iterations.i32(i32 100) |
| 315 | ; CHECK-MVE: [[COUNT:%[^ ]+]] = phi i32 [ 100, %entry ], [ [[LOOP_DEC:%[^ ]+]], %loop ] |
| 316 | ; CHECK-MVE: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[COUNT]], i32 1) |
| 317 | ; CHECK-MVE: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 |
| 318 | ; CHECK-MVE: br i1 [[CMP]], label %loop, label %exit |
Simon Tatham | a4b415a | 2019-06-25 16:48:46 +0000 | [diff] [blame] | 319 | define arm_aapcs_vfpcc void @test_masked_i32(<4 x i1> %mask, <4 x i32>* %a, <4 x i32>* %b, <4 x i32>* %c, <4 x i32> %passthru) { |
Sam Parker | 757ac02 | 2019-06-12 12:00:42 +0000 | [diff] [blame] | 320 | entry: |
| 321 | br label %loop |
| 322 | loop: |
| 323 | %count = phi i32 [ 0, %entry ], [ %count.next, %loop ] |
| 324 | %addr.a = getelementptr <4 x i32>, <4 x i32>* %a, i32 %count |
| 325 | %addr.b = getelementptr <4 x i32>, <4 x i32>* %b, i32 %count |
| 326 | %addr.c = getelementptr <4 x i32>, <4 x i32>* %c, i32 %count |
| 327 | %load.a = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr.a, i32 4, <4 x i1> %mask, <4 x i32> %passthru) |
| 328 | %load.b = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr.b, i32 4, <4 x i1> %mask, <4 x i32> %passthru) |
| 329 | %res = add <4 x i32> %load.a, %load.b |
| 330 | call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %res, <4 x i32>* %addr.c, i32 4, <4 x i1> %mask) |
| 331 | %count.next = add nuw i32 %count, 1 |
| 332 | %cmp = icmp ne i32 %count.next, 100 |
| 333 | br i1 %cmp, label %loop, label %exit |
| 334 | exit: |
| 335 | ret void |
| 336 | } |
| 337 | |
| 338 | ; CHECK-LABEL: test_masked_f32 |
| 339 | ; CHECK-NOT: call void @llvm.set.loop.iterations |
| 340 | ; CHECK-MVEFP: call void @llvm.set.loop.iterations |
| 341 | ; CHECK-MVE: call void @llvm.set.loop.iterations.i32(i32 100) |
| 342 | ; CHECK-MVE: [[COUNT:%[^ ]+]] = phi i32 [ 100, %entry ], [ [[LOOP_DEC:%[^ ]+]], %loop ] |
| 343 | ; CHECK-MVE: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[COUNT]], i32 1) |
| 344 | ; CHECK-MVE: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 |
| 345 | ; CHECK-MVE: br i1 [[CMP]], label %loop, label %exit |
Simon Tatham | a4b415a | 2019-06-25 16:48:46 +0000 | [diff] [blame] | 346 | define arm_aapcs_vfpcc void @test_masked_f32(<4 x i1> %mask, <4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float> %passthru) { |
Sam Parker | 757ac02 | 2019-06-12 12:00:42 +0000 | [diff] [blame] | 347 | entry: |
| 348 | br label %loop |
| 349 | loop: |
| 350 | %count = phi i32 [ 0, %entry ], [ %count.next, %loop ] |
| 351 | %addr.a = getelementptr <4 x float>, <4 x float>* %a, i32 %count |
| 352 | %addr.b = getelementptr <4 x float>, <4 x float>* %b, i32 %count |
| 353 | %addr.c = getelementptr <4 x float>, <4 x float>* %c, i32 %count |
| 354 | %load.a = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr.a, i32 4, <4 x i1> %mask, <4 x float> %passthru) |
| 355 | %load.b = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr.b, i32 4, <4 x i1> %mask, <4 x float> %passthru) |
| 356 | %res = fadd <4 x float> %load.a, %load.b |
| 357 | call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %res, <4 x float>* %addr.c, i32 4, <4 x i1> %mask) |
| 358 | %count.next = add nuw i32 %count, 1 |
| 359 | %cmp = icmp ne i32 %count.next, 100 |
| 360 | br i1 %cmp, label %loop, label %exit |
| 361 | exit: |
| 362 | ret void |
| 363 | } |
| 364 | |
| 365 | ; CHECK-LABEL: test_gather_scatter |
| 366 | ; CHECK-NOT: call void @llvm.set.loop.iterations |
| 367 | ; CHECK-MVEFP: call void @llvm.set.loop.iterations |
| 368 | ; CHECK-MVE: call void @llvm.set.loop.iterations.i32(i32 100) |
| 369 | ; CHECK-MVE: [[COUNT:%[^ ]+]] = phi i32 [ 100, %entry ], [ [[LOOP_DEC:%[^ ]+]], %loop ] |
| 370 | ; CHECK-MVE: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[COUNT]], i32 1) |
| 371 | ; CHECK-MVE: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 |
| 372 | ; CHECK-MVE: br i1 [[CMP]], label %loop, label %exit |
Simon Tatham | a4b415a | 2019-06-25 16:48:46 +0000 | [diff] [blame] | 373 | define arm_aapcs_vfpcc void @test_gather_scatter(<4 x i1> %mask, <4 x float*> %a, <4 x float*> %b, <4 x float*> %c, <4 x float> %passthru) { |
Sam Parker | 757ac02 | 2019-06-12 12:00:42 +0000 | [diff] [blame] | 374 | entry: |
| 375 | br label %loop |
| 376 | loop: |
| 377 | %count = phi i32 [ 0, %entry ], [ %count.next, %loop ] |
| 378 | %load.a = call <4 x float> @llvm.masked.gather.v4f32.p0v4f32(<4 x float*> %a, i32 4, <4 x i1> %mask, <4 x float> %passthru) |
| 379 | %load.b = call <4 x float> @llvm.masked.gather.v4f32.p0v4f32(<4 x float*> %b, i32 4, <4 x i1> %mask, <4 x float> %passthru) |
| 380 | %res = fadd <4 x float> %load.a, %load.b |
| 381 | call void @llvm.masked.scatter.v4f32.p0v4f32(<4 x float> %res, <4 x float*> %c, i32 4, <4 x i1> %mask) |
| 382 | %count.next = add nuw i32 %count, 1 |
| 383 | %cmp = icmp ne i32 %count.next, 100 |
| 384 | br i1 %cmp, label %loop, label %exit |
| 385 | exit: |
| 386 | ret void |
| 387 | } |
| 388 | |
| 389 | declare i32 @bar(...) local_unnamed_addr #1 |
| 390 | declare i32 @llvm.arm.smlad(i32, i32, i32) |
| 391 | declare half @llvm.fabs.f16(half) |
| 392 | declare float @llvm.fabs.f32(float) |
| 393 | declare double @llvm.fabs.f64(double) |
| 394 | declare float @llvm.log.f32(float) |
| 395 | declare <4 x float> @llvm.fabs.v4f32(<4 x float>) |
| 396 | declare half @llvm.sqrt.f16(half) |
| 397 | declare float @llvm.sqrt.f32(float) |
| 398 | declare double @llvm.sqrt.f64(double) |
| 399 | declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) |
| 400 | declare i32 @llvm.sadd.sat.i32(i32, i32) |
| 401 | declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) |
| 402 | declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) |
| 403 | declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) |
| 404 | declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) |
| 405 | declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) |
| 406 | declare <4 x float> @llvm.masked.gather.v4f32.p0v4f32(<4 x float*>, i32, <4 x i1>, <4 x float>) |
| 407 | declare void @llvm.masked.scatter.v4f32.p0v4f32(<4 x float>, <4 x float*>, i32, <4 x i1>) |