Evan Cheng | c4b527a | 2012-01-13 01:37:24 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a9 -stress-ivchain | FileCheck %s |
NAKAMURA Takumi | db080e8 | 2012-01-13 07:03:55 +0000 | [diff] [blame^] | 2 | ; REQUIRES: asserts |
Evan Cheng | c4b527a | 2012-01-13 01:37:24 +0000 | [diff] [blame] | 3 | |
| 4 | ; @sharedidx is an unrolled variant of this loop: |
| 5 | ; for (unsigned long i = 0; i < len; i += s) { |
| 6 | ; c[i] = a[i] + b[i]; |
| 7 | ; } |
| 8 | ; where 's' cannot be folded into the addressing mode. |
| 9 | ; |
| 10 | ; This is not quite profitable to chain. But with -stress-ivchain, we |
| 11 | ; can form three address chains in place of the shared induction |
| 12 | ; variable. |
| 13 | |
| 14 | ; rdar://10674430 |
| 15 | define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp { |
| 16 | entry: |
| 17 | ; CHECK: sharedidx: |
| 18 | %cmp8 = icmp eq i32 %len, 0 |
| 19 | br i1 %cmp8, label %for.end, label %for.body |
| 20 | |
| 21 | for.body: ; preds = %entry, %for.body.3 |
| 22 | ; CHECK: %for.body |
| 23 | ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! |
| 24 | ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! |
| 25 | %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ] |
| 26 | %arrayidx = getelementptr inbounds i8* %a, i32 %i.09 |
| 27 | %0 = load i8* %arrayidx, align 1 |
| 28 | %conv6 = zext i8 %0 to i32 |
| 29 | %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09 |
| 30 | %1 = load i8* %arrayidx1, align 1 |
| 31 | %conv27 = zext i8 %1 to i32 |
| 32 | %add = add nsw i32 %conv27, %conv6 |
| 33 | %conv3 = trunc i32 %add to i8 |
| 34 | %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09 |
| 35 | store i8 %conv3, i8* %arrayidx4, align 1 |
| 36 | %add5 = add i32 %i.09, %s |
| 37 | %cmp = icmp ult i32 %add5, %len |
| 38 | br i1 %cmp, label %for.body.1, label %for.end |
| 39 | |
| 40 | for.end: ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry |
| 41 | ret void |
| 42 | |
| 43 | for.body.1: ; preds = %for.body |
| 44 | ; CHECK: %for.body.1 |
| 45 | ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! |
| 46 | ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! |
| 47 | %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5 |
| 48 | %2 = load i8* %arrayidx.1, align 1 |
| 49 | %conv6.1 = zext i8 %2 to i32 |
| 50 | %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5 |
| 51 | %3 = load i8* %arrayidx1.1, align 1 |
| 52 | %conv27.1 = zext i8 %3 to i32 |
| 53 | %add.1 = add nsw i32 %conv27.1, %conv6.1 |
| 54 | %conv3.1 = trunc i32 %add.1 to i8 |
| 55 | %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5 |
| 56 | store i8 %conv3.1, i8* %arrayidx4.1, align 1 |
| 57 | %add5.1 = add i32 %add5, %s |
| 58 | %cmp.1 = icmp ult i32 %add5.1, %len |
| 59 | br i1 %cmp.1, label %for.body.2, label %for.end |
| 60 | |
| 61 | for.body.2: ; preds = %for.body.1 |
| 62 | ; CHECK: %for.body.2 |
| 63 | ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! |
| 64 | ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! |
| 65 | %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1 |
| 66 | %4 = load i8* %arrayidx.2, align 1 |
| 67 | %conv6.2 = zext i8 %4 to i32 |
| 68 | %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1 |
| 69 | %5 = load i8* %arrayidx1.2, align 1 |
| 70 | %conv27.2 = zext i8 %5 to i32 |
| 71 | %add.2 = add nsw i32 %conv27.2, %conv6.2 |
| 72 | %conv3.2 = trunc i32 %add.2 to i8 |
| 73 | %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1 |
| 74 | store i8 %conv3.2, i8* %arrayidx4.2, align 1 |
| 75 | %add5.2 = add i32 %add5.1, %s |
| 76 | %cmp.2 = icmp ult i32 %add5.2, %len |
| 77 | br i1 %cmp.2, label %for.body.3, label %for.end |
| 78 | |
| 79 | for.body.3: ; preds = %for.body.2 |
| 80 | ; CHECK: %for.body.3 |
| 81 | ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! |
| 82 | ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]! |
| 83 | %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2 |
| 84 | %6 = load i8* %arrayidx.3, align 1 |
| 85 | %conv6.3 = zext i8 %6 to i32 |
| 86 | %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2 |
| 87 | %7 = load i8* %arrayidx1.3, align 1 |
| 88 | %conv27.3 = zext i8 %7 to i32 |
| 89 | %add.3 = add nsw i32 %conv27.3, %conv6.3 |
| 90 | %conv3.3 = trunc i32 %add.3 to i8 |
| 91 | %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2 |
| 92 | store i8 %conv3.3, i8* %arrayidx4.3, align 1 |
| 93 | %add5.3 = add i32 %add5.2, %s |
| 94 | %cmp.3 = icmp ult i32 %add5.3, %len |
| 95 | br i1 %cmp.3, label %for.body, label %for.end |
| 96 | } |