Whitney Tsang | dd3b649 | 2019-08-09 13:56:29 +0000 | [diff] [blame] | 1 | ; RUN: opt < %s -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck %s |
| 2 | |
| 3 | target datalayout = "e-m:e-i64:64-n32:64" |
| 4 | target triple = "powerpc64le-unknown-linux-gnu" |
| 5 | |
| 6 | ; void foo(long n, long m, long o, int A[n][m][o], int B[n][m][o], int C[n][m][o]) { |
| 7 | ; for (long i = 0; i < n; i++) |
| 8 | ; for (long j = 0; j < m; j++) |
| 9 | ; for (long k = 0; k < o; k++) |
| 10 | ; A[i][k][j] += B[i][k][j] + C[i][j][k]; |
| 11 | ; } |
| 12 | |
Whitney Tsang | 3caf9af | 2019-08-09 16:18:22 +0000 | [diff] [blame] | 13 | ; CHECK-DAG: Loop 'for.i' has cost = 3000000 |
| 14 | ; CHECK-DAG: Loop 'for.k' has cost = 2030000 |
| 15 | ; CHECK-DAG: Loop 'for.j' has cost = 1060000 |
Whitney Tsang | dd3b649 | 2019-08-09 13:56:29 +0000 | [diff] [blame] | 16 | |
| 17 | define void @foo(i64 %n, i64 %m, i64 %o, i32* %A, i32* %B, i32* %C) { |
| 18 | entry: |
| 19 | %cmp32 = icmp sgt i64 %n, 0 |
| 20 | %cmp230 = icmp sgt i64 %m, 0 |
| 21 | %cmp528 = icmp sgt i64 %o, 0 |
| 22 | br i1 %cmp32, label %for.cond1.preheader.lr.ph, label %for.end |
| 23 | |
| 24 | for.cond1.preheader.lr.ph: ; preds = %entry |
| 25 | br i1 %cmp230, label %for.i.preheader, label %for.end |
| 26 | |
| 27 | for.i.preheader: ; preds = %for.cond1.preheader.lr.ph |
| 28 | br i1 %cmp528, label %for.i.preheader.split, label %for.end |
| 29 | |
| 30 | for.i.preheader.split: ; preds = %for.i.preheader |
| 31 | br label %for.i |
| 32 | |
| 33 | for.i: ; preds = %for.inci, %for.i.preheader.split |
| 34 | %i = phi i64 [ %inci, %for.inci ], [ 0, %for.i.preheader.split ] |
| 35 | %muli = mul i64 %i, %m |
| 36 | br label %for.j |
| 37 | |
| 38 | for.j: ; preds = %for.incj, %for.i |
| 39 | %j = phi i64 [ %incj, %for.incj ], [ 0, %for.i ] |
| 40 | %addj = add i64 %muli, %j |
| 41 | %mulj = mul i64 %addj, %o |
| 42 | br label %for.k |
| 43 | |
| 44 | for.k: ; preds = %for.k, %for.j |
| 45 | %k = phi i64 [ 0, %for.j ], [ %inck, %for.k ] |
| 46 | |
| 47 | ; B[i][k][j] |
| 48 | %addk = add i64 %muli, %k |
| 49 | %mulk = mul i64 %addk, %o |
| 50 | %arrayidx1 = add i64 %j, %mulk |
| 51 | %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %arrayidx1 |
| 52 | %elem_B = load i32, i32* %arrayidx2, align 4 |
| 53 | |
| 54 | ; C[i][j][k] |
| 55 | %arrayidx3 = add i64 %k, %mulj |
| 56 | %arrayidx4 = getelementptr inbounds i32, i32* %C, i64 %arrayidx3 |
| 57 | %elem_C = load i32, i32* %arrayidx4, align 4 |
| 58 | |
| 59 | ; A[i][k][j] |
| 60 | %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %arrayidx1 |
| 61 | %elem_A = load i32, i32* %arrayidx5, align 4 |
| 62 | |
| 63 | ; A[i][k][j] += B[i][k][j] + C[i][j][k] |
| 64 | %add1 = add i32 %elem_B, %elem_C |
| 65 | %add2 = add i32 %add1, %elem_A |
| 66 | %arrayidx6 = getelementptr inbounds i32, i32* %A, i64 %arrayidx1 |
| 67 | store i32 %add2, i32* %arrayidx6, align 4 |
| 68 | |
| 69 | %inck = add nsw i64 %k, 1 |
| 70 | %exitcond.us = icmp eq i64 %inck, %o |
| 71 | br i1 %exitcond.us, label %for.incj, label %for.k |
| 72 | |
| 73 | for.incj: ; preds = %for.k |
| 74 | %incj = add nsw i64 %j, 1 |
| 75 | %exitcond54.us = icmp eq i64 %incj, %m |
| 76 | br i1 %exitcond54.us, label %for.inci, label %for.j |
| 77 | |
| 78 | for.inci: ; preds = %for.incj |
| 79 | %inci = add nsw i64 %i, 1 |
| 80 | %exitcond55.us = icmp eq i64 %inci, %n |
| 81 | br i1 %exitcond55.us, label %for.end.loopexit, label %for.i |
| 82 | |
| 83 | for.end.loopexit: ; preds = %for.inci |
| 84 | br label %for.end |
| 85 | |
| 86 | for.end: ; preds = %for.end.loopexit, %for.cond1.preheader.lr.ph, %entry |
| 87 | ret void |
| 88 | } |