Chris Lattner | 8dca876 | 2009-11-27 06:33:09 +0000 | [diff] [blame] | 1 | ; RUN: opt < %s -gvn -enable-load-pre -S | FileCheck %s |
Chris Lattner | 9a5c22c | 2009-11-27 18:08:30 +0000 | [diff] [blame] | 2 | target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" |
Chris Lattner | c89c6a9 | 2008-12-02 08:16:11 +0000 | [diff] [blame] | 3 | |
Chris Lattner | 8dca876 | 2009-11-27 06:33:09 +0000 | [diff] [blame] | 4 | define i32 @test1(i32* %p, i1 %C) { |
| 5 | ; CHECK: @test1 |
Chris Lattner | c89c6a9 | 2008-12-02 08:16:11 +0000 | [diff] [blame] | 6 | block1: |
| 7 | br i1 %C, label %block2, label %block3 |
| 8 | |
| 9 | block2: |
| 10 | br label %block4 |
Chris Lattner | 8dca876 | 2009-11-27 06:33:09 +0000 | [diff] [blame] | 11 | ; CHECK: block2: |
| 12 | ; CHECK-NEXT: load i32* %p |
Chris Lattner | c89c6a9 | 2008-12-02 08:16:11 +0000 | [diff] [blame] | 13 | |
| 14 | block3: |
Chris Lattner | d280d85 | 2009-11-27 06:42:42 +0000 | [diff] [blame] | 15 | store i32 0, i32* %p |
Chris Lattner | c89c6a9 | 2008-12-02 08:16:11 +0000 | [diff] [blame] | 16 | br label %block4 |
| 17 | |
| 18 | block4: |
| 19 | %PRE = load i32* %p |
| 20 | ret i32 %PRE |
Chris Lattner | 8dca876 | 2009-11-27 06:33:09 +0000 | [diff] [blame] | 21 | ; CHECK: block4: |
| 22 | ; CHECK-NEXT: phi i32 |
| 23 | ; CHECK-NEXT: ret i32 |
Chris Lattner | c89c6a9 | 2008-12-02 08:16:11 +0000 | [diff] [blame] | 24 | } |
Chris Lattner | d280d85 | 2009-11-27 06:42:42 +0000 | [diff] [blame] | 25 | |
Chris Lattner | 616613d | 2009-11-27 08:25:10 +0000 | [diff] [blame] | 26 | ; This is a simple phi translation case. |
Chris Lattner | d280d85 | 2009-11-27 06:42:42 +0000 | [diff] [blame] | 27 | define i32 @test2(i32* %p, i32* %q, i1 %C) { |
| 28 | ; CHECK: @test2 |
| 29 | block1: |
| 30 | br i1 %C, label %block2, label %block3 |
| 31 | |
| 32 | block2: |
| 33 | br label %block4 |
| 34 | ; CHECK: block2: |
| 35 | ; CHECK-NEXT: load i32* %q |
| 36 | |
| 37 | block3: |
| 38 | store i32 0, i32* %p |
| 39 | br label %block4 |
| 40 | |
| 41 | block4: |
| 42 | %P2 = phi i32* [%p, %block3], [%q, %block2] |
| 43 | %PRE = load i32* %P2 |
| 44 | ret i32 %PRE |
| 45 | ; CHECK: block4: |
| 46 | ; CHECK-NEXT: phi i32 [ |
| 47 | ; CHECK-NOT: load |
| 48 | ; CHECK: ret i32 |
| 49 | } |
| 50 | |
Chris Lattner | 616613d | 2009-11-27 08:25:10 +0000 | [diff] [blame] | 51 | ; This is a PRE case that requires phi translation through a GEP. |
Chris Lattner | d280d85 | 2009-11-27 06:42:42 +0000 | [diff] [blame] | 52 | define i32 @test3(i32* %p, i32* %q, i32** %Hack, i1 %C) { |
| 53 | ; CHECK: @test3 |
| 54 | block1: |
| 55 | %B = getelementptr i32* %q, i32 1 |
| 56 | store i32* %B, i32** %Hack |
| 57 | br i1 %C, label %block2, label %block3 |
| 58 | |
| 59 | block2: |
| 60 | br label %block4 |
| 61 | ; CHECK: block2: |
| 62 | ; CHECK-NEXT: load i32* %B |
| 63 | |
| 64 | block3: |
| 65 | %A = getelementptr i32* %p, i32 1 |
| 66 | store i32 0, i32* %A |
| 67 | br label %block4 |
| 68 | |
| 69 | block4: |
| 70 | %P2 = phi i32* [%p, %block3], [%q, %block2] |
| 71 | %P3 = getelementptr i32* %P2, i32 1 |
| 72 | %PRE = load i32* %P3 |
| 73 | ret i32 %PRE |
| 74 | ; CHECK: block4: |
| 75 | ; CHECK-NEXT: phi i32 [ |
| 76 | ; CHECK-NOT: load |
| 77 | ; CHECK: ret i32 |
| 78 | } |
Chris Lattner | 616613d | 2009-11-27 08:25:10 +0000 | [diff] [blame] | 79 | |
| 80 | ;; Here the loaded address is available, but the computation is in 'block3' |
| 81 | ;; which does not dominate 'block2'. |
| 82 | define i32 @test4(i32* %p, i32* %q, i32** %Hack, i1 %C) { |
| 83 | ; CHECK: @test4 |
| 84 | block1: |
| 85 | br i1 %C, label %block2, label %block3 |
| 86 | |
| 87 | block2: |
| 88 | br label %block4 |
Chris Lattner | 0c264b1 | 2009-11-28 16:08:18 +0000 | [diff] [blame] | 89 | ; CHECK: block2: |
| 90 | ; CHECK: load i32* |
| 91 | ; CHECK: br label %block4 |
Chris Lattner | 616613d | 2009-11-27 08:25:10 +0000 | [diff] [blame] | 92 | |
| 93 | block3: |
| 94 | %B = getelementptr i32* %q, i32 1 |
| 95 | store i32* %B, i32** %Hack |
| 96 | |
| 97 | %A = getelementptr i32* %p, i32 1 |
| 98 | store i32 0, i32* %A |
| 99 | br label %block4 |
| 100 | |
| 101 | block4: |
| 102 | %P2 = phi i32* [%p, %block3], [%q, %block2] |
| 103 | %P3 = getelementptr i32* %P2, i32 1 |
| 104 | %PRE = load i32* %P3 |
| 105 | ret i32 %PRE |
Chris Lattner | 0c264b1 | 2009-11-28 16:08:18 +0000 | [diff] [blame] | 106 | ; CHECK: block4: |
| 107 | ; CHECK-NEXT: phi i32 [ |
| 108 | ; CHECK-NOT: load |
| 109 | ; CHECK: ret i32 |
Chris Lattner | 616613d | 2009-11-27 08:25:10 +0000 | [diff] [blame] | 110 | } |
Chris Lattner | 9a5c22c | 2009-11-27 18:08:30 +0000 | [diff] [blame] | 111 | |
| 112 | ;void test5(int N, double *G) { |
| 113 | ; int j; |
| 114 | ; for (j = 0; j < N - 1; j++) |
| 115 | ; G[j] = G[j] + G[j+1]; |
| 116 | ;} |
| 117 | |
| 118 | define void @test5(i32 %N, double* nocapture %G) nounwind ssp { |
| 119 | ; CHECK: @test5 |
| 120 | entry: |
| 121 | %0 = add i32 %N, -1 |
| 122 | %1 = icmp sgt i32 %0, 0 |
| 123 | br i1 %1, label %bb.nph, label %return |
| 124 | |
| 125 | bb.nph: |
| 126 | %tmp = zext i32 %0 to i64 |
| 127 | br label %bb |
| 128 | |
| 129 | ; CHECK: bb.nph: |
| 130 | ; CHECK: load double* |
| 131 | ; CHECK: br label %bb |
| 132 | |
| 133 | bb: |
| 134 | %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ] |
| 135 | %tmp6 = add i64 %indvar, 1 |
| 136 | %scevgep = getelementptr double* %G, i64 %tmp6 |
| 137 | %scevgep7 = getelementptr double* %G, i64 %indvar |
| 138 | %2 = load double* %scevgep7, align 8 |
| 139 | %3 = load double* %scevgep, align 8 |
| 140 | %4 = fadd double %2, %3 |
| 141 | store double %4, double* %scevgep7, align 8 |
| 142 | %exitcond = icmp eq i64 %tmp6, %tmp |
| 143 | br i1 %exitcond, label %return, label %bb |
| 144 | |
| 145 | ; Should only be one load in the loop. |
| 146 | ; CHECK: bb: |
| 147 | ; CHECK: load double* |
| 148 | ; CHECK-NOT: load double* |
| 149 | ; CHECK: br i1 %exitcond |
| 150 | |
| 151 | return: |
| 152 | ret void |
| 153 | } |
| 154 | |
| 155 | ;void test6(int N, double *G) { |
| 156 | ; int j; |
| 157 | ; for (j = 0; j < N - 1; j++) |
| 158 | ; G[j+1] = G[j] + G[j+1]; |
| 159 | ;} |
| 160 | |
| 161 | define void @test6(i32 %N, double* nocapture %G) nounwind ssp { |
| 162 | ; CHECK: @test6 |
| 163 | entry: |
| 164 | %0 = add i32 %N, -1 |
| 165 | %1 = icmp sgt i32 %0, 0 |
| 166 | br i1 %1, label %bb.nph, label %return |
| 167 | |
| 168 | bb.nph: |
| 169 | %tmp = zext i32 %0 to i64 |
| 170 | br label %bb |
| 171 | |
| 172 | ; CHECK: bb.nph: |
| 173 | ; CHECK: load double* |
| 174 | ; CHECK: br label %bb |
| 175 | |
| 176 | bb: |
| 177 | %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ] |
| 178 | %tmp6 = add i64 %indvar, 1 |
| 179 | %scevgep = getelementptr double* %G, i64 %tmp6 |
| 180 | %scevgep7 = getelementptr double* %G, i64 %indvar |
| 181 | %2 = load double* %scevgep7, align 8 |
| 182 | %3 = load double* %scevgep, align 8 |
| 183 | %4 = fadd double %2, %3 |
| 184 | store double %4, double* %scevgep, align 8 |
| 185 | %exitcond = icmp eq i64 %tmp6, %tmp |
| 186 | br i1 %exitcond, label %return, label %bb |
| 187 | |
| 188 | ; Should only be one load in the loop. |
| 189 | ; CHECK: bb: |
| 190 | ; CHECK: load double* |
| 191 | ; CHECK-NOT: load double* |
| 192 | ; CHECK: br i1 %exitcond |
| 193 | |
| 194 | return: |
| 195 | ret void |
| 196 | } |
| 197 | |
Chris Lattner | 11c6bab | 2009-11-27 19:11:31 +0000 | [diff] [blame] | 198 | ;void test7(int N, double* G) { |
| 199 | ; long j; |
| 200 | ; G[1] = 1; |
| 201 | ; for (j = 1; j < N - 1; j++) |
| 202 | ; G[j+1] = G[j] + G[j+1]; |
| 203 | ;} |
| 204 | |
| 205 | ; This requires phi translation of the adds. |
| 206 | define void @test7(i32 %N, double* nocapture %G) nounwind ssp { |
| 207 | entry: |
| 208 | %0 = getelementptr inbounds double* %G, i64 1 |
| 209 | store double 1.000000e+00, double* %0, align 8 |
| 210 | %1 = add i32 %N, -1 |
| 211 | %2 = icmp sgt i32 %1, 1 |
| 212 | br i1 %2, label %bb.nph, label %return |
| 213 | |
| 214 | bb.nph: |
| 215 | %tmp = sext i32 %1 to i64 |
| 216 | %tmp7 = add i64 %tmp, -1 |
| 217 | br label %bb |
| 218 | |
| 219 | bb: |
| 220 | %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ] |
| 221 | %tmp8 = add i64 %indvar, 2 |
| 222 | %scevgep = getelementptr double* %G, i64 %tmp8 |
| 223 | %tmp9 = add i64 %indvar, 1 |
| 224 | %scevgep10 = getelementptr double* %G, i64 %tmp9 |
| 225 | %3 = load double* %scevgep10, align 8 |
| 226 | %4 = load double* %scevgep, align 8 |
| 227 | %5 = fadd double %3, %4 |
| 228 | store double %5, double* %scevgep, align 8 |
| 229 | %exitcond = icmp eq i64 %tmp9, %tmp7 |
| 230 | br i1 %exitcond, label %return, label %bb |
| 231 | |
| 232 | ; Should only be one load in the loop. |
| 233 | ; CHECK: bb: |
| 234 | ; CHECK: load double* |
| 235 | ; CHECK-NOT: load double* |
| 236 | ; CHECK: br i1 %exitcond |
| 237 | |
| 238 | return: |
| 239 | ret void |
| 240 | } |
Chris Lattner | 9a5c22c | 2009-11-27 18:08:30 +0000 | [diff] [blame] | 241 | |
Chris Lattner | 971fd57 | 2009-11-27 22:50:07 +0000 | [diff] [blame] | 242 | ;; Here the loaded address isn't available in 'block2' at all, requiring a new |
| 243 | ;; GEP to be inserted into it. |
| 244 | define i32 @test8(i32* %p, i32* %q, i32** %Hack, i1 %C) { |
| 245 | ; CHECK: @test8 |
Chris Lattner | 9a5c22c | 2009-11-27 18:08:30 +0000 | [diff] [blame] | 246 | block1: |
| 247 | br i1 %C, label %block2, label %block3 |
| 248 | |
| 249 | block2: |
| 250 | br label %block4 |
Chris Lattner | 0c264b1 | 2009-11-28 16:08:18 +0000 | [diff] [blame] | 251 | ; CHECK: block2: |
| 252 | ; CHECK: load i32* |
| 253 | ; CHECK: br label %block4 |
Chris Lattner | 9a5c22c | 2009-11-27 18:08:30 +0000 | [diff] [blame] | 254 | |
| 255 | block3: |
| 256 | %A = getelementptr i32* %p, i32 1 |
| 257 | store i32 0, i32* %A |
| 258 | br label %block4 |
| 259 | |
| 260 | block4: |
| 261 | %P2 = phi i32* [%p, %block3], [%q, %block2] |
| 262 | %P3 = getelementptr i32* %P2, i32 1 |
| 263 | %PRE = load i32* %P3 |
| 264 | ret i32 %PRE |
Chris Lattner | 0c264b1 | 2009-11-28 16:08:18 +0000 | [diff] [blame] | 265 | ; CHECK: block4: |
| 266 | ; CHECK-NEXT: phi i32 [ |
| 267 | ; CHECK-NOT: load |
| 268 | ; CHECK: ret i32 |
Chris Lattner | 9a5c22c | 2009-11-27 18:08:30 +0000 | [diff] [blame] | 269 | } |
| 270 | |
Chris Lattner | 9fed3c2 | 2009-11-29 01:04:40 +0000 | [diff] [blame^] | 271 | ;void test9(int N, double* G) { |
| 272 | ; long j; |
| 273 | ; for (j = 1; j < N - 1; j++) |
| 274 | ; G[j+1] = G[j] + G[j+1]; |
| 275 | ;} |
| 276 | |
| 277 | ; This requires phi translation of the adds. |
| 278 | define void @test9(i32 %N, double* nocapture %G) nounwind ssp { |
| 279 | entry: |
| 280 | add i32 0, 0 |
| 281 | %1 = add i32 %N, -1 |
| 282 | %2 = icmp sgt i32 %1, 1 |
| 283 | br i1 %2, label %bb.nph, label %return |
| 284 | |
| 285 | bb.nph: |
| 286 | %tmp = sext i32 %1 to i64 |
| 287 | %tmp7 = add i64 %tmp, -1 |
| 288 | br label %bb |
| 289 | |
| 290 | ; CHECK: bb.nph: |
| 291 | ; CHECK: load double* |
| 292 | ; CHECK: br label %bb |
| 293 | |
| 294 | bb: |
| 295 | %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ] |
| 296 | %tmp8 = add i64 %indvar, 2 |
| 297 | %scevgep = getelementptr double* %G, i64 %tmp8 |
| 298 | %tmp9 = add i64 %indvar, 1 |
| 299 | %scevgep10 = getelementptr double* %G, i64 %tmp9 |
| 300 | %3 = load double* %scevgep10, align 8 |
| 301 | %4 = load double* %scevgep, align 8 |
| 302 | %5 = fadd double %3, %4 |
| 303 | store double %5, double* %scevgep, align 8 |
| 304 | %exitcond = icmp eq i64 %tmp9, %tmp7 |
| 305 | br i1 %exitcond, label %return, label %bb |
| 306 | |
| 307 | ; Should only be one load in the loop. |
| 308 | ; CHECK: bb: |
| 309 | ; CHECK: load double* |
| 310 | ; CHECK-NOT: load double* |
| 311 | ; CHECK: br i1 %exitcond |
| 312 | |
| 313 | return: |
| 314 | ret void |
| 315 | } |
Chris Lattner | 9a5c22c | 2009-11-27 18:08:30 +0000 | [diff] [blame] | 316 | |