Anna Thomas | b2d12b8 | 2016-08-09 20:00:47 +0000 | [diff] [blame] | 1 | ; RUN: opt < %s -S -early-cse | FileCheck %s |
| 2 | ; RUN: opt < %s -S -passes=early-cse | FileCheck %s |
| 3 | |
| 4 | declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly |
| 5 | declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind |
| 6 | |
| 7 | ; Check that we do load-load forwarding over invariant.start, since it does not |
| 8 | ; clobber memory |
Philip Reames | 0adbb19 | 2018-03-14 21:35:06 +0000 | [diff] [blame] | 9 | define i8 @test_bypass1(i8 *%P) { |
| 10 | ; CHECK-LABEL: @test_bypass1( |
Anna Thomas | b2d12b8 | 2016-08-09 20:00:47 +0000 | [diff] [blame] | 11 | ; CHECK-NEXT: %V1 = load i8, i8* %P |
| 12 | ; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) |
| 13 | ; CHECK-NEXT: ret i8 0 |
| 14 | |
Anna Thomas | b2d12b8 | 2016-08-09 20:00:47 +0000 | [diff] [blame] | 15 | %V1 = load i8, i8* %P |
| 16 | %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) |
| 17 | %V2 = load i8, i8* %P |
| 18 | %Diff = sub i8 %V1, %V2 |
| 19 | ret i8 %Diff |
| 20 | } |
| 21 | |
| 22 | |
| 23 | ; Trivial Store->load forwarding over invariant.start |
Philip Reames | 0adbb19 | 2018-03-14 21:35:06 +0000 | [diff] [blame] | 24 | define i8 @test_bypass2(i8 *%P) { |
| 25 | ; CHECK-LABEL: @test_bypass2( |
Anna Thomas | b2d12b8 | 2016-08-09 20:00:47 +0000 | [diff] [blame] | 26 | ; CHECK-NEXT: store i8 42, i8* %P |
| 27 | ; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) |
| 28 | ; CHECK-NEXT: ret i8 42 |
| 29 | |
Anna Thomas | b2d12b8 | 2016-08-09 20:00:47 +0000 | [diff] [blame] | 30 | store i8 42, i8* %P |
| 31 | %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) |
| 32 | %V1 = load i8, i8* %P |
| 33 | ret i8 %V1 |
| 34 | } |
| 35 | |
| 36 | ; We can DSE over invariant.start calls, since the first store to |
| 37 | ; %P is valid, and the second store is actually unreachable based on semantics |
| 38 | ; of invariant.start. |
Philip Reames | 0adbb19 | 2018-03-14 21:35:06 +0000 | [diff] [blame] | 39 | define void @test_bypass3(i8* %P) { |
| 40 | ; CHECK-LABEL: @test_bypass3( |
Anna Thomas | b2d12b8 | 2016-08-09 20:00:47 +0000 | [diff] [blame] | 41 | ; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) |
| 42 | ; CHECK-NEXT: store i8 60, i8* %P |
| 43 | |
Anna Thomas | b2d12b8 | 2016-08-09 20:00:47 +0000 | [diff] [blame] | 44 | store i8 50, i8* %P |
| 45 | %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) |
| 46 | store i8 60, i8* %P |
| 47 | ret void |
| 48 | } |
| 49 | |
| 50 | |
| 51 | ; FIXME: Now the first store can actually be eliminated, since there is no read within |
| 52 | ; the invariant region, between start and end. |
Philip Reames | 0adbb19 | 2018-03-14 21:35:06 +0000 | [diff] [blame] | 53 | define void @test_bypass4(i8* %P) { |
Anna Thomas | b2d12b8 | 2016-08-09 20:00:47 +0000 | [diff] [blame] | 54 | |
Philip Reames | 0adbb19 | 2018-03-14 21:35:06 +0000 | [diff] [blame] | 55 | ; CHECK-LABEL: @test_bypass4( |
Anna Thomas | b2d12b8 | 2016-08-09 20:00:47 +0000 | [diff] [blame] | 56 | ; CHECK-NEXT: store i8 50, i8* %P |
| 57 | ; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) |
| 58 | ; CHECK-NEXT: call void @llvm.invariant.end.p0i8({}* %i, i64 1, i8* %P) |
| 59 | ; CHECK-NEXT: store i8 60, i8* %P |
| 60 | |
| 61 | |
| 62 | store i8 50, i8* %P |
| 63 | %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) |
| 64 | call void @llvm.invariant.end.p0i8({}* %i, i64 1, i8* %P) |
| 65 | store i8 60, i8* %P |
| 66 | ret void |
| 67 | } |
Philip Reames | 0adbb19 | 2018-03-14 21:35:06 +0000 | [diff] [blame] | 68 | |
| 69 | |
| 70 | declare void @clobber() |
| 71 | declare {}* @llvm.invariant.start.p0i32(i64 %size, i32* nocapture %ptr) |
| 72 | declare void @llvm.invariant.end.p0i32({}*, i64, i32* nocapture) nounwind |
| 73 | |
| 74 | define i32 @test_before_load(i32* %p) { |
| 75 | ; CHECK-LABEL: @test_before_load |
| 76 | ; CHECK: ret i32 0 |
| 77 | call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) |
| 78 | %v1 = load i32, i32* %p |
| 79 | call void @clobber() |
| 80 | %v2 = load i32, i32* %p |
| 81 | %sub = sub i32 %v1, %v2 |
| 82 | ret i32 %sub |
| 83 | } |
| 84 | |
| 85 | define i32 @test_before_clobber(i32* %p) { |
| 86 | ; CHECK-LABEL: @test_before_clobber |
| 87 | ; CHECK: ret i32 0 |
| 88 | %v1 = load i32, i32* %p |
| 89 | call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) |
| 90 | call void @clobber() |
| 91 | %v2 = load i32, i32* %p |
| 92 | %sub = sub i32 %v1, %v2 |
| 93 | ret i32 %sub |
| 94 | } |
| 95 | |
Philip Reames | 422024a | 2018-03-15 18:12:27 +0000 | [diff] [blame] | 96 | define i32 @test_duplicate_scope(i32* %p) { |
| 97 | ; CHECK-LABEL: @test_duplicate_scope |
| 98 | ; CHECK: ret i32 0 |
| 99 | %v1 = load i32, i32* %p |
| 100 | call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) |
| 101 | call void @clobber() |
| 102 | call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) |
| 103 | %v2 = load i32, i32* %p |
| 104 | %sub = sub i32 %v1, %v2 |
| 105 | ret i32 %sub |
| 106 | } |
| 107 | |
Philip Reames | 0adbb19 | 2018-03-14 21:35:06 +0000 | [diff] [blame] | 108 | define i32 @test_unanalzyable_load(i32* %p) { |
| 109 | ; CHECK-LABEL: @test_unanalzyable_load |
| 110 | ; CHECK: ret i32 0 |
| 111 | call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) |
| 112 | call void @clobber() |
| 113 | %v1 = load i32, i32* %p |
| 114 | call void @clobber() |
| 115 | %v2 = load i32, i32* %p |
| 116 | %sub = sub i32 %v1, %v2 |
| 117 | ret i32 %sub |
| 118 | } |
| 119 | |
| 120 | define i32 @test_negative_after_clobber(i32* %p) { |
| 121 | ; CHECK-LABEL: @test_negative_after_clobber |
| 122 | ; CHECK: ret i32 %sub |
| 123 | %v1 = load i32, i32* %p |
| 124 | call void @clobber() |
| 125 | call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) |
| 126 | %v2 = load i32, i32* %p |
| 127 | %sub = sub i32 %v1, %v2 |
| 128 | ret i32 %sub |
| 129 | } |
| 130 | |
| 131 | define i32 @test_merge(i32* %p, i1 %cnd) { |
| 132 | ; CHECK-LABEL: @test_merge |
| 133 | ; CHECK: ret i32 0 |
| 134 | %v1 = load i32, i32* %p |
| 135 | call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) |
| 136 | br i1 %cnd, label %merge, label %taken |
| 137 | |
| 138 | taken: |
| 139 | call void @clobber() |
| 140 | br label %merge |
| 141 | merge: |
| 142 | %v2 = load i32, i32* %p |
| 143 | %sub = sub i32 %v1, %v2 |
| 144 | ret i32 %sub |
| 145 | } |
| 146 | |
| 147 | define i32 @test_negative_after_mergeclobber(i32* %p, i1 %cnd) { |
| 148 | ; CHECK-LABEL: @test_negative_after_mergeclobber |
| 149 | ; CHECK: ret i32 %sub |
| 150 | %v1 = load i32, i32* %p |
| 151 | br i1 %cnd, label %merge, label %taken |
| 152 | |
| 153 | taken: |
| 154 | call void @clobber() |
| 155 | br label %merge |
| 156 | merge: |
| 157 | call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) |
| 158 | %v2 = load i32, i32* %p |
| 159 | %sub = sub i32 %v1, %v2 |
| 160 | ret i32 %sub |
| 161 | } |
| 162 | |
| 163 | ; In theory, this version could work, but earlycse is incapable of |
| 164 | ; merging facts along distinct paths. |
| 165 | define i32 @test_false_negative_merge(i32* %p, i1 %cnd) { |
| 166 | ; CHECK-LABEL: @test_false_negative_merge |
| 167 | ; CHECK: ret i32 %sub |
| 168 | %v1 = load i32, i32* %p |
| 169 | br i1 %cnd, label %merge, label %taken |
| 170 | |
| 171 | taken: |
| 172 | call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) |
| 173 | call void @clobber() |
| 174 | br label %merge |
| 175 | merge: |
| 176 | %v2 = load i32, i32* %p |
| 177 | %sub = sub i32 %v1, %v2 |
| 178 | ret i32 %sub |
| 179 | } |
| 180 | |
| 181 | define i32 @test_merge_unanalyzable_load(i32* %p, i1 %cnd) { |
| 182 | ; CHECK-LABEL: @test_merge_unanalyzable_load |
| 183 | ; CHECK: ret i32 0 |
| 184 | call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) |
| 185 | call void @clobber() |
| 186 | %v1 = load i32, i32* %p |
| 187 | br i1 %cnd, label %merge, label %taken |
| 188 | |
| 189 | taken: |
| 190 | call void @clobber() |
| 191 | br label %merge |
| 192 | merge: |
| 193 | %v2 = load i32, i32* %p |
| 194 | %sub = sub i32 %v1, %v2 |
| 195 | ret i32 %sub |
| 196 | } |
| 197 | |
| 198 | define void @test_dse_before_load(i32* %p, i1 %cnd) { |
| 199 | ; CHECK-LABEL: @test_dse_before_load |
| 200 | ; CHECK-NOT: store |
| 201 | call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) |
| 202 | %v1 = load i32, i32* %p |
| 203 | call void @clobber() |
| 204 | store i32 %v1, i32* %p |
| 205 | ret void |
| 206 | } |
| 207 | |
| 208 | define void @test_dse_after_load(i32* %p, i1 %cnd) { |
| 209 | ; CHECK-LABEL: @test_dse_after_load |
| 210 | ; CHECK-NOT: store |
| 211 | %v1 = load i32, i32* %p |
| 212 | call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) |
| 213 | call void @clobber() |
| 214 | store i32 %v1, i32* %p |
| 215 | ret void |
| 216 | } |
| 217 | |
| 218 | |
| 219 | ; In this case, we have a false negative since MemoryLocation is implicitly |
| 220 | ; typed due to the user of a Value to represent the address. Note that other |
| 221 | ; passes will canonicalize away the bitcasts in this example. |
| 222 | define i32 @test_false_negative_types(i32* %p) { |
| 223 | ; CHECK-LABEL: @test_false_negative_types |
| 224 | ; CHECK: ret i32 %sub |
| 225 | call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) |
| 226 | %v1 = load i32, i32* %p |
| 227 | call void @clobber() |
| 228 | %pf = bitcast i32* %p to float* |
| 229 | %v2f = load float, float* %pf |
| 230 | %v2 = bitcast float %v2f to i32 |
| 231 | %sub = sub i32 %v1, %v2 |
| 232 | ret i32 %sub |
| 233 | } |
| 234 | |
| 235 | define i32 @test_negative_size1(i32* %p) { |
| 236 | ; CHECK-LABEL: @test_negative_size1 |
| 237 | ; CHECK: ret i32 %sub |
| 238 | call {}* @llvm.invariant.start.p0i32(i64 3, i32* %p) |
| 239 | %v1 = load i32, i32* %p |
| 240 | call void @clobber() |
| 241 | %v2 = load i32, i32* %p |
| 242 | %sub = sub i32 %v1, %v2 |
| 243 | ret i32 %sub |
| 244 | } |
| 245 | |
| 246 | define i32 @test_negative_size2(i32* %p) { |
| 247 | ; CHECK-LABEL: @test_negative_size2 |
| 248 | ; CHECK: ret i32 %sub |
| 249 | call {}* @llvm.invariant.start.p0i32(i64 0, i32* %p) |
| 250 | %v1 = load i32, i32* %p |
| 251 | call void @clobber() |
| 252 | %v2 = load i32, i32* %p |
| 253 | %sub = sub i32 %v1, %v2 |
| 254 | ret i32 %sub |
| 255 | } |
| 256 | |
| 257 | define i32 @test_negative_scope(i32* %p) { |
| 258 | ; CHECK-LABEL: @test_negative_scope |
| 259 | ; CHECK: ret i32 %sub |
| 260 | %scope = call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) |
| 261 | call void @llvm.invariant.end.p0i32({}* %scope, i64 4, i32* %p) |
| 262 | %v1 = load i32, i32* %p |
| 263 | call void @clobber() |
| 264 | %v2 = load i32, i32* %p |
| 265 | %sub = sub i32 %v1, %v2 |
| 266 | ret i32 %sub |
| 267 | } |
| 268 | |
| 269 | define i32 @test_false_negative_scope(i32* %p) { |
| 270 | ; CHECK-LABEL: @test_false_negative_scope |
| 271 | ; CHECK: ret i32 %sub |
| 272 | %scope = call {}* @llvm.invariant.start.p0i32(i64 4, i32* %p) |
| 273 | %v1 = load i32, i32* %p |
| 274 | call void @clobber() |
| 275 | %v2 = load i32, i32* %p |
| 276 | call void @llvm.invariant.end.p0i32({}* %scope, i64 4, i32* %p) |
| 277 | %sub = sub i32 %v1, %v2 |
| 278 | ret i32 %sub |
| 279 | } |
| 280 | |
| 281 | ; Invariant load defact starts an invariant.start scope of the appropriate size |
| 282 | define i32 @test_invariant_load_scope(i32* %p) { |
| 283 | ; CHECK-LABEL: @test_invariant_load_scope |
| 284 | ; CHECK: ret i32 0 |
| 285 | %v1 = load i32, i32* %p, !invariant.load !{} |
| 286 | call void @clobber() |
| 287 | %v2 = load i32, i32* %p |
| 288 | %sub = sub i32 %v1, %v2 |
| 289 | ret i32 %sub |
| 290 | } |