[CodeGenPrepare] limit overflow intrinsic matching to a single basic block (2nd try)
This is a subset of the original commit from rL359879
which was reverted because it could crash when using the 'RemovedInstructions'
structure that enables delayed deletion of dead instructions. The motivating
compile-time win does not require that change though. We should get most of
that win from this change alone.
Using/updating a dominator tree to match math overflow patterns may be very
expensive in compile-time (because of the way CGP uses a DT), so just handle
the single-block case.
See post-commit thread for rL354298 for more details:
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20190422/646276.html
Differential Revision: https://reviews.llvm.org/D61075
llvm-svn: 359969
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
index ab636c3..fc6d66a 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
@@ -47,15 +47,16 @@
ret i64 %Q
}
+; TODO? CGP sinks the compare before we have a chance to form the overflow intrinsic.
+
define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp {
; CHECK-LABEL: @uaddo4(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]]
; CHECK-NEXT: br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]]
; CHECK: next:
-; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]])
-; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
-; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
-; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[B]], [[ADD]]
+; CHECK-NEXT: [[Q:%.*]] = select i1 [[TMP0]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]]
; CHECK: exit:
; CHECK-NEXT: ret i64 0
@@ -362,7 +363,7 @@
ret i1 %ov
}
-; Verify insertion point for multi-BB.
+; This used to verify insertion point for multi-BB, but now we just bail out.
declare void @call(i1)
@@ -371,15 +372,14 @@
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
; CHECK: t:
-; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]])
-; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
-; CHECK-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
-; CHECK-NEXT: store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT: [[S:%.*]] = sub i64 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: store i64 [[S]], i64* [[P:%.*]]
; CHECK-NEXT: br i1 [[COND]], label [[END:%.*]], label [[F]]
; CHECK: f:
; CHECK-NEXT: ret i1 [[COND]]
; CHECK: end:
-; CHECK-NEXT: ret i1 [[OV1]]
+; CHECK-NEXT: [[OV:%.*]] = icmp ult i64 [[X]], [[Y]]
+; CHECK-NEXT: ret i1 [[OV]]
;
entry:
br i1 %cond, label %t, label %f
@@ -514,6 +514,26 @@
ret void
}
+; This was crashing when trying to delay instruction removal/deletion.
+
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #0
+
+define hidden fastcc void @crash() {
+; CHECK-LABEL: @crash(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT: [[T2:%.*]] = select i1 undef, i1 undef, i1 [[OV]]
+; CHECK-NEXT: unreachable
+;
+ %t0 = add i64 undef, undef
+ %t1 = icmp ult i64 %t0, undef
+ %t2 = select i1 undef, i1 undef, i1 %t1
+ %t3 = call i64 @llvm.objectsize.i64.p0i8(i8* nonnull undef, i1 false, i1 false, i1 false)
+ %t4 = icmp ugt i64 %t3, 7
+ unreachable
+}
+
; Check that every instruction inserted by -codegenprepare has a debug location.
; DEBUG: CheckModuleDebugify: PASS