[AArch64][GlobalISel] Inline tiny memcpy et al at -O0.
FastISel already does this since the initial arm64 port was upstreamed, so
it seems there are no issues with doing this at -O0 for very small memcpys.
Gives a 0.2% geomean code size improvement on CTMark.
Differential Revision: https://reviews.llvm.org/D65758
llvm-svn: 367919
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 373d52f..9149813 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -861,7 +861,7 @@
return true;
}
-bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI) {
+bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
// This combine is fairly complex so it's not written with a separate
// matcher function.
assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
@@ -900,6 +900,9 @@
return true;
}
+ if (MaxLen && KnownLen > MaxLen)
+ return false;
+
if (ID == Intrinsic::memcpy)
return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
if (ID == Intrinsic::memmove)