[AArch64][GlobalISel] Inline tiny memcpy et al at -O0.

FastISel already does this since the initial arm64 port was upstreamed, so
it seems there are no issues with doing this at -O0 for very small memcpys.

Gives a 0.2% geomean code size improvement on CTMark.

Differential Revision: https://reviews.llvm.org/D65758

llvm-svn: 367919
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 373d52f..9149813 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -861,7 +861,7 @@
   return true;
 }
 
-bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI) {
+bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
   // This combine is fairly complex so it's not written with a separate
   // matcher function.
   assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
@@ -900,6 +900,9 @@
     return true;
   }
 
+  if (MaxLen && KnownLen > MaxLen)
+    return false;
+
   if (ID == Intrinsic::memcpy)
     return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
   if (ID == Intrinsic::memmove)
diff --git a/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
index 5ec209a..835fcf0 100644
--- a/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
@@ -56,9 +56,12 @@
     case Intrinsic::memcpy:
     case Intrinsic::memmove:
     case Intrinsic::memset: {
+      // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
+      // heuristics decide.
+      unsigned MaxLen = EnableOpt ? 0 : 32;
       // Try to inline memcpy type calls if optimizations are enabled.
-      return (EnableOpt && !EnableOptSize) ? Helper.tryCombineMemCpyFamily(MI)
-                                           : false;
+      return (!EnableOptSize) ? Helper.tryCombineMemCpyFamily(MI, MaxLen)
+                              : false;
     }
     default:
       break;