[AArch64][GlobalISel] Inline tiny memcpy et al at -O0. FastISel already does this since the initial arm64 port was upstreamed, so it seems there are no issues with doing this at -O0 for very small memcpys. Gives a 0.2% geomean code size improvement on CTMark. Differential Revision: https://reviews.llvm.org/D65758 llvm-svn: 367919

commit: 85e5e28ab4c826593610e25aac7197a35da8244c [log] [tgz]
author: Amara Emerson <aemerson@apple.com> Mon Aug 05 20:02:52 2019 +0000
committer: Amara Emerson <aemerson@apple.com> Mon Aug 05 20:02:52 2019 +0000
tree: cc4baa8bdb74698ed790b524f840aa99c5652df0
parent: 6e33c647f3077d91079bf4c33d03acda47a55a1c [diff]
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 373d52f..9149813 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

@@ -861,7 +861,7 @@
   return true;
 }
 
-bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI) {
+bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
   // This combine is fairly complex so it's not written with a separate
   // matcher function.
   assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
@@ -900,6 +900,9 @@
     return true;
   }
 
+  if (MaxLen && KnownLen > MaxLen)
+    return false;
+
   if (ID == Intrinsic::memcpy)
     return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
   if (ID == Intrinsic::memmove)

diff --git a/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
index 5ec209a..835fcf0 100644
--- a/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp

@@ -56,9 +56,12 @@
     case Intrinsic::memcpy:
     case Intrinsic::memmove:
     case Intrinsic::memset: {
+      // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
+      // heuristics decide.
+      unsigned MaxLen = EnableOpt ? 0 : 32;
       // Try to inline memcpy type calls if optimizations are enabled.
-      return (EnableOpt && !EnableOptSize) ? Helper.tryCombineMemCpyFamily(MI)
-                                           : false;
+      return (!EnableOptSize) ? Helper.tryCombineMemCpyFamily(MI, MaxLen)
+                              : false;
     }
     default:
       break;
commit	85e5e28ab4c826593610e25aac7197a35da8244c	[log] [tgz]
author	Amara Emerson <aemerson@apple.com>	Mon Aug 05 20:02:52 2019 +0000
committer	Amara Emerson <aemerson@apple.com>	Mon Aug 05 20:02:52 2019 +0000
tree	cc4baa8bdb74698ed790b524f840aa99c5652df0
parent	6e33c647f3077d91079bf4c33d03acda47a55a1c [diff]