[XRay][compiler-rt] xray::Array Freelist and Iterator Updates

Summary:
We found a bug while working on a benchmark for the profiling mode which
manifests as a segmentation fault in the profiling handler's
implementation. This change adds unit tests which replicate the
issues in isolation.

We've tracked this down as a bug in the implementation of the Freelist
in the `xray::Array` type. This happens when we trim the array by a
number of elements, where we've been incorrectly assigning pointers for
the links in the freelist of chunk nodes. We've taken the chance to add
more debug-only assertions to the code path and allow us to verify these
assumptions in debug builds.

In the process, we also took the opportunity to use iterators to
implement both `front()` and `back()` which exposes a bug in the
iterator decrement operation.  In particular, when we decrement past a
chunk size boundary, we end up moving too far back and reaching the
`SentinelChunk` prematurely.

This change unblocks us to allow for contributing the non-crashing
version of the benchmarks in the test-suite as well.

Reviewers: kpw

Subscribers: mgorny, llvm-commits

Differential Revision: https://reviews.llvm.org/D48653

llvm-svn: 336644
diff --git a/compiler-rt/lib/xray/xray_function_call_trie.h b/compiler-rt/lib/xray/xray_function_call_trie.h
index 4bff5da..5ad8227 100644
--- a/compiler-rt/lib/xray/xray_function_call_trie.h
+++ b/compiler-rt/lib/xray/xray_function_call_trie.h
@@ -17,8 +17,8 @@
 
 #include "xray_profiling_flags.h"
 #include "xray_segmented_array.h"
+#include <memory> // For placement new.
 #include <utility>
-#include <memory>  // For placement new.
 
 namespace __xray {
 
@@ -101,6 +101,8 @@
     NodeIdPair(Node *N, int32_t F) : NodePtr(N), FId(F) {}
   };
 
+  static_assert(sizeof(NodeIdPair) == 16, "Wrong size for NodeIDPair.");
+
   using NodeIdPairArray = Array<NodeIdPair>;
   using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType;
 
@@ -128,15 +130,12 @@
 
 private:
   struct ShadowStackEntry {
-    int32_t FId; // We're copying the function ID into the stack to avoid having
-                 // to reach into the node just to get the function ID.
     uint64_t EntryTSC;
     Node *NodePtr;
 
     // We add a constructor here to allow us to inplace-construct through
     // Array<...>'s AppendEmplace.
-    ShadowStackEntry(int32_t F, uint64_t T, Node *N)
-        : FId(F), EntryTSC(T), NodePtr(N) {}
+    ShadowStackEntry(uint64_t T, Node *N) : EntryTSC{T}, NodePtr{N} {}
   };
 
   using NodeArray = Array<Node>;
@@ -219,30 +218,30 @@
 
   // TODO: Support configuration of options through the arguments.
   static Allocators InitAllocators() {
+    return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max);
+  }
+
+  static Allocators InitAllocatorsCustom(uptr Max) {
     Allocators A;
     auto NodeAllocator = reinterpret_cast<Allocators::NodeAllocatorType *>(
         InternalAlloc(sizeof(Allocators::NodeAllocatorType)));
-    new (NodeAllocator) Allocators::NodeAllocatorType(
-        profilingFlags()->per_thread_allocator_max, 0);
+    new (NodeAllocator) Allocators::NodeAllocatorType(Max, 0);
     A.NodeAllocator = NodeAllocator;
 
     auto RootAllocator = reinterpret_cast<Allocators::RootAllocatorType *>(
         InternalAlloc(sizeof(Allocators::RootAllocatorType)));
-    new (RootAllocator) Allocators::RootAllocatorType(
-        profilingFlags()->per_thread_allocator_max, 0);
+    new (RootAllocator) Allocators::RootAllocatorType(Max, 0);
     A.RootAllocator = RootAllocator;
 
     auto ShadowStackAllocator =
         reinterpret_cast<Allocators::ShadowStackAllocatorType *>(
             InternalAlloc(sizeof(Allocators::ShadowStackAllocatorType)));
-    new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(
-        profilingFlags()->per_thread_allocator_max, 0);
+    new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(Max, 0);
     A.ShadowStackAllocator = ShadowStackAllocator;
 
     auto NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
         InternalAlloc(sizeof(NodeIdPairAllocatorType)));
-    new (NodeIdPairAllocator)
-        NodeIdPairAllocatorType(profilingFlags()->per_thread_allocator_max, 0);
+    new (NodeIdPairAllocator) NodeIdPairAllocatorType(Max, 0);
     A.NodeIdPairAllocator = NodeIdPairAllocator;
     return A;
   }
@@ -253,20 +252,14 @@
   ShadowStackArray ShadowStack;
   NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
 
-  const Allocators &GetGlobalAllocators() {
-    static const Allocators A = [] { return InitAllocators(); }();
-    return A;
-  }
-
 public:
   explicit FunctionCallTrie(const Allocators &A)
       : Nodes(*A.NodeAllocator), Roots(*A.RootAllocator),
         ShadowStack(*A.ShadowStackAllocator),
         NodeIdPairAllocator(A.NodeIdPairAllocator) {}
 
-  FunctionCallTrie() : FunctionCallTrie(GetGlobalAllocators()) {}
-
-  void enterFunction(int32_t FId, uint64_t TSC) {
+  void enterFunction(const int32_t FId, uint64_t TSC) {
+    DCHECK_NE(FId, 0);
     // This function primarily deals with ensuring that the ShadowStack is
     // consistent and ready for when an exit event is encountered.
     if (UNLIKELY(ShadowStack.empty())) {
@@ -275,12 +268,13 @@
       if (UNLIKELY(NewRoot == nullptr))
         return;
       Roots.Append(NewRoot);
-      ShadowStack.AppendEmplace(FId, TSC, NewRoot);
+      ShadowStack.AppendEmplace(TSC, NewRoot);
       return;
     }
 
     auto &Top = ShadowStack.back();
     auto TopNode = Top.NodePtr;
+    DCHECK_NE(TopNode, nullptr);
 
     // If we've seen this callee before, then we just access that node and place
     // that on the top of the stack.
@@ -288,7 +282,7 @@
         [FId](const NodeIdPair &NR) { return NR.FId == FId; });
     if (Callee != nullptr) {
       CHECK_NE(Callee->NodePtr, nullptr);
-      ShadowStack.AppendEmplace(FId, TSC, Callee->NodePtr);
+      ShadowStack.AppendEmplace(TSC, Callee->NodePtr);
       return;
     }
 
@@ -297,8 +291,10 @@
         Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, 0, 0, FId);
     if (UNLIKELY(NewNode == nullptr))
       return;
+    DCHECK_NE(NewNode, nullptr);
     TopNode->Callees.AppendEmplace(NewNode, FId);
-    ShadowStack.AppendEmplace(FId, TSC, NewNode);
+    ShadowStack.AppendEmplace(TSC, NewNode);
+    DCHECK_NE(ShadowStack.back().NodePtr, nullptr);
     return;
   }
 
@@ -307,14 +303,11 @@
     // entered this function before. We do as little processing here as we can,
     // since most of the hard work would have already been done at function
     // entry.
-    if (UNLIKELY(ShadowStack.empty()))
-      return;
-
     uint64_t CumulativeTreeTime = 0;
     while (!ShadowStack.empty()) {
-      auto &Top = ShadowStack.back();
+      const auto &Top = ShadowStack.back();
       auto TopNode = Top.NodePtr;
-      auto TopFId = TopNode->FId;
+      DCHECK_NE(TopNode, nullptr);
       auto LocalTime = TSC - Top.EntryTSC;
       TopNode->CallCount++;
       TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime;
@@ -322,7 +315,7 @@
       ShadowStack.trim(1);
 
       // TODO: Update the histogram for the node.
-      if (TopFId == FId)
+      if (TopNode->FId == FId)
         break;
     }
   }
@@ -344,28 +337,34 @@
   // This function must *not* be called with a non-empty FunctionCallTrie |O|.
   void deepCopyInto(FunctionCallTrie &O) const {
     DCHECK(O.getRoots().empty());
+
+    // We then push the root into a stack, to use as the parent marker for new
+    // nodes we push in as we're traversing depth-first down the call tree.
+    struct NodeAndParent {
+      FunctionCallTrie::Node *Node;
+      FunctionCallTrie::Node *NewNode;
+    };
+    using Stack = Array<NodeAndParent>;
+
+    typename Stack::AllocatorType StackAllocator(
+        profilingFlags()->stack_allocator_max, 0);
+    Stack DFSStack(StackAllocator);
+
     for (const auto Root : getRoots()) {
       // Add a node in O for this root.
       auto NewRoot = O.Nodes.AppendEmplace(
           nullptr, *O.NodeIdPairAllocator, Root->CallCount,
           Root->CumulativeLocalTime, Root->FId);
+
+      // Because we cannot allocate more memory we should bail out right away.
+      if (UNLIKELY(NewRoot == nullptr))
+        return;
+
       O.Roots.Append(NewRoot);
 
-      // We then push the root into a stack, to use as the parent marker for new
-      // nodes we push in as we're traversing depth-first down the call tree.
-      struct NodeAndParent {
-        FunctionCallTrie::Node *Node;
-        FunctionCallTrie::Node *NewNode;
-      };
-      using Stack = Array<NodeAndParent>;
-
-      typename Stack::AllocatorType StackAllocator(
-          profilingFlags()->stack_allocator_max, 0);
-      Stack DFSStack(StackAllocator);
-
       // TODO: Figure out what to do if we fail to allocate any more stack
       // space. Maybe warn or report once?
-      DFSStack.Append(NodeAndParent{Root, NewRoot});
+      DFSStack.AppendEmplace(Root, NewRoot);
       while (!DFSStack.empty()) {
         NodeAndParent NP = DFSStack.back();
         DCHECK_NE(NP.Node, nullptr);
@@ -375,9 +374,10 @@
           auto NewNode = O.Nodes.AppendEmplace(
               NP.NewNode, *O.NodeIdPairAllocator, Callee.NodePtr->CallCount,
               Callee.NodePtr->CumulativeLocalTime, Callee.FId);
-          DCHECK_NE(NewNode, nullptr);
+          if (UNLIKELY(NewNode == nullptr))
+            return;
           NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId);
-          DFSStack.Append(NodeAndParent{Callee.NodePtr, NewNode});
+          DFSStack.AppendEmplace(Callee.NodePtr, NewNode);
         }
       }
     }
@@ -408,6 +408,9 @@
       if (R == nullptr) {
         TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, 0,
                                            0, Root->FId);
+        if (UNLIKELY(TargetRoot == nullptr))
+          return;
+
         O.Roots.Append(TargetRoot);
       } else {
         TargetRoot = *R;
@@ -430,10 +433,14 @@
           if (TargetCallee == nullptr) {
             auto NewTargetNode = O.Nodes.AppendEmplace(
                 NT.TargetNode, *O.NodeIdPairAllocator, 0, 0, Callee.FId);
+
+            if (UNLIKELY(NewTargetNode == nullptr))
+              return;
+
             TargetCallee =
                 NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId);
           }
-          DFSStack.Append(NodeAndTarget{Callee.NodePtr, TargetCallee->NodePtr});
+          DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr);
         }
       }
     }