[XRay] ARM 32-bit no-Thumb support in LLVM

This is a port of XRay to ARM 32-bit, without Thumb support yet. The XRay instrumentation support is moving up to AsmPrinter.
This is one of 3 commits to different repositories of XRay ARM port. The other 2 are:

https://reviews.llvm.org/D23932 (Clang test)
https://reviews.llvm.org/D23933 (compiler-rt)

Differential Revision: https://reviews.llvm.org/D23931

llvm-svn: 281878
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 0ffa5b6..80e2f0a 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -164,6 +164,9 @@
   // Emit the rest of the function body.
   EmitFunctionBody();
 
+  // Emit the XRay table for this function.
+  EmitXRayTable();
+
   // If we need V4T thumb mode Register Indirect Jump pads, emit them.
   // These are created per function, rather than per TU, since it's
   // relatively easy to exceed the thumb branch range within a TU.
@@ -2019,6 +2022,12 @@
                                      .addReg(0));
     return;
   }
+  case ARM::PATCHABLE_FUNCTION_ENTER:
+    LowerPATCHABLE_FUNCTION_ENTER(*MI);
+    return;
+  case ARM::PATCHABLE_FUNCTION_EXIT:
+    LowerPATCHABLE_FUNCTION_EXIT(*MI);
+    return;
   }
 
   MCInst TmpInst;
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.h b/llvm/lib/Target/ARM/ARMAsmPrinter.h
index 30abdbc..0db0a0f 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.h
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -101,7 +101,19 @@
   // lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
   bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
 
+  //===------------------------------------------------------------------===//
+  // XRay implementation
+  //===------------------------------------------------------------------===//
+public:
+  // XRay-specific lowering for ARM.
+  void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
+  void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
+  // Helper function that emits the XRay sleds we've collected for a particular
+  // function.
+  void EmitXRayTable();
+
 private:
+  void EmitSled(const MachineInstr &MI, SledKind Kind);
 
   // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
   void emitAttributes();
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index a42ed0c..b01d5c8 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -100,6 +100,10 @@
   // Return whether the target has an explicit NOP encoding.
   bool hasNOP() const;
 
+  virtual void getNoopForElfTarget(MCInst &NopInst) const {
+    getNoopForMachoTarget(NopInst);
+  }
+
   // Return the non-pre/post incrementing version of 'Opc'. Return 0
   // if there is not such an opcode.
   virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
diff --git a/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/llvm/lib/Target/ARM/ARMMCInstLower.cpp
index 7429acd..19c9151 100644
--- a/llvm/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -21,6 +21,11 @@
 #include "llvm/IR/Mangler.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCStreamer.h"
 using namespace llvm;
 
 
@@ -150,3 +155,92 @@
     }
   }
 }
+
+void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
+{
+  if (MI.getParent()->getParent()->getInfo<ARMFunctionInfo>()
+    ->isThumbFunction())
+  {
+    MI.emitError("An attempt to perform XRay instrumentation for a"
+      " Thumb function (not supported). Detected when emitting a sled.");
+    return;
+  }
+  static const int8_t NoopsInSledCount = 6;
+  // We want to emit the following pattern:
+  //
+  // .Lxray_sled_N:
+  //   ALIGN
+  //   B #20
+  //   ; 6 NOP instructions (24 bytes)
+  // .tmpN
+  //
+  // We need the 24 bytes (6 instructions) because at runtime, we'd be patching
+  // over the full 28 bytes (7 instructions) with the following pattern:
+  //
+  //   PUSH{ r0, lr }
+  //   MOVW r0, #<lower 16 bits of function ID>
+  //   MOVT r0, #<higher 16 bits of function ID>
+  //   MOVW ip, #<lower 16 bits of address of __xray_FunctionEntry/Exit>
+  //   MOVT ip, #<higher 16 bits of address of __xray_FunctionEntry/Exit>
+  //   BLX ip
+  //   POP{ r0, lr }
+  //
+  OutStreamer->EmitCodeAlignment(4);
+  auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
+  OutStreamer->EmitLabel(CurSled);
+  auto Target = OutContext.createTempSymbol();
+
+  // Emit "B #20" instruction, which jumps over the next 24 bytes (because
+  // register pc is 8 bytes ahead of the jump instruction by the moment CPU
+  // is executing it).
+  // By analogy to ARMAsmPrinter::emitPseudoExpansionLowering() |case ARM::B|.
+  // It is not clear why |addReg(0)| is needed (the last operand).
+  EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::Bcc).addImm(20)
+    .addImm(ARMCC::AL).addReg(0));
+
+  MCInst Noop;
+  Subtarget->getInstrInfo()->getNoopForElfTarget(Noop);
+  for (int8_t I = 0; I < NoopsInSledCount; I++)
+  {
+    OutStreamer->EmitInstruction(Noop, getSubtargetInfo());
+  }
+
+  OutStreamer->EmitLabel(Target);
+  recordSled(CurSled, MI, Kind);
+}
+
+void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI)
+{
+  EmitSled(MI, SledKind::FUNCTION_ENTER);
+}
+
+void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI)
+{
+  EmitSled(MI, SledKind::FUNCTION_EXIT);
+}
+
+void ARMAsmPrinter::EmitXRayTable()
+{
+  if (Sleds.empty())
+    return;
+  if (Subtarget->isTargetELF()) {
+    auto *Section = OutContext.getELFSection(
+      "xray_instr_map", ELF::SHT_PROGBITS,
+      ELF::SHF_ALLOC | ELF::SHF_GROUP | ELF::SHF_MERGE, 0,
+      CurrentFnSym->getName());
+    auto PrevSection = OutStreamer->getCurrentSectionOnly();
+    OutStreamer->SwitchSection(Section);
+    for (const auto &Sled : Sleds) {
+      OutStreamer->EmitSymbolValue(Sled.Sled, 4);
+      OutStreamer->EmitSymbolValue(CurrentFnSym, 4);
+      auto Kind = static_cast<uint8_t>(Sled.Kind);
+      OutStreamer->EmitBytes(
+        StringRef(reinterpret_cast<const char *>(&Kind), 1));
+      OutStreamer->EmitBytes(
+        StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
+      OutStreamer->EmitZeros(6);
+    }
+    OutStreamer->SwitchSection(PrevSection);
+  }
+  Sleds.clear();
+}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 469d340..a90b495 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -101,6 +101,11 @@
                           : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
       TLInfo(TM, *this) {}
 
+bool ARMSubtarget::isXRaySupported() const {
+  // We don't currently suppport Thumb, but Windows requires Thumb.
+  return hasV6Ops() && hasARMOps() && !isTargetWindows();
+}
+
 void ARMSubtarget::initializeEnvironment() {
   // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this
   // directly from it, but we can try to make sure they're consistent when both
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 2c14eb0..6d528b8 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -540,6 +540,8 @@
   }
   bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
 
+  virtual bool isXRaySupported() const override;
+
   bool isAPCS_ABI() const;
   bool isAAPCS_ABI() const;
   bool isAAPCS16_ABI() const;
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.h b/llvm/lib/Target/X86/X86AsmPrinter.h
index dcb7b5a..50acb39 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.h
+++ b/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -71,27 +71,6 @@
 
   StackMapShadowTracker SMShadowTracker;
 
-  // This describes the kind of sled we're storing in the XRay table.
-  enum class SledKind : uint8_t {
-    FUNCTION_ENTER = 0,
-    FUNCTION_EXIT = 1,
-    TAIL_CALL = 2,
-  };
-
-  // The table will contain these structs that point to the sled, the function
-  // containing the sled, and what kind of sled (and whether they should always
-  // be instrumented).
-  struct XRayFunctionEntry {
-    const MCSymbol *Sled;
-    const MCSymbol *Function;
-    SledKind Kind;
-    bool AlwaysInstrument;
-    const class Function *Fn;
-  };
-
-  // All the sleds to be emitted.
-  std::vector<XRayFunctionEntry> Sleds;
-
   // All instructions emitted by the X86AsmPrinter should use this helper
   // method.
   //
@@ -117,8 +96,6 @@
   // function.
   void EmitXRayTable();
 
-  // Helper function to record a given XRay sled.
-  void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind);
 public:
   explicit X86AsmPrinter(TargetMachine &TM,
                          std::unique_ptr<MCStreamer> Streamer)
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 52c0551..7dfe9f2 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1020,16 +1020,6 @@
            getSubtargetInfo());
 }
 
-void X86AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
-                               SledKind Kind) {
-  auto Fn = MI.getParent()->getParent()->getFunction();
-  auto Attr = Fn->getFnAttribute("function-instrument");
-  bool AlwaysInstrument =
-      Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
-  Sleds.emplace_back(
-      XRayFunctionEntry{Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn});
-}
-
 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
                                                   X86MCInstLower &MCIL) {
   // We want to emit the following pattern:
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index c1f862d..a5cd83d 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -460,6 +460,8 @@
   bool hasPKU() const { return HasPKU; }
   bool hasMPX() const { return HasMPX; }
 
+  virtual bool isXRaySupported() const override { return is64Bit(); }
+
   bool isAtom() const { return X86ProcFamily == IntelAtom; }
   bool isSLM() const { return X86ProcFamily == IntelSLM; }
   bool useSoftFloat() const { return UseSoftFloat; }