Hardware-assisted AddressSanitizer (llvm part).

Summary:
This is LLVM instrumentation for the new HWASan tool. It is basically
a stripped down copy of ASan at this point, w/o stack or global
support. Instrumenation adds a global constructor + runtime callbacks
for every load and store.

HWASan comes with its own IR attribute.

A brief design document can be found in
clang/docs/HardwareAssistedAddressSanitizerDesign.rst (submitted earlier).

Reviewers: kcc, pcc, alekseyshl

Subscribers: srhines, mehdi_amini, mgorny, javed.absar, eraman, llvm-commits, hiraditya

Differential Revision: https://reviews.llvm.org/D40932

llvm-svn: 320217
diff --git a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index e48c3d7..325a5d7 100644
--- a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -52,6 +52,7 @@
       .Case("returns_twice", Attribute::ReturnsTwice)
       .Case("safestack", Attribute::SafeStack)
       .Case("sanitize_address", Attribute::SanitizeAddress)
+      .Case("sanitize_hwaddress", Attribute::SanitizeHWAddress)
       .Case("sanitize_memory", Attribute::SanitizeMemory)
       .Case("sanitize_thread", Attribute::SanitizeThread)
       .Case("ssp", Attribute::StackProtect)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index a00e6f7..aa05512 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3607,7 +3607,8 @@
   case Intrinsic::lifetime_start:
     // Asan needs to poison memory to detect invalid access which is possible
     // even for empty lifetime range.
-    if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress))
+    if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
+        II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
       break;
 
     if (removeTriviallyEmptyRange(*II, Intrinsic::lifetime_start,
diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
index f2806e2..66fdcb3 100644
--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -12,6 +12,7 @@
   SanitizerCoverage.cpp
   ThreadSanitizer.cpp
   EfficiencySanitizer.cpp
+  HWAddressSanitizer.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
new file mode 100644
index 0000000..7a1852c
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -0,0 +1,282 @@
+//===- HWAddressSanitizer.cpp - detector of uninitialized reads -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file is a part of HWAddressSanitizer, an address sanity checker
+/// based on tagged addressing.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hwasan"
+
+static const char *const kHwasanModuleCtorName = "hwasan.module_ctor";
+static const char *const kHwasanInitName = "__hwasan_init";
+
+// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
+static const size_t kNumberOfAccessSizes = 5;
+
+static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
+    "hwasan-memory-access-callback-prefix",
+    cl::desc("Prefix for memory access callbacks"), cl::Hidden,
+    cl::init("__hwasan_"));
+
+static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
+                                       cl::desc("instrument read instructions"),
+                                       cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClInstrumentWrites(
+    "hwasan-instrument-writes", cl::desc("instrument write instructions"),
+    cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClInstrumentAtomics(
+    "hwasan-instrument-atomics",
+    cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
+    cl::init(true));
+
+namespace {
+
+/// \brief An instrumentation pass implementing detection of addressability bugs
+/// using tagged pointers.
+class HWAddressSanitizer : public FunctionPass {
+public:
+  // Pass identification, replacement for typeid.
+  static char ID;
+
+  HWAddressSanitizer() : FunctionPass(ID) {}
+
+  StringRef getPassName() const override { return "HWAddressSanitizer"; }
+
+  bool runOnFunction(Function &F) override;
+  bool doInitialization(Module &M) override;
+
+  void initializeCallbacks(Module &M);
+  bool instrumentMemAccess(Instruction *I);
+  Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
+                                   uint64_t *TypeSize, unsigned *Alignment,
+                                   Value **MaybeMask);
+
+private:
+  LLVMContext *C;
+  Type *IntptrTy;
+
+  Function *HwasanCtorFunction;
+
+  Function *HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
+  Function *HwasanMemoryAccessCallbackSized[2];
+};
+
+} // end anonymous namespace
+
+char HWAddressSanitizer::ID = 0;
+
+INITIALIZE_PASS_BEGIN(
+    HWAddressSanitizer, "hwasan",
+    "HWAddressSanitizer: detect memory bugs using tagged addressing.", false, false)
+INITIALIZE_PASS_END(
+    HWAddressSanitizer, "hwasan",
+    "HWAddressSanitizer: detect memory bugs using tagged addressing.", false, false)
+
+FunctionPass *llvm::createHWAddressSanitizerPass() {
+  return new HWAddressSanitizer();
+}
+
+/// \brief Module-level initialization.
+///
+/// inserts a call to __hwasan_init to the module's constructor list.
+bool HWAddressSanitizer::doInitialization(Module &M) {
+  DEBUG(dbgs() << "Init " << M.getName() << "\n");
+  auto &DL = M.getDataLayout();
+
+  Triple TargetTriple(M.getTargetTriple());
+
+  C = &(M.getContext());
+  IRBuilder<> IRB(*C);
+  IntptrTy = IRB.getIntPtrTy(DL);
+
+  std::tie(HwasanCtorFunction, std::ignore) =
+      createSanitizerCtorAndInitFunctions(M, kHwasanModuleCtorName,
+                                          kHwasanInitName,
+                                          /*InitArgTypes=*/{},
+                                          /*InitArgs=*/{});
+  appendToGlobalCtors(M, HwasanCtorFunction, 0);
+  return true;
+}
+
+void HWAddressSanitizer::initializeCallbacks(Module &M) {
+  IRBuilder<> IRB(*C);
+  for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
+    const std::string TypeStr = AccessIsWrite ? "store" : "load";
+
+    HwasanMemoryAccessCallbackSized[AccessIsWrite] =
+        checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+            ClMemoryAccessCallbackPrefix + TypeStr,
+            FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false)));
+
+    for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
+         AccessSizeIndex++) {
+      HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
+          checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+              ClMemoryAccessCallbackPrefix + TypeStr +
+                  itostr(1ULL << AccessSizeIndex),
+              FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false)));
+    }
+  }
+}
+
+Value *HWAddressSanitizer::isInterestingMemoryAccess(Instruction *I,
+                                                   bool *IsWrite,
+                                                   uint64_t *TypeSize,
+                                                   unsigned *Alignment,
+                                                   Value **MaybeMask) {
+  // Skip memory accesses inserted by another instrumentation.
+  if (I->getMetadata("nosanitize")) return nullptr;
+
+  Value *PtrOperand = nullptr;
+  const DataLayout &DL = I->getModule()->getDataLayout();
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    if (!ClInstrumentReads) return nullptr;
+    *IsWrite = false;
+    *TypeSize = DL.getTypeStoreSizeInBits(LI->getType());
+    *Alignment = LI->getAlignment();
+    PtrOperand = LI->getPointerOperand();
+  } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+    if (!ClInstrumentWrites) return nullptr;
+    *IsWrite = true;
+    *TypeSize = DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType());
+    *Alignment = SI->getAlignment();
+    PtrOperand = SI->getPointerOperand();
+  } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
+    if (!ClInstrumentAtomics) return nullptr;
+    *IsWrite = true;
+    *TypeSize = DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType());
+    *Alignment = 0;
+    PtrOperand = RMW->getPointerOperand();
+  } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
+    if (!ClInstrumentAtomics) return nullptr;
+    *IsWrite = true;
+    *TypeSize = DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType());
+    *Alignment = 0;
+    PtrOperand = XCHG->getPointerOperand();
+  }
+
+  if (PtrOperand) {
+    // Do not instrument acesses from different address spaces; we cannot deal
+    // with them.
+    Type *PtrTy = cast<PointerType>(PtrOperand->getType()->getScalarType());
+    if (PtrTy->getPointerAddressSpace() != 0)
+      return nullptr;
+
+    // Ignore swifterror addresses.
+    // swifterror memory addresses are mem2reg promoted by instruction
+    // selection. As such they cannot have regular uses like an instrumentation
+    // function and it makes no sense to track them as memory.
+    if (PtrOperand->isSwiftError())
+      return nullptr;
+  }
+
+  return PtrOperand;
+}
+
+static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
+  size_t Res = countTrailingZeros(TypeSize / 8);
+  assert(Res < kNumberOfAccessSizes);
+  return Res;
+}
+
+bool HWAddressSanitizer::instrumentMemAccess(Instruction *I) {
+  DEBUG(dbgs() << "Instrumenting: " << *I << "\n");
+  bool IsWrite = false;
+  unsigned Alignment = 0;
+  uint64_t TypeSize = 0;
+  Value *MaybeMask = nullptr;
+  Value *Addr =
+      isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment, &MaybeMask);
+
+  if (!Addr)
+    return false;
+
+  if (MaybeMask)
+    return false; //FIXME
+
+  IRBuilder<> IRB(I);
+  Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+  if (isPowerOf2_64(TypeSize) &&
+      (TypeSize / 8 <= (1UL << (kNumberOfAccessSizes - 1)))) {
+    size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
+    IRB.CreateCall(HwasanMemoryAccessCallback[IsWrite][AccessSizeIndex],
+                   AddrLong);
+  } else {
+    IRB.CreateCall(HwasanMemoryAccessCallbackSized[IsWrite],
+                   {AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8)});
+  }
+
+  return true;
+}
+
+bool HWAddressSanitizer::runOnFunction(Function &F) {
+  if (&F == HwasanCtorFunction)
+    return false;
+
+  if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
+    return false;
+
+  DEBUG(dbgs() << "Function: " << F.getName() << "\n");
+
+  initializeCallbacks(*F.getParent());
+
+  bool Changed = false;
+  SmallVector<Instruction*, 16> ToInstrument;
+  for (auto &BB : F) {
+    for (auto &Inst : BB) {
+      Value *MaybeMask = nullptr;
+      bool IsWrite;
+      unsigned Alignment;
+      uint64_t TypeSize;
+      Value *Addr = isInterestingMemoryAccess(&Inst, &IsWrite, &TypeSize,
+                                              &Alignment, &MaybeMask);
+      if (Addr || isa<MemIntrinsic>(Inst))
+        ToInstrument.push_back(&Inst);
+    }
+  }
+
+  for (auto Inst : ToInstrument)
+    Changed |= instrumentMemAccess(Inst);
+
+  return Changed;
+}
diff --git a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
index ed5e9db..8e9eea9 100644
--- a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -66,6 +66,7 @@
   initializePGOMemOPSizeOptLegacyPassPass(Registry);
   initializeInstrProfilingLegacyPassPass(Registry);
   initializeMemorySanitizerPass(Registry);
+  initializeHWAddressSanitizerPass(Registry);
   initializeThreadSanitizerPass(Registry);
   initializeSanitizerCoverageModulePass(Registry);
   initializeDataFlowSanitizerPass(Registry);
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 76e295c..e2c1eaf 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -1299,7 +1299,10 @@
 /// non-local by performing PHI construction.
 bool GVN::processNonLocalLoad(LoadInst *LI) {
   // non-local speculations are not allowed under asan.
-  if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeAddress))
+  if (LI->getParent()->getParent()->hasFnAttribute(
+          Attribute::SanitizeAddress) ||
+      LI->getParent()->getParent()->hasFnAttribute(
+          Attribute::SanitizeHWAddress))
     return false;
 
   // Step 1: Find the non-local dependencies of the load.