|  | //===-- EfficiencySanitizer.cpp - performance tuner -----------------------===// | 
|  | // | 
|  | //                     The LLVM Compiler Infrastructure | 
|  | // | 
|  | // This file is distributed under the University of Illinois Open Source | 
|  | // License. See LICENSE.TXT for details. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // This file is a part of EfficiencySanitizer, a family of performance tuners | 
|  | // that detects multiple performance issues via separate sub-tools. | 
|  | // | 
|  | // The instrumentation phase is straightforward: | 
|  | //   - Take action on every memory access: either inlined instrumentation, | 
|  | //     or Inserted calls to our run-time library. | 
|  | //   - Optimizations may apply to avoid instrumenting some of the accesses. | 
|  | //   - Turn mem{set,cpy,move} instrinsics into library calls. | 
|  | // The rest is handled by the run-time library. | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "llvm/Transforms/Instrumentation.h" | 
|  | #include "llvm/ADT/SmallString.h" | 
|  | #include "llvm/ADT/SmallVector.h" | 
|  | #include "llvm/ADT/Statistic.h" | 
|  | #include "llvm/ADT/StringExtras.h" | 
|  | #include "llvm/Analysis/TargetLibraryInfo.h" | 
|  | #include "llvm/IR/Function.h" | 
|  | #include "llvm/IR/IRBuilder.h" | 
|  | #include "llvm/IR/IntrinsicInst.h" | 
|  | #include "llvm/IR/Module.h" | 
|  | #include "llvm/IR/Type.h" | 
|  | #include "llvm/Support/CommandLine.h" | 
|  | #include "llvm/Support/Debug.h" | 
|  | #include "llvm/Support/raw_ostream.h" | 
|  | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | 
|  | #include "llvm/Transforms/Utils/Local.h" | 
|  | #include "llvm/Transforms/Utils/ModuleUtils.h" | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | #define DEBUG_TYPE "esan" | 
|  |  | 
|  | // The tool type must be just one of these ClTool* options, as the tools | 
|  | // cannot be combined due to shadow memory constraints. | 
|  | static cl::opt<bool> | 
|  | ClToolCacheFrag("esan-cache-frag", cl::init(false), | 
|  | cl::desc("Detect data cache fragmentation"), cl::Hidden); | 
|  | static cl::opt<bool> | 
|  | ClToolWorkingSet("esan-working-set", cl::init(false), | 
|  | cl::desc("Measure the working set size"), cl::Hidden); | 
|  | // Each new tool will get its own opt flag here. | 
|  | // These are converted to EfficiencySanitizerOptions for use | 
|  | // in the code. | 
|  |  | 
|  | static cl::opt<bool> ClInstrumentLoadsAndStores( | 
|  | "esan-instrument-loads-and-stores", cl::init(true), | 
|  | cl::desc("Instrument loads and stores"), cl::Hidden); | 
|  | static cl::opt<bool> ClInstrumentMemIntrinsics( | 
|  | "esan-instrument-memintrinsics", cl::init(true), | 
|  | cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden); | 
|  | static cl::opt<bool> ClInstrumentFastpath( | 
|  | "esan-instrument-fastpath", cl::init(true), | 
|  | cl::desc("Instrument fastpath"), cl::Hidden); | 
|  | static cl::opt<bool> ClAuxFieldInfo( | 
|  | "esan-aux-field-info", cl::init(true), | 
|  | cl::desc("Generate binary with auxiliary struct field information"), | 
|  | cl::Hidden); | 
|  |  | 
|  | // Experiments show that the performance difference can be 2x or more, | 
|  | // and accuracy loss is typically negligible, so we turn this on by default. | 
|  | static cl::opt<bool> ClAssumeIntraCacheLine( | 
|  | "esan-assume-intra-cache-line", cl::init(true), | 
|  | cl::desc("Assume each memory access touches just one cache line, for " | 
|  | "better performance but with a potential loss of accuracy."), | 
|  | cl::Hidden); | 
|  |  | 
|  | STATISTIC(NumInstrumentedLoads, "Number of instrumented loads"); | 
|  | STATISTIC(NumInstrumentedStores, "Number of instrumented stores"); | 
|  | STATISTIC(NumFastpaths, "Number of instrumented fastpaths"); | 
|  | STATISTIC(NumAccessesWithIrregularSize, | 
|  | "Number of accesses with a size outside our targeted callout sizes"); | 
|  | STATISTIC(NumIgnoredStructs, "Number of ignored structs"); | 
|  | STATISTIC(NumIgnoredGEPs, "Number of ignored GEP instructions"); | 
|  | STATISTIC(NumInstrumentedGEPs, "Number of instrumented GEP instructions"); | 
|  | STATISTIC(NumAssumedIntraCacheLine, | 
|  | "Number of accesses assumed to be intra-cache-line"); | 
|  |  | 
|  | static const uint64_t EsanCtorAndDtorPriority = 0; | 
|  | static const char *const EsanModuleCtorName = "esan.module_ctor"; | 
|  | static const char *const EsanModuleDtorName = "esan.module_dtor"; | 
|  | static const char *const EsanInitName = "__esan_init"; | 
|  | static const char *const EsanExitName = "__esan_exit"; | 
|  |  | 
|  | // We need to specify the tool to the runtime earlier than | 
|  | // the ctor is called in some cases, so we set a global variable. | 
|  | static const char *const EsanWhichToolName = "__esan_which_tool"; | 
|  |  | 
|  | // We must keep these Shadow* constants consistent with the esan runtime. | 
|  | // FIXME: Try to place these shadow constants, the names of the __esan_* | 
|  | // interface functions, and the ToolType enum into a header shared between | 
|  | // llvm and compiler-rt. | 
|  | static const uint64_t ShadowMask = 0x00000fffffffffffull; | 
|  | static const uint64_t ShadowOffs[3] = { // Indexed by scale | 
|  | 0x0000130000000000ull, | 
|  | 0x0000220000000000ull, | 
|  | 0x0000440000000000ull, | 
|  | }; | 
|  | // This array is indexed by the ToolType enum. | 
|  | static const int ShadowScale[] = { | 
|  | 0, // ESAN_None. | 
|  | 2, // ESAN_CacheFrag: 4B:1B, so 4 to 1 == >>2. | 
|  | 6, // ESAN_WorkingSet: 64B:1B, so 64 to 1 == >>6. | 
|  | }; | 
|  |  | 
|  | // MaxStructCounterNameSize is a soft size limit to avoid insanely long | 
|  | // names for those extremely large structs. | 
|  | static const unsigned MaxStructCounterNameSize = 512; | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | static EfficiencySanitizerOptions | 
|  | OverrideOptionsFromCL(EfficiencySanitizerOptions Options) { | 
|  | if (ClToolCacheFrag) | 
|  | Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag; | 
|  | else if (ClToolWorkingSet) | 
|  | Options.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet; | 
|  |  | 
|  | // Direct opt invocation with no params will have the default ESAN_None. | 
|  | // We run the default tool in that case. | 
|  | if (Options.ToolType == EfficiencySanitizerOptions::ESAN_None) | 
|  | Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag; | 
|  |  | 
|  | return Options; | 
|  | } | 
|  |  | 
|  | // Create a constant for Str so that we can pass it to the run-time lib. | 
|  | static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str, | 
|  | bool AllowMerging) { | 
|  | Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); | 
|  | // We use private linkage for module-local strings. If they can be merged | 
|  | // with another one, we set the unnamed_addr attribute. | 
|  | GlobalVariable *GV = | 
|  | new GlobalVariable(M, StrConst->getType(), true, | 
|  | GlobalValue::PrivateLinkage, StrConst, ""); | 
|  | if (AllowMerging) | 
|  | GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); | 
|  | GV->setAlignment(1);  // Strings may not be merged w/o setting align 1. | 
|  | return GV; | 
|  | } | 
|  |  | 
|  | /// EfficiencySanitizer: instrument each module to find performance issues. | 
|  | class EfficiencySanitizer : public ModulePass { | 
|  | public: | 
|  | EfficiencySanitizer( | 
|  | const EfficiencySanitizerOptions &Opts = EfficiencySanitizerOptions()) | 
|  | : ModulePass(ID), Options(OverrideOptionsFromCL(Opts)) {} | 
|  | const char *getPassName() const override; | 
|  | void getAnalysisUsage(AnalysisUsage &AU) const override; | 
|  | bool runOnModule(Module &M) override; | 
|  | static char ID; | 
|  |  | 
|  | private: | 
|  | bool initOnModule(Module &M); | 
|  | void initializeCallbacks(Module &M); | 
|  | bool shouldIgnoreStructType(StructType *StructTy); | 
|  | void createStructCounterName( | 
|  | StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr); | 
|  | void createCacheFragAuxGV( | 
|  | Module &M, const DataLayout &DL, StructType *StructTy, | 
|  | GlobalVariable *&TypeNames, GlobalVariable *&Offsets, GlobalVariable *&Size); | 
|  | GlobalVariable *createCacheFragInfoGV(Module &M, const DataLayout &DL, | 
|  | Constant *UnitName); | 
|  | Constant *createEsanInitToolInfoArg(Module &M, const DataLayout &DL); | 
|  | void createDestructor(Module &M, Constant *ToolInfoArg); | 
|  | bool runOnFunction(Function &F, Module &M); | 
|  | bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL); | 
|  | bool instrumentMemIntrinsic(MemIntrinsic *MI); | 
|  | bool instrumentGetElementPtr(Instruction *I, Module &M); | 
|  | bool insertCounterUpdate(Instruction *I, StructType *StructTy, | 
|  | unsigned CounterIdx); | 
|  | unsigned getFieldCounterIdx(StructType *StructTy) { | 
|  | return 0; | 
|  | } | 
|  | unsigned getArrayCounterIdx(StructType *StructTy) { | 
|  | return StructTy->getNumElements(); | 
|  | } | 
|  | unsigned getStructCounterSize(StructType *StructTy) { | 
|  | // The struct counter array includes: | 
|  | // - one counter for each struct field, | 
|  | // - one counter for the struct access within an array. | 
|  | return (StructTy->getNumElements()/*field*/ + 1/*array*/); | 
|  | } | 
|  | bool shouldIgnoreMemoryAccess(Instruction *I); | 
|  | int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL); | 
|  | Value *appToShadow(Value *Shadow, IRBuilder<> &IRB); | 
|  | bool instrumentFastpath(Instruction *I, const DataLayout &DL, bool IsStore, | 
|  | Value *Addr, unsigned Alignment); | 
|  | // Each tool has its own fastpath routine: | 
|  | bool instrumentFastpathCacheFrag(Instruction *I, const DataLayout &DL, | 
|  | Value *Addr, unsigned Alignment); | 
|  | bool instrumentFastpathWorkingSet(Instruction *I, const DataLayout &DL, | 
|  | Value *Addr, unsigned Alignment); | 
|  |  | 
|  | EfficiencySanitizerOptions Options; | 
|  | LLVMContext *Ctx; | 
|  | Type *IntptrTy; | 
|  | // Our slowpath involves callouts to the runtime library. | 
|  | // Access sizes are powers of two: 1, 2, 4, 8, 16. | 
|  | static const size_t NumberOfAccessSizes = 5; | 
|  | Function *EsanAlignedLoad[NumberOfAccessSizes]; | 
|  | Function *EsanAlignedStore[NumberOfAccessSizes]; | 
|  | Function *EsanUnalignedLoad[NumberOfAccessSizes]; | 
|  | Function *EsanUnalignedStore[NumberOfAccessSizes]; | 
|  | // For irregular sizes of any alignment: | 
|  | Function *EsanUnalignedLoadN, *EsanUnalignedStoreN; | 
|  | Function *MemmoveFn, *MemcpyFn, *MemsetFn; | 
|  | Function *EsanCtorFunction; | 
|  | Function *EsanDtorFunction; | 
|  | // Remember the counter variable for each struct type to avoid | 
|  | // recomputing the variable name later during instrumentation. | 
|  | std::map<Type *, GlobalVariable *> StructTyMap; | 
|  | }; | 
|  | } // namespace | 
|  |  | 
|  | char EfficiencySanitizer::ID = 0; | 
|  | INITIALIZE_PASS_BEGIN( | 
|  | EfficiencySanitizer, "esan", | 
|  | "EfficiencySanitizer: finds performance issues.", false, false) | 
|  | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) | 
|  | INITIALIZE_PASS_END( | 
|  | EfficiencySanitizer, "esan", | 
|  | "EfficiencySanitizer: finds performance issues.", false, false) | 
|  |  | 
|  | const char *EfficiencySanitizer::getPassName() const { | 
|  | return "EfficiencySanitizer"; | 
|  | } | 
|  |  | 
|  | void EfficiencySanitizer::getAnalysisUsage(AnalysisUsage &AU) const { | 
|  | AU.addRequired<TargetLibraryInfoWrapperPass>(); | 
|  | } | 
|  |  | 
|  | ModulePass * | 
|  | llvm::createEfficiencySanitizerPass(const EfficiencySanitizerOptions &Options) { | 
|  | return new EfficiencySanitizer(Options); | 
|  | } | 
|  |  | 
|  | void EfficiencySanitizer::initializeCallbacks(Module &M) { | 
|  | IRBuilder<> IRB(M.getContext()); | 
|  | // Initialize the callbacks. | 
|  | for (size_t Idx = 0; Idx < NumberOfAccessSizes; ++Idx) { | 
|  | const unsigned ByteSize = 1U << Idx; | 
|  | std::string ByteSizeStr = utostr(ByteSize); | 
|  | // We'll inline the most common (i.e., aligned and frequent sizes) | 
|  | // load + store instrumentation: these callouts are for the slowpath. | 
|  | SmallString<32> AlignedLoadName("__esan_aligned_load" + ByteSizeStr); | 
|  | EsanAlignedLoad[Idx] = | 
|  | checkSanitizerInterfaceFunction(M.getOrInsertFunction( | 
|  | AlignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); | 
|  | SmallString<32> AlignedStoreName("__esan_aligned_store" + ByteSizeStr); | 
|  | EsanAlignedStore[Idx] = | 
|  | checkSanitizerInterfaceFunction(M.getOrInsertFunction( | 
|  | AlignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); | 
|  | SmallString<32> UnalignedLoadName("__esan_unaligned_load" + ByteSizeStr); | 
|  | EsanUnalignedLoad[Idx] = | 
|  | checkSanitizerInterfaceFunction(M.getOrInsertFunction( | 
|  | UnalignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); | 
|  | SmallString<32> UnalignedStoreName("__esan_unaligned_store" + ByteSizeStr); | 
|  | EsanUnalignedStore[Idx] = | 
|  | checkSanitizerInterfaceFunction(M.getOrInsertFunction( | 
|  | UnalignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); | 
|  | } | 
|  | EsanUnalignedLoadN = checkSanitizerInterfaceFunction( | 
|  | M.getOrInsertFunction("__esan_unaligned_loadN", IRB.getVoidTy(), | 
|  | IRB.getInt8PtrTy(), IntptrTy, nullptr)); | 
|  | EsanUnalignedStoreN = checkSanitizerInterfaceFunction( | 
|  | M.getOrInsertFunction("__esan_unaligned_storeN", IRB.getVoidTy(), | 
|  | IRB.getInt8PtrTy(), IntptrTy, nullptr)); | 
|  | MemmoveFn = checkSanitizerInterfaceFunction( | 
|  | M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), | 
|  | IRB.getInt8PtrTy(), IntptrTy, nullptr)); | 
|  | MemcpyFn = checkSanitizerInterfaceFunction( | 
|  | M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), | 
|  | IRB.getInt8PtrTy(), IntptrTy, nullptr)); | 
|  | MemsetFn = checkSanitizerInterfaceFunction( | 
|  | M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), | 
|  | IRB.getInt32Ty(), IntptrTy, nullptr)); | 
|  | } | 
|  |  | 
|  | bool EfficiencySanitizer::shouldIgnoreStructType(StructType *StructTy) { | 
|  | if (StructTy == nullptr || StructTy->isOpaque() /* no struct body */) | 
|  | return true; | 
|  | return false; | 
|  | } | 
|  |  | 
|  | void EfficiencySanitizer::createStructCounterName( | 
|  | StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr) { | 
|  | // Append NumFields and field type ids to avoid struct conflicts | 
|  | // with the same name but different fields. | 
|  | if (StructTy->hasName()) | 
|  | NameStr += StructTy->getName(); | 
|  | else | 
|  | NameStr += "struct.anon"; | 
|  | // We allow the actual size of the StructCounterName to be larger than | 
|  | // MaxStructCounterNameSize and append #NumFields and at least one | 
|  | // field type id. | 
|  | // Append #NumFields. | 
|  | NameStr += "#"; | 
|  | Twine(StructTy->getNumElements()).toVector(NameStr); | 
|  | // Append struct field type ids in the reverse order. | 
|  | for (int i = StructTy->getNumElements() - 1; i >= 0; --i) { | 
|  | NameStr += "#"; | 
|  | Twine(StructTy->getElementType(i)->getTypeID()).toVector(NameStr); | 
|  | if (NameStr.size() >= MaxStructCounterNameSize) | 
|  | break; | 
|  | } | 
|  | if (StructTy->isLiteral()) { | 
|  | // End with # for literal struct. | 
|  | NameStr += "#"; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Create global variables with auxiliary information (e.g., struct field size, | 
|  | // offset, and type name) for better user report. | 
|  | void EfficiencySanitizer::createCacheFragAuxGV( | 
|  | Module &M, const DataLayout &DL, StructType *StructTy, | 
|  | GlobalVariable *&TypeName, GlobalVariable *&Offset, | 
|  | GlobalVariable *&Size) { | 
|  | auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx); | 
|  | auto *Int32Ty = Type::getInt32Ty(*Ctx); | 
|  | // FieldTypeName. | 
|  | auto *TypeNameArrayTy = ArrayType::get(Int8PtrTy, StructTy->getNumElements()); | 
|  | TypeName = new GlobalVariable(M, TypeNameArrayTy, true, | 
|  | GlobalVariable::InternalLinkage, nullptr); | 
|  | SmallVector<Constant *, 16> TypeNameVec; | 
|  | // FieldOffset. | 
|  | auto *OffsetArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements()); | 
|  | Offset = new GlobalVariable(M, OffsetArrayTy, true, | 
|  | GlobalVariable::InternalLinkage, nullptr); | 
|  | SmallVector<Constant *, 16> OffsetVec; | 
|  | // FieldSize | 
|  | auto *SizeArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements()); | 
|  | Size = new GlobalVariable(M, SizeArrayTy, true, | 
|  | GlobalVariable::InternalLinkage, nullptr); | 
|  | SmallVector<Constant *, 16> SizeVec; | 
|  | for (unsigned i = 0; i < StructTy->getNumElements(); ++i) { | 
|  | Type *Ty = StructTy->getElementType(i); | 
|  | std::string Str; | 
|  | raw_string_ostream StrOS(Str); | 
|  | Ty->print(StrOS); | 
|  | TypeNameVec.push_back( | 
|  | ConstantExpr::getPointerCast( | 
|  | createPrivateGlobalForString(M, StrOS.str(), true), | 
|  | Int8PtrTy)); | 
|  | OffsetVec.push_back( | 
|  | ConstantInt::get(Int32Ty, | 
|  | DL.getStructLayout(StructTy)->getElementOffset(i))); | 
|  | SizeVec.push_back(ConstantInt::get(Int32Ty, | 
|  | DL.getTypeAllocSize(Ty))); | 
|  | } | 
|  | TypeName->setInitializer(ConstantArray::get(TypeNameArrayTy, TypeNameVec)); | 
|  | Offset->setInitializer(ConstantArray::get(OffsetArrayTy, OffsetVec)); | 
|  | Size->setInitializer(ConstantArray::get(SizeArrayTy, SizeVec)); | 
|  | } | 
|  |  | 
|  | // Create the global variable for the cache-fragmentation tool. | 
|  | GlobalVariable *EfficiencySanitizer::createCacheFragInfoGV( | 
|  | Module &M, const DataLayout &DL, Constant *UnitName) { | 
|  | assert(Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag); | 
|  |  | 
|  | auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx); | 
|  | auto *Int8PtrPtrTy = Int8PtrTy->getPointerTo(); | 
|  | auto *Int32Ty = Type::getInt32Ty(*Ctx); | 
|  | auto *Int32PtrTy = Type::getInt32PtrTy(*Ctx); | 
|  | auto *Int64Ty = Type::getInt64Ty(*Ctx); | 
|  | auto *Int64PtrTy = Type::getInt64PtrTy(*Ctx); | 
|  | // This structure should be kept consistent with the StructInfo struct | 
|  | // in the runtime library. | 
|  | // struct StructInfo { | 
|  | //   const char *StructName; | 
|  | //   u32 Size; | 
|  | //   u32 NumFields; | 
|  | //   u32 *FieldOffset;           // auxiliary struct field info. | 
|  | //   u32 *FieldSize;             // auxiliary struct field info. | 
|  | //   const char **FieldTypeName; // auxiliary struct field info. | 
|  | //   u64 *FieldCounters; | 
|  | //   u64 *ArrayCounter; | 
|  | // }; | 
|  | auto *StructInfoTy = | 
|  | StructType::get(Int8PtrTy, Int32Ty, Int32Ty, Int32PtrTy, Int32PtrTy, | 
|  | Int8PtrPtrTy, Int64PtrTy, Int64PtrTy, nullptr); | 
|  | auto *StructInfoPtrTy = StructInfoTy->getPointerTo(); | 
|  | // This structure should be kept consistent with the CacheFragInfo struct | 
|  | // in the runtime library. | 
|  | // struct CacheFragInfo { | 
|  | //   const char *UnitName; | 
|  | //   u32 NumStructs; | 
|  | //   StructInfo *Structs; | 
|  | // }; | 
|  | auto *CacheFragInfoTy = | 
|  | StructType::get(Int8PtrTy, Int32Ty, StructInfoPtrTy, nullptr); | 
|  |  | 
|  | std::vector<StructType *> Vec = M.getIdentifiedStructTypes(); | 
|  | unsigned NumStructs = 0; | 
|  | SmallVector<Constant *, 16> Initializers; | 
|  |  | 
|  | for (auto &StructTy : Vec) { | 
|  | if (shouldIgnoreStructType(StructTy)) { | 
|  | ++NumIgnoredStructs; | 
|  | continue; | 
|  | } | 
|  | ++NumStructs; | 
|  |  | 
|  | // StructName. | 
|  | SmallString<MaxStructCounterNameSize> CounterNameStr; | 
|  | createStructCounterName(StructTy, CounterNameStr); | 
|  | GlobalVariable *StructCounterName = createPrivateGlobalForString( | 
|  | M, CounterNameStr, /*AllowMerging*/true); | 
|  |  | 
|  | // Counters. | 
|  | // We create the counter array with StructCounterName and weak linkage | 
|  | // so that the structs with the same name and layout from different | 
|  | // compilation units will be merged into one. | 
|  | auto *CounterArrayTy = ArrayType::get(Int64Ty, | 
|  | getStructCounterSize(StructTy)); | 
|  | GlobalVariable *Counters = | 
|  | new GlobalVariable(M, CounterArrayTy, false, | 
|  | GlobalVariable::WeakAnyLinkage, | 
|  | ConstantAggregateZero::get(CounterArrayTy), | 
|  | CounterNameStr); | 
|  |  | 
|  | // Remember the counter variable for each struct type. | 
|  | StructTyMap.insert(std::pair<Type *, GlobalVariable *>(StructTy, Counters)); | 
|  |  | 
|  | // We pass the field type name array, offset array, and size array to | 
|  | // the runtime for better reporting. | 
|  | GlobalVariable *TypeName = nullptr, *Offset = nullptr, *Size = nullptr; | 
|  | if (ClAuxFieldInfo) | 
|  | createCacheFragAuxGV(M, DL, StructTy, TypeName, Offset, Size); | 
|  |  | 
|  | Constant *FieldCounterIdx[2]; | 
|  | FieldCounterIdx[0] = ConstantInt::get(Int32Ty, 0); | 
|  | FieldCounterIdx[1] = ConstantInt::get(Int32Ty, | 
|  | getFieldCounterIdx(StructTy)); | 
|  | Constant *ArrayCounterIdx[2]; | 
|  | ArrayCounterIdx[0] = ConstantInt::get(Int32Ty, 0); | 
|  | ArrayCounterIdx[1] = ConstantInt::get(Int32Ty, | 
|  | getArrayCounterIdx(StructTy)); | 
|  | Initializers.push_back( | 
|  | ConstantStruct::get( | 
|  | StructInfoTy, | 
|  | ConstantExpr::getPointerCast(StructCounterName, Int8PtrTy), | 
|  | ConstantInt::get(Int32Ty, | 
|  | DL.getStructLayout(StructTy)->getSizeInBytes()), | 
|  | ConstantInt::get(Int32Ty, StructTy->getNumElements()), | 
|  | Offset == nullptr ? ConstantPointerNull::get(Int32PtrTy) : | 
|  | ConstantExpr::getPointerCast(Offset, Int32PtrTy), | 
|  | Size == nullptr ? ConstantPointerNull::get(Int32PtrTy) : | 
|  | ConstantExpr::getPointerCast(Size, Int32PtrTy), | 
|  | TypeName == nullptr ? ConstantPointerNull::get(Int8PtrPtrTy) : | 
|  | ConstantExpr::getPointerCast(TypeName, Int8PtrPtrTy), | 
|  | ConstantExpr::getGetElementPtr(CounterArrayTy, Counters, | 
|  | FieldCounterIdx), | 
|  | ConstantExpr::getGetElementPtr(CounterArrayTy, Counters, | 
|  | ArrayCounterIdx), | 
|  | nullptr)); | 
|  | } | 
|  | // Structs. | 
|  | Constant *StructInfo; | 
|  | if (NumStructs == 0) { | 
|  | StructInfo = ConstantPointerNull::get(StructInfoPtrTy); | 
|  | } else { | 
|  | auto *StructInfoArrayTy = ArrayType::get(StructInfoTy, NumStructs); | 
|  | StructInfo = ConstantExpr::getPointerCast( | 
|  | new GlobalVariable(M, StructInfoArrayTy, false, | 
|  | GlobalVariable::InternalLinkage, | 
|  | ConstantArray::get(StructInfoArrayTy, Initializers)), | 
|  | StructInfoPtrTy); | 
|  | } | 
|  |  | 
|  | auto *CacheFragInfoGV = new GlobalVariable( | 
|  | M, CacheFragInfoTy, true, GlobalVariable::InternalLinkage, | 
|  | ConstantStruct::get(CacheFragInfoTy, | 
|  | UnitName, | 
|  | ConstantInt::get(Int32Ty, NumStructs), | 
|  | StructInfo, | 
|  | nullptr)); | 
|  | return CacheFragInfoGV; | 
|  | } | 
|  |  | 
|  | // Create the tool-specific argument passed to EsanInit and EsanExit. | 
|  | Constant *EfficiencySanitizer::createEsanInitToolInfoArg(Module &M, | 
|  | const DataLayout &DL) { | 
|  | // This structure contains tool-specific information about each compilation | 
|  | // unit (module) and is passed to the runtime library. | 
|  | GlobalVariable *ToolInfoGV = nullptr; | 
|  |  | 
|  | auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx); | 
|  | // Compilation unit name. | 
|  | auto *UnitName = ConstantExpr::getPointerCast( | 
|  | createPrivateGlobalForString(M, M.getModuleIdentifier(), true), | 
|  | Int8PtrTy); | 
|  |  | 
|  | // Create the tool-specific variable. | 
|  | if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) | 
|  | ToolInfoGV = createCacheFragInfoGV(M, DL, UnitName); | 
|  |  | 
|  | if (ToolInfoGV != nullptr) | 
|  | return ConstantExpr::getPointerCast(ToolInfoGV, Int8PtrTy); | 
|  |  | 
|  | // Create the null pointer if no tool-specific variable created. | 
|  | return ConstantPointerNull::get(Int8PtrTy); | 
|  | } | 
|  |  | 
|  | void EfficiencySanitizer::createDestructor(Module &M, Constant *ToolInfoArg) { | 
|  | PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx); | 
|  | EsanDtorFunction = Function::Create(FunctionType::get(Type::getVoidTy(*Ctx), | 
|  | false), | 
|  | GlobalValue::InternalLinkage, | 
|  | EsanModuleDtorName, &M); | 
|  | ReturnInst::Create(*Ctx, BasicBlock::Create(*Ctx, "", EsanDtorFunction)); | 
|  | IRBuilder<> IRB_Dtor(EsanDtorFunction->getEntryBlock().getTerminator()); | 
|  | Function *EsanExit = checkSanitizerInterfaceFunction( | 
|  | M.getOrInsertFunction(EsanExitName, IRB_Dtor.getVoidTy(), | 
|  | Int8PtrTy, nullptr)); | 
|  | EsanExit->setLinkage(Function::ExternalLinkage); | 
|  | IRB_Dtor.CreateCall(EsanExit, {ToolInfoArg}); | 
|  | appendToGlobalDtors(M, EsanDtorFunction, EsanCtorAndDtorPriority); | 
|  | } | 
|  |  | 
|  | bool EfficiencySanitizer::initOnModule(Module &M) { | 
|  | Ctx = &M.getContext(); | 
|  | const DataLayout &DL = M.getDataLayout(); | 
|  | IRBuilder<> IRB(M.getContext()); | 
|  | IntegerType *OrdTy = IRB.getInt32Ty(); | 
|  | PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx); | 
|  | IntptrTy = DL.getIntPtrType(M.getContext()); | 
|  | // Create the variable passed to EsanInit and EsanExit. | 
|  | Constant *ToolInfoArg = createEsanInitToolInfoArg(M, DL); | 
|  | // Constructor | 
|  | // We specify the tool type both in the EsanWhichToolName global | 
|  | // and as an arg to the init routine as a sanity check. | 
|  | std::tie(EsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions( | 
|  | M, EsanModuleCtorName, EsanInitName, /*InitArgTypes=*/{OrdTy, Int8PtrTy}, | 
|  | /*InitArgs=*/{ | 
|  | ConstantInt::get(OrdTy, static_cast<int>(Options.ToolType)), | 
|  | ToolInfoArg}); | 
|  | appendToGlobalCtors(M, EsanCtorFunction, EsanCtorAndDtorPriority); | 
|  |  | 
|  | createDestructor(M, ToolInfoArg); | 
|  |  | 
|  | new GlobalVariable(M, OrdTy, true, | 
|  | GlobalValue::WeakAnyLinkage, | 
|  | ConstantInt::get(OrdTy, | 
|  | static_cast<int>(Options.ToolType)), | 
|  | EsanWhichToolName); | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | Value *EfficiencySanitizer::appToShadow(Value *Shadow, IRBuilder<> &IRB) { | 
|  | // Shadow = ((App & Mask) + Offs) >> Scale | 
|  | Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, ShadowMask)); | 
|  | uint64_t Offs; | 
|  | int Scale = ShadowScale[Options.ToolType]; | 
|  | if (Scale <= 2) | 
|  | Offs = ShadowOffs[Scale]; | 
|  | else | 
|  | Offs = ShadowOffs[0] << Scale; | 
|  | Shadow = IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Offs)); | 
|  | if (Scale > 0) | 
|  | Shadow = IRB.CreateLShr(Shadow, Scale); | 
|  | return Shadow; | 
|  | } | 
|  |  | 
|  | bool EfficiencySanitizer::shouldIgnoreMemoryAccess(Instruction *I) { | 
|  | if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) { | 
|  | // We'd like to know about cache fragmentation in vtable accesses and | 
|  | // constant data references, so we do not currently ignore anything. | 
|  | return false; | 
|  | } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) { | 
|  | // TODO: the instrumentation disturbs the data layout on the stack, so we | 
|  | // may want to add an option to ignore stack references (if we can | 
|  | // distinguish them) to reduce overhead. | 
|  | } | 
|  | // TODO(bruening): future tools will be returning true for some cases. | 
|  | return false; | 
|  | } | 
|  |  | 
|  | bool EfficiencySanitizer::runOnModule(Module &M) { | 
|  | bool Res = initOnModule(M); | 
|  | initializeCallbacks(M); | 
|  | for (auto &F : M) { | 
|  | Res |= runOnFunction(F, M); | 
|  | } | 
|  | return Res; | 
|  | } | 
|  |  | 
|  | bool EfficiencySanitizer::runOnFunction(Function &F, Module &M) { | 
|  | // This is required to prevent instrumenting the call to __esan_init from | 
|  | // within the module constructor. | 
|  | if (&F == EsanCtorFunction) | 
|  | return false; | 
|  | SmallVector<Instruction *, 8> LoadsAndStores; | 
|  | SmallVector<Instruction *, 8> MemIntrinCalls; | 
|  | SmallVector<Instruction *, 8> GetElementPtrs; | 
|  | bool Res = false; | 
|  | const DataLayout &DL = M.getDataLayout(); | 
|  | const TargetLibraryInfo *TLI = | 
|  | &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); | 
|  |  | 
|  | for (auto &BB : F) { | 
|  | for (auto &Inst : BB) { | 
|  | if ((isa<LoadInst>(Inst) || isa<StoreInst>(Inst) || | 
|  | isa<AtomicRMWInst>(Inst) || isa<AtomicCmpXchgInst>(Inst)) && | 
|  | !shouldIgnoreMemoryAccess(&Inst)) | 
|  | LoadsAndStores.push_back(&Inst); | 
|  | else if (isa<MemIntrinsic>(Inst)) | 
|  | MemIntrinCalls.push_back(&Inst); | 
|  | else if (isa<GetElementPtrInst>(Inst)) | 
|  | GetElementPtrs.push_back(&Inst); | 
|  | else if (CallInst *CI = dyn_cast<CallInst>(&Inst)) | 
|  | maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI); | 
|  | } | 
|  | } | 
|  |  | 
|  | if (ClInstrumentLoadsAndStores) { | 
|  | for (auto Inst : LoadsAndStores) { | 
|  | Res |= instrumentLoadOrStore(Inst, DL); | 
|  | } | 
|  | } | 
|  |  | 
|  | if (ClInstrumentMemIntrinsics) { | 
|  | for (auto Inst : MemIntrinCalls) { | 
|  | Res |= instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); | 
|  | } | 
|  | } | 
|  |  | 
|  | if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) { | 
|  | for (auto Inst : GetElementPtrs) { | 
|  | Res |= instrumentGetElementPtr(Inst, M); | 
|  | } | 
|  | } | 
|  |  | 
|  | return Res; | 
|  | } | 
|  |  | 
|  | bool EfficiencySanitizer::instrumentLoadOrStore(Instruction *I, | 
|  | const DataLayout &DL) { | 
|  | IRBuilder<> IRB(I); | 
|  | bool IsStore; | 
|  | Value *Addr; | 
|  | unsigned Alignment; | 
|  | if (LoadInst *Load = dyn_cast<LoadInst>(I)) { | 
|  | IsStore = false; | 
|  | Alignment = Load->getAlignment(); | 
|  | Addr = Load->getPointerOperand(); | 
|  | } else if (StoreInst *Store = dyn_cast<StoreInst>(I)) { | 
|  | IsStore = true; | 
|  | Alignment = Store->getAlignment(); | 
|  | Addr = Store->getPointerOperand(); | 
|  | } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { | 
|  | IsStore = true; | 
|  | Alignment = 0; | 
|  | Addr = RMW->getPointerOperand(); | 
|  | } else if (AtomicCmpXchgInst *Xchg = dyn_cast<AtomicCmpXchgInst>(I)) { | 
|  | IsStore = true; | 
|  | Alignment = 0; | 
|  | Addr = Xchg->getPointerOperand(); | 
|  | } else | 
|  | llvm_unreachable("Unsupported mem access type"); | 
|  |  | 
|  | Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType(); | 
|  | const uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8; | 
|  | Value *OnAccessFunc = nullptr; | 
|  |  | 
|  | // Convert 0 to the default alignment. | 
|  | if (Alignment == 0) | 
|  | Alignment = DL.getPrefTypeAlignment(OrigTy); | 
|  |  | 
|  | if (IsStore) | 
|  | NumInstrumentedStores++; | 
|  | else | 
|  | NumInstrumentedLoads++; | 
|  | int Idx = getMemoryAccessFuncIndex(Addr, DL); | 
|  | if (Idx < 0) { | 
|  | OnAccessFunc = IsStore ? EsanUnalignedStoreN : EsanUnalignedLoadN; | 
|  | IRB.CreateCall(OnAccessFunc, | 
|  | {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), | 
|  | ConstantInt::get(IntptrTy, TypeSizeBytes)}); | 
|  | } else { | 
|  | if (ClInstrumentFastpath && | 
|  | instrumentFastpath(I, DL, IsStore, Addr, Alignment)) { | 
|  | NumFastpaths++; | 
|  | return true; | 
|  | } | 
|  | if (Alignment == 0 || (Alignment % TypeSizeBytes) == 0) | 
|  | OnAccessFunc = IsStore ? EsanAlignedStore[Idx] : EsanAlignedLoad[Idx]; | 
|  | else | 
|  | OnAccessFunc = IsStore ? EsanUnalignedStore[Idx] : EsanUnalignedLoad[Idx]; | 
|  | IRB.CreateCall(OnAccessFunc, | 
|  | IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | // It's simplest to replace the memset/memmove/memcpy intrinsics with | 
|  | // calls that the runtime library intercepts. | 
|  | // Our pass is late enough that calls should not turn back into intrinsics. | 
|  | bool EfficiencySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { | 
|  | IRBuilder<> IRB(MI); | 
|  | bool Res = false; | 
|  | if (isa<MemSetInst>(MI)) { | 
|  | IRB.CreateCall( | 
|  | MemsetFn, | 
|  | {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()), | 
|  | IRB.CreateIntCast(MI->getArgOperand(1), IRB.getInt32Ty(), false), | 
|  | IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)}); | 
|  | MI->eraseFromParent(); | 
|  | Res = true; | 
|  | } else if (isa<MemTransferInst>(MI)) { | 
|  | IRB.CreateCall( | 
|  | isa<MemCpyInst>(MI) ? MemcpyFn : MemmoveFn, | 
|  | {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()), | 
|  | IRB.CreatePointerCast(MI->getArgOperand(1), IRB.getInt8PtrTy()), | 
|  | IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)}); | 
|  | MI->eraseFromParent(); | 
|  | Res = true; | 
|  | } else | 
|  | llvm_unreachable("Unsupported mem intrinsic type"); | 
|  | return Res; | 
|  | } | 
|  |  | 
|  | bool EfficiencySanitizer::instrumentGetElementPtr(Instruction *I, Module &M) { | 
|  | GetElementPtrInst *GepInst = dyn_cast<GetElementPtrInst>(I); | 
|  | bool Res = false; | 
|  | if (GepInst == nullptr || GepInst->getNumIndices() == 1) { | 
|  | ++NumIgnoredGEPs; | 
|  | return false; | 
|  | } | 
|  | Type *SourceTy = GepInst->getSourceElementType(); | 
|  | StructType *StructTy; | 
|  | ConstantInt *Idx; | 
|  | // Check if GEP calculates address from a struct array. | 
|  | if (isa<StructType>(SourceTy)) { | 
|  | StructTy = cast<StructType>(SourceTy); | 
|  | Idx = dyn_cast<ConstantInt>(GepInst->getOperand(1)); | 
|  | if ((Idx == nullptr || Idx->getSExtValue() != 0) && | 
|  | !shouldIgnoreStructType(StructTy) && StructTyMap.count(StructTy) != 0) | 
|  | Res |= insertCounterUpdate(I, StructTy, getArrayCounterIdx(StructTy)); | 
|  | } | 
|  | // Iterate all (except the first and the last) idx within each GEP instruction | 
|  | // for possible nested struct field address calculation. | 
|  | for (unsigned i = 1; i < GepInst->getNumIndices(); ++i) { | 
|  | SmallVector<Value *, 8> IdxVec(GepInst->idx_begin(), | 
|  | GepInst->idx_begin() + i); | 
|  | Type *Ty = GetElementPtrInst::getIndexedType(SourceTy, IdxVec); | 
|  | unsigned CounterIdx = 0; | 
|  | if (isa<ArrayType>(Ty)) { | 
|  | ArrayType *ArrayTy = cast<ArrayType>(Ty); | 
|  | StructTy = dyn_cast<StructType>(ArrayTy->getElementType()); | 
|  | if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0) | 
|  | continue; | 
|  | // The last counter for struct array access. | 
|  | CounterIdx = getArrayCounterIdx(StructTy); | 
|  | } else if (isa<StructType>(Ty)) { | 
|  | StructTy = cast<StructType>(Ty); | 
|  | if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0) | 
|  | continue; | 
|  | // Get the StructTy's subfield index. | 
|  | Idx = cast<ConstantInt>(GepInst->getOperand(i+1)); | 
|  | assert(Idx->getSExtValue() >= 0 && | 
|  | Idx->getSExtValue() < StructTy->getNumElements()); | 
|  | CounterIdx = getFieldCounterIdx(StructTy) + Idx->getSExtValue(); | 
|  | } | 
|  | Res |= insertCounterUpdate(I, StructTy, CounterIdx); | 
|  | } | 
|  | if (Res) | 
|  | ++NumInstrumentedGEPs; | 
|  | else | 
|  | ++NumIgnoredGEPs; | 
|  | return Res; | 
|  | } | 
|  |  | 
|  | bool EfficiencySanitizer::insertCounterUpdate(Instruction *I, | 
|  | StructType *StructTy, | 
|  | unsigned CounterIdx) { | 
|  | GlobalVariable *CounterArray = StructTyMap[StructTy]; | 
|  | if (CounterArray == nullptr) | 
|  | return false; | 
|  | IRBuilder<> IRB(I); | 
|  | Constant *Indices[2]; | 
|  | // Xref http://llvm.org/docs/LangRef.html#i-getelementptr and | 
|  | // http://llvm.org/docs/GetElementPtr.html. | 
|  | // The first index of the GEP instruction steps through the first operand, | 
|  | // i.e., the array itself. | 
|  | Indices[0] = ConstantInt::get(IRB.getInt32Ty(), 0); | 
|  | // The second index is the index within the array. | 
|  | Indices[1] = ConstantInt::get(IRB.getInt32Ty(), CounterIdx); | 
|  | Constant *Counter = | 
|  | ConstantExpr::getGetElementPtr( | 
|  | ArrayType::get(IRB.getInt64Ty(), getStructCounterSize(StructTy)), | 
|  | CounterArray, Indices); | 
|  | Value *Load = IRB.CreateLoad(Counter); | 
|  | IRB.CreateStore(IRB.CreateAdd(Load, ConstantInt::get(IRB.getInt64Ty(), 1)), | 
|  | Counter); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | int EfficiencySanitizer::getMemoryAccessFuncIndex(Value *Addr, | 
|  | const DataLayout &DL) { | 
|  | Type *OrigPtrTy = Addr->getType(); | 
|  | Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); | 
|  | assert(OrigTy->isSized()); | 
|  | // The size is always a multiple of 8. | 
|  | uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8; | 
|  | if (TypeSizeBytes != 1 && TypeSizeBytes != 2 && TypeSizeBytes != 4 && | 
|  | TypeSizeBytes != 8 && TypeSizeBytes != 16) { | 
|  | // Irregular sizes do not have per-size call targets. | 
|  | NumAccessesWithIrregularSize++; | 
|  | return -1; | 
|  | } | 
|  | size_t Idx = countTrailingZeros(TypeSizeBytes); | 
|  | assert(Idx < NumberOfAccessSizes); | 
|  | return Idx; | 
|  | } | 
|  |  | 
|  | bool EfficiencySanitizer::instrumentFastpath(Instruction *I, | 
|  | const DataLayout &DL, bool IsStore, | 
|  | Value *Addr, unsigned Alignment) { | 
|  | if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) { | 
|  | return instrumentFastpathCacheFrag(I, DL, Addr, Alignment); | 
|  | } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) { | 
|  | return instrumentFastpathWorkingSet(I, DL, Addr, Alignment); | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | bool EfficiencySanitizer::instrumentFastpathCacheFrag(Instruction *I, | 
|  | const DataLayout &DL, | 
|  | Value *Addr, | 
|  | unsigned Alignment) { | 
|  | // Do nothing. | 
|  | return true; // Return true to avoid slowpath instrumentation. | 
|  | } | 
|  |  | 
|  | bool EfficiencySanitizer::instrumentFastpathWorkingSet( | 
|  | Instruction *I, const DataLayout &DL, Value *Addr, unsigned Alignment) { | 
|  | assert(ShadowScale[Options.ToolType] == 6); // The code below assumes this | 
|  | IRBuilder<> IRB(I); | 
|  | Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType(); | 
|  | const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); | 
|  | // Bail to the slowpath if the access might touch multiple cache lines. | 
|  | // An access aligned to its size is guaranteed to be intra-cache-line. | 
|  | // getMemoryAccessFuncIndex has already ruled out a size larger than 16 | 
|  | // and thus larger than a cache line for platforms this tool targets | 
|  | // (and our shadow memory setup assumes 64-byte cache lines). | 
|  | assert(TypeSize <= 128); | 
|  | if (!(TypeSize == 8 || | 
|  | (Alignment % (TypeSize / 8)) == 0)) { | 
|  | if (ClAssumeIntraCacheLine) | 
|  | ++NumAssumedIntraCacheLine; | 
|  | else | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // We inline instrumentation to set the corresponding shadow bits for | 
|  | // each cache line touched by the application.  Here we handle a single | 
|  | // load or store where we've already ruled out the possibility that it | 
|  | // might touch more than one cache line and thus we simply update the | 
|  | // shadow memory for a single cache line. | 
|  | // Our shadow memory model is fine with races when manipulating shadow values. | 
|  | // We generate the following code: | 
|  | // | 
|  | //   const char BitMask = 0x81; | 
|  | //   char *ShadowAddr = appToShadow(AppAddr); | 
|  | //   if ((*ShadowAddr & BitMask) != BitMask) | 
|  | //     *ShadowAddr |= Bitmask; | 
|  | // | 
|  | Value *AddrPtr = IRB.CreatePointerCast(Addr, IntptrTy); | 
|  | Value *ShadowPtr = appToShadow(AddrPtr, IRB); | 
|  | Type *ShadowTy = IntegerType::get(*Ctx, 8U); | 
|  | Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); | 
|  | // The bottom bit is used for the current sampling period's working set. | 
|  | // The top bit is used for the total working set.  We set both on each | 
|  | // memory access, if they are not already set. | 
|  | Value *ValueMask = ConstantInt::get(ShadowTy, 0x81); // 10000001B | 
|  |  | 
|  | Value *OldValue = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); | 
|  | // The AND and CMP will be turned into a TEST instruction by the compiler. | 
|  | Value *Cmp = IRB.CreateICmpNE(IRB.CreateAnd(OldValue, ValueMask), ValueMask); | 
|  | TerminatorInst *CmpTerm = SplitBlockAndInsertIfThen(Cmp, I, false); | 
|  | // FIXME: do I need to call SetCurrentDebugLocation? | 
|  | IRB.SetInsertPoint(CmpTerm); | 
|  | // We use OR to set the shadow bits to avoid corrupting the middle 6 bits, | 
|  | // which are used by the runtime library. | 
|  | Value *NewVal = IRB.CreateOr(OldValue, ValueMask); | 
|  | IRB.CreateStore(NewVal, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); | 
|  | IRB.SetInsertPoint(I); | 
|  |  | 
|  | return true; | 
|  | } |