Decouple llvm::SpecialCaseList text representation and its LLVM IR semantics.
Turn llvm::SpecialCaseList into a simple class that parses text files in
a specified format and knows nothing about LLVM IR. Move this class into
LLVMSupport library. Implement two users of this class:
* DFSanABIList in DFSan instrumentation pass.
* SanitizerBlacklist in Clang CodeGen library.
The latter will be modified to use actual source-level information from frontend
(source file names) instead of unstable LLVM IR things (LLVM Module identifier).
Remove dependency edge from ClangCodeGen/ClangDriver to LLVMTransformUtils.
No functionality change.
llvm-svn: 212643
diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 7f468f7..3b9212a 100644
--- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -59,9 +59,9 @@
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/SpecialCaseList.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SpecialCaseList.h"
#include <iterator>
using namespace llvm;
@@ -120,6 +120,51 @@
namespace {
+StringRef GetGlobalTypeString(const GlobalValue &G) {
+ // Types of GlobalVariables are always pointer types.
+ Type *GType = G.getType()->getElementType();
+ // For now we support blacklisting struct types only.
+ if (StructType *SGType = dyn_cast<StructType>(GType)) {
+ if (!SGType->isLiteral())
+ return SGType->getName();
+ }
+ return "<unknown type>";
+}
+
+class DFSanABIList {
+ std::unique_ptr<SpecialCaseList> SCL;
+
+ public:
+ DFSanABIList(SpecialCaseList *SCL) : SCL(SCL) {}
+
+ /// Returns whether either this function or its source file are listed in the
+ /// given category.
+ bool isIn(const Function &F, const StringRef Category) const {
+ return isIn(*F.getParent(), Category) ||
+ SCL->inSection("fun", F.getName(), Category);
+ }
+
+ /// Returns whether this global alias is listed in the given category.
+ ///
+ /// If GA aliases a function, the alias's name is matched as a function name
+ /// would be. Similarly, aliases of globals are matched like globals.
+ bool isIn(const GlobalAlias &GA, const StringRef Category) const {
+ if (isIn(*GA.getParent(), Category))
+ return true;
+
+ if (isa<FunctionType>(GA.getType()->getElementType()))
+ return SCL->inSection("fun", GA.getName(), Category);
+
+ return SCL->inSection("global", GA.getName(), Category) ||
+ SCL->inSection("type", GetGlobalTypeString(GA), Category);
+ }
+
+ /// Returns whether this module is listed in the given category.
+ bool isIn(const Module &M, const StringRef Category) const {
+ return SCL->inSection("src", M.getModuleIdentifier(), Category);
+ }
+};
+
class DataFlowSanitizer : public ModulePass {
friend struct DFSanFunction;
friend class DFSanVisitor;
@@ -190,7 +235,7 @@
Constant *DFSanSetLabelFn;
Constant *DFSanNonzeroLabelFn;
MDNode *ColdCallWeights;
- std::unique_ptr<SpecialCaseList> ABIList;
+ DFSanABIList ABIList;
DenseMap<Value *, Function *> UnwrappedFnMap;
AttributeSet ReadOnlyNoneAttrs;
@@ -395,11 +440,11 @@
}
bool DataFlowSanitizer::isInstrumented(const Function *F) {
- return !ABIList->isIn(*F, "uninstrumented");
+ return !ABIList.isIn(*F, "uninstrumented");
}
bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
- return !ABIList->isIn(*GA, "uninstrumented");
+ return !ABIList.isIn(*GA, "uninstrumented");
}
DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() {
@@ -407,11 +452,11 @@
}
DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
- if (ABIList->isIn(*F, "functional"))
+ if (ABIList.isIn(*F, "functional"))
return WK_Functional;
- if (ABIList->isIn(*F, "discard"))
+ if (ABIList.isIn(*F, "discard"))
return WK_Discard;
- if (ABIList->isIn(*F, "custom"))
+ if (ABIList.isIn(*F, "custom"))
return WK_Custom;
return WK_Warning;
@@ -500,7 +545,7 @@
if (!DL)
return false;
- if (ABIList->isIn(M, "skip"))
+ if (ABIList.isIn(M, "skip"))
return false;
if (!GetArgTLSPtr) {
diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt
index e10ca90..fcf548f 100644
--- a/llvm/lib/Transforms/Utils/CMakeLists.txt
+++ b/llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -33,7 +33,6 @@
SimplifyIndVar.cpp
SimplifyInstructions.cpp
SimplifyLibCalls.cpp
- SpecialCaseList.cpp
UnifyFunctionExitNodes.cpp
Utils.cpp
ValueMapper.cpp
diff --git a/llvm/lib/Transforms/Utils/SpecialCaseList.cpp b/llvm/lib/Transforms/Utils/SpecialCaseList.cpp
deleted file mode 100644
index 3d76a17..0000000
--- a/llvm/lib/Transforms/Utils/SpecialCaseList.cpp
+++ /dev/null
@@ -1,222 +0,0 @@
-//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This is a utility class for instrumentation passes (like AddressSanitizer
-// or ThreadSanitizer) to avoid instrumenting some functions or global
-// variables, or to instrument some functions or global variables in a specific
-// way, based on a user-supplied list.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/SpecialCaseList.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSet.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Regex.h"
-#include "llvm/Support/raw_ostream.h"
-#include <string>
-#include <system_error>
-#include <utility>
-
-namespace llvm {
-
-/// Represents a set of regular expressions. Regular expressions which are
-/// "literal" (i.e. no regex metacharacters) are stored in Strings, while all
-/// others are represented as a single pipe-separated regex in RegEx. The
-/// reason for doing so is efficiency; StringSet is much faster at matching
-/// literal strings than Regex.
-struct SpecialCaseList::Entry {
- StringSet<> Strings;
- Regex *RegEx;
-
- Entry() : RegEx(nullptr) {}
-
- bool match(StringRef Query) const {
- return Strings.count(Query) || (RegEx && RegEx->match(Query));
- }
-};
-
-SpecialCaseList::SpecialCaseList() : Entries() {}
-
-SpecialCaseList *SpecialCaseList::create(
- const StringRef Path, std::string &Error) {
- if (Path.empty())
- return new SpecialCaseList();
- ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
- MemoryBuffer::getFile(Path);
- if (std::error_code EC = FileOrErr.getError()) {
- Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str();
- return nullptr;
- }
- return create(FileOrErr.get().get(), Error);
-}
-
-SpecialCaseList *SpecialCaseList::create(
- const MemoryBuffer *MB, std::string &Error) {
- std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
- if (!SCL->parse(MB, Error))
- return nullptr;
- return SCL.release();
-}
-
-SpecialCaseList *SpecialCaseList::createOrDie(const StringRef Path) {
- std::string Error;
- if (SpecialCaseList *SCL = create(Path, Error))
- return SCL;
- report_fatal_error(Error);
-}
-
-bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
- // Iterate through each line in the blacklist file.
- SmallVector<StringRef, 16> Lines;
- SplitString(MB->getBuffer(), Lines, "\n\r");
- StringMap<StringMap<std::string> > Regexps;
- assert(Entries.empty() &&
- "parse() should be called on an empty SpecialCaseList");
- int LineNo = 1;
- for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end();
- I != E; ++I, ++LineNo) {
- // Ignore empty lines and lines starting with "#"
- if (I->empty() || I->startswith("#"))
- continue;
- // Get our prefix and unparsed regexp.
- std::pair<StringRef, StringRef> SplitLine = I->split(":");
- StringRef Prefix = SplitLine.first;
- if (SplitLine.second.empty()) {
- // Missing ':' in the line.
- Error = (Twine("Malformed line ") + Twine(LineNo) + ": '" +
- SplitLine.first + "'").str();
- return false;
- }
-
- std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("=");
- std::string Regexp = SplitRegexp.first;
- StringRef Category = SplitRegexp.second;
-
- // Backwards compatibility.
- if (Prefix == "global-init") {
- Prefix = "global";
- Category = "init";
- } else if (Prefix == "global-init-type") {
- Prefix = "type";
- Category = "init";
- } else if (Prefix == "global-init-src") {
- Prefix = "src";
- Category = "init";
- }
-
- // See if we can store Regexp in Strings.
- if (Regex::isLiteralERE(Regexp)) {
- Entries[Prefix][Category].Strings.insert(Regexp);
- continue;
- }
-
- // Replace * with .*
- for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
- pos += strlen(".*")) {
- Regexp.replace(pos, strlen("*"), ".*");
- }
-
- // Check that the regexp is valid.
- Regex CheckRE(Regexp);
- std::string REError;
- if (!CheckRE.isValid(REError)) {
- Error = (Twine("Malformed regex in line ") + Twine(LineNo) + ": '" +
- SplitLine.second + "': " + REError).str();
- return false;
- }
-
- // Add this regexp into the proper group by its prefix.
- if (!Regexps[Prefix][Category].empty())
- Regexps[Prefix][Category] += "|";
- Regexps[Prefix][Category] += "^" + Regexp + "$";
- }
-
- // Iterate through each of the prefixes, and create Regexs for them.
- for (StringMap<StringMap<std::string> >::const_iterator I = Regexps.begin(),
- E = Regexps.end();
- I != E; ++I) {
- for (StringMap<std::string>::const_iterator II = I->second.begin(),
- IE = I->second.end();
- II != IE; ++II) {
- Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue());
- }
- }
- return true;
-}
-
-SpecialCaseList::~SpecialCaseList() {
- for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(),
- E = Entries.end();
- I != E; ++I) {
- for (StringMap<Entry>::const_iterator II = I->second.begin(),
- IE = I->second.end();
- II != IE; ++II) {
- delete II->second.RegEx;
- }
- }
-}
-
-bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const {
- return isIn(*F.getParent(), Category) ||
- inSectionCategory("fun", F.getName(), Category);
-}
-
-static StringRef GetGlobalTypeString(const GlobalValue &G) {
- // Types of GlobalVariables are always pointer types.
- Type *GType = G.getType()->getElementType();
- // For now we support blacklisting struct types only.
- if (StructType *SGType = dyn_cast<StructType>(GType)) {
- if (!SGType->isLiteral())
- return SGType->getName();
- }
- return "<unknown type>";
-}
-
-bool SpecialCaseList::isIn(const GlobalVariable &G,
- const StringRef Category) const {
- return isIn(*G.getParent(), Category) ||
- inSectionCategory("global", G.getName(), Category) ||
- inSectionCategory("type", GetGlobalTypeString(G), Category);
-}
-
-bool SpecialCaseList::isIn(const GlobalAlias &GA,
- const StringRef Category) const {
- if (isIn(*GA.getParent(), Category))
- return true;
-
- if (isa<FunctionType>(GA.getType()->getElementType()))
- return inSectionCategory("fun", GA.getName(), Category);
-
- return inSectionCategory("global", GA.getName(), Category) ||
- inSectionCategory("type", GetGlobalTypeString(GA), Category);
-}
-
-bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const {
- return inSectionCategory("src", M.getModuleIdentifier(), Category);
-}
-
-bool SpecialCaseList::inSectionCategory(const StringRef Section,
- const StringRef Query,
- const StringRef Category) const {
- StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
- if (I == Entries.end()) return false;
- StringMap<Entry>::const_iterator II = I->second.find(Category);
- if (II == I->second.end()) return false;
-
- return II->getValue().match(Query);
-}
-
-} // namespace llvm