Completely revamp node binding for AST matchers.
This is in preparation for the backwards references to bound
nodes, which will expose a lot more about how matches occur. Main
changes:
- instead of building the tree of bound nodes, we build a "set" of bound
nodes and explode all possible match combinations while running
through the matchers; this will allow us to also implement matchers
that filter down the current set of matches, like "equalsBoundNode"
- take the set of bound nodes at the start of the match into
consideration when doing memoization; as part of that, reevaluated
that memoization gives us benefits that are large enough (it still
does - the effect on common match patterns is up to an order of
magnitude)
- reset the bound nodes when a node does not match, thus never leaking
information from partial sub-matcher matches for failing matchers
Effects:
- we can now correctly "explode" combinatorial matches, for example:
allOf(forEachDescendant(...bind("a")),
forEachDescendant(...bind("b"))) will now trigger matches for all
combinations of matching "a" and "b"s.
- we now never expose bound nodes from partial matches in matchers that
did not match in the end - this fixes a long-standing issue
FIXMEs:
- rename BoundNodesTreeBuilder to BoundNodesBuilder or
BoundNodesSetBuilder, as we don't build a tree any more; this is out
of scope for this change, though
- we're seeing some performance regressions (around 10%), but I expect
some performance tuning will get that back, and it's easily worth
the increase in expressiveness for now
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@184313 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/ASTMatchers/ASTMatchFinder.cpp b/lib/ASTMatchers/ASTMatchFinder.cpp
index 378453d..d8f0588 100644
--- a/lib/ASTMatchers/ASTMatchFinder.cpp
+++ b/lib/ASTMatchers/ASTMatchFinder.cpp
@@ -30,10 +30,21 @@
typedef MatchFinder::MatchCallback MatchCallback;
+// The maximum number of memoization entries to store.
+// 10k has been experimentally found to give a good trade-off
+// of performance vs. memory consumption by running matcher
+// that match on every statement over a very large codebase.
+//
+// FIXME: Do some performance optimization in general and
+// revisit this number; also, put up micro-benchmarks that we can
+// optimize this on.
+static const unsigned MaxMemoizationEntries = 10000;
+
// We use memoization to avoid running the same matcher on the same
-// AST node twice. This pair is the key for looking up match
+// AST node twice. This struct is the key for looking up match
// result. It consists of an ID of the MatcherInterface (for
-// identifying the matcher) and a pointer to the AST node.
+// identifying the matcher), a pointer to the AST node and the
+// bound nodes before the matcher was executed.
//
// We currently only memoize on nodes whose pointers identify the
// nodes (\c Stmt and \c Decl, but not \c QualType or \c TypeLoc).
@@ -41,12 +52,24 @@
// generation of keys for each type.
// FIXME: Benchmark whether memoization of non-pointer typed nodes
// provides enough benefit for the additional amount of code.
-typedef std::pair<uint64_t, const void*> UntypedMatchInput;
+struct MatchKey {
+ uint64_t MatcherID;
+ ast_type_traits::DynTypedNode Node;
+ BoundNodesTreeBuilder BoundNodes;
+
+ bool operator<(const MatchKey &Other) const {
+ if (MatcherID != Other.MatcherID)
+ return MatcherID < Other.MatcherID;
+ if (Node != Other.Node)
+ return Node < Other.Node;
+ return BoundNodes < Other.BoundNodes;
+ }
+};
// Used to store the result of a match and possibly bound nodes.
struct MemoizedMatchResult {
bool ResultOfMatch;
- BoundNodesTree Nodes;
+ BoundNodesTreeBuilder Nodes;
};
// A RecursiveASTVisitor that traverses all children or all descendants of
@@ -103,6 +126,12 @@
else if (const TypeLoc *T = DynNode.get<TypeLoc>())
traverse(*T);
// FIXME: Add other base types after adding tests.
+
+ // It's OK to always overwrite the bound nodes, as if there was
+ // no match in this recursive branch, the result set is empty
+ // anyway.
+ *Builder = ResultBindings;
+
return Matches;
}
@@ -220,18 +249,20 @@
return true;
}
if (Bind != ASTMatchFinder::BK_All) {
- if (Matcher->matches(ast_type_traits::DynTypedNode::create(Node),
- Finder, Builder)) {
+ BoundNodesTreeBuilder RecursiveBuilder(*Builder);
+ if (Matcher->matches(ast_type_traits::DynTypedNode::create(Node), Finder,
+ &RecursiveBuilder)) {
Matches = true;
- return false; // Abort as soon as a match is found.
+ ResultBindings.addMatch(RecursiveBuilder);
+ return false; // Abort as soon as a match is found.
}
} else {
- BoundNodesTreeBuilder RecursiveBuilder;
- if (Matcher->matches(ast_type_traits::DynTypedNode::create(Node),
- Finder, &RecursiveBuilder)) {
+ BoundNodesTreeBuilder RecursiveBuilder(*Builder);
+ if (Matcher->matches(ast_type_traits::DynTypedNode::create(Node), Finder,
+ &RecursiveBuilder)) {
// After the first match the matcher succeeds.
Matches = true;
- Builder->addMatch(RecursiveBuilder.build());
+ ResultBindings.addMatch(RecursiveBuilder);
}
}
return true;
@@ -251,6 +282,7 @@
const DynTypedMatcher *const Matcher;
ASTMatchFinder *const Finder;
BoundNodesTreeBuilder *const Builder;
+ BoundNodesTreeBuilder ResultBindings;
int CurrentDepth;
const int MaxDepth;
const ASTMatchFinder::TraversalKind Traversal;
@@ -341,24 +373,28 @@
const DynTypedMatcher &Matcher,
BoundNodesTreeBuilder *Builder, int MaxDepth,
TraversalKind Traversal, BindKind Bind) {
- const UntypedMatchInput input(Matcher.getID(), Node.getMemoizationData());
+ MatchKey Key;
+ Key.MatcherID = Matcher.getID();
+ Key.Node = Node;
+ // Note that we key on the bindings *before* the match.
+ Key.BoundNodes = *Builder;
// For AST-nodes that don't have an identity, we can't memoize.
- if (!input.second)
+ if (!Node.getMemoizationData())
return matchesRecursively(Node, Matcher, Builder, MaxDepth, Traversal,
Bind);
- std::pair<MemoizationMap::iterator, bool> InsertResult
- = ResultCache.insert(std::make_pair(input, MemoizedMatchResult()));
+ if (ResultCache.size() > MaxMemoizationEntries)
+ ResultCache.clear();
+ std::pair<MemoizationMap::iterator, bool> InsertResult =
+ ResultCache.insert(std::make_pair(Key, MemoizedMatchResult()));
if (InsertResult.second) {
- BoundNodesTreeBuilder DescendantBoundNodesBuilder;
+ InsertResult.first->second.Nodes = *Builder;
InsertResult.first->second.ResultOfMatch =
- matchesRecursively(Node, Matcher, &DescendantBoundNodesBuilder,
- MaxDepth, Traversal, Bind);
- InsertResult.first->second.Nodes =
- DescendantBoundNodesBuilder.build();
+ matchesRecursively(Node, Matcher, &InsertResult.first->second.Nodes,
+ MaxDepth, Traversal, Bind);
}
- InsertResult.first->second.Nodes.copyTo(Builder);
+ *Builder = InsertResult.first->second.Nodes;
return InsertResult.first->second.ResultOfMatch;
}
@@ -411,9 +447,8 @@
I != E; ++I) {
BoundNodesTreeBuilder Builder;
if (I->first->matches(Node, this, &Builder)) {
- BoundNodesTree BoundNodes = Builder.build();
MatchVisitor Visitor(ActiveASTContext, I->second);
- BoundNodes.visitMatches(&Visitor);
+ Builder.visitMatches(&Visitor);
}
}
}
@@ -459,19 +494,29 @@
assert(false && "Found node that is not in the parent map.");
return false;
}
- const UntypedMatchInput input(Matcher.getID(), Node.getMemoizationData());
- MemoizationMap::iterator I = ResultCache.find(input);
- if (I == ResultCache.end()) {
- BoundNodesTreeBuilder AncestorBoundNodesBuilder;
+ MatchKey Key;
+ Key.MatcherID = Matcher.getID();
+ Key.Node = Node;
+ Key.BoundNodes = *Builder;
+ if (ResultCache.size() > MaxMemoizationEntries)
+ ResultCache.clear();
+ std::pair<MemoizationMap::iterator, bool> InsertResult =
+ ResultCache.insert(std::make_pair(Key, MemoizedMatchResult()));
+ if (InsertResult.second) {
bool Matches = false;
if (Parents.size() == 1) {
// Only one parent - do recursive memoization.
const ast_type_traits::DynTypedNode Parent = Parents[0];
- if (Matcher.matches(Parent, this, &AncestorBoundNodesBuilder)) {
+ BoundNodesTreeBuilder Result(*Builder);
+ if (Matcher.matches(Parent, this, &Result)) {
+ InsertResult.first->second.Nodes = Result;
Matches = true;
} else if (MatchMode != ASTMatchFinder::AMM_ParentOnly) {
- Matches = memoizedMatchesAncestorOfRecursively(
- Parent, Matcher, &AncestorBoundNodesBuilder, MatchMode);
+ Matches = memoizedMatchesAncestorOfRecursively(Parent, Matcher,
+ Builder, MatchMode);
+ // Once we get back from the recursive call, the result will be the
+ // same as the parent's result.
+ InsertResult.first->second.Nodes = *Builder;
}
} else {
// Multiple parents - BFS over the rest of the nodes.
@@ -479,8 +524,9 @@
std::deque<ast_type_traits::DynTypedNode> Queue(Parents.begin(),
Parents.end());
while (!Queue.empty()) {
- if (Matcher.matches(Queue.front(), this,
- &AncestorBoundNodesBuilder)) {
+ BoundNodesTreeBuilder Result(*Builder);
+ if (Matcher.matches(Queue.front(), this, &Result)) {
+ InsertResult.first->second.Nodes = Result;
Matches = true;
break;
}
@@ -501,18 +547,15 @@
}
}
- I = ResultCache.insert(std::make_pair(input, MemoizedMatchResult()))
- .first;
- I->second.Nodes = AncestorBoundNodesBuilder.build();
- I->second.ResultOfMatch = Matches;
+ InsertResult.first->second.ResultOfMatch = Matches;
}
- I->second.Nodes.copyTo(Builder);
- return I->second.ResultOfMatch;
+ *Builder = InsertResult.first->second.Nodes;
+ return InsertResult.first->second.ResultOfMatch;
}
// Implements a BoundNodesTree::Visitor that calls a MatchCallback with
// the aggregated bound nodes for each match.
- class MatchVisitor : public BoundNodesTree::Visitor {
+ class MatchVisitor : public BoundNodesTreeBuilder::Visitor {
public:
MatchVisitor(ASTContext* Context,
MatchFinder::MatchCallback* Callback)
@@ -538,8 +581,11 @@
for (std::set<const TypedefDecl*>::const_iterator
It = Aliases.begin(), End = Aliases.end();
It != End; ++It) {
- if (Matcher.matches(**It, this, Builder))
+ BoundNodesTreeBuilder Result(*Builder);
+ if (Matcher.matches(**It, this, &Result)) {
+ *Builder = Result;
return true;
+ }
}
return false;
}
@@ -552,7 +598,7 @@
llvm::DenseMap<const Type*, std::set<const TypedefDecl*> > TypeAliases;
// Maps (matcher, node) -> the match result for memoization.
- typedef llvm::DenseMap<UntypedMatchInput, MemoizedMatchResult> MemoizationMap;
+ typedef std::map<MatchKey, MemoizedMatchResult> MemoizationMap;
MemoizationMap ResultCache;
};
@@ -616,8 +662,11 @@
assert(TemplateType);
return false;
}
- if (Base.matches(*ClassDecl, this, Builder))
+ BoundNodesTreeBuilder Result(*Builder);
+ if (Base.matches(*ClassDecl, this, &Result)) {
+ *Builder = Result;
return true;
+ }
if (classIsDerivedFrom(ClassDecl, Base, Builder))
return true;
}