blob: c20c00741700efa3d415908c1c1a41b08fc1b382 [file] [log] [blame]
Alex Lorenza75b2ca2017-07-21 12:49:28 +00001//===- ASTDiff.cpp - AST differencing implementation-----------*- C++ -*- -===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains definitons for the AST differencing interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Tooling/ASTDiff/ASTDiff.h"
15
16#include "clang/AST/RecursiveASTVisitor.h"
17#include "clang/Lex/Lexer.h"
18#include "llvm/ADT/PriorityQueue.h"
19
20#include <limits>
21#include <memory>
22#include <unordered_set>
23
24using namespace llvm;
25using namespace clang;
26
27namespace clang {
28namespace diff {
29
30class ASTDiff::Impl {
31public:
32 SyntaxTreeImpl &T1, &T2;
33 bool IsMappingDone = false;
34 Mapping TheMapping;
35
36 Impl(SyntaxTreeImpl &T1, SyntaxTreeImpl &T2, const ComparisonOptions &Options)
37 : T1(T1), T2(T2), Options(Options) {}
38
39 /// Matches nodes one-by-one based on their similarity.
40 void computeMapping();
41
42 std::vector<Match> getMatches(Mapping &M);
43
44 /// Finds an edit script that converts T1 to T2.
45 std::vector<Change> computeChanges(Mapping &M);
46
47 void printChangeImpl(raw_ostream &OS, const Change &Chg) const;
48 void printMatchImpl(raw_ostream &OS, const Match &M) const;
49
50 // Returns a mapping of isomorphic subtrees.
51 Mapping matchTopDown() const;
52
53private:
54 // Returns true if the two subtrees are identical.
55 bool isomorphic(NodeId Id1, NodeId Id2) const;
56
57 bool canBeAddedToMapping(const Mapping &M, NodeId Id1, NodeId Id2) const;
58
59 // Returns false if the nodes must not be mached.
60 bool isMatchingPossible(NodeId Id1, NodeId Id2) const;
61
62 // Adds all corresponding subtrees of the two nodes to the mapping.
63 // The two nodes must be isomorphic.
64 void addIsomorphicSubTrees(Mapping &M, NodeId Id1, NodeId Id2) const;
65
66 // Uses an optimal albeit slow algorithm to compute a mapping between two
67 // subtrees, but only if both have fewer nodes than MaxSize.
68 void addOptimalMapping(Mapping &M, NodeId Id1, NodeId Id2) const;
69
70 // Computes the ratio of common descendants between the two nodes.
71 // Descendants are only considered to be equal when they are mapped in M.
72 double getSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const;
73
74 // Returns the node that has the highest degree of similarity.
75 NodeId findCandidate(const Mapping &M, NodeId Id1) const;
76
77 // Tries to match any yet unmapped nodes, in a bottom-up fashion.
78 void matchBottomUp(Mapping &M) const;
79
80 const ComparisonOptions &Options;
81
82 friend class ZhangShashaMatcher;
83};
84
85template <class T>
86static bool isNodeExcluded(const SourceManager &SrcMgr, T *N) {
87 if (!N)
88 return true;
89 SourceLocation SLoc = N->getLocStart();
90 return SLoc.isValid() && SrcMgr.isInSystemHeader(SLoc);
91}
92
93namespace {
94/// Counts the number of nodes that will be compared.
95struct NodeCountVisitor : public RecursiveASTVisitor<NodeCountVisitor> {
96 int Count = 0;
97 const SyntaxTreeImpl &Root;
98 NodeCountVisitor(const SyntaxTreeImpl &Root) : Root(Root) {}
99 bool TraverseDecl(Decl *D) {
100 if (isNodeExcluded(Root.AST.getSourceManager(), D))
101 return true;
102 ++Count;
103 RecursiveASTVisitor<NodeCountVisitor>::TraverseDecl(D);
104 return true;
105 }
106 bool TraverseStmt(Stmt *S) {
107 if (isNodeExcluded(Root.AST.getSourceManager(), S))
108 return true;
109 ++Count;
110 RecursiveASTVisitor<NodeCountVisitor>::TraverseStmt(S);
111 return true;
112 }
113 bool TraverseType(QualType T) { return true; }
114};
115} // end anonymous namespace
116
117namespace {
118// Sets Height, Parent and Children for each node.
119struct PreorderVisitor : public RecursiveASTVisitor<PreorderVisitor> {
120 int Id = 0, Depth = 0;
121 NodeId Parent;
122 SyntaxTreeImpl &Root;
123
124 PreorderVisitor(SyntaxTreeImpl &Root) : Root(Root) {}
125
126 template <class T> std::tuple<NodeId, NodeId> PreTraverse(T *ASTNode) {
127 NodeId MyId = Id;
128 Node &N = Root.getMutableNode(MyId);
129 N.Parent = Parent;
130 N.Depth = Depth;
131 N.ASTNode = DynTypedNode::create(*ASTNode);
132 assert(!N.ASTNode.getNodeKind().isNone() &&
133 "Expected nodes to have a valid kind.");
134 if (Parent.isValid()) {
135 Node &P = Root.getMutableNode(Parent);
136 P.Children.push_back(MyId);
137 }
138 Parent = MyId;
139 ++Id;
140 ++Depth;
Alex Lorenz158063e2017-07-21 12:57:40 +0000141 return std::make_tuple(MyId, Root.getNode(MyId).Parent);
Alex Lorenza75b2ca2017-07-21 12:49:28 +0000142 }
143 void PostTraverse(std::tuple<NodeId, NodeId> State) {
144 NodeId MyId, PreviousParent;
145 std::tie(MyId, PreviousParent) = State;
146 assert(MyId.isValid() && "Expecting to only traverse valid nodes.");
147 Parent = PreviousParent;
148 --Depth;
149 Node &N = Root.getMutableNode(MyId);
150 N.RightMostDescendant = Id;
151 if (N.isLeaf())
152 Root.Leaves.push_back(MyId);
153 N.Height = 1;
154 for (NodeId Child : N.Children)
155 N.Height = std::max(N.Height, 1 + Root.getNode(Child).Height);
156 }
157 bool TraverseDecl(Decl *D) {
158 if (isNodeExcluded(Root.AST.getSourceManager(), D))
159 return true;
160 auto SavedState = PreTraverse(D);
161 RecursiveASTVisitor<PreorderVisitor>::TraverseDecl(D);
162 PostTraverse(SavedState);
163 return true;
164 }
165 bool TraverseStmt(Stmt *S) {
166 if (isNodeExcluded(Root.AST.getSourceManager(), S))
167 return true;
168 auto SavedState = PreTraverse(S);
169 RecursiveASTVisitor<PreorderVisitor>::TraverseStmt(S);
170 PostTraverse(SavedState);
171 return true;
172 }
173 bool TraverseType(QualType T) { return true; }
174};
175} // end anonymous namespace
176
177SyntaxTreeImpl::SyntaxTreeImpl(SyntaxTree *Parent, const ASTContext &AST)
178 : SyntaxTreeImpl(Parent, AST.getTranslationUnitDecl(), AST) {}
179
180SyntaxTreeImpl::SyntaxTreeImpl(SyntaxTree *Parent, Decl *N,
181 const ASTContext &AST)
182 : Parent(Parent), AST(AST) {
183 NodeCountVisitor NodeCounter(*this);
184 NodeCounter.TraverseDecl(N);
185 Nodes.resize(NodeCounter.Count);
186 PreorderVisitor PreorderWalker(*this);
187 PreorderWalker.TraverseDecl(N);
188 initTree();
189}
190
191SyntaxTreeImpl::SyntaxTreeImpl(SyntaxTree *Parent, Stmt *N,
192 const ASTContext &AST)
193 : Parent(Parent), AST(AST) {
194 NodeCountVisitor NodeCounter(*this);
195 NodeCounter.TraverseStmt(N);
196 Nodes.resize(NodeCounter.Count);
197 PreorderVisitor PreorderWalker(*this);
198 PreorderWalker.TraverseStmt(N);
199 initTree();
200}
201
202void SyntaxTreeImpl::initTree() {
203 setLeftMostDescendants();
204 int PostorderId = 0;
205 PostorderIds.resize(getSize());
206 std::function<void(NodeId)> PostorderTraverse = [&](NodeId Id) {
207 for (NodeId Child : getNode(Id).Children)
208 PostorderTraverse(Child);
209 PostorderIds[Id] = PostorderId;
210 ++PostorderId;
211 };
212 PostorderTraverse(root());
213}
214
215void SyntaxTreeImpl::setLeftMostDescendants() {
216 for (NodeId Leaf : Leaves) {
217 getMutableNode(Leaf).LeftMostDescendant = Leaf;
218 NodeId Parent, Cur = Leaf;
219 while ((Parent = getNode(Cur).Parent).isValid() &&
220 getNode(Parent).Children[0] == Cur) {
221 Cur = Parent;
222 getMutableNode(Cur).LeftMostDescendant = Leaf;
223 }
224 }
225}
226
227static std::vector<NodeId> getSubtreePostorder(const SyntaxTreeImpl &Tree,
228 NodeId Root) {
229 std::vector<NodeId> Postorder;
230 std::function<void(NodeId)> Traverse = [&](NodeId Id) {
231 const Node &N = Tree.getNode(Id);
232 for (NodeId Child : N.Children)
233 Traverse(Child);
234 Postorder.push_back(Id);
235 };
236 Traverse(Root);
237 return Postorder;
238}
239
240static std::vector<NodeId> getSubtreeBfs(const SyntaxTreeImpl &Tree,
241 NodeId Root) {
242 std::vector<NodeId> Ids;
243 size_t Expanded = 0;
244 Ids.push_back(Root);
245 while (Expanded < Ids.size())
246 for (NodeId Child : Tree.getNode(Ids[Expanded++]).Children)
247 Ids.push_back(Child);
248 return Ids;
249}
250
251int SyntaxTreeImpl::getNumberOfDescendants(NodeId Id) const {
252 return getNode(Id).RightMostDescendant - Id + 1;
253}
254
255bool SyntaxTreeImpl::isInSubtree(NodeId Id, NodeId SubtreeRoot) const {
256 NodeId Lower = SubtreeRoot;
257 NodeId Upper = getNode(SubtreeRoot).RightMostDescendant;
258 return Id >= Lower && Id <= Upper;
259}
260
261std::string SyntaxTreeImpl::getNodeValueImpl(NodeId Id) const {
262 return getNodeValueImpl(getNode(Id).ASTNode);
263}
264
265std::string SyntaxTreeImpl::getNodeValueImpl(const DynTypedNode &DTN) const {
266 if (auto *X = DTN.get<BinaryOperator>())
267 return X->getOpcodeStr();
268 if (auto *X = DTN.get<AccessSpecDecl>()) {
269 CharSourceRange Range(X->getSourceRange(), false);
270 return Lexer::getSourceText(Range, AST.getSourceManager(),
271 AST.getLangOpts());
272 }
273 if (auto *X = DTN.get<IntegerLiteral>()) {
274 SmallString<256> Str;
275 X->getValue().toString(Str, /*Radix=*/10, /*Signed=*/false);
276 return Str.str();
277 }
278 if (auto *X = DTN.get<StringLiteral>())
279 return X->getString();
280 if (auto *X = DTN.get<ValueDecl>())
281 return X->getNameAsString() + "(" + X->getType().getAsString() + ")";
282 if (auto *X = DTN.get<DeclStmt>())
283 return "";
284 if (auto *X = DTN.get<TranslationUnitDecl>())
285 return "";
286 std::string Value;
287 if (auto *X = DTN.get<DeclRefExpr>()) {
288 if (X->hasQualifier()) {
289 llvm::raw_string_ostream OS(Value);
290 PrintingPolicy PP(AST.getLangOpts());
291 X->getQualifier()->print(OS, PP);
292 }
293 Value += X->getDecl()->getNameAsString();
294 return Value;
295 }
296 if (auto *X = DTN.get<NamedDecl>())
297 Value += X->getNameAsString() + ";";
298 if (auto *X = DTN.get<TypedefNameDecl>())
299 return Value + X->getUnderlyingType().getAsString() + ";";
300 if (auto *X = DTN.get<NamespaceDecl>())
301 return Value;
302 if (auto *X = DTN.get<TypeDecl>())
303 if (X->getTypeForDecl())
304 Value +=
305 X->getTypeForDecl()->getCanonicalTypeInternal().getAsString() + ";";
306 if (auto *X = DTN.get<Decl>())
307 return Value;
308 if (auto *X = DTN.get<Stmt>())
309 return "";
310 llvm_unreachable("Fatal: unhandled AST node.\n");
311}
312
313void SyntaxTreeImpl::printTree() const { printTree(root()); }
314void SyntaxTreeImpl::printTree(NodeId Root) const {
315 printTree(llvm::outs(), Root);
316}
317
318void SyntaxTreeImpl::printTree(raw_ostream &OS, NodeId Root) const {
319 const Node &N = getNode(Root);
320 for (int I = 0; I < N.Depth; ++I)
321 OS << " ";
322 printNode(OS, Root);
323 OS << "\n";
324 for (NodeId Child : N.Children)
325 printTree(OS, Child);
326}
327
328void SyntaxTreeImpl::printNode(raw_ostream &OS, NodeId Id) const {
329 if (Id.isInvalid()) {
330 OS << "None";
331 return;
332 }
333 OS << getNode(Id).getTypeLabel();
334 if (getNodeValueImpl(Id) != "")
335 OS << ": " << getNodeValueImpl(Id);
336 OS << "(" << PostorderIds[Id] << ")";
337}
338
339void SyntaxTreeImpl::printNodeAsJson(raw_ostream &OS, NodeId Id) const {
340 auto N = getNode(Id);
341 OS << R"({"type":")" << N.getTypeLabel() << R"(")";
342 if (getNodeValueImpl(Id) != "")
343 OS << R"(,"value":")" << getNodeValueImpl(Id) << R"(")";
344 OS << R"(,"children":[)";
345 if (N.Children.size() > 0) {
346 printNodeAsJson(OS, N.Children[0]);
347 for (size_t I = 1, E = N.Children.size(); I < E; ++I) {
348 OS << ",";
349 printNodeAsJson(OS, N.Children[I]);
350 }
351 }
352 OS << "]}";
353}
354
355void SyntaxTreeImpl::printAsJsonImpl(raw_ostream &OS) const {
356 OS << R"({"root":)";
357 printNodeAsJson(OS, root());
358 OS << "}\n";
359}
360
361/// Identifies a node in a subtree by its postorder offset, starting at 1.
362struct SNodeId {
363 int Id = 0;
364
365 explicit SNodeId(int Id) : Id(Id) {}
366 explicit SNodeId() = default;
367
368 operator int() const { return Id; }
369 SNodeId &operator++() { return ++Id, *this; }
370 SNodeId &operator--() { return --Id, *this; }
371 SNodeId operator+(int Other) const { return SNodeId(Id + Other); }
372};
373
374class Subtree {
375private:
376 /// The parent tree.
377 const SyntaxTreeImpl &Tree;
378 /// Maps SNodeIds to original ids.
379 std::vector<NodeId> RootIds;
380 /// Maps subtree nodes to their leftmost descendants wtihin the subtree.
381 std::vector<SNodeId> LeftMostDescendants;
382
383public:
384 std::vector<SNodeId> KeyRoots;
385
386 Subtree(const SyntaxTreeImpl &Tree, NodeId SubtreeRoot) : Tree(Tree) {
387 RootIds = getSubtreePostorder(Tree, SubtreeRoot);
388 int NumLeaves = setLeftMostDescendants();
389 computeKeyRoots(NumLeaves);
390 }
391 int getSize() const { return RootIds.size(); }
392 NodeId getIdInRoot(SNodeId Id) const {
393 assert(Id > 0 && Id <= getSize() && "Invalid subtree node index.");
394 return RootIds[Id - 1];
395 }
396 const Node &getNode(SNodeId Id) const {
397 return Tree.getNode(getIdInRoot(Id));
398 }
399 SNodeId getLeftMostDescendant(SNodeId Id) const {
400 assert(Id > 0 && Id <= getSize() && "Invalid subtree node index.");
401 return LeftMostDescendants[Id - 1];
402 }
403 /// Returns the postorder index of the leftmost descendant in the subtree.
404 NodeId getPostorderOffset() const {
405 return Tree.PostorderIds[getIdInRoot(SNodeId(1))];
406 }
407
408private:
409 /// Returns the number of leafs in the subtree.
410 int setLeftMostDescendants() {
411 int NumLeaves = 0;
412 LeftMostDescendants.resize(getSize());
413 for (int I = 0; I < getSize(); ++I) {
414 SNodeId SI(I + 1);
415 const Node &N = getNode(SI);
416 NumLeaves += N.isLeaf();
417 assert(I == Tree.PostorderIds[getIdInRoot(SI)] - getPostorderOffset() &&
418 "Postorder traversal in subtree should correspond to traversal in "
419 "the root tree by a constant offset.");
420 LeftMostDescendants[I] = SNodeId(Tree.PostorderIds[N.LeftMostDescendant] -
421 getPostorderOffset());
422 }
423 return NumLeaves;
424 }
425 void computeKeyRoots(int Leaves) {
426 KeyRoots.resize(Leaves);
427 std::unordered_set<int> Visited;
428 int K = Leaves - 1;
429 for (SNodeId I(getSize()); I > 0; --I) {
430 SNodeId LeftDesc = getLeftMostDescendant(I);
431 if (Visited.count(LeftDesc))
432 continue;
433 assert(K >= 0 && "K should be non-negative");
434 KeyRoots[K] = I;
435 Visited.insert(LeftDesc);
436 --K;
437 }
438 }
439};
440
441/// Implementation of Zhang and Shasha's Algorithm for tree edit distance.
442/// Computes an optimal mapping between two trees using only insertion,
443/// deletion and update as edit actions (similar to the Levenshtein distance).
444class ZhangShashaMatcher {
445 const ASTDiff::Impl &DiffImpl;
446 Subtree S1;
447 Subtree S2;
448 std::unique_ptr<std::unique_ptr<double[]>[]> TreeDist, ForestDist;
449
450public:
451 ZhangShashaMatcher(const ASTDiff::Impl &DiffImpl, const SyntaxTreeImpl &T1,
452 const SyntaxTreeImpl &T2, NodeId Id1, NodeId Id2)
453 : DiffImpl(DiffImpl), S1(T1, Id1), S2(T2, Id2) {
454 TreeDist = llvm::make_unique<std::unique_ptr<double[]>[]>(
455 size_t(S1.getSize()) + 1);
456 ForestDist = llvm::make_unique<std::unique_ptr<double[]>[]>(
457 size_t(S1.getSize()) + 1);
458 for (int I = 0, E = S1.getSize() + 1; I < E; ++I) {
459 TreeDist[I] = llvm::make_unique<double[]>(size_t(S2.getSize()) + 1);
460 ForestDist[I] = llvm::make_unique<double[]>(size_t(S2.getSize()) + 1);
461 }
462 }
463
464 std::vector<std::pair<NodeId, NodeId>> getMatchingNodes() {
465 std::vector<std::pair<NodeId, NodeId>> Matches;
466 std::vector<std::pair<SNodeId, SNodeId>> TreePairs;
467
468 computeTreeDist();
469
470 bool RootNodePair = true;
471
Alex Lorenz4c0a8662017-07-21 13:04:57 +0000472 TreePairs.emplace_back(SNodeId(S1.getSize()), SNodeId(S2.getSize()));
Alex Lorenza75b2ca2017-07-21 12:49:28 +0000473
474 while (!TreePairs.empty()) {
475 SNodeId LastRow, LastCol, FirstRow, FirstCol, Row, Col;
476 std::tie(LastRow, LastCol) = TreePairs.back();
477 TreePairs.pop_back();
478
479 if (!RootNodePair) {
480 computeForestDist(LastRow, LastCol);
481 }
482
483 RootNodePair = false;
484
485 FirstRow = S1.getLeftMostDescendant(LastRow);
486 FirstCol = S2.getLeftMostDescendant(LastCol);
487
488 Row = LastRow;
489 Col = LastCol;
490
491 while (Row > FirstRow || Col > FirstCol) {
492 if (Row > FirstRow &&
493 ForestDist[Row - 1][Col] + 1 == ForestDist[Row][Col]) {
494 --Row;
495 } else if (Col > FirstCol &&
496 ForestDist[Row][Col - 1] + 1 == ForestDist[Row][Col]) {
497 --Col;
498 } else {
499 SNodeId LMD1 = S1.getLeftMostDescendant(Row);
500 SNodeId LMD2 = S2.getLeftMostDescendant(Col);
501 if (LMD1 == S1.getLeftMostDescendant(LastRow) &&
502 LMD2 == S2.getLeftMostDescendant(LastCol)) {
503 NodeId Id1 = S1.getIdInRoot(Row);
504 NodeId Id2 = S2.getIdInRoot(Col);
505 assert(DiffImpl.isMatchingPossible(Id1, Id2) &&
506 "These nodes must not be matched.");
507 Matches.emplace_back(Id1, Id2);
508 --Row;
509 --Col;
510 } else {
511 TreePairs.emplace_back(Row, Col);
512 Row = LMD1;
513 Col = LMD2;
514 }
515 }
516 }
517 }
518 return Matches;
519 }
520
521private:
522 /// Simple cost model for edit actions.
523 /// The values range between 0 and 1, or infinity if this edit action should
524 /// always be avoided.
525
526 /// These costs could be modified to better model the estimated cost of /
527 /// inserting / deleting the current node.
528 static constexpr double DeletionCost = 1;
529 static constexpr double InsertionCost = 1;
530
531 double getUpdateCost(SNodeId Id1, SNodeId Id2) {
532 const DynTypedNode &DTN1 = S1.getNode(Id1).ASTNode,
533 &DTN2 = S2.getNode(Id2).ASTNode;
534 if (!DiffImpl.Options.isMatchingAllowed(DTN1, DTN2))
535 return std::numeric_limits<double>::max();
536 return DiffImpl.Options.getNodeDistance(*DiffImpl.T1.Parent, DTN1,
537 *DiffImpl.T2.Parent, DTN2);
538 }
539
540 void computeTreeDist() {
541 for (SNodeId Id1 : S1.KeyRoots)
542 for (SNodeId Id2 : S2.KeyRoots)
543 computeForestDist(Id1, Id2);
544 }
545
546 void computeForestDist(SNodeId Id1, SNodeId Id2) {
547 assert(Id1 > 0 && Id2 > 0 && "Expecting offsets greater than 0.");
548 SNodeId LMD1 = S1.getLeftMostDescendant(Id1);
549 SNodeId LMD2 = S2.getLeftMostDescendant(Id2);
550
551 ForestDist[LMD1][LMD2] = 0;
552 for (SNodeId D1 = LMD1 + 1; D1 <= Id1; ++D1) {
553 ForestDist[D1][LMD2] = ForestDist[D1 - 1][LMD2] + DeletionCost;
554 for (SNodeId D2 = LMD2 + 1; D2 <= Id2; ++D2) {
555 ForestDist[LMD1][D2] = ForestDist[LMD1][D2 - 1] + InsertionCost;
556 SNodeId DLMD1 = S1.getLeftMostDescendant(D1);
557 SNodeId DLMD2 = S2.getLeftMostDescendant(D2);
558 if (DLMD1 == LMD1 && DLMD2 == LMD2) {
559 double UpdateCost = getUpdateCost(D1, D2);
560 ForestDist[D1][D2] =
561 std::min({ForestDist[D1 - 1][D2] + DeletionCost,
562 ForestDist[D1][D2 - 1] + InsertionCost,
563 ForestDist[D1 - 1][D2 - 1] + UpdateCost});
564 TreeDist[D1][D2] = ForestDist[D1][D2];
565 } else {
566 ForestDist[D1][D2] =
567 std::min({ForestDist[D1 - 1][D2] + DeletionCost,
568 ForestDist[D1][D2 - 1] + InsertionCost,
569 ForestDist[DLMD1][DLMD2] + TreeDist[D1][D2]});
570 }
571 }
572 }
573 }
574};
575
576namespace {
577// Compares nodes by their depth.
578struct HeightLess {
579 const SyntaxTreeImpl &Tree;
580 HeightLess(const SyntaxTreeImpl &Tree) : Tree(Tree) {}
581 bool operator()(NodeId Id1, NodeId Id2) const {
582 return Tree.getNode(Id1).Height < Tree.getNode(Id2).Height;
583 }
584};
585} // end anonymous namespace
586
587// Priority queue for nodes, sorted descendingly by their height.
588class PriorityList {
589 const SyntaxTreeImpl &Tree;
590 HeightLess Cmp;
591 std::vector<NodeId> Container;
592 PriorityQueue<NodeId, std::vector<NodeId>, HeightLess> List;
593
594public:
595 PriorityList(const SyntaxTreeImpl &Tree)
596 : Tree(Tree), Cmp(Tree), List(Cmp, Container) {}
597
598 void push(NodeId id) { List.push(id); }
599
600 std::vector<NodeId> pop() {
601 int Max = peekMax();
602 std::vector<NodeId> Result;
603 if (Max == 0)
604 return Result;
605 while (peekMax() == Max) {
606 Result.push_back(List.top());
607 List.pop();
608 }
609 // TODO this is here to get a stable output, not a good heuristic
610 std::sort(Result.begin(), Result.end());
611 return Result;
612 }
613 int peekMax() const {
614 if (List.empty())
615 return 0;
616 return Tree.getNode(List.top()).Height;
617 }
618 void open(NodeId Id) {
619 for (NodeId Child : Tree.getNode(Id).Children)
620 push(Child);
621 }
622};
623
624bool ASTDiff::Impl::isomorphic(NodeId Id1, NodeId Id2) const {
625 const Node &N1 = T1.getNode(Id1);
626 const Node &N2 = T2.getNode(Id2);
627 if (N1.Children.size() != N2.Children.size() ||
628 !isMatchingPossible(Id1, Id2) ||
629 Options.getNodeDistance(*T1.Parent, N1.ASTNode, *T2.Parent, N2.ASTNode) !=
630 0)
631 return false;
632 for (size_t Id = 0, E = N1.Children.size(); Id < E; ++Id)
633 if (!isomorphic(N1.Children[Id], N2.Children[Id]))
634 return false;
635 return true;
636}
637
638bool ASTDiff::Impl::canBeAddedToMapping(const Mapping &M, NodeId Id1,
639 NodeId Id2) const {
640 assert(isMatchingPossible(Id1, Id2) &&
641 "Matching must be possible in the first place.");
642 if (M.hasSrcDst(Id1, Id2))
643 return false;
644 if (Options.EnableMatchingWithUnmatchableParents)
645 return true;
646 const Node &N1 = T1.getNode(Id1);
647 const Node &N2 = T2.getNode(Id2);
648 NodeId P1 = N1.Parent;
649 NodeId P2 = N2.Parent;
650 // Only allow matching if parents can be matched.
651 return (P1.isInvalid() && P2.isInvalid()) ||
652 (P1.isValid() && P2.isValid() && isMatchingPossible(P1, P2));
653}
654
655bool ASTDiff::Impl::isMatchingPossible(NodeId Id1, NodeId Id2) const {
656 return Options.isMatchingAllowed(T1.getNode(Id1).ASTNode,
657 T2.getNode(Id2).ASTNode);
658}
659
660void ASTDiff::Impl::addIsomorphicSubTrees(Mapping &M, NodeId Id1,
661 NodeId Id2) const {
662 assert(isomorphic(Id1, Id2) && "Can only be called on isomorphic subtrees.");
663 M.link(Id1, Id2);
664 const Node &N1 = T1.getNode(Id1);
665 const Node &N2 = T2.getNode(Id2);
666 for (size_t Id = 0, E = N1.Children.size(); Id < E; ++Id)
667 addIsomorphicSubTrees(M, N1.Children[Id], N2.Children[Id]);
668}
669
670void ASTDiff::Impl::addOptimalMapping(Mapping &M, NodeId Id1,
671 NodeId Id2) const {
672 if (std::max(T1.getNumberOfDescendants(Id1),
673 T2.getNumberOfDescendants(Id2)) >= Options.MaxSize)
674 return;
675 ZhangShashaMatcher Matcher(*this, T1, T2, Id1, Id2);
676 std::vector<std::pair<NodeId, NodeId>> R = Matcher.getMatchingNodes();
677 for (const auto Tuple : R) {
678 NodeId Src = Tuple.first;
679 NodeId Dst = Tuple.second;
680 if (canBeAddedToMapping(M, Src, Dst))
681 M.link(Src, Dst);
682 }
683}
684
685double ASTDiff::Impl::getSimilarity(const Mapping &M, NodeId Id1,
686 NodeId Id2) const {
687 if (Id1.isInvalid() || Id2.isInvalid())
688 return 0.0;
689 int CommonDescendants = 0;
690 const Node &N1 = T1.getNode(Id1);
691 for (NodeId Id = Id1 + 1; Id <= N1.RightMostDescendant; ++Id)
692 CommonDescendants += int(T2.isInSubtree(M.getDst(Id), Id2));
693 return 2.0 * CommonDescendants /
694 (T1.getNumberOfDescendants(Id1) + T2.getNumberOfDescendants(Id2));
695}
696
697NodeId ASTDiff::Impl::findCandidate(const Mapping &M, NodeId Id1) const {
698 NodeId Candidate;
699 double MaxSimilarity = 0.0;
700 for (NodeId Id2 = 0, E = T2.getSize(); Id2 < E; ++Id2) {
701 if (!isMatchingPossible(Id1, Id2))
702 continue;
703 if (M.hasDst(Id2))
704 continue;
705 double Similarity = getSimilarity(M, Id1, Id2);
706 if (Similarity > MaxSimilarity) {
707 MaxSimilarity = Similarity;
708 Candidate = Id2;
709 }
710 }
711 return Candidate;
712}
713
714void ASTDiff::Impl::matchBottomUp(Mapping &M) const {
715 std::vector<NodeId> Postorder = getSubtreePostorder(T1, T1.root());
716 for (NodeId Id1 : Postorder) {
717 if (Id1 == T1.root()) {
718 if (isMatchingPossible(T1.root(), T2.root())) {
719 M.link(T1.root(), T2.root());
720 addOptimalMapping(M, T1.root(), T2.root());
721 }
722 break;
723 }
724 const Node &N1 = T1.getNode(Id1);
725 bool Matched = M.hasSrc(Id1);
726 bool MatchedChildren =
727 std::any_of(N1.Children.begin(), N1.Children.end(),
728 [&](NodeId Child) { return M.hasSrc(Child); });
729 if (Matched || !MatchedChildren)
730 continue;
731 NodeId Id2 = findCandidate(M, Id1);
732 if (Id2.isInvalid() || !canBeAddedToMapping(M, Id1, Id2) ||
733 getSimilarity(M, Id1, Id2) < Options.MinSimilarity)
734 continue;
735 M.link(Id1, Id2);
736 addOptimalMapping(M, Id1, Id2);
737 }
738}
739
740Mapping ASTDiff::Impl::matchTopDown() const {
741 PriorityList L1(T1);
742 PriorityList L2(T2);
743
744 Mapping M(T1.getSize(), T2.getSize());
745
746 L1.push(T1.root());
747 L2.push(T2.root());
748
749 int Max1, Max2;
750 while (std::min(Max1 = L1.peekMax(), Max2 = L2.peekMax()) >
751 Options.MinHeight) {
752 if (Max1 > Max2) {
753 for (NodeId Id : L1.pop())
754 L1.open(Id);
755 continue;
756 }
757 if (Max2 > Max1) {
758 for (NodeId Id : L2.pop())
759 L2.open(Id);
760 continue;
761 }
762 std::vector<NodeId> H1, H2;
763 H1 = L1.pop();
764 H2 = L2.pop();
765 for (NodeId Id1 : H1) {
766 for (NodeId Id2 : H2)
767 if (isomorphic(Id1, Id2) && canBeAddedToMapping(M, Id1, Id2))
768 addIsomorphicSubTrees(M, Id1, Id2);
769 }
770 for (NodeId Id1 : H1) {
771 if (!M.hasSrc(Id1))
772 L1.open(Id1);
773 }
774 for (NodeId Id2 : H2) {
775 if (!M.hasDst(Id2))
776 L2.open(Id2);
777 }
778 }
779 return M;
780}
781
782void ASTDiff::Impl::computeMapping() {
783 if (IsMappingDone)
784 return;
785 TheMapping = matchTopDown();
786 matchBottomUp(TheMapping);
787 IsMappingDone = true;
788}
789
790std::vector<Match> ASTDiff::Impl::getMatches(Mapping &M) {
791 std::vector<Match> Matches;
792 for (NodeId Id1 = 0, Id2, E = T1.getSize(); Id1 < E; ++Id1)
793 if ((Id2 = M.getDst(Id1)).isValid())
794 Matches.push_back({Id1, Id2});
795 return Matches;
796}
797
798std::vector<Change> ASTDiff::Impl::computeChanges(Mapping &M) {
799 std::vector<Change> Changes;
800 for (NodeId Id2 : getSubtreeBfs(T2, T2.root())) {
801 const Node &N2 = T2.getNode(Id2);
802 NodeId Id1 = M.getSrc(Id2);
803 if (Id1.isValid()) {
804 assert(isMatchingPossible(Id1, Id2) && "Invalid matching.");
805 if (T1.getNodeValueImpl(Id1) != T2.getNodeValueImpl(Id2)) {
806 Changes.emplace_back(Update, Id1, Id2);
807 }
808 continue;
809 }
810 NodeId P2 = N2.Parent;
811 NodeId P1 = M.getSrc(P2);
812 assert(P1.isValid() &&
813 "Parents must be matched for determining the change type.");
814 Node &Parent1 = T1.getMutableNode(P1);
815 const Node &Parent2 = T2.getNode(P2);
816 auto &Siblings1 = Parent1.Children;
817 const auto &Siblings2 = Parent2.Children;
818 size_t Position;
819 for (Position = 0; Position < Siblings2.size(); ++Position)
820 if (Siblings2[Position] == Id2 || Position >= Siblings1.size())
821 break;
822 Changes.emplace_back(Insert, Id2, P2, Position);
823 Node PatchNode;
824 PatchNode.Parent = P1;
825 PatchNode.LeftMostDescendant = N2.LeftMostDescendant;
826 PatchNode.RightMostDescendant = N2.RightMostDescendant;
827 PatchNode.Depth = N2.Depth;
828 PatchNode.ASTNode = N2.ASTNode;
829 // TODO update Depth if needed
830 NodeId PatchNodeId = T1.getSize();
831 // TODO maybe choose a different data structure for Children.
832 Siblings1.insert(Siblings1.begin() + Position, PatchNodeId);
833 T1.addNode(PatchNode);
834 M.link(PatchNodeId, Id2);
835 }
836 for (NodeId Id1 = 0; Id1 < T1.getSize(); ++Id1) {
837 NodeId Id2 = M.getDst(Id1);
838 if (Id2.isInvalid())
839 Changes.emplace_back(Delete, Id1, Id2);
840 }
841 return Changes;
842}
843
844void ASTDiff::Impl::printChangeImpl(raw_ostream &OS, const Change &Chg) const {
845 switch (Chg.Kind) {
846 case Delete:
847 OS << "Delete ";
848 T1.printNode(OS, Chg.Src);
849 OS << "\n";
850 break;
851 case Update:
852 OS << "Update ";
853 T1.printNode(OS, Chg.Src);
854 OS << " to " << T2.getNodeValueImpl(Chg.Dst) << "\n";
855 break;
856 case Insert:
857 OS << "Insert ";
858 T2.printNode(OS, Chg.Src);
859 OS << " into ";
860 T2.printNode(OS, Chg.Dst);
861 OS << " at " << Chg.Position << "\n";
862 break;
863 case Move:
864 llvm_unreachable("TODO");
865 break;
866 };
867}
868
869void ASTDiff::Impl::printMatchImpl(raw_ostream &OS, const Match &M) const {
870 OS << "Match ";
871 T1.printNode(OS, M.Src);
872 OS << " to ";
873 T2.printNode(OS, M.Dst);
874 OS << "\n";
875}
876
877ASTDiff::ASTDiff(SyntaxTree &T1, SyntaxTree &T2,
878 const ComparisonOptions &Options)
879 : DiffImpl(llvm::make_unique<Impl>(*T1.TreeImpl, *T2.TreeImpl, Options)) {}
880
881ASTDiff::~ASTDiff() {}
882
883SyntaxTree::SyntaxTree(const ASTContext &AST)
884 : TreeImpl(llvm::make_unique<SyntaxTreeImpl>(
885 this, AST.getTranslationUnitDecl(), AST)) {}
886
887std::vector<Match> ASTDiff::getMatches() {
888 DiffImpl->computeMapping();
889 return DiffImpl->getMatches(DiffImpl->TheMapping);
890}
891
892std::vector<Change> ASTDiff::getChanges() {
893 DiffImpl->computeMapping();
894 return DiffImpl->computeChanges(DiffImpl->TheMapping);
895}
896
897void ASTDiff::printChange(raw_ostream &OS, const Change &Chg) const {
898 DiffImpl->printChangeImpl(OS, Chg);
899}
900
901void ASTDiff::printMatch(raw_ostream &OS, const Match &M) const {
902 DiffImpl->printMatchImpl(OS, M);
903}
904
905void SyntaxTree::printAsJson(raw_ostream &OS) { TreeImpl->printAsJsonImpl(OS); }
906
907std::string SyntaxTree::getNodeValue(const DynTypedNode &DTN) const {
908 return TreeImpl->getNodeValueImpl(DTN);
909}
910
911} // end namespace diff
912} // end namespace clang