johannes updated this revision to Diff 121647.
johannes added a comment.
use raw source code of owned tokens instead
https://reviews.llvm.org/D37001
Files:
include/clang/Tooling/ASTDiff/ASTDiff.h
lib/Tooling/ASTDiff/ASTDiff.cpp
test/Tooling/clang-diff-basic.cpp
test/Tooling/clang-diff-html.test
test/Tooling/clang-diff-topdown.cpp
Index: test/Tooling/clang-diff-topdown.cpp
===================================================================
--- test/Tooling/clang-diff-topdown.cpp
+++ test/Tooling/clang-diff-topdown.cpp
@@ -35,8 +35,6 @@
int x2 = ::x + 1;
}
-class A { int x = 1 + 1; void f() { int x1 = x; } };
-
#else
@@ -66,18 +64,4 @@
int x2 = ::x + 1;
}
-class B {
- // Only the class name changed; it is not included in the field value,
- // therefore there is no update.
- // CHECK: Match FieldDecl: :x(int)(24) to FieldDecl: :x(int)(29)
- // CHECK-NOT: Update FieldDecl: :x(int)(24)
- int x = 1+1;
- void f() {
- // CHECK: Match MemberExpr: :x(32) to MemberExpr: :x(37)
- // CHECK-NOT: Update MemberExpr: :x(32)
- int x1 = B::x;
- }
-
-};
-
#endif
Index: test/Tooling/clang-diff-html.test
===================================================================
--- test/Tooling/clang-diff-html.test
+++ test/Tooling/clang-diff-html.test
@@ -1,7 +1,7 @@
RUN: clang-diff -html %S/Inputs/clang-diff-basic-src.cpp %S/clang-diff-basic.cpp -- | FileCheck %s
CHECK: <pre><div id='L' class='code'><span id='L0' tid='R0' title='TranslationUnitDecl
-CHECK-NEXT: 0 -> 0'>
+CHECK-NEXT: 0 -> 0' class='u'>
match, update
CHECK: <span id='L[[L:[0-9]+]]' tid='R[[R:[0-9]+]]' title='NamespaceDecl
@@ -32,5 +32,4 @@
CHECK-NEXT: Bar' class='i'>"Bar"</span>
comments
-CHECK: // CHECK: Insert IfStmt{{.*}} into IfStmt
CHECK: // CHECK: Delete AccessSpecDecl: public
Index: test/Tooling/clang-diff-basic.cpp
===================================================================
--- test/Tooling/clang-diff-basic.cpp
+++ test/Tooling/clang-diff-basic.cpp
@@ -30,10 +30,10 @@
class X {
const char *foo(int i) {
+ // CHECK: Insert IfStmt(29) into CompoundStmt(28)
if (i == 0)
return "Bar";
- // CHECK: Insert IfStmt{{.*}} into IfStmt
- // CHECK: Insert BinaryOperator: =={{.*}} into IfStmt
+ // CHECK: Move IfStmt(35) into IfStmt
else if (i == -1)
return "foo";
return 0;
@@ -64,7 +64,7 @@
M1;
// CHECK: Update Macro: M1{{.*}} to M2
M2;
- // CHECK: Update Macro: F(1, 1){{.*}} to F(1, /*b=*/1)
+ // CHECK: Match Macro: F(1, 1)(74)
F(1, /*b=*/1);
}
Index: lib/Tooling/ASTDiff/ASTDiff.cpp
===================================================================
--- lib/Tooling/ASTDiff/ASTDiff.cpp
+++ lib/Tooling/ASTDiff/ASTDiff.cpp
@@ -16,6 +16,7 @@
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/Lex/Lexer.h"
#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Support/MD5.h"
#include <limits>
#include <memory>
@@ -111,6 +112,8 @@
};
} // end anonymous namespace
+using HashType = std::array<uint8_t, 16>;
+
/// Represents the AST of a TranslationUnit.
class SyntaxTree::Impl {
public:
@@ -463,6 +466,30 @@
return "";
}
+static HashType hashNode(NodeRef N) {
+ llvm::MD5 Hash;
+ SourceManager &SM = N.getTree().getSourceManager();
+ const LangOptions &LangOpts = N.getTree().getLangOpts();
+ Token Tok;
+ for (auto TokenLocation : N.getOwnedTokens()) {
+ bool Failure = Lexer::getRawToken(TokenLocation, Tok, SM, LangOpts,
+ /*IgnoreWhiteSpace=*/true);
+ assert(!Failure);
+ auto Range = CharSourceRange::getCharRange(TokenLocation, Tok.getEndLoc());
+ // This is here to make CompoundStmt nodes compare equal, to make the tests
+ // pass. It should be changed to include changes to comments.
+ if (!Tok.isOneOf(tok::comment, tok::semi))
+ Hash.update(Lexer::getSourceText(Range, SM, LangOpts));
+ }
+ llvm::MD5::MD5Result HashResult;
+ Hash.final(HashResult);
+ return HashResult;
+}
+
+static bool areNodesDifferent(NodeRef N1, NodeRef N2) {
+ return hashNode(N1) != hashNode(N2);
+}
+
/// Identifies a node in a subtree by its postorder offset, starting at 1.
struct SNodeId {
int Id = 0;
@@ -637,7 +664,7 @@
NodeRef N1 = S1.getNode(Id1), N2 = S2.getNode(Id2);
if (!DiffImpl.isMatchingPossible(N1, N2))
return std::numeric_limits<double>::max();
- return S1.getNodeValue(Id1) != S2.getNodeValue(Id2);
+ return areNodesDifferent(S1.getNode(Id1), S2.getNode(Id2));
}
void computeTreeDist() {
@@ -795,6 +822,91 @@
return {Begin, End};
}
+static bool onlyWhitespace(StringRef Str) {
+ return std::all_of(Str.begin(), Str.end(),
+ [](char C) { return std::isspace(C); });
+}
+
+SmallVector<CharSourceRange, 4> Node::getOwnedSourceRanges() const {
+ SmallVector<CharSourceRange, 4> SourceRanges;
+ SourceManager &SM = getTree().getSourceManager();
+ const LangOptions &LangOpts = getTree().getLangOpts();
+ CharSourceRange Range = getSourceRange();
+ SourceLocation Offset = Range.getBegin();
+ BeforeThanCompare<SourceLocation> Less(SM);
+ auto AddSegment = [&](SourceLocation Until) {
+ if (Offset.isValid() && Until.isValid() && Less(Offset, Until)) {
+ CharSourceRange R = CharSourceRange::getCharRange({Offset, Until});
+ StringRef Text = Lexer::getSourceText(R, SM, LangOpts);
+ if (onlyWhitespace(Text))
+ return;
+ SourceRanges.emplace_back(CharSourceRange::getCharRange({Offset, Until}));
+ }
+ };
+ int ChildIndex = 0;
+ for (NodeRef Descendant : *this) {
+ CharSourceRange DescendantRange = Descendant.getSourceRange();
+ CharSourceRange LMDRange =
+ getTree().getNode(Descendant.LeftMostDescendant).getSourceRange();
+ CharSourceRange RMDRange =
+ getTree().getNode(Descendant.RightMostDescendant).getSourceRange();
+ auto MinValidBegin = [&Less](CharSourceRange &Range1,
+ CharSourceRange &Range2) {
+ SourceLocation Begin1 = Range1.getBegin(), Begin2 = Range2.getBegin();
+ if (Begin1.isInvalid())
+ return Begin2;
+ if (Begin2.isInvalid())
+ return Begin1;
+ return std::min(Begin1, Begin2, Less);
+ };
+ auto MaxValidEnd = [&Less](CharSourceRange &Range1,
+ CharSourceRange &Range2) {
+ SourceLocation End1 = Range1.getEnd(), End2 = Range2.getEnd();
+ if (End1.isInvalid())
+ return End2;
+ if (End2.isInvalid())
+ return End1;
+ return std::max(End1, End2, Less);
+ };
+ auto Min = MinValidBegin(DescendantRange, LMDRange);
+ auto Max = MaxValidEnd(DescendantRange, RMDRange);
+ AddSegment(Min);
+ if (Max.isValid())
+ Offset = Max;
+ ++ChildIndex;
+ }
+ AddSegment(Range.getEnd());
+ return SourceRanges;
+}
+
+void forEachTokenInRange(CharSourceRange Range, SyntaxTree &Tree,
+ std::function<void(Token &)> Body) {
+ SourceLocation Begin = Range.getBegin(), End = Range.getEnd();
+ SourceManager &SM = Tree.getSourceManager();
+ BeforeThanCompare<SourceLocation> Less(SM);
+ Token Tok;
+ while (Begin.isValid() && Less(Begin, End) &&
+ !Lexer::getRawToken(Begin, Tok, SM, Tree.getLangOpts(),
+ /*IgnoreWhiteSpace=*/true) &&
+ Less(Tok.getLocation(), End)) {
+ Body(Tok);
+ Begin = Tok.getEndLoc();
+ }
+}
+
+SmallVector<SourceLocation, 4> Node::getOwnedTokens() const {
+ SmallVector<SourceLocation, 4> TokenLocations;
+ BeforeThanCompare<SourceLocation> Less(getTree().getSourceManager());
+ const auto &SourceRanges = getOwnedSourceRanges();
+ for (auto &Range : SourceRanges) {
+ forEachTokenInRange(Range, getTree(), [&TokenLocations](Token &Tok) {
+ if (!isListSeparator(Tok))
+ TokenLocations.push_back(Tok.getLocation());
+ });
+ }
+ return TokenLocations;
+}
+
namespace {
// Compares nodes by their depth.
struct HeightLess {
@@ -847,7 +959,7 @@
bool ASTDiff::Impl::identical(NodeRef N1, NodeRef N2) const {
if (N1.getNumChildren() != N2.getNumChildren() ||
- !isMatchingPossible(N1, N2) || T1.getNodeValue(N1) != T2.getNodeValue(N2))
+ !isMatchingPossible(N1, N2) || areNodesDifferent(N1, N2))
return false;
for (size_t Id = 0, E = N1.getNumChildren(); Id < E; ++Id)
if (!identical(N1.getChild(Id), N2.getChild(Id)))
@@ -901,7 +1013,7 @@
double ASTDiff::Impl::getNodeSimilarity(NodeRef N1, NodeRef N2) const {
auto Ident1 = N1.getIdentifier(), Ident2 = N2.getIdentifier();
- bool SameValue = T1.getNodeValue(N1) == T2.getNodeValue(N2);
+ bool SameValue = !areNodesDifferent(N1, N2);
auto SameIdent = Ident1 && Ident2 && *Ident1 == *Ident2;
double NodeSimilarity = 0;
@@ -1091,7 +1203,7 @@
findNewPosition(N1) != findNewPosition(N2)) {
MutableN1.Change = MutableN2.Change = Move;
}
- if (T1.getNodeValue(N1) != T2.getNodeValue(N2)) {
+ if (areNodesDifferent(N1, N2)) {
MutableN1.Change = MutableN2.Change =
(N1.Change == Move ? UpdateMove : Update);
}
Index: include/clang/Tooling/ASTDiff/ASTDiff.h
===================================================================
--- include/clang/Tooling/ASTDiff/ASTDiff.h
+++ include/clang/Tooling/ASTDiff/ASTDiff.h
@@ -153,10 +153,22 @@
/// to the parent.
CharSourceRange getSourceRange() const;
+ /// Returns the sub-ranges of getSourceRange() that are exclusively owned by
+ /// this node, that is, none of its descendants includes them.
+ SmallVector<CharSourceRange, 4> getOwnedSourceRanges() const;
+
/// Returns the offsets for the range returned by getSourceRange().
std::pair<unsigned, unsigned> getSourceRangeOffsets() const;
+
+ /// Returns the starting locations of all tokens in getOwnedSourceRanges().
+ SmallVector<SourceLocation, 4> getOwnedTokens() const;
};
+void forEachTokenInRange(CharSourceRange Range, SyntaxTree &Tree,
+ std::function<void(Token &)> Body);
+
+inline bool isListSeparator(Token &Tok) { return Tok.is(tok::comma); }
+
struct NodeRefIterator {
SyntaxTree::Impl *Tree;
const NodeId *IdPointer;
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits