teemperor updated this revision to Diff 65086.
teemperor added a comment.

- StmtDataCollector is now using ConstStmtVisitor
- Added tests for StmtDataCollector


https://reviews.llvm.org/D22514

Files:
  lib/Analysis/CloneDetection.cpp
  test/Analysis/copypaste/test-asm.cpp
  test/Analysis/copypaste/test-attributes.cpp
  test/Analysis/copypaste/test-call.cpp
  test/Analysis/copypaste/test-catch.cpp
  test/Analysis/copypaste/test-delete.cpp
  test/Analysis/copypaste/test-dependent-exist.cpp
  test/Analysis/copypaste/test-expr-types.cpp
  test/Analysis/copypaste/test-fold.cpp
  test/Analysis/copypaste/test-generic.c
  test/Analysis/copypaste/test-labels.cpp
  test/Analysis/copypaste/test-lambda.cpp
  test/Analysis/copypaste/test-min-max.cpp
  test/Analysis/copypaste/test-operators.cpp
  test/Analysis/copypaste/test-predefined.cpp
  test/Analysis/copypaste/test-trait.cpp

Index: test/Analysis/copypaste/test-trait.cpp
===================================================================
--- /dev/null
+++ test/Analysis/copypaste/test-trait.cpp
@@ -0,0 +1,63 @@
+// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// expected-no-diagnostics
+
+bool foo1(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    return __is_trivially_constructible(long, int*, int*);
+  return true;
+}
+
+// Identical to foo1 except that long was replaced by int in the TypeTraitExpr.
+bool foo2(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    return __is_trivially_constructible(int, int*, int*);
+  return true;
+}
+
+// Identical to foo1 except the different number of types in the TypeTraitExpr.
+bool foo3(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    return __is_trivially_constructible(long, int*);
+  return true;
+}
+
+bool foo4(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    return __is_enum(long);
+  return true;
+}
+
+// Identical to foo4 except the different TypeTraitExpr kind.
+bool foo5(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    return __is_pod(long);
+  return true;
+}
+
+bool foo6(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    return __is_lvalue_expr(x);
+  return true;
+}
+
+// Identical to foo6 except the different ExpressionTraitExpr kind.
+bool foo7(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    return __is_rvalue_expr(x);
+  return true;
+}
Index: test/Analysis/copypaste/test-predefined.cpp
===================================================================
--- /dev/null
+++ test/Analysis/copypaste/test-predefined.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// expected-no-diagnostics
+
+bool foo1(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    return sizeof(__PRETTY_FUNCTION__);
+  return true;
+}
+
+// Identical to foo1 except the different predefined __func__ instead of __PRETTY_FUNCTION__
+bool foo2(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    return sizeof(__func__);
+  return true;
+}
Index: test/Analysis/copypaste/test-operators.cpp
===================================================================
--- /dev/null
+++ test/Analysis/copypaste/test-operators.cpp
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// expected-no-diagnostics
+
+bool foo1(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    return +x;
+  return true;
+}
+
+// Identical to foo1 except the different unary operator kind.
+bool foo2(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    return -x;
+  return true;
+}
+
+bool foo3(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    return x - x;
+  return true;
+}
+
+// Identical to foo3 except the different binary operator kind.
+bool foo4(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    return x + x;
+  return true;
+}
Index: test/Analysis/copypaste/test-min-max.cpp
===================================================================
--- test/Analysis/copypaste/test-min-max.cpp
+++ test/Analysis/copypaste/test-min-max.cpp
@@ -18,14 +18,6 @@
 
 // False positives below. These clones should not be reported.
 
-// FIXME: Detect different binary operator kinds.
-int min1(int a, int b) { // expected-note{{Related code clone is here.}}
-  log();
-  if (a < b)
-    return a;
-  return b;
-}
-
 // FIXME: Detect different variable patterns.
 int min2(int a, int b) { // expected-note{{Related code clone is here.}}
   log();
@@ -36,6 +28,13 @@
 
 // Functions below are not clones and should not be reported.
 
+int min1(int a, int b) { // no-warning
+  log();
+  if (a < b)
+    return a;
+  return b;
+}
+
 int foo(int a, int b) { // no-warning
   return a + b;
 }
Index: test/Analysis/copypaste/test-lambda.cpp
===================================================================
--- /dev/null
+++ test/Analysis/copypaste/test-lambda.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// expected-no-diagnostics
+
+void foo1(int a, long b) {
+  auto l = [a, b](){};
+}
+
+void foo2(int a, long b) {
+  auto l = [&a, b](){};
+}
+
+void foo3(int a, long b) {
+  auto l = [a](){};
+}
+
+void foo4(int a, long b) {
+  auto l = [=](){};
+}
+
+void foo5(int a, long b) {
+  auto l = [&](){};
+}
+
Index: test/Analysis/copypaste/test-labels.cpp
===================================================================
--- /dev/null
+++ test/Analysis/copypaste/test-labels.cpp
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -analyze -std=gnu++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// expected-no-diagnostics
+
+
+bool foo1(int x) {
+  start:
+  if (x != 3) {
+    ++x;
+    void *ptr = &&start;
+    goto start;
+  }
+  end:
+  return false;
+}
+
+// Targeting a different label with the address-of-label operator.
+bool foo2(int x) {
+  start:
+  if (x != 3) {
+    ++x;
+    void *ptr = &&end;
+    goto start;
+  }
+  end:
+  return false;
+}
+
+// Different target label in goto
+bool foo3(int x) {
+  start:
+  if (x != 3) {
+    ++x;
+    void *ptr = &&start;
+    goto end;
+  }
+  end:
+  return false;
+}
+
+// FIXME: Can't detect same algorithm as in foo1 but with different label names.
+bool foo4(int x) {
+  foo:
+  if (x != 3) {
+    ++x;
+    void *ptr = &&foo;
+    goto foo;
+  }
+  end:
+  return false;
+}
Index: test/Analysis/copypaste/test-generic.c
===================================================================
--- /dev/null
+++ test/Analysis/copypaste/test-generic.c
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -analyze -std=c11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// expected-no-diagnostics
+
+int global;
+
+int foo1() {
+  if (global > 0)
+    return 0;
+  else if (global < 0)
+    return _Generic(global, double: 1, float: 2, default: 3);
+  return 1;
+}
+
+// Different associated type (int instead of float)
+int foo2() {
+  if (global > 0)
+    return 0;
+  else if (global < 0)
+    return _Generic(global, double: 1, int: 2, default: 4);
+  return 1;
+}
+
+// Different number of associated types.
+int foo3() {
+  if (global > 0)
+    return 0;
+  else if (global < 0)
+    return _Generic(global, double: 1, default: 4);
+  return 1;
+}
Index: test/Analysis/copypaste/test-fold.cpp
===================================================================
--- /dev/null
+++ test/Analysis/copypaste/test-fold.cpp
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// expected-no-diagnostics
+
+int global = 0;
+
+template<typename ...Args>
+int foo1(Args&&... args) {
+  if (global > 0)
+    return 0;
+  else if (global < 0)
+    return (args + ...);
+  return 1;
+}
+
+// Different opeator in fold expression.
+template<typename ...Args>
+int foo2(Args&&... args) {
+  if (global > 0)
+    return 0;
+  else if (global < 0)
+    return (args - ...);
+  return 1;
+}
+
+// Parameter pack on a different side
+template<typename ...Args>
+int foo3(Args&&... args) {
+  if (global > 0)
+    return 0;
+  else if (global < 0)
+    return -1;
+  return (... + args);
+return 1;
+}
Index: test/Analysis/copypaste/test-expr-types.cpp
===================================================================
--- /dev/null
+++ test/Analysis/copypaste/test-expr-types.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// expected-no-diagnostics
+
+
+int foo1(int a, int b) {
+  if (a > b)
+    return a;
+  return b;
+}
+
+// Different types, so not a clone
+int foo2(long a, long b) {
+  if (a > b)
+    return a;
+  return b;
+}
Index: test/Analysis/copypaste/test-dependent-exist.cpp
===================================================================
--- /dev/null
+++ test/Analysis/copypaste/test-dependent-exist.cpp
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -analyze -fms-extensions -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// expected-no-diagnostics
+
+
+bool foo1(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0) {
+    __if_exists(x) { return false; }
+  }
+  return true;
+}
+
+// Same as above, but __if_not_exists
+bool foo2(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0) {
+    __if_not_exists(x) { return false; }
+  }
+  return true;
+}
Index: test/Analysis/copypaste/test-delete.cpp
===================================================================
--- /dev/null
+++ test/Analysis/copypaste/test-delete.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// expected-no-diagnostics
+
+bool foo1(int x, int* a) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    delete a;
+  return true;
+}
+
+// Explicit global delete
+bool foo2(int x, int* a) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    ::delete a;
+  return true;
+}
+
+// Array delete
+bool foo3(int x, int* a) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    delete[] a;
+  return true;
+}
Index: test/Analysis/copypaste/test-catch.cpp
===================================================================
--- /dev/null
+++ test/Analysis/copypaste/test-catch.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -analyze -fcxx-exceptions -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// expected-no-diagnostics
+
+bool foo1(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    try { x--; } catch (int i) {}
+  return true;
+}
+
+// Uses parenthesis instead of type
+bool foo2(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    try { x--; } catch (...) {}
+  return true;
+}
+
+// Catches a different type (long instead of int)
+bool foo3(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    try { x--; } catch (long i) {}
+  return true;
+}
Index: test/Analysis/copypaste/test-call.cpp
===================================================================
--- /dev/null
+++ test/Analysis/copypaste/test-call.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// expected-no-diagnostics
+
+bool a();
+bool b();
+
+// Calls method a with some extra code to pass the minimum complexity
+bool foo1(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    return a();
+  return true;
+}
+
+// Calls method b with some extra code to pass the minimum complexity
+bool foo2(int x) {
+  if (x > 0)
+    return false;
+  else if (x < 0)
+    return b();
+  return true;
+}
Index: test/Analysis/copypaste/test-attributes.cpp
===================================================================
--- /dev/null
+++ test/Analysis/copypaste/test-attributes.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -analyze -std=c++1z -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// expected-no-diagnostics
+
+int foo1(int n) {
+  int result = 0;
+  switch (n) {
+  case 33:
+    result += 33;
+    [[clang::fallthrough]];
+  case 44:
+    result += 44;
+  }
+  return result;
+}
+
+// Identical to foo1 except the missing attribute.
+int foo2(int n) {
+  int result = 0;
+  switch (n) {
+  case 33:
+    result += 33;
+    ;
+  case 44:
+    result += 44;
+  }
+  return result;
+}
Index: test/Analysis/copypaste/test-asm.cpp
===================================================================
--- /dev/null
+++ test/Analysis/copypaste/test-asm.cpp
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// expected-no-diagnostics
+
+int foo1(int src) {
+  int dst = src;
+  if (src < 100 && src > 0) {
+
+    asm ("mov %1, %0\n\t"
+         "add $1, %0"
+         : "=r" (dst)
+         : "r" (src));
+
+  }
+  return dst;
+}
+
+// Identical to foo1 except that it adds two instead of one, so it's no clone.
+int foo2(int src) {
+  int dst = src;
+  if (src < 100 && src > 0) {
+
+    asm ("mov %1, %0\n\t"
+         "add $2, %0"
+         : "=r" (dst)
+         : "r" (src));
+
+  }
+  return dst;
+}
+
+// Identical to foo1 except that its a volatile asm statement, so it's no clone.
+int foo3(int src) {
+  int dst = src;
+  if (src < 100 && src > 0) {
+
+    asm volatile ("mov %1, %0\n\t"
+         "add $1, %0"
+         : "=r" (dst)
+         : "r" (src));
+
+  }
+  return dst;
+}
Index: lib/Analysis/CloneDetection.cpp
===================================================================
--- lib/Analysis/CloneDetection.cpp
+++ lib/Analysis/CloneDetection.cpp
@@ -16,6 +16,7 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Stmt.h"
+#include "clang/AST/StmtVisitor.h"
 
 using namespace clang;
 
@@ -78,6 +79,157 @@
 SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); }
 
 namespace {
+/// \brief Collects the data of a single Stmt.
+///
+/// The data of a statement that isn't collected by this class is considered to
+/// be unimportant when comparing two statements. This means that this class
+/// defines what a 'similar' clone is. For example, this class doesn't collect
+/// names of variables, which makes statements that only differ in the names of
+/// the referenced variables clones of each other.
+class StmtDataCollector : public ConstStmtVisitor<StmtDataCollector> {
+
+  ASTContext &Context;
+  llvm::FoldingSetNodeID &D;
+
+public:
+
+  /// \brief Collects data from the given Stmt and saves it into the given
+  ///        FoldingSetNodeID.
+  StmtDataCollector(Stmt const *S, ASTContext &Context,
+                    llvm::FoldingSetNodeID &D) : Context(Context), D(D) {
+    Visit(S);
+  }
+
+  // Below are utility methods for adding generic data to the FoldingSetNodeID.
+
+  void addData(unsigned Data) {
+    D.AddInteger(Data);
+  }
+
+  void addData(llvm::StringRef const &String) {
+    D.AddString(String);
+  }
+
+  void addData(std::string const &String) {
+    D.AddString(String);
+  }
+
+  void addData(QualType QT) {
+    addData(QT.getAsString());
+  }
+
+  // The functions below collect the class specific data of each Stmt subclass.
+
+  // Utility macro for collecting statement specific data.
+  #define DEF_ADD_DATA(CLASS, CODE)                                            \
+  void Visit##CLASS(CLASS const *S) {                                          \
+    CODE;                                                                      \
+  }
+
+  DEF_ADD_DATA(Stmt, { addData(S->getStmtClass()); })
+  DEF_ADD_DATA(Expr, { addData(S->getType()); })
+
+  //--- Builtin functionality ----------------------------------------------//
+  DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); })
+  DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); })
+  DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); })
+  DEF_ADD_DATA(TypeTraitExpr, {
+    addData(S->getTrait());
+    for (unsigned i = 0; i < S->getNumArgs(); ++i)
+      addData(S->getArg(i)->getType());
+  })
+
+  //--- Calls --------------------------------------------------------------//
+  DEF_ADD_DATA(CallExpr, {
+    addData(S->getDirectCallee()->getQualifiedNameAsString());
+  })
+
+  //--- Exceptions ---------------------------------------------------------//
+  DEF_ADD_DATA(CXXCatchStmt, { addData(S->getCaughtType()); })
+
+  //--- C++ OOP Stmts ------------------------------------------------------//
+  DEF_ADD_DATA(CXXDeleteExpr, {
+    addData(S->isArrayFormAsWritten());
+    addData(S->isGlobalDelete());
+  })
+
+  //--- Casts --------------------------------------------------------------//
+  DEF_ADD_DATA(ObjCBridgedCastExpr, { addData(S->getBridgeKind()); })
+
+  //--- Miscellaneous Exprs ------------------------------------------------//
+  DEF_ADD_DATA(BinaryOperator, { addData(S->getOpcode()); })
+  DEF_ADD_DATA(UnaryOperator, { addData(S->getOpcode()); })
+
+  //--- Control flow -------------------------------------------------------//
+  DEF_ADD_DATA(GotoStmt, { addData(S->getLabel()->getName()); })
+  DEF_ADD_DATA(IndirectGotoStmt, {
+    if (S->getConstantTarget())
+      addData(S->getConstantTarget()->getName());
+  })
+  DEF_ADD_DATA(LabelStmt, { addData(S->getDecl()->getName()); })
+  DEF_ADD_DATA(MSDependentExistsStmt, { addData(S->isIfExists()); })
+  DEF_ADD_DATA(AddrLabelExpr, { addData(S->getLabel()->getName()); })
+
+  //--- Objective-C --------------------------------------------------------//
+  DEF_ADD_DATA(ObjCIndirectCopyRestoreExpr, { addData(S->shouldCopy()); })
+  DEF_ADD_DATA(ObjCPropertyRefExpr, {
+    addData(S->isSuperReceiver());
+    addData(S->isImplicitProperty());
+  })
+  DEF_ADD_DATA(ObjCAtCatchStmt, { addData(S->hasEllipsis()); })
+
+  //--- Miscellaneous Stmts ------------------------------------------------//
+  DEF_ADD_DATA(CXXFoldExpr, {
+    addData(S->isRightFold());
+    addData(S->getOperator());
+  })
+  DEF_ADD_DATA(GenericSelectionExpr, {
+    for (unsigned i = 0; i < S->getNumAssocs(); ++i) {
+      addData(S->getAssocType(i));
+    }
+  })
+  DEF_ADD_DATA(LambdaExpr, {
+    for (const LambdaCapture &C : S->captures()) {
+      addData(C.isPackExpansion());
+      addData(C.getCaptureKind());
+      if (C.capturesVariable())
+        addData(C.getCapturedVar()->getType());
+    }
+    addData(S->isGenericLambda());
+    addData(S->isMutable());
+  })
+  DEF_ADD_DATA(DeclStmt, {
+    auto numDecls = std::distance(S->decl_begin(), S->decl_end());
+    addData(static_cast<unsigned>(numDecls));
+    for (Decl *D : S->decls()) {
+      if (VarDecl* VD = dyn_cast<VarDecl>(D)) {
+        addData(VD->getType());
+      }
+    }
+  })
+  DEF_ADD_DATA(AsmStmt, {
+    addData(S->isSimple());
+    addData(S->isVolatile());
+    addData(S->generateAsmString(Context));
+    for (unsigned i = 0; i < S->getNumInputs(); ++i) {
+      addData(S->getInputConstraint(i));
+    }
+    for (unsigned i = 0; i < S->getNumOutputs(); ++i) {
+      addData(S->getOutputConstraint(i));
+    }
+    for (unsigned i = 0; i < S->getNumClobbers(); ++i) {
+      addData(S->getClobber(i));
+    }
+  })
+  DEF_ADD_DATA(AttributedStmt, {
+    for (Attr const * A : S->getAttrs()) {
+      addData(std::string(A->getSpelling()));
+    }
+  })
+};
+} // end anonymous namespace
+
+namespace {
 /// \brief A cache for temporarily storing a small amount of CloneSignatures.
 ///
 /// This cache should be manually cleaned from CloneSignatures that are
@@ -110,6 +262,8 @@
   /// \param Context The ASTContext that contains Parent.
   void removeChildren(Stmt const *Parent, ASTContext &Context) {
     for (Stmt const *Child : Parent->children()) {
+      if (!Child)
+        continue;
       StmtSequence ChildSequence(Child, Context);
       remove(ChildSequence);
     }
@@ -126,7 +280,13 @@
         return *I;
       }
     }
-    llvm_unreachable("Couldn't find CloneSignature for StmtSequence");
+    // FIXME: We return an empty signature on a cache miss. This isn't a perfect
+    // solution, but it's better than crashing when RecursiveASTVisitor is
+    // skipping some generated statements when generating signatures (which
+    // leads to this situation where signatures are missing in the cache).
+    // Unless RecursiveASTVisitor starts skipping important nodes, this won't
+    // influence the false-positive rate.
+    return CloneDetector::CloneSignature();
   }
 };
 } // end anonymous namespace
@@ -249,10 +409,8 @@
   llvm::FoldingSetNodeID Hash;
   unsigned Complexity = 1;
 
-  // The only relevant data for now is the class of the statement, so we
-  // calculate the hash class into the current hash value.
-  // TODO: Handle statement class specific data.
-  Hash.AddInteger(S->getStmtClass());
+  // Collect all relevant data from the current statement.
+  StmtDataCollector(S, Context, Hash);
 
   // Incorporate the hash values of all child Stmts into the current hash value.
   HandleChildHashes(Hash, Complexity, S->children());
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to