etienneb created this revision.
etienneb added a reviewer: alexfh.
etienneb added a subscriber: cfe-commits.

This checker is able to detect missing comma in 
an array of string literals.

```
  const char* A[] = {
    "abc",
    "def"   // missing comma (no compiler warnings)
    "ghi",
  };
```

The ratio of false-positive is reduced by restricting the
size of the array considered and the ratio of missing
comma.

To validate the quantity of false positive, the checker
was tried over LLVM and chromium code and detected these
cases:

[[ http://reviews.llvm.org/D18454 | http://reviews.llvm.org/D18454 ]]
[[https://codereview.chromium.org/1807753002/ | 
https://codereview.chromium.org/1807753002/]]
[[https://codereview.chromium.org/1826193002/ | 
https://codereview.chromium.org/1826193002/]]
[[https://codereview.chromium.org/1805713002/ | 
https://codereview.chromium.org/1805713002/]]

http://reviews.llvm.org/D18457

Files:
  clang-tidy/misc/CMakeLists.txt
  clang-tidy/misc/MiscTidyModule.cpp
  clang-tidy/misc/SuspiciousMissingCommaCheck.cpp
  clang-tidy/misc/SuspiciousMissingCommaCheck.h
  docs/clang-tidy/checks/list.rst
  docs/clang-tidy/checks/misc-suspicious-missing-comma.rst
  test/clang-tidy/misc-suspicious-missing-comma.cpp

Index: test/clang-tidy/misc-suspicious-missing-comma.cpp
===================================================================
--- /dev/null
+++ test/clang-tidy/misc-suspicious-missing-comma.cpp
@@ -0,0 +1,36 @@
+// RUN: %check_clang_tidy %s misc-suspicious-missing-comma %t
+
+const char* Cartoons[] = {
+  "Bugs Bunny",
+  "Homer Simpson",
+  "Mickey Mouse",
+  "Bart Simpson",
+  "Charlie Brown"  // There is a missing comma here.
+  "Fred Flintstone",
+  "Popeye",
+};
+// CHECK-MESSAGES: :[[@LINE-4]]:3: warning: Suspicious string literal. Probably missing a comma. [misc-suspicious-missing-comma]
+
+const wchar_t* Colors[] = {
+  L"Red", L"Yellow", L"Blue", L"Green", L"Purple", L"Rose", L"White", L"Black"
+};
+
+// The following array should not trigger any warnings.
+const char* HttpCommands[] = {
+  "GET / HTTP/1.0\r\n"
+  "\r\n",
+
+  "GET /index.html HTTP/1.0\r\n"
+  "\r\n",
+
+  "GET /favicon.ico HTTP/1.0\r\n"
+  "header: dummy"
+  "\r\n",
+};
+
+
+// This array is too small to trigger a warning.
+const char* SmallArray[] = {
+  "a" "b", "c"
+};
+
Index: docs/clang-tidy/checks/misc-suspicious-missing-comma.rst
===================================================================
--- /dev/null
+++ docs/clang-tidy/checks/misc-suspicious-missing-comma.rst
@@ -0,0 +1,35 @@
+.. title:: clang-tidy - misc-suspicious-missing-comma
+
+misc-suspicious-missing-comma
+=============================
+
+String literals placed side-by-side are concatenated at translation phase 6
+(after the preprocessor). This feature is used to represent long string
+literal on multiple lines.
+
+For instance, these declarations are equivalent:
+  const char* A[] = "This is a test";
+  const char* B[] = "This" " is a "
+                    "test";
+
+A common mistake done by programmers is to forget a comma between two string
+literals in an array initializer list.
+
+  const char* Test[] = {
+    "line 1",
+    "line 2"     // Missing comma!
+    "line 3",
+    "line 4",
+    "line 5"
+  };
+
+The array contains the string "line 2line3" at offset 1 (i.e. Test[1]). Clang
+won't generate warnings at compile time.
+
+This checker may warn incorrectly on cases like:
+
+  const char* SupportedFormat[] = {
+    "Error %s",
+    "Code " PRIu64,   // May warn here.
+    "Warning %s",
+  };
Index: docs/clang-tidy/checks/list.rst
===================================================================
--- docs/clang-tidy/checks/list.rst
+++ docs/clang-tidy/checks/list.rst
@@ -65,6 +65,7 @@
    misc-sizeof-container
    misc-static-assert
    misc-string-integer-assignment
+   misc-suspicious-missing-comma
    misc-suspicious-semicolon
    misc-swapped-arguments
    misc-throw-by-value-catch-by-reference
Index: clang-tidy/misc/SuspiciousMissingCommaCheck.h
===================================================================
--- /dev/null
+++ clang-tidy/misc/SuspiciousMissingCommaCheck.h
@@ -0,0 +1,36 @@
+//===--- SuspiciousMissingCommaCheck.h - clang-tidy--------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_SUSPICIOUS_MISSING_COMMA_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_SUSPICIOUS_MISSING_COMMA_H
+
+#include "../ClangTidy.h"
+
+namespace clang {
+namespace tidy {
+namespace misc {
+
+/// This check finds string literals which are concatenated probably
+/// unintendedly.
+///
+/// For the user-facing documentation see:
+/// http://clang.llvm.org/extra/clang-tidy/checks/misc-suspicious-missing-comma.html
+class SuspiciousMissingCommaCheck : public ClangTidyCheck {
+public:
+  SuspiciousMissingCommaCheck(StringRef Name, ClangTidyContext *Context)
+      : ClangTidyCheck(Name, Context) {}
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+};
+
+} // namespace misc
+} // namespace tidy
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_SUSPICIOUS_MISSING_COMMA_H
Index: clang-tidy/misc/SuspiciousMissingCommaCheck.cpp
===================================================================
--- /dev/null
+++ clang-tidy/misc/SuspiciousMissingCommaCheck.cpp
@@ -0,0 +1,69 @@
+//===--- SuspiciousMissingCommaCheck.cpp - clang-tidy----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SuspiciousMissingCommaCheck.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+
+using namespace clang::ast_matchers;
+
+namespace clang {
+namespace tidy {
+namespace misc {
+
+namespace {
+
+AST_MATCHER(StringLiteral, isConcatenatedLiteral) {
+  return Node.getNumConcatenated() > 1; 
+}
+
+} // namespace
+
+void SuspiciousMissingCommaCheck::registerMatchers(MatchFinder *Finder) {
+  const auto ConcatenatedStringLiteral =
+      stringLiteral(isConcatenatedLiteral()).bind("str");
+
+  const auto StringsInitializerList =
+      initListExpr(hasType(constantArrayType()),
+                   has(expr(ignoringImpCasts(ConcatenatedStringLiteral))));
+
+  Finder->addMatcher(StringsInitializerList.bind("list"), this);
+}
+
+void SuspiciousMissingCommaCheck::check(const MatchFinder::MatchResult &Result) {
+  const auto *InitializerList = Result.Nodes.getNodeAs<InitListExpr>("list");
+  const auto *ConcatenatedLiteral = Result.Nodes.getNodeAs<Expr>("str");
+  if (InitializerList && ConcatenatedLiteral) {
+    // Skip small arrays as they often generate false-positive.
+    unsigned int Size = InitializerList->getNumInits();
+    if (Size < 5)
+      return;
+
+    // Count the number of occurence of concatenated string literal.
+    unsigned int Count = 0;
+    for (unsigned int i = 0; i < Size; ++i) {
+      const Expr* Child = InitializerList->getInit(i)->IgnoreImpCasts();
+      if (const StringLiteral* Literal = dyn_cast<StringLiteral>(Child)) {
+        if (Literal->getNumConcatenated() > 1)
+          ++Count;
+      }
+    }
+
+    // Warn only when concatenation is not common in this initializer list.
+    // The current threshold is set to less than 1/5 of the string literals.
+    if ((5 * Count)/Size > 1)
+      return;
+
+    diag(ConcatenatedLiteral->getLocStart(), "Suspicious string literal. Probably missing a comma.");
+  }
+}
+
+} // namespace misc
+} // namespace tidy
+} // namespace clang
Index: clang-tidy/misc/MiscTidyModule.cpp
===================================================================
--- clang-tidy/misc/MiscTidyModule.cpp
+++ clang-tidy/misc/MiscTidyModule.cpp
@@ -30,6 +30,7 @@
 #include "SizeofContainerCheck.h"
 #include "StaticAssertCheck.h"
 #include "StringIntegerAssignmentCheck.h"
+#include "SuspiciousMissingCommaCheck.h"
 #include "SuspiciousSemicolonCheck.h"
 #include "SwappedArgumentsCheck.h"
 #include "ThrowByValueCatchByReferenceCheck.h"
@@ -85,6 +86,8 @@
         "misc-static-assert");
     CheckFactories.registerCheck<StringIntegerAssignmentCheck>(
         "misc-string-integer-assignment");
+    CheckFactories.registerCheck<SuspiciousMissingCommaCheck>(
+        "misc-suspicious-missing-comma");
     CheckFactories.registerCheck<SuspiciousSemicolonCheck>(
         "misc-suspicious-semicolon");
     CheckFactories.registerCheck<SwappedArgumentsCheck>(
Index: clang-tidy/misc/CMakeLists.txt
===================================================================
--- clang-tidy/misc/CMakeLists.txt
+++ clang-tidy/misc/CMakeLists.txt
@@ -22,6 +22,7 @@
   SizeofContainerCheck.cpp
   StaticAssertCheck.cpp
   StringIntegerAssignmentCheck.cpp
+  SuspiciousMissingCommaCheck.cpp
   SuspiciousSemicolonCheck.cpp
   SwappedArgumentsCheck.cpp
   ThrowByValueCatchByReferenceCheck.cpp
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to