LegalizeAdulthood updated this revision to Diff 47928.
LegalizeAdulthood added a comment.

Add FIXME for non-ASCII string literals.
Allow delimiter stem to be configured.
Avoid some string concatenation.


http://reviews.llvm.org/D16529

Files:
  clang-tidy/modernize/CMakeLists.txt
  clang-tidy/modernize/ModernizeTidyModule.cpp
  clang-tidy/modernize/RawStringLiteralCheck.cpp
  clang-tidy/modernize/RawStringLiteralCheck.h
  docs/clang-tidy/checks/list.rst
  docs/clang-tidy/checks/modernize-raw-string-literal.rst
  test/clang-tidy/modernize-raw-string-literal-delimiter.cpp
  test/clang-tidy/modernize-raw-string-literal.cpp

Index: test/clang-tidy/modernize-raw-string-literal.cpp
===================================================================
--- /dev/null
+++ test/clang-tidy/modernize-raw-string-literal.cpp
@@ -0,0 +1,122 @@
+// RUN: %check_clang_tidy %s modernize-raw-string-literal %t
+
+char const *const BackSlash{"goink\\frob"};
+// CHECK-MESSAGES: :[[@LINE-1]]:29: warning: escaped string literal can be written as a raw string literal [modernize-raw-string-literal]
+// CHECK-FIXES: {{^}}char const *const BackSlash{R"(goink\frob)"};{{$}}
+
+char const *const PlainLiteral("plain literal");
+
+// Non-printable ASCII characters.
+char const *const Nul{"goink\\\000"};
+char const *const Soh{"goink\\\001"};
+char const *const Stx{"goink\\\002"};
+char const *const Etx{"goink\\\003"};
+char const *const Enq{"goink\\\004"};
+char const *const Ack{"goink\\\005"};
+char const *const Bell{"goink\\\afrob"};
+char const *const BackSpace{"goink\\\bfrob"};
+char const *const HorizontalTab{"goink\\\tfrob"};
+char const *const NewLine{"goink\nfrob"};
+char const *const VerticalTab{"goink\\\vfrob"};
+char const *const FormFeed{"goink\\\ffrob"};
+char const *const CarraigeReturn{"goink\\\rfrob"};
+char const *const So{"goink\\\016"};
+char const *const Si{"goink\\\017"};
+char const *const Dle{"goink\\\020"};
+char const *const Dc1{"goink\\\021"};
+char const *const Dc2{"goink\\\022"};
+char const *const Dc3{"goink\\\023"};
+char const *const Dc4{"goink\\\024"};
+char const *const Nak{"goink\\\025"};
+char const *const Syn{"goink\\\026"};
+char const *const Etb{"goink\\\027"};
+char const *const Can{"goink\\\030"};
+char const *const Em{"goink\\\031"};
+char const *const Sub{"goink\\\032"};
+char const *const Esc{"goink\\\033"};
+char const *const Fs{"goink\\\034"};
+char const *const Gs{"goink\\\035"};
+char const *const Rs{"goink\\\036"};
+char const *const Us{"goink\\\037"};
+char const *const HexNonPrintable{"\\\x03"};
+char const *const Delete{"\\\177"};
+
+char const *const TrailingSpace{"A line \\with space. \n"};
+char const *const TrailingNewLine{"A single \\line.\n"};
+char const *const AlreadyRaw{R"(foobie\\bletch)"};
+char const *const UTF8Literal{u8"foobie\\bletch"};
+char const *const UTF8RawLiteral{u8R"(foobie\\bletch)"};
+char16_t const *const UTF16Literal{u"foobie\\bletch"};
+char16_t const *const UTF16RawLiteral{uR"(foobie\\bletch)"};
+char32_t const *const UTF32Literal{U"foobie\\bletch"};
+char32_t const *const UTF32RawLiteral{UR"(foobie\\bletch)"};
+wchar_t const *const WideLiteral{L"foobie\\bletch"};
+wchar_t const *const WideRawLiteral{LR"(foobie\\bletch)"};
+
+char const *const SingleQuote{"goink\'frob"};
+// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: {{.*}} can be written as a raw string literal
+// CHECK-XFIXES: {{^}}char const *const SingleQuote{R"(goink'frob)"};{{$}}
+
+char const *const DoubleQuote{"goink\"frob"};
+// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: {{.*}} can be written as a raw string literal
+// CHECK-FIXES: {{^}}char const *const DoubleQuote{R"(goink"frob)"};{{$}}
+
+char const *const QuestionMark{"goink\?frob"};
+// CHECK-MESSAGES: :[[@LINE-1]]:32: warning: {{.*}} can be written as a raw string literal
+// CHECK-FIXES: {{^}}char const *const QuestionMark{R"(goink?frob)"};{{$}}
+
+char const *const RegEx{"goink\\(one|two\\)\\\\\\?.*\\nfrob"};
+// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: {{.*}} can be written as a raw string literal
+// CHECK-FIXES: {{^}}char const *const RegEx{R"(goink\(one|two\)\\\?.*\nfrob)"};{{$}}
+
+char const *const Path{"C:\\Program Files\\Vendor\\Application\\Application.exe"};
+// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: {{.*}} can be written as a raw string literal
+// CHECK-FIXES: {{^}}char const *const Path{R"(C:\Program Files\Vendor\Application\Application.exe)"};{{$}}
+
+char const *const ContainsSentinel{"who\\ops)\""};
+// CHECK-MESSAGES: :[[@LINE-1]]:36: warning: {{.*}} can be written as a raw string literal
+// CHECK-FIXES: {{^}}char const *const ContainsSentinel{R"lit(who\ops)")lit"};{{$}}
+
+char const *const ContainsDelim{"whoops)\")lit\""};
+// CHECK-MESSAGES: :[[@LINE-1]]:33: warning: {{.*}} can be written as a raw string literal
+// CHECK-FIXES: {{^}}char const *const ContainsDelim{R"lit1(whoops)")lit")lit1"};{{$}}
+
+char const *const OctalPrintable{"\100\\"};
+// CHECK-MESSAGES: :[[@LINE-1]]:34: warning: {{.*}} can be written as a raw string literal
+// CHECK-FIXES: {{^}}char const *const OctalPrintable{R"(@\)"};{{$}}
+
+char const *const HexPrintable{"\x40\\"};
+// CHECK-MESSAGES: :[[@LINE-1]]:32: warning: {{.*}} can be written as a raw string literal
+// CHECK-FIXES: {{^}}char const *const HexPrintable{R"(@\)"};{{$}}
+
+#define TRICK(arg_) #arg_
+char const *const MacroBody = TRICK(foo\\bar);
+
+#define HAT(rabbit_) #rabbit_ "foo\\bar"
+char const *const StringizedMacroArgument = HAT(foo\\bar);
+
+#define SUBST(lit_) lit_
+char const *const MacroArgument = SUBST("foo\\bar");
+
+template <typename T>
+void fn(char const *const Arg) {
+  char const *const Str{"foo\\bar"};
+  // CHECK-MESSAGES: :[[@LINE-1]]:25: warning: {{.*}} can be written as a raw string literal
+  // CHECK-FIXES: {{^}}  char const *const Str{R"(foo\bar)"};{{$}}
+}
+
+template <>
+void fn<int>(char const *const Arg) {
+  char const *const Str{"foo\\bar"};
+  // CHECK-MESSAGES: :[[@LINE-1]]:25: warning: {{.*}} can be written as a raw string literal
+  // CHECK-FIXES: {{^}}  char const *const Str{R"(foo\bar)"};{{$}}
+}
+
+void callFn() {
+  fn<int>("foo\\bar");
+  // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: {{.*}} can be written as a raw string literal
+  // CHECK-FIXES: {{^}}  fn<int>(R"(foo\bar)");{{$}}
+  fn<double>("foo\\bar");
+  // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: {{.*}} can be written as a raw string literal
+  // CHECK-FIXES: {{^}}  fn<double>(R"(foo\bar)");{{$}}
+}
Index: test/clang-tidy/modernize-raw-string-literal-delimiter.cpp
===================================================================
--- /dev/null
+++ test/clang-tidy/modernize-raw-string-literal-delimiter.cpp
@@ -0,0 +1,9 @@
+// RUN: %check_clang_tidy %s modernize-raw-string-literal %t -- -config='{CheckOptions: [{key: "modernize-raw-string-literal.DelimiterStem", value: "str"}]}' -- -std=c++11
+
+char const *const ContainsSentinel{"who\\ops)\""};
+// CHECK-MESSAGES: :[[@LINE-1]]:36: warning: {{.*}} can be written as a raw string literal
+// CHECK-FIXES: {{^}}char const *const ContainsSentinel{R"str(who\ops)")str"};{{$}}
+
+//char const *const ContainsDelim{"whoops)\")lit\""};
+// CHECK-XMESSAGES: :[[@LINE-1]]:33: warning: {{.*}} can be written as a raw string literal
+// CHECK-XFIXES: {{^}}char const *const ContainsDelim{R"lit1(whoops)")lit")lit1"};{{$}}
Index: docs/clang-tidy/checks/modernize-raw-string-literal.rst
===================================================================
--- /dev/null
+++ docs/clang-tidy/checks/modernize-raw-string-literal.rst
@@ -0,0 +1,47 @@
+.. title:: clang-tidy - modernize-raw-string-literal
+
+modernize-raw-string-literal
+============================
+
+This check selectively replaces string literals containing escaped characters
+with raw string literals.
+
+Example:
+
+.. code-blocK:: c++
+
+  const char *const Quotes{"embedded \"quotes\""};
+  const char *const Paragraph{"Line one.\nLine two.\nLine three.\n"};
+  const char *const SingleLine{"Single line.\n"};
+  const char *const TrailingSpace{"Look here -> \n"};
+  const char *const Tab{"One\tTwo\n"};
+  const char *const Bell{"Hello!\a  And welcome!"};
+  const char *const Path{"C:\\Program Files\\Vendor\\Application.exe"};
+  const char *const RegEx{"\\w\\([a-z]\\)"};
+
+becomes
+
+.. code-block:: c++
+
+  const char *const Quotes{R"(embedded "quotes")"};
+  const char *const Paragraph{R"(Line one.\nLine two.\nLine three.\n)"};
+  const char *const SingleLine{"Single line.\n"};
+  const char *const TrailingSpace{"Look here -> \n"};
+  const char *const Tab{"One\tTwo\n"};
+  const char *const Bell{"Hello!\a  And welcome!"};
+  const char *const Path{R"(C:\Program Files\Vendor\Application.exe)"};
+  const char *const RegEx{R"(\w\([a-z]\))"};
+
+The presence of any of the following escapes can cause the string to be
+converted to a raw string literal: ``\\``, ``\'``, ``\"``, ``\?``,
+and octal or hexadecimal escapes for printable ASCII characters.
+
+A string literal containing only escaped newlines is a common way of
+writing lines of text output.  Introducing physical newlines with raw
+string literals in this case is likely to impede readability.  These
+string literals are left unchanged.
+
+An escaped horizontal tab, form feed, or vertical tab prevents the string
+literal from being converted.  Unlike a physical newline, the presence of a
+horizontal tab, form feed or vertical tab in source code is not visually
+obvious.
Index: docs/clang-tidy/checks/list.rst
===================================================================
--- docs/clang-tidy/checks/list.rst
+++ docs/clang-tidy/checks/list.rst
@@ -73,6 +73,7 @@
    modernize-loop-convert
    modernize-make-unique
    modernize-pass-by-value
+   modernize-raw-string-literal
    modernize-redundant-void-arg
    modernize-replace-auto-ptr
    modernize-shrink-to-fit
Index: clang-tidy/modernize/RawStringLiteralCheck.h
===================================================================
--- /dev/null
+++ clang-tidy/modernize/RawStringLiteralCheck.h
@@ -0,0 +1,45 @@
+//===--- RawStringLiteralCheck.h - clang-tidy--------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H
+
+#include "../ClangTidy.h"
+//#include <string>
+
+namespace clang {
+namespace tidy {
+namespace modernize {
+
+/// This check replaces string literals with escaped characters to
+/// raw string literals.
+///
+/// For the user-facing documentation see:
+/// http://clang.llvm.org/extra/clang-tidy/checks/modernize-raw-string-literal.html
+class RawStringLiteralCheck : public ClangTidyCheck {
+public:
+  RawStringLiteralCheck(StringRef Name, ClangTidyContext *Context);
+
+  void storeOptions(ClangTidyOptions::OptionMap &Options) override;
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+
+private:
+  void replaceWithRawStringLiteral(
+      const ast_matchers::MatchFinder::MatchResult &Result,
+      const StringLiteral *Literal);
+
+  std::string DelimiterStem;
+};
+
+} // namespace modernize
+} // namespace tidy
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H
Index: clang-tidy/modernize/RawStringLiteralCheck.cpp
===================================================================
--- /dev/null
+++ clang-tidy/modernize/RawStringLiteralCheck.cpp
@@ -0,0 +1,140 @@
+//===--- RawStringLiteralCheck.cpp - clang-tidy----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RawStringLiteralCheck.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Lex/Lexer.h"
+
+using namespace clang::ast_matchers;
+
+namespace clang {
+namespace tidy {
+namespace modernize {
+
+namespace {
+
+bool containsEscapes(StringRef HayStack, StringRef Escapes) {
+  size_t BackSlash = HayStack.find('\\');
+  if (BackSlash == StringRef::npos)
+    return false;
+
+  while (BackSlash != StringRef::npos) {
+    if (Escapes.find(HayStack[BackSlash + 1]) == StringRef::npos)
+      return false;
+    BackSlash = HayStack.find('\\', BackSlash + 2);
+  }
+
+  return true;
+}
+
+bool isRawStringLiteral(StringRef Text) {
+  // Already a raw string literal if R comes before ".
+  const size_t QuotePos = Text.find('"');
+  assert(QuotePos != StringRef::npos);
+  return (QuotePos > 0) && (Text[QuotePos - 1] == 'R');
+}
+
+bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
+                               const StringLiteral *Literal) {
+  // FIXME: Handle L"", u8"", u"" and U"" literals.
+  if (!Literal->isAscii())
+    return false;
+
+  StringRef Bytes = Literal->getBytes();
+  // Non-printing characters disqualify this literal:
+  // \007 = \a bell
+  // \010 = \b backspace
+  // \011 = \t horizontal tab
+  // \012 = \n new line
+  // \013 = \v vertical tab
+  // \014 = \f form feed
+  // \015 = \r carriage return
+  // \177 = delete
+  if (Bytes.find_first_of(StringRef("\000\001\002\003\004\005\006\a"
+                                    "\b\t\n\v\f\r\016\017"
+                                    "\020\021\022\023\024\025\026\027"
+                                    "\030\031\032\033\034\035\036\037"
+                                    "\177",
+                                    33)) != StringRef::npos)
+    return false;
+
+  CharSourceRange CharRange = Lexer::makeFileCharRange(
+      CharSourceRange::getTokenRange(Literal->getSourceRange()),
+      *Result.SourceManager, Result.Context->getLangOpts());
+  StringRef Text = Lexer::getSourceText(CharRange, *Result.SourceManager,
+                                        Result.Context->getLangOpts());
+  if (isRawStringLiteral(Text))
+    return false;
+
+  return containsEscapes(Text, R"('\"?x01)");
+}
+
+bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) {
+  return Bytes.find(Delimiter.empty()
+                        ? std::string{R"lit()")lit"}
+                        : (")" + Delimiter + R"(")")) != StringRef::npos;
+}
+
+std::string asRawStringLiteral(const StringLiteral *Literal,
+                               const std::string &DelimiterStem) {
+  const StringRef Bytes = Literal->getBytes();
+  std::string Delimiter;
+  for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I) {
+    Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(I);
+  }
+
+  if (Delimiter.empty())
+    return (R"(R"()" + Bytes + R"lit()")lit").str();
+
+  return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")").str();
+}
+
+} // namespace
+
+RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name,
+                                             ClangTidyContext *Context)
+    : ClangTidyCheck(Name, Context),
+      DelimiterStem{Options.get("DelimiterStem", "lit")} {}
+
+void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Options) {
+  ClangTidyCheck::storeOptions(Options);
+}
+
+void RawStringLiteralCheck::registerMatchers(MatchFinder *Finder) {
+  Finder->addMatcher(stringLiteral().bind("lit"), this);
+}
+
+void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) {
+  // Raw string literals require C++11 or later.
+  if (!Result.Context->getLangOpts().CPlusPlus11)
+    return;
+
+  const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>("lit");
+  if (Literal->getLocStart().isMacroID())
+    return;
+
+  if (containsEscapedCharacters(Result, Literal))
+    replaceWithRawStringLiteral(Result, Literal);
+}
+
+void RawStringLiteralCheck::replaceWithRawStringLiteral(
+    const MatchFinder::MatchResult &Result, const StringLiteral *Literal) {
+  CharSourceRange CharRange = Lexer::makeFileCharRange(
+      CharSourceRange::getTokenRange(Literal->getSourceRange()),
+      *Result.SourceManager, Result.Context->getLangOpts());
+  diag(Literal->getLocStart(),
+       "escaped string literal can be written as a raw string literal")
+      << FixItHint::CreateReplacement(
+          CharRange, asRawStringLiteral(Literal, DelimiterStem));
+}
+
+} // namespace modernize
+} // namespace tidy
+} // namespace clang
Index: clang-tidy/modernize/ModernizeTidyModule.cpp
===================================================================
--- clang-tidy/modernize/ModernizeTidyModule.cpp
+++ clang-tidy/modernize/ModernizeTidyModule.cpp
@@ -13,6 +13,7 @@
 #include "LoopConvertCheck.h"
 #include "MakeUniqueCheck.h"
 #include "PassByValueCheck.h"
+#include "RawStringLiteralCheck.h"
 #include "RedundantVoidArgCheck.h"
 #include "ReplaceAutoPtrCheck.h"
 #include "ShrinkToFitCheck.h"
@@ -33,6 +34,8 @@
     CheckFactories.registerCheck<LoopConvertCheck>("modernize-loop-convert");
     CheckFactories.registerCheck<MakeUniqueCheck>("modernize-make-unique");
     CheckFactories.registerCheck<PassByValueCheck>("modernize-pass-by-value");
+    CheckFactories.registerCheck<RawStringLiteralCheck>(
+        "modernize-raw-string-literal");
     CheckFactories.registerCheck<RedundantVoidArgCheck>(
         "modernize-redundant-void-arg");
     CheckFactories.registerCheck<ReplaceAutoPtrCheck>(
Index: clang-tidy/modernize/CMakeLists.txt
===================================================================
--- clang-tidy/modernize/CMakeLists.txt
+++ clang-tidy/modernize/CMakeLists.txt
@@ -6,6 +6,7 @@
   MakeUniqueCheck.cpp
   ModernizeTidyModule.cpp
   PassByValueCheck.cpp
+  RawStringLiteralCheck.cpp
   RedundantVoidArgCheck.cpp
   ReplaceAutoPtrCheck.cpp
   ShrinkToFitCheck.cpp
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to