Author: zinovy.nis Date: Tue May 1 11:46:32 2018 New Revision: 331297 URL: http://llvm.org/viewvc/llvm-project?rev=331297&view=rev Log: [clang-tidy][modernize-raw-string-literal] Don't replace upper ASCII with raw literals
It's useless and not safe to replace UTF-8 encoded with escaped ASCII to raw UTF-8 chars: "\xE2\x98\x83" ---> <snowman> So don't do it. Modified: clang-tools-extra/trunk/clang-tidy/modernize/RawStringLiteralCheck.cpp clang-tools-extra/trunk/clang-tidy/modernize/RawStringLiteralCheck.h clang-tools-extra/trunk/test/clang-tidy/modernize-raw-string-literal.cpp Modified: clang-tools-extra/trunk/clang-tidy/modernize/RawStringLiteralCheck.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clang-tidy/modernize/RawStringLiteralCheck.cpp?rev=331297&r1=331296&r2=331297&view=diff ============================================================================== --- clang-tools-extra/trunk/clang-tidy/modernize/RawStringLiteralCheck.cpp (original) +++ clang-tools-extra/trunk/clang-tidy/modernize/RawStringLiteralCheck.cpp Tue May 1 11:46:32 2018 @@ -42,28 +42,15 @@ bool isRawStringLiteral(StringRef Text) } bool containsEscapedCharacters(const MatchFinder::MatchResult &Result, - const StringLiteral *Literal) { + const StringLiteral *Literal, + const CharsBitSet &DisallowedChars) { // FIXME: Handle L"", u8"", u"" and U"" literals. if (!Literal->isAscii()) return false; - StringRef Bytes = Literal->getBytes(); - // Non-printing characters disqualify this literal: - // \007 = \a bell - // \010 = \b backspace - // \011 = \t horizontal tab - // \012 = \n new line - // \013 = \v vertical tab - // \014 = \f form feed - // \015 = \r carriage return - // \177 = delete - if (Bytes.find_first_of(StringRef("\000\001\002\003\004\005\006\a" - "\b\t\n\v\f\r\016\017" - "\020\021\022\023\024\025\026\027" - "\030\031\032\033\034\035\036\037" - "\177", - 33)) != StringRef::npos) - return false; + for (const unsigned char C : Literal->getBytes()) + if (DisallowedChars.test(C)) + return false; CharSourceRange CharRange = Lexer::makeFileCharRange( CharSourceRange::getTokenRange(Literal->getSourceRange()), @@ -102,7 +89,28 @@ RawStringLiteralCheck::RawStringLiteralC ClangTidyContext *Context) : ClangTidyCheck(Name, Context), DelimiterStem(Options.get("DelimiterStem", "lit")), - ReplaceShorterLiterals(Options.get("ReplaceShorterLiterals", false)) {} + ReplaceShorterLiterals(Options.get("ReplaceShorterLiterals", false)) { + // Non-printing characters are disallowed: + // \007 = \a bell + // \010 = \b backspace + // \011 = \t horizontal tab + // \012 = \n new line + // \013 = \v vertical tab + // \014 = \f form feed + // \015 = \r carriage return + // \177 = delete + for (const unsigned char C : StringRef("\000\001\002\003\004\005\006\a" + "\b\t\n\v\f\r\016\017" + "\020\021\022\023\024\025\026\027" + "\030\031\032\033\034\035\036\037" + "\177", + 33)) + DisallowedChars.set(C); + + // Non-ASCII are disallowed too. + for (unsigned int C = 0x80u; C <= 0xFFu; ++C) + DisallowedChars.set(static_cast<unsigned char>(C)); +} void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Options) { ClangTidyCheck::storeOptions(Options); @@ -124,7 +132,7 @@ void RawStringLiteralCheck::check(const if (Literal->getLocStart().isMacroID()) return; - if (containsEscapedCharacters(Result, Literal)) { + if (containsEscapedCharacters(Result, Literal, DisallowedChars)) { std::string Replacement = asRawStringLiteral(Literal, DelimiterStem); if (ReplaceShorterLiterals || Replacement.length() <= Modified: clang-tools-extra/trunk/clang-tidy/modernize/RawStringLiteralCheck.h URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clang-tidy/modernize/RawStringLiteralCheck.h?rev=331297&r1=331296&r2=331297&view=diff ============================================================================== --- clang-tools-extra/trunk/clang-tidy/modernize/RawStringLiteralCheck.h (original) +++ clang-tools-extra/trunk/clang-tidy/modernize/RawStringLiteralCheck.h Tue May 1 11:46:32 2018 @@ -11,11 +11,14 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H #include "../ClangTidy.h" +#include <bitset> namespace clang { namespace tidy { namespace modernize { +using CharsBitSet = std::bitset<1 << CHAR_BIT>; + /// This check replaces string literals with escaped characters to /// raw string literals. /// @@ -35,6 +38,7 @@ private: const StringLiteral *Literal, StringRef Replacement); std::string DelimiterStem; + CharsBitSet DisallowedChars; const bool ReplaceShorterLiterals; }; Modified: clang-tools-extra/trunk/test/clang-tidy/modernize-raw-string-literal.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/test/clang-tidy/modernize-raw-string-literal.cpp?rev=331297&r1=331296&r2=331297&view=diff ============================================================================== --- clang-tools-extra/trunk/test/clang-tidy/modernize-raw-string-literal.cpp (original) +++ clang-tools-extra/trunk/test/clang-tidy/modernize-raw-string-literal.cpp Tue May 1 11:46:32 2018 @@ -40,6 +40,8 @@ char const *const Rs("goink\\\036"); char const *const Us("goink\\\037"); char const *const HexNonPrintable("\\\x03"); char const *const Delete("\\\177"); +char const *const MultibyteSnowman("\xE2\x98\x83"); +// CHECK-FIXES: {{^}}char const *const MultibyteSnowman("\xE2\x98\x83");{{$}} char const *const TrailingSpace("A line \\with space. \n"); char const *const TrailingNewLine("A single \\line.\n"); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits