Author: etienneb Date: Thu Apr 7 11:16:36 2016 New Revision: 265691 URL: http://llvm.org/viewvc/llvm-project?rev=265691&view=rev Log: [clang-tidy] add new checker for string literal with NUL character.
Summary: This patch adds the support for detecting suspicious string literals and their //incorrect// usage. The following example shows a incorrect character escaping leading to an embedded NUL character. ``` std::string str = "\0x42"; // Should be "\x42". ``` The patch also add detection of truncated literal when a literal is passed to a string constructor. Reviewers: hokein, alexfh Subscribers: LegalizeAdulthood, bcraig, Eugene.Zelenko, bkramer, cfe-commits Differential Revision: http://reviews.llvm.org/D18783 Added: clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.cpp clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.h clang-tools-extra/trunk/docs/clang-tidy/checks/misc-string-literal-with-embedded-nul.rst clang-tools-extra/trunk/test/clang-tidy/misc-string-literal-with-embedded-nul.cpp Modified: clang-tools-extra/trunk/clang-tidy/misc/CMakeLists.txt clang-tools-extra/trunk/clang-tidy/misc/MiscTidyModule.cpp clang-tools-extra/trunk/docs/ReleaseNotes.rst clang-tools-extra/trunk/docs/clang-tidy/checks/list.rst Modified: clang-tools-extra/trunk/clang-tidy/misc/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clang-tidy/misc/CMakeLists.txt?rev=265691&r1=265690&r2=265691&view=diff ============================================================================== --- clang-tools-extra/trunk/clang-tidy/misc/CMakeLists.txt (original) +++ clang-tools-extra/trunk/clang-tidy/misc/CMakeLists.txt Thu Apr 7 11:16:36 2016 @@ -23,6 +23,7 @@ add_clang_library(clangTidyMiscModule SizeofContainerCheck.cpp StaticAssertCheck.cpp StringIntegerAssignmentCheck.cpp + StringLiteralWithEmbeddedNulCheck.cpp SuspiciousMissingCommaCheck.cpp SuspiciousSemicolonCheck.cpp SwappedArgumentsCheck.cpp Modified: clang-tools-extra/trunk/clang-tidy/misc/MiscTidyModule.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clang-tidy/misc/MiscTidyModule.cpp?rev=265691&r1=265690&r2=265691&view=diff ============================================================================== --- clang-tools-extra/trunk/clang-tidy/misc/MiscTidyModule.cpp (original) +++ clang-tools-extra/trunk/clang-tidy/misc/MiscTidyModule.cpp Thu Apr 7 11:16:36 2016 @@ -31,6 +31,7 @@ #include "SizeofContainerCheck.h" #include "StaticAssertCheck.h" #include "StringIntegerAssignmentCheck.h" +#include "StringLiteralWithEmbeddedNulCheck.h" #include "SuspiciousMissingCommaCheck.h" #include "SuspiciousSemicolonCheck.h" #include "SwappedArgumentsCheck.h" @@ -89,6 +90,8 @@ public: "misc-static-assert"); CheckFactories.registerCheck<StringIntegerAssignmentCheck>( "misc-string-integer-assignment"); + CheckFactories.registerCheck<StringLiteralWithEmbeddedNulCheck>( + "misc-string-literal-with-embedded-nul"); CheckFactories.registerCheck<SuspiciousMissingCommaCheck>( "misc-suspicious-missing-comma"); CheckFactories.registerCheck<SuspiciousSemicolonCheck>( Added: clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.cpp?rev=265691&view=auto ============================================================================== --- clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.cpp (added) +++ clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.cpp Thu Apr 7 11:16:36 2016 @@ -0,0 +1,83 @@ +//===--- StringLiteralWithEmbeddedNulCheck.cpp - clang-tidy----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "StringLiteralWithEmbeddedNulCheck.h" +#include "clang/AST/ASTContext.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace misc { + +AST_MATCHER(StringLiteral, containsNul) { + for (size_t i = 0; i < Node.getLength(); ++i) + if (Node.getCodeUnit(i) == '\0') + return true; + return false; +} + +void StringLiteralWithEmbeddedNulCheck::registerMatchers(MatchFinder *Finder) { + // Match a string that contains embedded NUL character. Extra-checks are + // applied in |check| to find incorectly escaped characters. + Finder->addMatcher(stringLiteral(containsNul()).bind("strlit"), this); + + // The remaining checks only apply to C++. + if (!getLangOpts().CPlusPlus) + return; + + const auto StrLitWithNul = + ignoringParenImpCasts(stringLiteral(containsNul()).bind("truncated")); + + // Match string constructor. + const auto StringConstructorExpr = expr(anyOf( + cxxConstructExpr(argumentCountIs(1), + hasDeclaration(cxxMethodDecl(hasName("basic_string")))), + // If present, the second argument is the alloc object which must not + // be present explicitly. + cxxConstructExpr(argumentCountIs(2), + hasDeclaration(cxxMethodDecl(hasName("basic_string"))), + hasArgument(1, cxxDefaultArgExpr())))); + + // Detect passing a suspicious string literal to a string constructor. + // example: std::string str = "abc\0def"; + Finder->addMatcher( + cxxConstructExpr(StringConstructorExpr, hasArgument(0, StrLitWithNul)), + this); + + // Detect passing a suspicious string literal through an overloaded operator. + Finder->addMatcher(cxxOperatorCallExpr(hasAnyArgument(StrLitWithNul)), this); +} + +void StringLiteralWithEmbeddedNulCheck::check( + const MatchFinder::MatchResult &Result) { + if (const auto *SL = Result.Nodes.getNodeAs<StringLiteral>("strlit")) { + for (size_t Offset = 0, Length = SL->getLength(); Offset < Length; + ++Offset) { + // Find a sequence of character like "\0x12". + if (Offset + 3 < Length && SL->getCodeUnit(Offset) == '\0' && + SL->getCodeUnit(Offset + 1) == 'x' && + isDigit(SL->getCodeUnit(Offset + 2)) && + isDigit(SL->getCodeUnit(Offset + 3))) { + diag(SL->getLocStart(), "suspicious embedded NUL character"); + return; + } + } + } + + if (const auto *SL = Result.Nodes.getNodeAs<StringLiteral>("truncated")) { + diag(SL->getLocStart(), + "truncated string literal with embedded NUL character"); + } +} + +} // namespace misc +} // namespace tidy +} // namespace clang Added: clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.h URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.h?rev=265691&view=auto ============================================================================== --- clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.h (added) +++ clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.h Thu Apr 7 11:16:36 2016 @@ -0,0 +1,35 @@ +//===--- StringLiteralWithEmbeddedNulCheck.h - clang-tidy--------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_STRING_LITERAL_WITH_EMBEDDED_NUL_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_STRING_LITERAL_WITH_EMBEDDED_NUL_H + +#include "../ClangTidy.h" + +namespace clang { +namespace tidy { +namespace misc { + +/// Find suspicious string literals with embedded NUL characters. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/misc-string-literal-with-embedded-nul.html +class StringLiteralWithEmbeddedNulCheck : public ClangTidyCheck { +public: + StringLiteralWithEmbeddedNulCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; +}; + +} // namespace misc +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_STRING_LITERAL_WITH_EMBEDDED_NUL_H Modified: clang-tools-extra/trunk/docs/ReleaseNotes.rst URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/docs/ReleaseNotes.rst?rev=265691&r1=265690&r2=265691&view=diff ============================================================================== --- clang-tools-extra/trunk/docs/ReleaseNotes.rst (original) +++ clang-tools-extra/trunk/docs/ReleaseNotes.rst Thu Apr 7 11:16:36 2016 @@ -97,6 +97,12 @@ identified. The improvements since the Warns when there is a explicit redundant cast of a calculation result to a bigger type. +- New `misc-string-literal-with-embedded-nul + <http://clang.llvm.org/extra/clang-tidy/checks/misc-string-literal-with-embedded-nul.html>`_ check + + Warns about suspicious NUL character in string literals which may lead to + truncation or invalid character escaping. + - New `misc-suspicious-missing-comma <http://clang.llvm.org/extra/clang-tidy/checks/misc-suspicious-missing-comma.html>`_ check Modified: clang-tools-extra/trunk/docs/clang-tidy/checks/list.rst URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/docs/clang-tidy/checks/list.rst?rev=265691&r1=265690&r2=265691&view=diff ============================================================================== --- clang-tools-extra/trunk/docs/clang-tidy/checks/list.rst (original) +++ clang-tools-extra/trunk/docs/clang-tidy/checks/list.rst Thu Apr 7 11:16:36 2016 @@ -66,6 +66,7 @@ Clang-Tidy Checks misc-sizeof-container misc-static-assert misc-string-integer-assignment + misc-string-literal-with-embedded-nul misc-suspicious-missing-comma misc-suspicious-semicolon misc-swapped-arguments Added: clang-tools-extra/trunk/docs/clang-tidy/checks/misc-string-literal-with-embedded-nul.rst URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/docs/clang-tidy/checks/misc-string-literal-with-embedded-nul.rst?rev=265691&view=auto ============================================================================== --- clang-tools-extra/trunk/docs/clang-tidy/checks/misc-string-literal-with-embedded-nul.rst (added) +++ clang-tools-extra/trunk/docs/clang-tidy/checks/misc-string-literal-with-embedded-nul.rst Thu Apr 7 11:16:36 2016 @@ -0,0 +1,38 @@ +.. title:: clang-tidy - misc-string-literal-with-embedded-nul + +misc-string-literal-with-embedded-nul +===================================== + +Finds occurences of string literal with embedded NUL character and validates +their usage. + + +Invalid escaping +^^^^^^^^^^^^^^^^ + +Special characters can be escaped within a string literal by using their +hexadecimal encoding like ``\x42``. A common mistake is to escape them +like this ``\0x42`` where the ``\0`` stands for the NUL character. + +.. code:: c++ + + const char* Example[] = "Invalid character: \0x12 should be \x12"; + const char* Bytes[] = "\x03\0x02\0x01\0x00\0xFF\0xFF\0xFF"; + + +Truncated literal +^^^^^^^^^^^^^^^^^ + +String-like classes can manipulate strings with embedded NUL as they are +keeping track of the bytes and the length. This is not the case for a +``char*`` (NUL-terminated) string. + +A common mistake is to pass a string-literal with embedded NUL to a string +constructor expecting a NUL-terminated string. The bytes after the first NUL +character are truncated. + +.. code:: c++ + + std::string str("abc\0def"); // "def" is truncated + str += "\0"; // This statement is doing nothing + if (str == "\0abc") return; // This expression is always true Added: clang-tools-extra/trunk/test/clang-tidy/misc-string-literal-with-embedded-nul.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/test/clang-tidy/misc-string-literal-with-embedded-nul.cpp?rev=265691&view=auto ============================================================================== --- clang-tools-extra/trunk/test/clang-tidy/misc-string-literal-with-embedded-nul.cpp (added) +++ clang-tools-extra/trunk/test/clang-tidy/misc-string-literal-with-embedded-nul.cpp Thu Apr 7 11:16:36 2016 @@ -0,0 +1,85 @@ +// RUN: %check_clang_tidy %s misc-string-literal-with-embedded-nul %t + +namespace std { +template <typename T> +class allocator {}; +template <typename T> +class char_traits {}; +template <typename C, typename T, typename A> +struct basic_string { + typedef basic_string<C, T, A> _Type; + basic_string(); + basic_string(const C *p, const A &a = A()); + + _Type& operator+=(const C* s); + _Type& operator=(const C* s); +}; + +typedef basic_string<char, std::char_traits<char>, std::allocator<char>> string; +typedef basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t>> wstring; +} + +bool operator==(const std::string&, const char*); +bool operator==(const char*, const std::string&); + + +const char Valid[] = "This is valid \x12."; +const char Strange[] = "This is strange \0x12 and must be fixed"; +// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: suspicious embedded NUL character [misc-string-literal-with-embedded-nul] + +const char textA[] = "\0x01\0x02\0x03\0x04"; +// CHECK-MESSAGES: :[[@LINE-1]]:22: warning: suspicious embedded NUL character +const wchar_t textW[] = L"\0x01\0x02\0x03\0x04"; +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: suspicious embedded NUL character + +const char A[] = "\0"; +const char B[] = "\0x"; +const char C[] = "\0x1"; +const char D[] = "\0x11"; +// CHECK-MESSAGES: :[[@LINE-1]]:18: warning: suspicious embedded NUL character + +const wchar_t E[] = L"\0"; +const wchar_t F[] = L"\0x"; +const wchar_t G[] = L"\0x1"; +const wchar_t H[] = L"\0x11"; +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: suspicious embedded NUL character + +const char I[] = "\000\000\000\000"; +const char J[] = "\0\0\0\0\0\0"; +const char K[] = ""; + +const char L[] = "\0x12" "\0x12" "\0x12" "\0x12"; +// CHECK-MESSAGES: :[[@LINE-1]]:18: warning: suspicious embedded NUL character + +void TestA() { + std::string str1 = "abc\0def"; + // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: truncated string literal + std::string str2 = "\0"; + // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: truncated string literal + std::string str3("\0"); + // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: truncated string literal + std::string str4{"\x00\x01\x02\x03"}; + // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: truncated string literal + + std::string str; + str += "abc\0def"; + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: truncated string literal + str = "abc\0def"; + // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: truncated string literal + + if (str == "abc\0def") return; + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: truncated string literal + if ("abc\0def" == str) return; + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: truncated string literal +} + +void TestW() { + std::wstring str1 = L"abc\0def"; + // CHECK-MESSAGES: :[[@LINE-1]]:23: warning: truncated string literal + std::wstring str2 = L"\0"; + // CHECK-MESSAGES: :[[@LINE-1]]:23: warning: truncated string literal + std::wstring str3(L"\0"); + // CHECK-MESSAGES: :[[@LINE-1]]:21: warning: truncated string literal + std::wstring str4{L"\x00\x01\x02\x03"}; + // CHECK-MESSAGES: :[[@LINE-1]]:21: warning: truncated string literal +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits