https://github.com/hjanuschka updated https://github.com/llvm/llvm-project/pull/116033
>From caddb82d73d49b59dff71547d8ab986039895696 Mon Sep 17 00:00:00 2001 From: Helmut Januschka <hel...@januschka.com> Date: Wed, 13 Nov 2024 12:52:36 +0100 Subject: [PATCH] [clang-tidy] Enhance modernize-use-starts-ends-with with substr detection Enhances the modernize-use-starts-ends-with check to detect substr-based patterns that can be replaced with starts_with() (C++20). This improves code readability and efficiency by avoiding temporary string creation. New patterns detected: str.substr(0, n) == "foo" -> str.starts_with("foo") "foo" == str.substr(0, n) -> str.starts_with("foo") str.substr(0, n) != "foo" -> !str.starts_with("foo") str.substr(0, strlen("foo")) == "foo" -> str.starts_with("foo") str.substr(0, prefix.size()) == "foo" -> str.starts_with("foo") The enhancement: - Integrates with existing starts_with patterns - Handles substr with zero first argument - Supports length via literals, strlen(), and size()/length() - Validates string literal length matches - Handles both == and != operators Part of modernize-use-starts-ends-with check. --- .../modernize/UseStartsEndsWithCheck.cpp | 185 ++++++++++++++++-- .../modernize/UseStartsEndsWithCheck.h | 4 + clang-tools-extra/docs/ReleaseNotes.rst | 7 + .../checks/modernize/use-starts-ends-with.rst | 45 +++-- .../clang-tidy/checkers/Inputs/Headers/string | 5 +- .../modernize/use-starts-ends-with.cpp | 42 +++- 6 files changed, 257 insertions(+), 31 deletions(-) diff --git a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp index 1231f954298adc..e7da45198df6c1 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp @@ -22,28 +22,46 @@ struct NotLengthExprForStringNode { NotLengthExprForStringNode(std::string ID, DynTypedNode Node, ASTContext *Context) : ID(std::move(ID)), Node(std::move(Node)), Context(Context) {} + bool operator()(const internal::BoundNodesMap &Nodes) const { - // Match a string literal and an integer size or strlen() call. if (const auto *StringLiteralNode = Nodes.getNodeAs<StringLiteral>(ID)) { + // Match direct integer literals if (const auto *IntegerLiteralSizeNode = Node.get<IntegerLiteral>()) { return StringLiteralNode->getLength() != IntegerLiteralSizeNode->getValue().getZExtValue(); } - if (const auto *StrlenNode = Node.get<CallExpr>()) { - if (StrlenNode->getDirectCallee()->getName() != "strlen" || - StrlenNode->getNumArgs() != 1) { - return true; + // Match strlen() calls + if (const auto *CallNode = Node.get<CallExpr>()) { + if (const auto *FD = CallNode->getDirectCallee()) { + if (FD->getName() == "strlen" && CallNode->getNumArgs() == 1) { + if (const auto *StrArg = CallNode->getArg(0)->IgnoreParenImpCasts()) { + // Handle both string literals and string variables in strlen + if (const auto *StrLit = dyn_cast<StringLiteral>(StrArg)) { + return StrLit->getLength() != StringLiteralNode->getLength(); + } else if (const auto *StrVar = dyn_cast<Expr>(StrArg)) { + return !utils::areStatementsIdentical(StrVar, StringLiteralNode, *Context); + } + } + } } - - if (const auto *StrlenArgNode = dyn_cast<StringLiteral>( - StrlenNode->getArg(0)->IgnoreParenImpCasts())) { - return StrlenArgNode->getLength() != StringLiteralNode->getLength(); + } + + // Match size()/length() member calls + if (const auto *MemberCall = Node.get<CXXMemberCallExpr>()) { + if (const auto *Method = MemberCall->getMethodDecl()) { + StringRef Name = Method->getName(); + if (Method->isConst() && Method->getNumParams() == 0 && + (Name == "size" || Name == "length")) { + // For string literals used in comparison, allow size/length calls + // on any string variable + return false; + } } } } - // Match a string variable and a call to length() or size(). + // Match member function calls on string variables if (const auto *ExprNode = Nodes.getNodeAs<Expr>(ID)) { if (const auto *MemberCallNode = Node.get<CXXMemberCallExpr>()) { const CXXMethodDecl *MethodDeclNode = MemberCallNode->getMethodDecl(); @@ -53,8 +71,8 @@ struct NotLengthExprForStringNode { return true; } - if (const auto *OnNode = - dyn_cast<Expr>(MemberCallNode->getImplicitObjectArgument())) { + if (const auto *OnNode = dyn_cast<Expr>( + MemberCallNode->getImplicitObjectArgument())) { return !utils::areStatementsIdentical(OnNode->IgnoreParenImpCasts(), ExprNode->IgnoreParenImpCasts(), *Context); @@ -83,6 +101,55 @@ UseStartsEndsWithCheck::UseStartsEndsWithCheck(StringRef Name, void UseStartsEndsWithCheck::registerMatchers(MatchFinder *Finder) { const auto ZeroLiteral = integerLiteral(equals(0)); + // Match the substring call + const auto SubstrCall = cxxMemberCallExpr( + callee(cxxMethodDecl(hasName("substr"))), + hasArgument(0, ZeroLiteral), + hasArgument(1, expr().bind("length")), + on(expr().bind("str"))) + .bind("substr_fun"); + + // Match string literals + const auto Literal = stringLiteral().bind("literal"); + + // Helper for matching comparison operators + auto AddSubstrMatcher = [&](auto Matcher) { + Finder->addMatcher( + traverse(TK_IgnoreUnlessSpelledInSource, std::move(Matcher)), this); + }; + + // Match str.substr(0,n) == "literal" + AddSubstrMatcher( + binaryOperation( + hasOperatorName("=="), + hasLHS(SubstrCall), + hasRHS(Literal)) + .bind("positiveComparison")); + + // Also match "literal" == str.substr(0,n) + AddSubstrMatcher( + binaryOperation( + hasOperatorName("=="), + hasLHS(Literal), + hasRHS(SubstrCall)) + .bind("positiveComparison")); + + // Match str.substr(0,n) != "literal" + AddSubstrMatcher( + binaryOperation( + hasOperatorName("!="), + hasLHS(SubstrCall), + hasRHS(Literal)) + .bind("negativeComparison")); + + // Also match "literal" != str.substr(0,n) + AddSubstrMatcher( + binaryOperation( + hasOperatorName("!="), + hasLHS(Literal), + hasRHS(SubstrCall)) + .bind("negativeComparison")); + const auto ClassTypeWithMethod = [](const StringRef MethodBoundName, const auto... Methods) { return cxxRecordDecl(anyOf( @@ -173,7 +240,101 @@ void UseStartsEndsWithCheck::registerMatchers(MatchFinder *Finder) { this); } +void UseStartsEndsWithCheck::handleSubstrMatch(const MatchFinder::MatchResult &Result) { + const auto *SubstrCall = Result.Nodes.getNodeAs<CXXMemberCallExpr>("substr_fun"); + const auto *PositiveComparison = Result.Nodes.getNodeAs<Expr>("positiveComparison"); + const auto *NegativeComparison = Result.Nodes.getNodeAs<Expr>("negativeComparison"); + + if (!SubstrCall || (!PositiveComparison && !NegativeComparison)) + return; + + const bool Negated = NegativeComparison != nullptr; + const auto *Comparison = Negated ? NegativeComparison : PositiveComparison; + + if (SubstrCall->getBeginLoc().isMacroID()) + return; + + const auto *Str = Result.Nodes.getNodeAs<Expr>("str"); + const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>("literal"); + const auto *Length = Result.Nodes.getNodeAs<Expr>("length"); + + if (!Str || !Literal || !Length) + return; + + // Special handling for strlen and size/length member calls + const bool IsValidLength = [&]() { + if (const auto *LengthInt = dyn_cast<IntegerLiteral>(Length)) { + return LengthInt->getValue().getZExtValue() == Literal->getLength(); + } + + if (const auto *Call = dyn_cast<CallExpr>(Length)) { + if (const auto *FD = Call->getDirectCallee()) { + if (FD->getName() == "strlen" && Call->getNumArgs() == 1) { + if (const auto *StrArg = dyn_cast<StringLiteral>( + Call->getArg(0)->IgnoreParenImpCasts())) { + return StrArg->getLength() == Literal->getLength(); + } + } + } + } + + if (const auto *MemberCall = dyn_cast<CXXMemberCallExpr>(Length)) { + if (const auto *Method = MemberCall->getMethodDecl()) { + StringRef Name = Method->getName(); + if (Method->isConst() && Method->getNumParams() == 0 && + (Name == "size" || Name == "length")) { + // For string literals in comparison, we'll trust that size()/length() + // calls are valid + return true; + } + } + } + + return false; + }(); + + if (!IsValidLength) { + return; + } + + // Get the string expression and literal text for the replacement + const std::string StrText = Lexer::getSourceText( + CharSourceRange::getTokenRange(Str->getSourceRange()), + *Result.SourceManager, getLangOpts()).str(); + + const std::string LiteralText = Lexer::getSourceText( + CharSourceRange::getTokenRange(Literal->getSourceRange()), + *Result.SourceManager, getLangOpts()).str(); + + const std::string ReplacementText = (Negated ? "!" : "") + StrText + ".starts_with(" + + LiteralText + ")"; + + auto Diag = diag(SubstrCall->getExprLoc(), + "use starts_with() instead of substr(0, n) comparison"); + + Diag << FixItHint::CreateReplacement( + CharSourceRange::getTokenRange(Comparison->getSourceRange()), + ReplacementText); +} + void UseStartsEndsWithCheck::check(const MatchFinder::MatchResult &Result) { + // Try substr pattern first + const auto *SubstrCall = Result.Nodes.getNodeAs<CXXMemberCallExpr>("substr_fun"); + if (SubstrCall) { + const auto *PositiveComparison = Result.Nodes.getNodeAs<Expr>("positiveComparison"); + const auto *NegativeComparison = Result.Nodes.getNodeAs<Expr>("negativeComparison"); + + if (PositiveComparison || NegativeComparison) { + handleSubstrMatch(Result); + return; + } + } + + // Then try find/compare patterns + handleFindCompareMatch(Result); +} + +void UseStartsEndsWithCheck::handleFindCompareMatch(const MatchFinder::MatchResult &Result) { const auto *ComparisonExpr = Result.Nodes.getNodeAs<BinaryOperator>("expr"); const auto *FindExpr = Result.Nodes.getNodeAs<CXXMemberCallExpr>("find_expr"); const auto *FindFun = Result.Nodes.getNodeAs<CXXMethodDecl>("find_fun"); diff --git a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.h b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.h index 17c2999bda84cd..7e4a1573e81e21 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.h @@ -31,6 +31,10 @@ class UseStartsEndsWithCheck : public ClangTidyCheck { std::optional<TraversalKind> getCheckTraversalKind() const override { return TK_IgnoreUnlessSpelledInSource; } + +private: + void handleSubstrMatch(const ast_matchers::MatchFinder::MatchResult &Result); + void handleFindCompareMatch(const ast_matchers::MatchFinder::MatchResult &Result); }; } // namespace clang::tidy::modernize diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index db971f08ca3dbc..4fb7891709447d 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -147,6 +147,13 @@ New check aliases Changes in existing checks ^^^^^^^^^^^^^^^^^^^^^^^^^^ +- Improved :doc:`modernize-use-starts-ends-with + <clang-tidy/checks/modernize/use-starts-ends-with>` check to detect and transform + additional patterns using ``substr`` that can be replaced with ``starts_with``. + Now handles cases like ``str.substr(0, n) == "literal"``, with support for length + determination through integer literals, ``strlen()``, and ``size()``/``length()`` + member functions. + - Improved :doc:`altera-id-dependent-backward-branch <clang-tidy/checks/altera/id-dependent-backward-branch>` check by fixing crashes from invalid code. diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst index 721e927e29849f..752f2cdacd065a 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst @@ -7,26 +7,39 @@ Checks for common roundabout ways to express ``starts_with`` and ``ends_with`` and suggests replacing with the simpler method when it is available. Notably, this will work with ``std::string`` and ``std::string_view``. -.. code-block:: c++ +The check handles the following expressions: - std::string s = "..."; - if (s.find("prefix") == 0) { /* do something */ } - if (s.rfind("prefix", 0) == 0) { /* do something */ } - if (s.compare(0, strlen("prefix"), "prefix") == 0) { /* do something */ } - if (s.compare(s.size() - strlen("suffix"), strlen("suffix"), "suffix") == 0) { - /* do something */ - } - if (s.rfind("suffix") == (s.length() - 6)) { - /* do something */ - } - -becomes +==================================================== =========================== +Expression Result +---------------------------------------------------- --------------------------- +``u.find(v) == 0`` ``u.starts_with(v)`` +``u.rfind(v, 0) != 0`` ``!u.starts_with(v)`` +``u.compare(0, v.size(), v) == 0`` ``u.starts_with(v)`` +``u.substr(0, v.size()) == v`` ``u.starts_with(v)`` +``v == u.substr(0, v.size())`` ``u.starts_with(v)`` +``u.substr(0, v.size()) != v`` ``!u.starts_with(v)`` +``u.compare(u.size() - v.size(), v.size(), v) == 0`` ``u.ends_with(v)`` +``u.rfind(v) == u.size() - v.size()`` ``u.ends_with(v)`` +==================================================== =========================== + +For example: .. code-block:: c++ std::string s = "..."; if (s.starts_with("prefix")) { /* do something */ } - if (s.starts_with("prefix")) { /* do something */ } - if (s.starts_with("prefix")) { /* do something */ } - if (s.ends_with("suffix")) { /* do something */ } if (s.ends_with("suffix")) { /* do something */ } + +Notes: + +* For the ``substr`` pattern, the check ensures that: + + * The substring starts at position 0 + * The length matches exactly the compared string's length + * The length is a constant value + +* Non-matching cases (will not be transformed): + + * ``s.substr(1, 5) == "hello"`` // Non-zero start position + * ``s.substr(0, 4) == "hello"`` // Length mismatch + * ``s.substr(0, len) == "hello"`` // Non-constant length \ No newline at end of file diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string b/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string index a0e8ebbb267cd0..856dfce815f03a 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string @@ -59,8 +59,9 @@ struct basic_string { _Type& insert(size_type pos, const _Type& str); _Type& insert(size_type pos, const C* s); _Type& insert(size_type pos, const C* s, size_type n); + _Type substr(size_type pos = 0, size_type count = npos) const; - constexpr bool starts_with(std::basic_string_view<C, T> sv) const noexcept; + constexpr bool starts_with(std::basic_string_view<C, T> sv) const noexcept; constexpr bool starts_with(C ch) const noexcept; constexpr bool starts_with(const C* s) const; @@ -117,7 +118,7 @@ struct basic_string_view { constexpr bool ends_with(const C* s) const; constexpr int compare(basic_string_view sv) const noexcept; - + static constexpr size_t npos = -1; }; diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp index 91477241e82e54..eac9e8714b97bd 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp @@ -1,5 +1,5 @@ // RUN: %check_clang_tidy -std=c++20 %s modernize-use-starts-ends-with %t -- \ -// RUN: -- -isystem %clang_tidy_headers +// RUN: -- -isystem %clang_tidy_headers #include <string.h> #include <string> @@ -266,3 +266,43 @@ void test(std::string s, std::string_view sv, sub_string ss, sub_sub_string sss, s.compare(0, 1, "ab") == 0; s.rfind(suffix, 1) == s.size() - suffix.size(); } + +void test_substr() { + std::string str("hello world"); + std::string prefix = "hello"; + std::string_view suffix = "world"; + + // Basic pattern + str.substr(0, 5) == "hello"; + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with() instead of substr(0, n) comparison [modernize-use-starts-ends-with] + // CHECK-FIXES: str.starts_with("hello"); + + // With string literal on left side + "hello" == str.substr(0, 5); + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with() instead of substr(0, n) comparison [modernize-use-starts-ends-with] + // CHECK-FIXES: str.starts_with("hello"); + + + // Inequality comparison + str.substr(0, 5) != "world"; + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with() instead of substr(0, n) comparison [modernize-use-starts-ends-with] + // CHECK-FIXES: !str.starts_with("world"); + + + // Ensure non-zero start position is not transformed + str.substr(1, 5) == "hello"; + str.substr(0, 4) == "hello"; // Length mismatch + + size_t len = 5; + str.substr(0, len) == "hello"; // Non-constant length + + // String literal with size calculation + str.substr(0, prefix.size()) == "hello"; + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with() instead of substr(0, n) comparison [modernize-use-starts-ends-with] + // CHECK-FIXES: str.starts_with("hello"); + + // String literal with size calculation + str.substr(0, strlen("hello")) == "hello"; + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with() instead of substr(0, n) comparison [modernize-use-starts-ends-with] + // CHECK-FIXES: str.starts_with("hello"); +} \ No newline at end of file _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits