https://github.com/hjanuschka updated https://github.com/llvm/llvm-project/pull/116033
>From 8a48d6be0b0cf28ab8e98b3a5e7f45628ceadb52 Mon Sep 17 00:00:00 2001 From: Helmut Januschka <hel...@januschka.com> Date: Wed, 13 Nov 2024 12:52:36 +0100 Subject: [PATCH 1/5] [clang-tidy] Enhance modernize-use-starts-ends-with with substr detection Enhances the modernize-use-starts-ends-with check to detect substr-based patterns that can be replaced with starts_with() (C++20). This improves code readability and efficiency by avoiding temporary string creation. New patterns detected: str.substr(0, n) == "foo" -> str.starts_with("foo") "foo" == str.substr(0, n) -> str.starts_with("foo") str.substr(0, n) != "foo" -> !str.starts_with("foo") str.substr(0, strlen("foo")) == "foo" -> str.starts_with("foo") str.substr(0, prefix.size()) == "foo" -> str.starts_with("foo") The enhancement: - Integrates with existing starts_with patterns - Handles substr with zero first argument - Supports length via literals, strlen(), and size()/length() - Validates string literal length matches - Handles both == and != operators Part of modernize-use-starts-ends-with check. --- .../modernize/UseStartsEndsWithCheck.cpp | 65 ++++++++++++++++++- clang-tools-extra/docs/ReleaseNotes.rst | 5 +- .../checks/modernize/use-starts-ends-with.rst | 45 ++++++++----- .../clang-tidy/checkers/Inputs/Headers/string | 1 + .../modernize/use-starts-ends-with.cpp | 34 ++++++++++ 5 files changed, 131 insertions(+), 19 deletions(-) diff --git a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp index 1231f954298adc..bcfb7227be722b 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseStartsEndsWithCheck.cpp @@ -171,10 +171,24 @@ void UseStartsEndsWithCheck::registerMatchers(MatchFinder *Finder) { hasRHS(lengthExprForStringNode("needle"))))) .bind("expr"), this); + Finder->addMatcher( + cxxOperatorCallExpr( + hasAnyOperatorName("==", "!="), + hasOperands( + expr().bind("needle"), + cxxMemberCallExpr( + argumentCountIs(2), hasArgument(0, ZeroLiteral), + hasArgument(1, lengthExprForStringNode("needle")), + callee(cxxMethodDecl(hasName("substr"), + ofClass(OnClassWithStartsWithFunction)) + .bind("find_fun"))) + .bind("find_expr"))) + .bind("expr"), + this); } void UseStartsEndsWithCheck::check(const MatchFinder::MatchResult &Result) { - const auto *ComparisonExpr = Result.Nodes.getNodeAs<BinaryOperator>("expr"); + const auto *ComparisonExpr = Result.Nodes.getNodeAs<Expr>("expr"); const auto *FindExpr = Result.Nodes.getNodeAs<CXXMemberCallExpr>("find_expr"); const auto *FindFun = Result.Nodes.getNodeAs<CXXMethodDecl>("find_fun"); const auto *SearchExpr = Result.Nodes.getNodeAs<Expr>("needle"); @@ -189,7 +203,54 @@ void UseStartsEndsWithCheck::check(const MatchFinder::MatchResult &Result) { if (ComparisonExpr->getBeginLoc().isMacroID()) return; - const bool Neg = ComparisonExpr->getOpcode() == BO_NE; + bool Neg; + if (const auto *BO = llvm::dyn_cast<BinaryOperator>(ComparisonExpr)) { + Neg = BO->getOpcode() == BO_NE; + } else { + assert(llvm::isa<CXXOperatorCallExpr>(ComparisonExpr)); + Neg = llvm::cast<CXXOperatorCallExpr>(ComparisonExpr)->getOperator() == + OO_ExclaimEqual; + } + + // Check if this is a substr case + bool IsSubstr = FindFun->getName() == "substr"; + + if (IsSubstr) { + const auto *SubstrCall = cast<CXXMemberCallExpr>(FindExpr); + const Expr *Object = SubstrCall->getImplicitObjectArgument(); + + std::string ObjectStr; + std::string SearchStr; + bool Invalid = false; + + auto &SM = *Result.SourceManager; + + CharSourceRange ObjectRange = CharSourceRange::getTokenRange( + Object->getBeginLoc(), Object->getEndLoc()); + ObjectStr = Lexer::getSourceText(ObjectRange, SM, getLangOpts(), &Invalid).str(); + if (Invalid) + return; + + CharSourceRange SearchRange = CharSourceRange::getTokenRange( + SearchExpr->getBeginLoc(), SearchExpr->getEndLoc()); + SearchStr = Lexer::getSourceText(SearchRange, SM, getLangOpts(), &Invalid).str(); + if (Invalid) + return; + + // Build the new expression: [!]Object.starts_with(SearchExpr) + std::string NewExpr = + (llvm::Twine(Neg ? "!" : "") + ObjectStr + "." + + ReplacementFunction->getName() + "(" + SearchStr + ")").str(); + // Replace the entire comparison expression + auto Diag = diag(ComparisonExpr->getBeginLoc(), + "use %0 instead of substr() %select{==|!=}1") + << ReplacementFunction->getName() << Neg; + Diag << FixItHint::CreateReplacement( + CharSourceRange::getTokenRange(ComparisonExpr->getSourceRange()), + NewExpr); + return; + } + auto Diagnostic = diag(FindExpr->getExprLoc(), "use %0 instead of %1() %select{==|!=}2 0") diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index db971f08ca3dbc..62dbc7646740bc 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -244,7 +244,10 @@ Changes in existing checks - Improved :doc:`modernize-use-starts-ends-with <clang-tidy/checks/modernize/use-starts-ends-with>` check to handle two cases - that can be replaced with ``ends_with`` + that can be replaced with ``ends_with`` and detect patterns using ``substr`` + that can be replaced with ``starts_with``. Now handles cases like + ``str.substr(0, n) == "literal"``, with support for length determination through + integer literals, ``strlen()``, and ``size()``/``length()`` member functions. - Improved :doc:`modernize-use-std-format <clang-tidy/checks/modernize/use-std-format>` check to support replacing diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst index 721e927e29849f..41e49e19fa03fd 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst @@ -7,26 +7,39 @@ Checks for common roundabout ways to express ``starts_with`` and ``ends_with`` and suggests replacing with the simpler method when it is available. Notably, this will work with ``std::string`` and ``std::string_view``. -.. code-block:: c++ +The check handles the follow ing expressions: - std::string s = "..."; - if (s.find("prefix") == 0) { /* do something */ } - if (s.rfind("prefix", 0) == 0) { /* do something */ } - if (s.compare(0, strlen("prefix"), "prefix") == 0) { /* do something */ } - if (s.compare(s.size() - strlen("suffix"), strlen("suffix"), "suffix") == 0) { - /* do something */ - } - if (s.rfind("suffix") == (s.length() - 6)) { - /* do something */ - } - -becomes +====================================================== ===================== +Expression Replacement +------------------------------------------------------ --------------------- +``u.find(v) == 0`` ``u.starts_with(v)`` +``u.rfind(v, 0) != 0`` ``!u.starts_with(v)`` +``u.compare(0, v.size(), v) == 0`` ``u.starts_with(v)`` +``u.substr(0, v.size()) == v`` ``u.starts_with(v)`` +``v == u.substr(0, v.size())`` ``u.starts_with(v)`` +``u.substr(0, v.size()) != v`` ``!u.starts_with(v)`` +``u.compare(u.size() - v.size(), v.size(), v) == 0`` ``u.ends_with(v)`` +``u.rfind(v) == u.size() - v.size()`` ``u.ends_with(v)`` +====================================================== ===================== + +For example: .. code-block:: c++ std::string s = "..."; if (s.starts_with("prefix")) { /* do something */ } - if (s.starts_with("prefix")) { /* do something */ } - if (s.starts_with("prefix")) { /* do something */ } - if (s.ends_with("suffix")) { /* do something */ } if (s.ends_with("suffix")) { /* do something */ } + +Notes: + +* For the ``substr`` pattern, the check ensures that: + + * The substring starts at position 0 + * The length matches exactly the compared string's length + * The length is a constant value + +* Non-matching cases (will not be transformed): + + * ``s.substr(1, 5) == "hello"`` // Non-zero start position + * ``s.substr(0, 4) == "hello"`` // Length mismatch + * ``s.substr(0, len) == "hello"`` // Non-constant length \ No newline at end of file diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string b/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string index a0e8ebbb267cd0..bd797cac0dbe0d 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string @@ -59,6 +59,7 @@ struct basic_string { _Type& insert(size_type pos, const _Type& str); _Type& insert(size_type pos, const C* s); _Type& insert(size_type pos, const C* s, size_type n); + _Type substr(size_type pos = 0, size_type count = npos) const; constexpr bool starts_with(std::basic_string_view<C, T> sv) const noexcept; constexpr bool starts_with(C ch) const noexcept; diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp index 91477241e82e54..cfc773b769d2bb 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp @@ -266,3 +266,37 @@ void test(std::string s, std::string_view sv, sub_string ss, sub_sub_string sss, s.compare(0, 1, "ab") == 0; s.rfind(suffix, 1) == s.size() - suffix.size(); } + +void test_substr() { + std::string str("hello world"); + std::string prefix = "hello"; + std::string_view suffix = "world"; + + // Basic pattern + str.substr(0, 5) == "hello"; + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with instead of substr() == [modernize-use-starts-ends-with] + // CHECK-FIXES: str.starts_with("hello"); + + // With string literal on left side + "hello" == str.substr(0, 5); + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with instead of substr() == [modernize-use-starts-ends-with] + // CHECK-FIXES: str.starts_with("hello"); + + // Inequality comparison + str.substr(0, 5) != "world"; + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with instead of substr() != [modernize-use-starts-ends-with] + // CHECK-FIXES: !str.starts_with("world"); + + // Ensure non-zero start position is not transformed + str.substr(1, 5) == "hello"; + str.substr(0, 4) == "hello"; // Length mismatch + + size_t len = 5; + str.substr(0, len) == "hello"; // Non-constant length + + // String literal with size calculation + str.substr(0, strlen("hello")) == "hello"; + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with instead of substr() == [modernize-use-starts-ends-with] + // CHECK-FIXES: str.starts_with("hello"); + +} >From 87444a7331f48f9620f7e2c5bdc851e9f5085e3d Mon Sep 17 00:00:00 2001 From: Helmut Januschka <hel...@januschka.com> Date: Fri, 15 Nov 2024 23:05:24 +0100 Subject: [PATCH 2/5] Update clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst Co-authored-by: Nicolas van Kempen <nvank...@gmail.com> --- .../docs/clang-tidy/checks/modernize/use-starts-ends-with.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst index 41e49e19fa03fd..e1dd8c47f01d57 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst @@ -7,7 +7,7 @@ Checks for common roundabout ways to express ``starts_with`` and ``ends_with`` and suggests replacing with the simpler method when it is available. Notably, this will work with ``std::string`` and ``std::string_view``. -The check handles the follow ing expressions: +The check handles the following expressions: ====================================================== ===================== Expression Replacement >From b90d6d98605c44f34fa156082034ef26ba450a2b Mon Sep 17 00:00:00 2001 From: Helmut Januschka <hel...@januschka.com> Date: Fri, 15 Nov 2024 23:05:35 +0100 Subject: [PATCH 3/5] Update clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst Co-authored-by: Nicolas van Kempen <nvank...@gmail.com> --- .../checks/modernize/use-starts-ends-with.rst | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst index e1dd8c47f01d57..2fdd118beefaa2 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-starts-ends-with.rst @@ -9,18 +9,17 @@ this will work with ``std::string`` and ``std::string_view``. The check handles the following expressions: -====================================================== ===================== -Expression Replacement ------------------------------------------------------- --------------------- -``u.find(v) == 0`` ``u.starts_with(v)`` -``u.rfind(v, 0) != 0`` ``!u.starts_with(v)`` -``u.compare(0, v.size(), v) == 0`` ``u.starts_with(v)`` -``u.substr(0, v.size()) == v`` ``u.starts_with(v)`` -``v == u.substr(0, v.size())`` ``u.starts_with(v)`` -``u.substr(0, v.size()) != v`` ``!u.starts_with(v)`` -``u.compare(u.size() - v.size(), v.size(), v) == 0`` ``u.ends_with(v)`` -``u.rfind(v) == u.size() - v.size()`` ``u.ends_with(v)`` -====================================================== ===================== +==================================================== ===================== +Expression Replacement +---------------------------------------------------- --------------------- +``u.find(v) == 0`` ``u.starts_with(v)`` +``u.rfind(v, 0) != 0`` ``!u.starts_with(v)`` +``u.compare(0, v.size(), v) == 0`` ``u.starts_with(v)`` +``u.substr(0, v.size()) == v`` ``u.starts_with(v)`` +``v != u.substr(0, v.size())`` ``!u.starts_with(v)`` +``u.compare(u.size() - v.size(), v.size(), v) == 0`` ``u.ends_with(v)`` +``u.rfind(v) == u.size() - v.size()`` ``u.ends_with(v)`` +==================================================== ===================== For example: >From 005d0dd81a673f7dd30e79a8d825d176fb15f4a9 Mon Sep 17 00:00:00 2001 From: Helmut Januschka <hel...@januschka.com> Date: Fri, 15 Nov 2024 23:10:07 +0100 Subject: [PATCH 4/5] Update clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string Co-authored-by: Nicolas van Kempen <nvank...@gmail.com> --- clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string | 1 + 1 file changed, 1 insertion(+) diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string b/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string index bd797cac0dbe0d..51f68174169067 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/Headers/string @@ -59,6 +59,7 @@ struct basic_string { _Type& insert(size_type pos, const _Type& str); _Type& insert(size_type pos, const C* s); _Type& insert(size_type pos, const C* s, size_type n); + _Type substr(size_type pos = 0, size_type count = npos) const; constexpr bool starts_with(std::basic_string_view<C, T> sv) const noexcept; >From 8b4b0a9dcf13d06537fae28130c71dca8d13f0e0 Mon Sep 17 00:00:00 2001 From: Helmut Januschka <hel...@januschka.com> Date: Fri, 15 Nov 2024 23:12:05 +0100 Subject: [PATCH 5/5] Update clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp Co-authored-by: Nicolas van Kempen <nvank...@gmail.com> --- .../test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp index cfc773b769d2bb..906cd9f4c3c55f 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-starts-ends-with.cpp @@ -274,7 +274,7 @@ void test_substr() { // Basic pattern str.substr(0, 5) == "hello"; - // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with instead of substr() == [modernize-use-starts-ends-with] + // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use starts_with instead of substr // CHECK-FIXES: str.starts_with("hello"); // With string literal on left side _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits