llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-tidy Author: Helmut Januschka (hjanuschka) <details> <summary>Changes</summary> Adds a new check that finds calls to substr when its first argument is a zero-equivalent expression and can be replaced with starts_with() (introduced in C++20). This modernization improves code readability by making the intent clearer and can be more efficient as it avoids creating temporary strings. Converts patterns like: str.substr(0, 3) == "foo" -> str.starts_with("foo") str.substr(x-x, 3) == "foo" -> str.starts_with("foo") str.substr(zero, n) == prefix -> str.starts_with(prefix) "bar" == str.substr(i-i, 3) -> str.starts_with("bar") str.substr(0, n) != prefix -> !str.starts_with(prefix) The check: - Detects zero-equivalent expressions: * Direct zero literals (0) * Variables initialized to zero * Self-canceling expressions (x-x, i-i) - Only converts when length matches exactly for string literals - Warns about standalone substr calls that might be candidates - Supports both string literals and string variables - Handles both == and != operators --- Full diff: https://github.com/llvm/llvm-project/pull/116033.diff 6 Files Affected: - (modified) clang-tools-extra/clang-tidy/modernize/CMakeLists.txt (+1) - (modified) clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp (+3) - (added) clang-tools-extra/clang-tidy/modernize/SubstrToStartsWithCheck.cpp (+121) - (added) clang-tools-extra/clang-tidy/modernize/SubstrToStartsWithCheck.h (+30) - (added) clang-tools-extra/docs/clang-tidy/checks/modernize/substr-to-starts-with.rst (+38) - (added) clang-tools-extra/test/clang-tidy/checkers/modernize/substr-to-starts-with.cpp (+41) ``````````diff diff --git a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt index c919d49b42873a..ed1ba2ab62a90f 100644 --- a/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/modernize/CMakeLists.txt @@ -49,6 +49,7 @@ add_clang_library(clangTidyModernizeModule STATIC UseTransparentFunctorsCheck.cpp UseUncaughtExceptionsCheck.cpp UseUsingCheck.cpp + SubstrToStartsWithCheck.cpp LINK_LIBS clangTidy diff --git a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp index 18607593320635..7569211d2552ea 100644 --- a/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/modernize/ModernizeTidyModule.cpp @@ -50,6 +50,7 @@ #include "UseTransparentFunctorsCheck.h" #include "UseUncaughtExceptionsCheck.h" #include "UseUsingCheck.h" +#include "SubstrToStartsWithCheck.h" using namespace clang::ast_matchers; @@ -122,6 +123,8 @@ class ModernizeModule : public ClangTidyModule { CheckFactories.registerCheck<UseUncaughtExceptionsCheck>( "modernize-use-uncaught-exceptions"); CheckFactories.registerCheck<UseUsingCheck>("modernize-use-using"); + CheckFactories.registerCheck<SubstrToStartsWithCheck>( + "modernize-substr-to-starts-with"); } }; diff --git a/clang-tools-extra/clang-tidy/modernize/SubstrToStartsWithCheck.cpp b/clang-tools-extra/clang-tidy/modernize/SubstrToStartsWithCheck.cpp new file mode 100644 index 00000000000000..b185a2a3b0afc2 --- /dev/null +++ b/clang-tools-extra/clang-tidy/modernize/SubstrToStartsWithCheck.cpp @@ -0,0 +1,121 @@ +//===--- SubstrToStartsWithCheck.cpp - clang-tidy ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SubstrToStartsWithCheck.h" +#include "clang/AST/ASTContext.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" + +using namespace clang::ast_matchers; + +namespace clang::tidy::modernize { + +void SubstrToStartsWithCheck::registerMatchers(MatchFinder *Finder) { + // Helper matcher to identify expressions that evaluate to 0 + auto isZeroExpr = expr(anyOf( + integerLiteral(equals(0)), + ignoringParenImpCasts(declRefExpr( + to(varDecl(hasInitializer(integerLiteral(equals(0))))))), + binaryOperator(hasOperatorName("-"), hasLHS(expr()), hasRHS(expr())))); + + // Match string literals or string variables + auto isStringLike = expr(anyOf( + stringLiteral().bind("literal"), + implicitCastExpr(hasSourceExpression(stringLiteral().bind("literal"))), + declRefExpr(to(varDecl(hasType(qualType(hasDeclaration( + namedDecl(hasAnyName("::std::string", "::std::basic_string")))))))).bind("strvar"))); + + // Match substr with zero first argument + auto isSubstrCall = + cxxMemberCallExpr( + callee(memberExpr(hasDeclaration(cxxMethodDecl( + hasName("substr"), + ofClass(hasAnyName("basic_string", "string", "u16string")))))), + hasArgument(0, isZeroExpr), + hasArgument(1, expr().bind("length"))) + .bind("substr"); + + // Match comparison operations + Finder->addMatcher( + binaryOperator( + anyOf(hasOperatorName("=="), hasOperatorName("!=")), + hasEitherOperand(isSubstrCall), + hasEitherOperand(isStringLike), + unless(hasType(isAnyCharacter()))) + .bind("comparison"), + this); + + // Also match direct substr calls that could be starts_with + Finder->addMatcher( + cxxMemberCallExpr( + callee(memberExpr(hasDeclaration(cxxMethodDecl( + hasName("substr"), + ofClass(hasAnyName("basic_string", "string", "u16string")))))), + hasArgument(0, isZeroExpr), + hasArgument(1, expr().bind("direct_length"))) + .bind("direct_substr"), + this); +} + +void SubstrToStartsWithCheck::check(const MatchFinder::MatchResult &Result) { + const auto *Comparison = Result.Nodes.getNodeAs<BinaryOperator>("comparison"); + const auto *DirectSubstr = + Result.Nodes.getNodeAs<CXXMemberCallExpr>("direct_substr"); + + // Handle comparison cases + if (Comparison) { + const auto *SubstrCall = Result.Nodes.getNodeAs<CXXMemberCallExpr>("substr"); + const auto *LengthArg = Result.Nodes.getNodeAs<Expr>("length"); + const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>("literal"); + const auto *StrVar = Result.Nodes.getNodeAs<DeclRefExpr>("strvar"); + + if (!SubstrCall || !LengthArg || (!Literal && !StrVar)) + return; + + std::string CompareStr; + if (Literal) { + CompareStr = Literal->getString().str(); + } else if (StrVar) { + CompareStr = Lexer::getSourceText( + CharSourceRange::getTokenRange(StrVar->getSourceRange()), + *Result.SourceManager, Result.Context->getLangOpts()) + .str(); + } + + // Check if the length matches the string literal length + if (Literal) { + if (const auto *LengthLiteral = dyn_cast<IntegerLiteral>(LengthArg)) { + if (LengthLiteral->getValue() != Literal->getLength()) + return; + } + } + + // Build replacement + std::string Replacement; + if (Comparison->getOpcode() == BO_EQ) { + Replacement = "starts_with(" + CompareStr + ")"; + } else { // BO_NE + Replacement = "!starts_with(" + CompareStr + ")"; + } + + diag(Comparison->getBeginLoc(), + "use starts_with() instead of substring comparison") + << FixItHint::CreateReplacement(Comparison->getSourceRange(), + Replacement); + } + + // Handle direct substr calls + if (DirectSubstr) { + diag(DirectSubstr->getBeginLoc(), + "consider using starts_with() if this substr is used for prefix " + "checking"); + } +} + +} // namespace clang::tidy::modernize \ No newline at end of file diff --git a/clang-tools-extra/clang-tidy/modernize/SubstrToStartsWithCheck.h b/clang-tools-extra/clang-tidy/modernize/SubstrToStartsWithCheck.h new file mode 100644 index 00000000000000..fdb157ca0a24fb --- /dev/null +++ b/clang-tools-extra/clang-tidy/modernize/SubstrToStartsWithCheck.h @@ -0,0 +1,30 @@ +//===--- SubstrToStartsWithCheck.h - clang-tidy ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_SUBSTRTOSTARTSWITHCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_SUBSTRTOSTARTSWITHCHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang::tidy::modernize { + +/// Finds calls to substr(0, n) that can be replaced with starts_with(). +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/modernize/substr-to-starts-with.html +class SubstrToStartsWithCheck : public ClangTidyCheck { +public: + SubstrToStartsWithCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; +}; + +} // namespace clang::tidy::modernize + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_SUBSTRTOSTARTSWITHCHECK_H diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/substr-to-starts-with.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/substr-to-starts-with.rst new file mode 100644 index 00000000000000..88a9c311eb6737 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/substr-to-starts-with.rst @@ -0,0 +1,38 @@ +modernize-substr-to-starts-with +============================== + +Finds calls to ``substr(0, n)`` that can be replaced with ``starts_with()`` (introduced in C++20). +This makes the code's intent clearer and can be more efficient as it avoids creating temporary strings. + +For example: + +.. code-block:: c++ + + str.substr(0, 3) == "foo" // before + str.starts_with("foo") // after + + "bar" == str.substr(0, 3) // before + str.starts_with("bar") // after + + str.substr(0, n) == prefix // before + str.starts_with(prefix) // after + +The check handles various ways of expressing zero as the start index: + +.. code-block:: c++ + + const int zero = 0; + str.substr(zero, n) == prefix // converted + str.substr(x - x, n) == prefix // converted + +The check will only convert cases where: +* The substr call starts at index 0 (or equivalent) +* When comparing with string literals, the length matches exactly +* The comparison is with == or != + +The check will also warn about standalone substr calls that might be candidates +for conversion to starts_with: + +.. code-block:: c++ + + auto prefix = str.substr(0, n); // warns about possible use of starts_with diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/substr-to-starts-with.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/substr-to-starts-with.cpp new file mode 100644 index 00000000000000..41b59903e605d7 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/substr-to-starts-with.cpp @@ -0,0 +1,41 @@ +// RUN: %check_clang_tidy %s modernize-substr-to-starts-with %t + +#include <string> + +void test() { + std::string str = "hello world"; + if (str.substr(0, 5) == "hello") {} + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: use starts_with() instead of substring comparison + // CHECK-FIXES: if (str.starts_with("hello")) {} + + if ("hello" == str.substr(0, 5)) {} + // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use starts_with() instead of substring comparison + // CHECK-FIXES: if (str.starts_with("hello")) {} + + bool b = str.substr(0, 5) != "hello"; + // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: use starts_with() instead of substring comparison + // CHECK-FIXES: bool b = !str.starts_with("hello"); + + // Variable length and string refs + std::string prefix = "hello"; + size_t len = 5; + if (str.substr(0, len) == prefix) {} + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: use starts_with() instead of substring comparison + // CHECK-FIXES: if (str.starts_with(prefix)) {} + + // Various zero expressions + const int zero = 0; + int i = 0; + if (str.substr(zero, 5) == "hello") {} + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: use starts_with() instead of substring comparison + // CHECK-FIXES: if (str.starts_with("hello")) {} + + if (str.substr(i-i, 5) == "hello") {} + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: use starts_with() instead of substring comparison + // CHECK-FIXES: if (str.starts_with("hello")) {} + + // Should not convert these + if (str.substr(1, 5) == "hello") {} // Non-zero start + if (str.substr(0, 4) == "hello") {} // Length mismatch + if (str.substr(0, 6) == "hello") {} // Length mismatch +} `````````` </details> https://github.com/llvm/llvm-project/pull/116033 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits