https://github.com/hjanuschka updated https://github.com/llvm/llvm-project/pull/120055
>From f6c5027fc3b822c66c02b73d2a41de8c73b1b6d5 Mon Sep 17 00:00:00 2001 From: Helmut Januschka <[email protected]> Date: Wed, 18 Feb 2026 10:04:16 +0100 Subject: [PATCH] [clang-tidy] Add readability-stringview-substr check Add a new check that suggests using string_view::remove_prefix() and remove_suffix() instead of substr() when the intent is to remove characters from either end of a string_view. Transformations: sv = sv.substr(n) -> sv.remove_prefix(n) sv = sv.substr(0, sv.length() - n) -> sv.remove_suffix(n) sv = sv.substr(0, sv.length()) -> redundant self-copy (removed) sv1 = sv.substr(0, sv.length()) -> sv1 = sv sv1 = sv2.substr(0, sv2.length()-n) -> sv1 = sv2; sv1.remove_suffix(n) These replacements make the intent clearer and are more efficient as they modify the string_view in place rather than creating a new one. --- .../clang-tidy/readability/CMakeLists.txt | 1 + .../readability/ReadabilityTidyModule.cpp | 3 + .../readability/StringviewSubstrCheck.cpp | 192 ++++++++++++++++++ .../readability/StringviewSubstrCheck.h | 44 ++++ clang-tools-extra/docs/ReleaseNotes.rst | 6 + .../docs/clang-tidy/checks/list.rst | 1 + .../checks/readability/stringview-substr.rst | 20 ++ .../readability/stringview_substr.cpp | 155 ++++++++++++++ 8 files changed, 422 insertions(+) create mode 100644 clang-tools-extra/clang-tidy/readability/StringviewSubstrCheck.cpp create mode 100644 clang-tools-extra/clang-tidy/readability/StringviewSubstrCheck.h create mode 100644 clang-tools-extra/docs/clang-tidy/checks/readability/stringview-substr.rst create mode 100644 clang-tools-extra/test/clang-tidy/checkers/readability/stringview_substr.cpp diff --git a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt index f1f3cde32feff..def80a47f4cff 100644 --- a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt @@ -57,6 +57,7 @@ add_clang_library(clangTidyReadabilityModule STATIC StaticAccessedThroughInstanceCheck.cpp StaticDefinitionInAnonymousNamespaceCheck.cpp StringCompareCheck.cpp + StringviewSubstrCheck.cpp SuspiciousCallArgumentCheck.cpp TrailingCommaCheck.cpp UniqueptrDeleteReleaseCheck.cpp diff --git a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp index c582dc98eac6b..08f17db156b3f 100644 --- a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp @@ -59,6 +59,7 @@ #include "StaticAccessedThroughInstanceCheck.h" #include "StaticDefinitionInAnonymousNamespaceCheck.h" #include "StringCompareCheck.h" +#include "StringviewSubstrCheck.h" #include "SuspiciousCallArgumentCheck.h" #include "TrailingCommaCheck.h" #include "UniqueptrDeleteReleaseCheck.h" @@ -160,6 +161,8 @@ class ReadabilityModule : public ClangTidyModule { "readability-static-definition-in-anonymous-namespace"); CheckFactories.registerCheck<StringCompareCheck>( "readability-string-compare"); + CheckFactories.registerCheck<StringViewSubstrCheck>( + "readability-stringview-substr"); CheckFactories.registerCheck<readability::NamedParameterCheck>( "readability-named-parameter"); CheckFactories.registerCheck<NonConstParameterCheck>( diff --git a/clang-tools-extra/clang-tidy/readability/StringviewSubstrCheck.cpp b/clang-tools-extra/clang-tidy/readability/StringviewSubstrCheck.cpp new file mode 100644 index 0000000000000..0aa2669b86388 --- /dev/null +++ b/clang-tools-extra/clang-tidy/readability/StringviewSubstrCheck.cpp @@ -0,0 +1,192 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "StringviewSubstrCheck.h" +#include "clang/AST/ASTContext.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Lex/Lexer.h" + +using namespace clang::ast_matchers; + +namespace clang::tidy::readability { + +void StringViewSubstrCheck::registerMatchers(MatchFinder *Finder) { + // Match string_view type + const auto StringViewDecl = recordDecl(hasName("::std::basic_string_view")); + const auto IsStringView = qualType( + hasUnqualifiedDesugaredType(recordType(hasDeclaration(StringViewDecl)))); + + // Length/size matcher reused in multiple places + const auto LengthMatcher = cxxMemberCallExpr(callee(memberExpr(hasDeclaration( + cxxMethodDecl(anyOf(hasName("length"), hasName("size"))))))); + + // Match various forms of zero + const auto IsZero = + expr(anyOf(ignoringParenImpCasts(integerLiteral(equals(0))), + ignoringParenImpCasts(declRefExpr( + to(varDecl(hasInitializer(integerLiteral(equals(0))))))))); + + // Match substr() call patterns + const auto SubstrCall = + cxxMemberCallExpr( + callee(memberExpr(hasDeclaration(cxxMethodDecl(hasName("substr"))))), + on(expr(hasType(IsStringView)).bind("source")), + // substr always has 2 args (second one may be defaulted) + argumentCountIs(2), + anyOf( + // Case 1: sv.substr(n, npos) -> remove_prefix + allOf(hasArgument(0, expr().bind("prefix_n")), + hasArgument(1, cxxDefaultArgExpr())), + + // Case 2: sv.substr(0, sv.length()) or sv.substr(0, sv.length() - + // 0) -> redundant self-copy + allOf(hasArgument(0, IsZero.bind("zero")), + hasArgument( + 1, anyOf(LengthMatcher.bind("full_length"), + binaryOperator( + hasOperatorName("-"), + hasLHS(LengthMatcher.bind("full_length")), + hasRHS(IsZero))))), + + // Case 3: sv.substr(0, sv.length() - n) -> remove_suffix + allOf(hasArgument(0, IsZero), + hasArgument( + 1, binaryOperator( + hasOperatorName("-"), + hasLHS(LengthMatcher.bind("length_call")), + hasRHS(expr(unless(IsZero)).bind("suffix_n"))) + .bind("length_minus_n"))))) + .bind("substr_call"); + + // Only match assignments not part of larger expressions + Finder->addMatcher( + stmt(cxxOperatorCallExpr( + unless(isInTemplateInstantiation()), + hasOverloadedOperatorName("="), + hasArgument(0, expr(hasType(IsStringView)).bind("target")), + hasArgument(1, SubstrCall)) + .bind("assignment"), + unless(anyOf(hasAncestor(varDecl()), hasAncestor(callExpr())))), + this); +} + +void StringViewSubstrCheck::check(const MatchFinder::MatchResult &Result) { + const auto *Assignment = + Result.Nodes.getNodeAs<CXXOperatorCallExpr>("assignment"); + const auto *Target = Result.Nodes.getNodeAs<Expr>("target"); + const auto *Source = Result.Nodes.getNodeAs<Expr>("source"); + const auto *SubstrCall = + Result.Nodes.getNodeAs<CXXMemberCallExpr>("substr_call"); + const auto *PrefixN = Result.Nodes.getNodeAs<Expr>("prefix_n"); + const auto *FullLength = Result.Nodes.getNodeAs<Expr>("full_length"); + const auto *LengthCall = Result.Nodes.getNodeAs<Expr>("length_call"); + const auto *SuffixN = Result.Nodes.getNodeAs<Expr>("suffix_n"); + + if (!Assignment || !Target || !Source || !SubstrCall) + return; + + const auto *TargetDRE = dyn_cast<DeclRefExpr>(Target->IgnoreParenImpCasts()); + const auto *SourceDRE = dyn_cast<DeclRefExpr>(Source->IgnoreParenImpCasts()); + + if (!TargetDRE || !SourceDRE) + return; + + const bool IsSameVar = (TargetDRE->getDecl() == SourceDRE->getDecl()); + const std::string TargetName = TargetDRE->getNameInfo().getAsString(); + const std::string SourceName = SourceDRE->getNameInfo().getAsString(); + + // Case 1: remove_prefix + if (PrefixN) { + if (!IsSameVar) + return; + + const std::string PrefixText = + Lexer::getSourceText( + CharSourceRange::getTokenRange(PrefixN->getSourceRange()), + *Result.SourceManager, Result.Context->getLangOpts()) + .str(); + + const std::string Replacement = + TargetName + ".remove_prefix(" + PrefixText + ")"; + diag(Assignment->getBeginLoc(), "prefer 'remove_prefix' over 'substr' for " + "removing characters from the start") + << FixItHint::CreateReplacement(Assignment->getSourceRange(), + Replacement); + return; + } + + // Case 2: redundant full copy + if (FullLength) { + const auto *LengthObject = + cast<CXXMemberCallExpr>(FullLength)->getImplicitObjectArgument(); + const auto *LengthDRE = + dyn_cast<DeclRefExpr>(LengthObject->IgnoreParenImpCasts()); + + if (!LengthDRE || LengthDRE->getDecl() != SourceDRE->getDecl()) + return; + + if (IsSameVar) { + // Remove redundant self copy including trailing semicolon + const SourceLocation EndLoc = Lexer::findLocationAfterToken( + Assignment->getEndLoc(), tok::semi, *Result.SourceManager, + Result.Context->getLangOpts(), false); + + if (EndLoc.isValid()) { + diag(Assignment->getBeginLoc(), "redundant self-copy") + << FixItHint::CreateRemoval(CharSourceRange::getCharRange( + Assignment->getBeginLoc(), EndLoc)); + } + } else { + // Direct copy between different variables + const std::string Replacement = TargetName + " = " + SourceName; + diag(Assignment->getBeginLoc(), "prefer direct copy over substr") + << FixItHint::CreateReplacement(Assignment->getSourceRange(), + Replacement); + } + return; + } + + // Case 3: remove_suffix + if (LengthCall && SuffixN) { + const auto *LengthObject = + cast<CXXMemberCallExpr>(LengthCall)->getImplicitObjectArgument(); + const auto *LengthDRE = + dyn_cast<DeclRefExpr>(LengthObject->IgnoreParenImpCasts()); + + if (!LengthDRE || LengthDRE->getDecl() != SourceDRE->getDecl()) + return; + + const std::string SuffixText = + Lexer::getSourceText( + CharSourceRange::getTokenRange(SuffixN->getSourceRange()), + *Result.SourceManager, Result.Context->getLangOpts()) + .str(); + + if (IsSameVar) { + const std::string Replacement = + TargetName + ".remove_suffix(" + SuffixText + ")"; + diag(Assignment->getBeginLoc(), "prefer 'remove_suffix' over 'substr' " + "for removing characters from the end") + << FixItHint::CreateReplacement(Assignment->getSourceRange(), + Replacement); + } else { + const std::string Replacement = TargetName + " = " + SourceName + ";\n" + + " " + TargetName + ".remove_suffix(" + + SuffixText + ")"; + + diag(Assignment->getBeginLoc(), + "prefer assignment and remove_suffix over substr") + << FixItHint::CreateReplacement(Assignment->getSourceRange(), + Replacement); + } + + return; + } +} + +} // namespace clang::tidy::readability diff --git a/clang-tools-extra/clang-tidy/readability/StringviewSubstrCheck.h b/clang-tools-extra/clang-tidy/readability/StringviewSubstrCheck.h new file mode 100644 index 0000000000000..398a44a80632d --- /dev/null +++ b/clang-tools-extra/clang-tidy/readability/StringviewSubstrCheck.h @@ -0,0 +1,44 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STRINGVIEWSUBSTRCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STRINGVIEWSUBSTRCHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang::tidy::readability { + +/// Finds string_view substr() calls that can be replaced with remove_prefix() +/// or remove_suffix(). +/// +/// For the user-facing documentation see: +/// https://clang.llvm.org/extra/clang-tidy/checks/readability/string-view-substr.html +/// +/// The check matches two patterns: +/// sv = sv.substr(N) -> sv.remove_prefix(N) +/// sv = sv.substr(0, sv.length() - N) -> sv.remove_suffix(N) +/// sv = sv.substr(0, sv.length()) -> // Remove redundant self-copy +/// sv1 = sv2.substr(0, sv2.length()) -> sv1 = sv2 +/// sv1 = sv2.substr(0, sv2.length() - N) -> sv1 = sv2; sv1.remove_suffix(N) +/// +/// These replacements make the intent clearer and are more efficient as they +/// modify the string_view in place rather than creating a new one. +class StringViewSubstrCheck : public ClangTidyCheck { +public: + StringViewSubstrCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { + return LangOpts.CPlusPlus17; + } +}; + +} // namespace clang::tidy::readability + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_STRINGVIEWSUBSTRCHECK_H diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 2bb4800df47c9..617ae6689c8ee 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -133,6 +133,12 @@ New checks Finds and removes redundant conversions from ``std::[w|u8|u16|u32]string_view`` to ``std::[...]string`` in call expressions expecting ``std::[...]string_view``. +- New :doc:`readability-stringview-substr + <clang-tidy/checks/readability/stringview-substr>` check. + + Finds ``string_view`` ``substr()`` calls that can be replaced with + ``remove_prefix()`` or ``remove_suffix()``. + - New :doc:`readability-trailing-comma <clang-tidy/checks/readability/trailing-comma>` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 0eabd9929dc39..804a8bd63721d 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -424,6 +424,7 @@ Clang-Tidy Checks :doc:`readability-static-accessed-through-instance <readability/static-accessed-through-instance>`, "Yes" :doc:`readability-static-definition-in-anonymous-namespace <readability/static-definition-in-anonymous-namespace>`, "Yes" :doc:`readability-string-compare <readability/string-compare>`, "Yes" + :doc:`readability-stringview-substr <readability/stringview-substr>`, "Yes" :doc:`readability-suspicious-call-argument <readability/suspicious-call-argument>`, :doc:`readability-trailing-comma <readability/trailing-comma>`, "Yes" :doc:`readability-uniqueptr-delete-release <readability/uniqueptr-delete-release>`, "Yes" diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability/stringview-substr.rst b/clang-tools-extra/docs/clang-tidy/checks/readability/stringview-substr.rst new file mode 100644 index 0000000000000..3df84d8fbecdf --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/readability/stringview-substr.rst @@ -0,0 +1,20 @@ +.. title:: clang-tidy - readability-stringview-substr + +readability-stringview-substr +============================== + +Finds ``string_view substr()`` calls that can be replaced with +clearer alternatives using ``remove_prefix()`` or +``remove_suffix()``. + +The check suggests the following transformations: + +- ``sv = sv.substr(n)`` -> ``sv.remove_prefix(n)`` +- ``sv = sv.substr(0, sv.size()-n)`` -> + ``sv.remove_suffix(n)`` +- ``sv = sv.substr(0, sv.size())`` -> + Redundant self-copy (removed) +- ``sv1 = sv.substr(0, sv.size())`` -> + ``sv1 = sv`` +- ``sv1 = sv2.substr(0, sv2.size()-n)`` -> + ``sv1 = sv2; sv1.remove_suffix(n)`` diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/stringview_substr.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/stringview_substr.cpp new file mode 100644 index 0000000000000..f2dae554b689a --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/stringview_substr.cpp @@ -0,0 +1,155 @@ +// RUN: %check_clang_tidy %s readability-stringview-substr -std=c++17 %t + +namespace std { +template <typename T> +class basic_string_view { +public: + using size_type = unsigned long; + static constexpr size_type npos = -1; + + basic_string_view(const char*) {} + basic_string_view substr(size_type pos, size_type count = npos) const { return *this; } + void remove_prefix(size_type n) {} + void remove_suffix(size_type n) {} + size_type length() const { return 0; } + size_type size() const { return 0; } + basic_string_view& operator=(const basic_string_view&) { return *this; } +}; + +using string_view = basic_string_view<char>; +} // namespace std + +void test_basic() { + std::string_view sv("test"); + std::string_view sv2("test"); + + // Case 1: remove_prefix + sv = sv.substr(5); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer 'remove_prefix' over 'substr' for removing characters from the start [readability-stringview-substr] + // CHECK-FIXES: sv.remove_prefix(5); + + // Case 3: remove_suffix with length() + sv = sv.substr(0, sv.length() - 3); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer 'remove_suffix' over 'substr' for removing characters from the end [readability-stringview-substr] + // CHECK-FIXES: sv.remove_suffix(3); + + // Case 3: remove_suffix with complex expression + sv = sv.substr(0, sv.length() - (3 + 2)); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer 'remove_suffix' over 'substr' for removing characters from the end [readability-stringview-substr] + // CHECK-FIXES: sv.remove_suffix((3 + 2)); +} + +void test_size_method() { + std::string_view sv("test"); + std::string_view sv2("test"); + + // Case 3: remove_suffix with size() + sv = sv.substr(0, sv.size() - 3); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer 'remove_suffix' over 'substr' for removing characters from the end [readability-stringview-substr] + // CHECK-FIXES: sv.remove_suffix(3); + + // Case 2: redundant self-copy with size() + sv = sv.substr(0, sv.size()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: redundant self-copy [readability-stringview-substr] + + // Case 2: redundant self-copy with size() - 0 + sv = sv.substr(0, sv.size() - 0); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: redundant self-copy [readability-stringview-substr] + + // Case 2: different vars, direct copy + sv2 = sv.substr(0, sv.size()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer direct copy over substr [readability-stringview-substr] + // CHECK-FIXES: sv2 = sv; +} + +template <typename T> +void test_template_instantiation() { + std::basic_string_view<T> sv("test"); + std::basic_string_view<T> sv2("test"); + + // Should not match: inside a template instantiation + sv = sv.substr(0, sv.size() - 3); // No warning + sv = sv.substr(0, sv.length()); // No warning + sv2 = sv.substr(0, sv.size()); // No warning +} + +// No matches when instantiated +void test_template_no_matches() { + test_template_instantiation<char>(); // No warnings + test_template_instantiation<wchar_t>(); // No warnings +} + +void test_copies() { + std::string_view sv("test"); + std::string_view sv1("test"); + std::string_view sv2("test"); + + // Redundant self-copies + sv = sv.substr(0, sv.length()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: redundant self-copy [readability-stringview-substr] + + sv = sv.substr(0, sv.length() - 0); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: redundant self-copy [readability-stringview-substr] + + // Direct copies between different variables + sv1 = sv.substr(0, sv.length()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer direct copy over substr [readability-stringview-substr] + // CHECK-FIXES: sv1 = sv; + + sv2 = sv.substr(0, sv.length() - 0); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer direct copy over substr [readability-stringview-substr] + // CHECK-FIXES: sv2 = sv; +} + +void test_zero_forms() { + std::string_view sv("test"); + const int kZero = 0; + #define START_POS 0 + + // Various forms of zero in first argument + sv = sv.substr(kZero, sv.length() - 3); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer 'remove_suffix' over 'substr' for removing characters from the end [readability-stringview-substr] + // CHECK-FIXES: sv.remove_suffix(3); + + sv = sv.substr(START_POS, sv.length() - 3); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer 'remove_suffix' over 'substr' for removing characters from the end [readability-stringview-substr] + // CHECK-FIXES: sv.remove_suffix(3); + + sv = sv.substr((0), sv.length() - 3); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer 'remove_suffix' over 'substr' for removing characters from the end [readability-stringview-substr] + // CHECK-FIXES: sv.remove_suffix(3); + + sv = sv.substr(0u, sv.length() - 3); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer 'remove_suffix' over 'substr' for removing characters from the end [readability-stringview-substr] + // CHECK-FIXES: sv.remove_suffix(3); + + sv = sv.substr(0UL, sv.length() - 3); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer 'remove_suffix' over 'substr' for removing characters from the end [readability-stringview-substr] + // CHECK-FIXES: sv.remove_suffix(3); +} + +void test_should_not_match() { + std::string_view sv("test"); + std::string_view sv2("test"); + + // No match: substr used for prefix or mid-view + sv = sv.substr(1, sv.length() - 3); // No warning +} + +void test_different_vars_remove_suffix() { + std::string_view sv("test"); + std::string_view sv2("test"); + + // Different string_views with remove_suffix pattern + sv = sv2.substr(0, sv2.length() - 3); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer assignment and remove_suffix over substr [readability-stringview-substr] + // CHECK-FIXES: sv = sv2; + // CHECK-FIXES: sv.remove_suffix(3); +} + +void f(std::string_view) {} +void test_expr_with_cleanups() { + std::string_view sv("test"); + const auto copy = sv = sv.substr(0, sv.length() - 3); // No warning + f(sv = sv.substr(0, sv.length() - 3)); // No warning +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
