llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-analysis @llvm/pr-subscribers-clang Author: Ziqing Luo (ziqingluo-90) <details> <summary>Changes</summary> Depending on https://github.com/llvm/llvm-project/pull/101583 (rdar://117182250) --- Patch is 41.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/105383.diff 9 Files Affected: - (modified) clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h (+16) - (modified) clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def (+7-6) - (modified) clang/include/clang/Basic/DiagnosticGroups.td (+2-1) - (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+7) - (modified) clang/lib/Analysis/UnsafeBufferUsage.cpp (+521-5) - (modified) clang/lib/Sema/AnalysisBasedWarnings.cpp (+18-1) - (added) clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions-inline-namespace.cpp (+60) - (added) clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp (+132) - (modified) clang/test/SemaCXX/warn-unsafe-buffer-usage-test-unreachable.cpp (+1-3) ``````````diff diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index 228b4ae1e3e115..43fca9b4637363 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_ANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_H #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" #include "clang/AST/Stmt.h" #include "clang/Basic/SourceLocation.h" #include "llvm/Support/Debug.h" @@ -106,6 +107,17 @@ class UnsafeBufferUsageHandler { virtual void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl, ASTContext &Ctx) = 0; + /// Invoked when a call to an unsafe libc function is found. + /// \param PrintfInfo + /// is 0 if the callee function is not a member of the printf family; + /// is 1 if the callee is `sprintf`; + /// is 2 if arguments of the call have `__size_by` relation but are not in a + /// safe pattern; + /// is 3 if string arguments do not guarantee null-termination + /// is 4 if the callee takes va_list + virtual void handleUnsafeLibcCall(const CallExpr *Call, unsigned PrintfInfo, + ASTContext &Ctx) = 0; + /// Invoked when an unsafe operation with a std container is found. virtual void handleUnsafeOperationInContainer(const Stmt *Operation, bool IsRelatedToDecl, @@ -151,6 +163,10 @@ class UnsafeBufferUsageHandler { virtual bool ignoreUnsafeBufferInContainer(const SourceLocation &Loc) const = 0; + /// \return true iff unsafe libc call should NOT be reported at `Loc` + virtual bool + ignoreUnsafeBufferInLibcCall(const SourceLocation &Loc) const = 0; + virtual std::string getUnsafeBufferUsageAttributeTextAt(SourceLocation Loc, StringRef WSSuffix = "") const = 0; diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 242ad763ba62b9..09fa510bc0472e 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -18,10 +18,10 @@ #define WARNING_GADGET(name) GADGET(name) #endif -/// A `WARNING_GADGET` subset, where the code pattern of each gadget -/// corresponds uses of a (possibly hardened) contatiner (e.g., `std::span`). -#ifndef WARNING_CONTAINER_GADGET -#define WARNING_CONTAINER_GADGET(name) WARNING_GADGET(name) +/// A `WARNING_GADGET` subset, each of which may be enable/disable separately +/// with different flags +#ifndef WARNING_OPTIONAL_GADGET +#define WARNING_OPTIONAL_GADGET(name) WARNING_GADGET(name) #endif /// Safe gadgets correspond to code patterns that aren't unsafe but need to be @@ -38,7 +38,8 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) WARNING_GADGET(UnsafeBufferUsageCtorAttr) WARNING_GADGET(DataInvocation) -WARNING_CONTAINER_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` +WARNING_OPTIONAL_GADGET(UnsafeLibcFunctionCall) +WARNING_OPTIONAL_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)` FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) FIXABLE_GADGET(PointerDereference) @@ -52,5 +53,5 @@ FIXABLE_GADGET(PointerInit) #undef FIXABLE_GADGET #undef WARNING_GADGET -#undef WARNING_CONTAINER_GADGET +#undef WARNING_OPTIONAL_GADGET #undef GADGET diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 19c3f1e0433496..3e9d5230e9ed10 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1552,7 +1552,8 @@ def ReadOnlyPlacementChecks : DiagGroup<"read-only-types">; // Warnings and fixes to support the "safe buffers" programming model. def UnsafeBufferUsageInContainer : DiagGroup<"unsafe-buffer-usage-in-container">; -def UnsafeBufferUsage : DiagGroup<"unsafe-buffer-usage", [UnsafeBufferUsageInContainer]>; +def UnsafeBufferUsageInLibcCall : DiagGroup<"unsafe-buffer-usage-in-libc-call">; +def UnsafeBufferUsage : DiagGroup<"unsafe-buffer-usage", [UnsafeBufferUsageInContainer, UnsafeBufferUsageInLibcCall]>; // Warnings and notes related to the function effects system underlying // the nonblocking and nonallocating attributes. diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 581434d33c5c9a..f689c9e633ebb3 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12376,6 +12376,13 @@ def warn_unsafe_buffer_operation : Warning< "%select{unsafe pointer operation|unsafe pointer arithmetic|" "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">, InGroup<UnsafeBufferUsage>, DefaultIgnore; +def warn_unsafe_buffer_libc_call : Warning< + "function %0 introduces unsafe buffer access">, + InGroup<UnsafeBufferUsageInLibcCall>, DefaultIgnore; +def note_unsafe_buffer_printf_call : Note< + "%select{| change to 'snprintf' for explicit bounds checking | buffer pointer and size may not match" + "| use 'std::string::c_str' or string literal as string pointer to guarantee null-termination" + "| do not use va_list that cannot be checked at compile-time for bounds safety}0">; def note_unsafe_buffer_operation : Note< "used%select{| in pointer arithmetic| in buffer access}0 here">; def note_unsafe_buffer_variable_fixit_group : Note< diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 866222380974b6..02e39161e2e8f3 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -9,19 +9,25 @@ #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/FormatString.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" +#include "clang/AST/Type.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Basic/CharInfo.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/APSInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Casting.h" #include <memory> #include <optional> @@ -247,6 +253,11 @@ AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer, return Handler->ignoreUnsafeBufferInContainer(Node.getBeginLoc()); } +AST_MATCHER_P(Stmt, ignoreUnsafeLibcCall, const UnsafeBufferUsageHandler *, + Handler) { + return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc()); +} + AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) { return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder); } @@ -443,6 +454,421 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } +AST_MATCHER_P(CallExpr, hasNumArgs, unsigned, Num) { + return Node.getNumArgs() == Num; +} + +namespace libc_func_matchers { +// Under `libc_func_matchers`, define a set of matchers that match unsafe +// functions in libc and unsafe calls to them. + +// A tiny parser to strip off common prefix and suffix of libc function names +// in real code. +// +// Given a function name, `matchName` returns `CoreName` according to the +// following grammar: +// +// LibcName := CoreName | CoreName + "_s" +// MatchingName := "__builtin_" + LibcName | +// "__builtin___" + LibcName + "_chk" | +// "__asan_" + LibcName +// +struct LibcFunNamePrefixSuffixParser { + StringRef matchName(StringRef FunName, bool isBuiltin) { + // Try to match __builtin_: + if (isBuiltin && FunName.starts_with("__builtin_")) + // Then either it is __builtin_LibcName or __builtin___LibcName_chk or + // no match: + return matchLibcNameOrBuiltinChk( + FunName.drop_front(10 /* truncate "__builtin_" */)); + // Try to match __asan_: + if (FunName.starts_with("__asan_")) + return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */)); + return matchLibcName(FunName); + } + + // Parameter `Name` is the substring after stripping off the prefix + // "__builtin_". + StringRef matchLibcNameOrBuiltinChk(StringRef Name) { + if (Name.starts_with("__") && Name.ends_with("_chk")) + return matchLibcName( + Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */); + return matchLibcName(Name); + } + + StringRef matchLibcName(StringRef Name) { + if (Name.ends_with("_s")) + return Name.drop_back(2 /* truncate "_s" */); + return Name; + } +}; + +// A pointer type expression is known to be null-terminated, if it has the +// form: E.c_str(), for any expression E of `std::string` type. +static bool isNullTermPointer(const Expr *Ptr) { + if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts())) + return true; + if (isa<PredefinedExpr>(Ptr->IgnoreParenImpCasts())) + return true; + if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) { + const CXXMethodDecl *MD = MCE->getMethodDecl(); + const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl(); + + if (MD && RD && RD->isInStdNamespace()) + if (MD->getName() == "c_str" && RD->getName() == "basic_string") + return true; + } + return false; +} + +// Return true iff at least one of following cases holds: +// 1. Format string is a literal and there is an unsafe pointer argument +// corresponding to an `s` specifier; +// 2. Format string is not a literal and there is least an unsafe pointer +// argument (including the formatter argument). +static bool hasUnsafeFormatOrSArg(const CallExpr *Call, unsigned FmtArgIdx, + ASTContext &Ctx, bool isKprintf = false) { + class StringFormatStringHandler + : public analyze_format_string::FormatStringHandler { + const CallExpr *Call; + unsigned FmtArgIdx; + + public: + StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx) + : Call(Call), FmtArgIdx(FmtArgIdx) {} + + bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, + const char *startSpecifier, + unsigned specifierLen, + const TargetInfo &Target) override { + if (FS.getConversionSpecifier().getKind() == + analyze_printf::PrintfConversionSpecifier::sArg) { + unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx; + + if (0 < ArgIdx && ArgIdx < Call->getNumArgs()) + if (!isNullTermPointer(Call->getArg(ArgIdx))) + return false; // stop parsing + } + return true; // continue parsing + } + }; + + const Expr *Fmt = Call->getArg(FmtArgIdx); + + if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) { + StringRef FmtStr = SL->getString(); + StringFormatStringHandler Handler(Call, FmtArgIdx); + + return analyze_format_string::ParsePrintfString( + Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(), + Ctx.getTargetInfo(), isKprintf); + } + // If format is not a string literal, we cannot analyze the format string. + // In this case, this call is considered unsafe if at least one argument + // (including the format argument) is unsafe pointer. + return llvm::any_of( + llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()), + [](const Expr *Arg) { + return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); + }); +} + +// Matches a FunctionDecl node such that +// 1. It's name, after stripping off predefined prefix and suffix, is +// `CoreName`; and +// 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which +// is a set of libc function names. +// +// Note: For predefined prefix and suffix, see `LibcFunNamePrefixSuffixParser`. +// The notation `CoreName[str/wcs]` means a new name obtained from replace +// string "wcs" with "str" in `CoreName`. +AST_MATCHER(FunctionDecl, isPredefinedUnsafeLibcFunc) { + static std::unique_ptr<std::set<StringRef>> PredefinedNames = nullptr; + if (!PredefinedNames) + PredefinedNames = + std::make_unique<std::set<StringRef>, std::set<StringRef>>({ + // numeric conversion: + "atof", + "atoi", + "atol", + "atoll", + "strtol", + "strtoll", + "strtoul", + "strtoull", + "strtof", + "strtod", + "strtold", + "strtoimax", + "strtoumax", + // "strfromf", "strfromd", "strfroml", // C23? + // string manipulation: + "strcpy", + "strncpy", + "strlcpy", + "strcat", + "strncat", + "strlcat", + "strxfrm", + "strdup", + "strndup", + // string examination: + "strlen", + "strnlen", + "strcmp", + "strncmp", + "stricmp", + "strcasecmp", + "strcoll", + "strchr", + "strrchr", + "strspn", + "strcspn", + "strpbrk", + "strstr", + "strtok", + // "mem-" functions + "memchr", + "wmemchr", + "memcmp", + "wmemcmp", + "memcpy", + "memccpy", + "mempcpy", + "wmemcpy", + "memmove", + "wmemmove", + "memset", + "wmemset", + // IO: + "fread", + "fwrite", + "fgets", + "fgetws", + "gets", + "fputs", + "fputws", + "puts", + // others + "strerror_s", + "strerror_r", + "bcopy", + "bzero", + "bsearch", + "qsort", + }); + + auto *II = Node.getIdentifier(); + + if (!II) + return false; + + StringRef Name = LibcFunNamePrefixSuffixParser().matchName( + II->getName(), Node.getBuiltinID()); + + // Match predefined names: + if (PredefinedNames->find(Name) != PredefinedNames->end()) + return true; + + std::string NameWCS = Name.str(); + size_t WcsPos = NameWCS.find("wcs"); + + while (WcsPos != std::string::npos) { + NameWCS[WcsPos++] = 's'; + NameWCS[WcsPos++] = 't'; + NameWCS[WcsPos++] = 'r'; + WcsPos = NameWCS.find("wcs", WcsPos); + } + if (PredefinedNames->find(NameWCS) != PredefinedNames->end()) + return true; + // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They + // all should end with "scanf"): + return Name.ends_with("scanf"); +} + +// Match a call to one of the `v*printf` functions taking `va_list`. We cannot +// check safety for these functions so they should be changed to their +// non-va_list versions. +AST_MATCHER(FunctionDecl, isUnsafeVaListPrintfFunc) { + auto *II = Node.getIdentifier(); + + if (!II) + return false; + + StringRef Name = LibcFunNamePrefixSuffixParser().matchName( + II->getName(), Node.getBuiltinID()); + + if (!Name.ends_with("printf")) + return false; // neither printf nor scanf + return Name.starts_with("v"); +} + +// Matches a call to one of the `sprintf` functions as they are always unsafe +// and should be changed to `snprintf`. +AST_MATCHER(FunctionDecl, isUnsafeSprintfFunc) { + auto *II = Node.getIdentifier(); + + if (!II) + return false; + + StringRef Name = LibcFunNamePrefixSuffixParser().matchName( + II->getName(), Node.getBuiltinID()); + + if (!Name.ends_with("printf") || + // Let `isUnsafeVaListPrintfFunc` check for cases with va-list: + Name.starts_with("v")) + return false; + + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) + Prefix = Prefix.drop_back(1); + return Prefix == "s"; +} + +// Match function declarations of `printf`, `fprintf`, `snprintf` and their wide +// character versions. Calls to these functions can be safe if their arguments +// are carefully made safe. +AST_MATCHER(FunctionDecl, isNormalPrintfFunc) { + auto *II = Node.getIdentifier(); + + if (!II) + return false; + + StringRef Name = LibcFunNamePrefixSuffixParser().matchName( + II->getName(), Node.getBuiltinID()); + + if (!Name.ends_with("printf") || Name.starts_with("v")) + return false; + + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) + Prefix = Prefix.drop_back(1); + + return Prefix.empty() || Prefix == "k" || Prefix == "f" || Prefix == "sn"; +} + +// This matcher requires that it is known that the callee `isNormalPrintf`. +// Then if the format string is a string literal, this matcher matches when at +// least one string argument is unsafe. If the format is not a string literal, +// this matcher matches when at least one pointer type argument is unsafe. +AST_MATCHER(CallExpr, hasUnsafePrintfStringArg) { + // Determine what printf it is: + const Expr *FirstArg = Node.getArg(0); + ASTContext &Ctx = Finder->getASTContext(); + + if (isa<StringLiteral>(FirstArg->IgnoreParenImpCasts())) { + // It is a printf/kprintf. And, the format is a string literal: + bool isKprintf = false; + if (auto *Callee = Node.getDirectCallee()) + if (auto *II = Node.getDirectCallee()->getIdentifier()) + isKprintf = II->getName() == "kprintf"; + return hasUnsafeFormatOrSArg(&Node, 0, Ctx, isKprintf); + } + + QualType PtrTy = FirstArg->getType(); + + assert(PtrTy->isPointerType()); + + QualType PteTy = (cast<PointerType>(PtrTy))->getPointeeType(); + + if (!Ctx.getFILEType().isNull() /* If `FILE *` is not ever in the ASTContext, + there can't be any file pointer then */ + && PteTy.getCanonicalType() == Ctx.getFILEType().getCanonicalType()) { + // It is a fprintf: + return hasUnsafeFormatOrSArg(&Node, 1, Ctx, false); + } + + const Expr *SecondArg = Node.getArg(1); + + if (SecondArg->getType()->isIntegerType()) { + // It is a snprintf: + return hasUnsafeFormatOrSArg(&Node, 2, Ctx, false); + } + // It is printf but the format string is passed by pointer. The only thing we + // can do is to require all pointers to be null-terminated: + return llvm::any_of(Node.arguments(), [](const Expr *Arg) -> bool { + return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); + }); +} + +// This matcher requires that it is known that the callee `isNormalPrintf`. +// Then it matches if the first two arguments of the call is a pointer and an +// integer and they are not in a safe pattern. +// +// For the first two arguments: `ptr` and `size`, they are safe if in the +// following patterns: +// +// Pattern 1: +// ptr := DRE.data(); +// size:= DRE.size()/DRE.size_bytes() +// And DRE is a hardened container or view. +// +// Pattern 2: +// ptr := Constant-Array-DRE; +// size:= any expression that has compile-time constant value equivalent to +// sizeof (Constant-Array-DRE) +AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) { + if (Node.getNumArgs() < 3) + return false; // not an snprintf call + + const Expr *Buf = Node.getArg(0), *Size = Node.getArg(1); + + if (!Buf->getType()->isPointerType() || !Size->getType()->isIntegerType()) + return false; // not an snprintf call + + // Pattern 1: + static StringRef SizedObjs[] = {"span", "array", "vector", + "basic_string_view", "basic_string"}; + Buf = Buf->IgnoreParenImpCasts(); + Size = Size->IgnoreParenImpCasts(); + if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(Buf)) + if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) { + auto *DREOfPtr = dyn_cast<DeclRefExpr>( + MCEPtr->getIm... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/105383 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits