================ @@ -443,6 +448,368 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } +namespace libc_fun_disjoint_inner_matchers { +// `libc_fun_disjoint_inner_matchers` covers a set of matchers that match +// disjoint node sets. They all take a `CoreName`, which is the substring of a +// function name after `ignoreLibcPrefixAndSuffix`. They are suppose to be used +// as an inner matcher of `ignoreLibcPrefixAndSuffix` to deal with different +// libc function calls. + +// Matches a function call node such that +// 1. It's name, after stripping off predefined prefix and suffix, is +// `CoreName`; and +// 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which +// is a set of libc function names. +// +// Note: For predefined prefix and suffix, see `ignoreLibcPrefixAndSuffix`. +// And, the notation `CoreName[str/wcs]` means a new name obtained from replace +// string "wcs" with "str" in `CoreName`. +// +// Also note, the set of predefined function names does not include `printf` +// functions, they are checked exclusively with other matchers below. +// Maintaining the invariant that all matchers under +// `libc_fun_disjoint_inner_matchers` are disjoint. +AST_MATCHER_P(CallExpr, predefinedUnsafeLibcFunCall, StringRef, CoreName) { + static const std::set<StringRef> PredefinedNames{ + // numeric conversion: + "atof", + "atoi", + "atol", + "atoll", + "strtol", + "strtoll", + "strtoul", + "strtoull", + "strtof", + "strtod", + "strtold", + "strtoimax", + "strtoumax", + // "strfromf", "strfromd", "strfroml", // C23? + // string manipulation: + "strcpy", + "strncpy", + "strlcpy", + "strcat", + "strncat", + "strlcat", + "strxfrm", + "strdup", + "strndup", + // string examination: + "strlen", + "strnlen", + "strcmp", + "strncmp", + "stricmp", + "strcasecmp", + "strcoll", + "strchr", + "strrchr", + "strspn", + "strcspn", + "strpbrk", + "strstr", + "strtok", + // "mem-" functions + "memchr", + "wmemchr", + "memcmp", + "wmemcmp", + "memcpy", + "memccpy", + "mempcpy", + "wmemcpy", + "memmove", + "wmemmove", + "memset", + "wmemset", + // IO: + "fread", + "fwrite", + "fgets", + "fgetws", + "gets", + "fputs", + "fputws", + "puts", + // others + "strerror_s", + "strerror_r", + "bcopy", + "bzero", + "bsearch", + "qsort", + }; + // This is safe: strlen("hello"). We don't want to be noisy on this case. + auto isSafeStrlen = [&Node](StringRef Name) -> bool { + return Name == "strlen" && Node.getNumArgs() == 1 && + isa<StringLiteral>(Node.getArg(0)->IgnoreParenImpCasts()); + }; + + // Match predefined names: + if (PredefinedNames.find(CoreName.str()) != PredefinedNames.end()) + return !isSafeStrlen(CoreName); + + std::string NameWCS = CoreName.str(); + size_t WcsPos = NameWCS.find("wcs"); + + while (WcsPos != std::string::npos) { + NameWCS[WcsPos++] = 's'; + NameWCS[WcsPos++] = 't'; + NameWCS[WcsPos++] = 'r'; + WcsPos = NameWCS.find("wcs", WcsPos); + } + if (PredefinedNames.find(NameWCS) != PredefinedNames.end()) + return !isSafeStrlen(NameWCS); + // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They + // all should end with "scanf"): + return CoreName.ends_with("scanf"); +} + +// Match a call to one of the `-printf` functions taking `va_list`. We cannot +// check safety for these functions so they should be changed to their +// non-va_list versions. +AST_MATCHER_P(CallExpr, unsafeVaListPrintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf")) + return false; // neither printf nor scanf + return Name.starts_with("v"); +} + +// Matches a call to one of the `-sprintf` functions (excluding the ones with +// va_list) as they are always unsafe and should be changed to corresponding +// `-snprintf`s. +AST_MATCHER_P(CallExpr, unsafeSprintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf") || Name.starts_with("v")) + return false; + + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) + Prefix = Prefix.drop_back(1); + return Prefix == "s"; +} + +// A pointer type expression is known to be null-terminated, if it has the +// form: E.c_str(), for any expression E of `std::string` type. +static bool isNullTermPointer(const Expr *Ptr) { + if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts())) + return true; + if (isa<PredefinedExpr>(Ptr->IgnoreParenImpCasts())) + return true; + if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) { + const CXXMethodDecl *MD = MCE->getMethodDecl(); + const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl(); + + if (MD && RD && RD->isInStdNamespace()) + if (MD->getName() == "c_str" && RD->getName() == "basic_string") + return true; + } + return false; +} + +// Return true iff at least one of following cases holds: +// 1. Format string is a literal and there is an unsafe pointer argument +// corresponding to an `s` specifier; +// 2. Format string is not a literal and there is least an unsafe pointer +// argument (including the formatter argument). +static bool hasUnsafeFormatOrSArg(const Expr *Fmt, unsigned FmtArgIdx, + const CallExpr *Call, ASTContext &Ctx) { + if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) { + StringRef FmtStr = SL->getString(); + auto I = FmtStr.begin(); + auto E = FmtStr.end(); + unsigned ArgIdx = FmtArgIdx; + + do { + ArgIdx = analyze_format_string::ParseFormatStringFirstSArgIndex( + I, E, ArgIdx, Ctx.getLangOpts(), Ctx.getTargetInfo()); + if (ArgIdx && Call->getNumArgs() > ArgIdx && + !isNullTermPointer(Call->getArg(ArgIdx))) + return true; + } while (ArgIdx); + return false; + } + // If format is not a string literal, we cannot analyze the format string. + // In this case, this call is considered unsafe if at least one argument + // (including the format argument) is unsafe pointer. + return llvm::any_of( + llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()), + [](const Expr *Arg) { + return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); + }); +} + +// Matches a call to one of the `-printf" functions (excluding the ones with +// va_list, or `-sprintf`s) that taking pointer-to-char-as-string arguments but +// fail to guarantee their null-termination. In other words, these calls are +// safe if they use null-termination guaranteed string pointers. +AST_MATCHER_P(CallExpr, unsafeStringInPrintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf") || Name.starts_with("v")) + return false; + + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) + Prefix = Prefix.drop_back(1); + + if (Prefix.empty() || + Prefix == "k") // printf: all pointer args should be null-terminated + return hasUnsafeFormatOrSArg(Node.getArg(0), 0, &Node, + Finder->getASTContext()); + if (Prefix == "f") + return hasUnsafeFormatOrSArg(Node.getArg(1), 1, &Node, + Finder->getASTContext()); + if (Prefix == "sn") { + // The first two arguments need to be in safe patterns, which is checked + // by `isSafeSizedby`: + return hasUnsafeFormatOrSArg(Node.getArg(2), 2, &Node, + Finder->getASTContext()); + } + return false; // A call to a "-printf" falls into another category. +} + +// Matches a call to one of the `snprintf` functions (excluding the ones with +// va_list) such that the first two arguments fail to conform to safe patterns. +// +// For the first two arguments: `ptr` and `size`, they are safe if in the +// following patterns: +// ptr := DRE.data(); +// size:= DRE.size()/DRE.size_bytes() +// And DRE is a hardened container or view. +AST_MATCHER_P(CallExpr, unsafeSizedByInSnprintfs, StringRef, CoreName) { + StringRef Name = CoreName; + + if (!Name.ends_with("printf") || Name.starts_with("v")) + return false; // not snprint or use va_list + + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) + Prefix = Prefix.drop_back(1); + + if (Prefix != "sn") + return false; // not snprint + + static StringRef SizedObjs[] = {"span", "array", "vector", + "basic_string_view", "basic_string"}; + const Expr *CharPtr = (*Node.arg_begin())->IgnoreParenImpCasts(); + const Expr *Size = (*(Node.arg_begin() + 1))->IgnoreParenImpCasts(); + + if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(CharPtr)) + if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) { + auto *DREOfPtr = dyn_cast<DeclRefExpr>( + MCEPtr->getImplicitObjectArgument()->IgnoreParenImpCasts()); + auto *DREOfSize = dyn_cast<DeclRefExpr>( + MCESize->getImplicitObjectArgument()->IgnoreParenImpCasts()); + + if (!DREOfPtr || !DREOfSize) + return true; // not in safe pattern + if (DREOfPtr->getDecl() != DREOfSize->getDecl()) + return true; // not in safe pattern + if (MCEPtr->getMethodDecl()->getName() != "data") + return true; // not in safe pattern + + if (MCESize->getMethodDecl()->getName() == "size_bytes" || + // Note here the pointer must be a pointer-to-char type unless there + // is explicit casting. If there is explicit casting, this branch + // is unreachable. Thus, at this branch "size" and "size_bytes" are + // equivalent as the pointer is a char pointer: + MCESize->getMethodDecl()->getName() == "size") + for (StringRef SizedObj : SizedObjs) + if (MCEPtr->getRecordDecl()->isInStdNamespace() && + MCEPtr->getRecordDecl()->getCanonicalDecl()->getName() == + SizedObj) + return false; // It is in fact safe + } + return true; // ptr and size are not in safe pattern +} +} // namespace libc_fun_disjoint_inner_matchers + +// Match call to a function whose name may have prefixes like "__builtin_" or +// "__asan_" and suffixes like "_s" or "_chk". This matcher takes an argument, +// which should be applied to the core name---the subtring after stripping off +// prefix and suffix of the function name. +// The application results in an inner matcher that matches the call node with +// respect to the core name of the callee. +AST_MATCHER_P(CallExpr, ignoreLibcPrefixAndSuffix, + std::function<internal::Matcher<CallExpr>(StringRef)>, ---------------- ziqingluo-90 wrote:
Refactored this part. https://github.com/llvm/llvm-project/pull/101583 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits