================ @@ -443,6 +448,260 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { return false; } +AST_MATCHER(CallExpr, isUnsafeLibcFunctionCall) { + static const std::set<StringRef> PredefinedNames{ + // numeric conversion: + "atof", + "atoi", + "atol", + "atoll", + "strtol", + "strtoll", + "strtoul", + "strtoull", + "strtof", + "strtod", + "strtold", + "strtoimax", + "strtoumax", + // "strfromf", "strfromd", "strfroml", // C23? + // string manipulation: + "strcpy", + "strncpy", + "strlcpy", + "strcat", + "strncat", + "strlcat", + "strxfrm", + "strdup", + "strndup", + // string examination: + "strlen", + "strnlen", + "strcmp", + "strncmp", + "stricmp", + "strcasecmp", + "strcoll", + "strchr", + "strrchr", + "strspn", + "strcspn", + "strpbrk", + "strstr", + "strtok", + // "mem-" functions + "memchr", + "wmemchr", + "memcmp", + "wmemcmp", + "memcpy", + "memccpy", + "mempcpy", + "wmemcpy", + "memmove", + "wmemmove", + "memset", + "wmemset", + // IO: + "fread", + "fwrite", + "fgets", + "fgetws", + "gets", + "fputs", + "fputws", + "puts", + // others + "strerror_s", + "strerror_r", + "bcopy", + "bzero", + "bsearch", + "qsort", + }; + + // A tiny name parser for unsafe libc function names with additional + // checks for `printf`s: + struct FuncNameMatch { + const CallExpr *const Call; + FuncNameMatch(const CallExpr *Call) : Call(Call) {} + + // For a name `S` in `PredefinedNames` or a member of the printf/scanf + // family, define matching function names with `S` by the grammar below: + // + // CoreName := S | S["wcs"/"str"] + // LibcName := CoreName | CoreName + "_s" + // MatchingName := "__builtin_" + LibcName | + // "__builtin___" + LibcName + "_chk" | + // "__asan_" + LibcName + // + // (Note S["wcs"/"str"] means substitute "str" with "wcs" in S.) + bool matchName(StringRef FunName) { + // Try to match __builtin_: + if (FunName.starts_with("__builtin_")) + // Then either it is __builtin_LibcName or __builtin___LibcName_chk or + // no match: + return matchLibcNameOrBuiltinChk( + FunName.drop_front(10 /* truncate "__builtin_" */)); + // Try to match __asan_: + if (FunName.starts_with("__asan_")) + return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */)); + return matchLibcName(FunName); + } + + // Parameter `Name` is the substring after stripping off the prefix + // "__builtin_". + bool matchLibcNameOrBuiltinChk(StringRef Name) { + if (Name.starts_with("__") && Name.ends_with("_chk")) + return matchLibcName( + Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */); + return matchLibcName(Name); + } + + bool matchLibcName(StringRef Name) { + if (Name.ends_with("_s")) + return matchCoreName(Name.drop_back(2 /* truncate "_s" */)); + return matchCoreName(Name); + } + + bool matchCoreName(StringRef Name) { + if (PredefinedNames.find(Name.str()) != PredefinedNames.end()) + return !isSafeStrlen(Name); // match unless it's a safe strlen call + + std::string NameWCS = Name.str(); + size_t WcsPos = NameWCS.find("wcs"); + + while (WcsPos != std::string::npos) { + NameWCS[WcsPos++] = 's'; + NameWCS[WcsPos++] = 't'; + NameWCS[WcsPos++] = 'r'; + WcsPos = NameWCS.find("wcs", WcsPos); + } + if (PredefinedNames.find(NameWCS) != PredefinedNames.end()) + return !isSafeStrlen(NameWCS); + return matchPrintfOrScanfFamily(Name); + } + + bool matchPrintfOrScanfFamily(StringRef Name) { + if (Name.ends_with("scanf")) + return true; // simply warn about scanf functions + if (!Name.ends_with("printf")) + return false; // neither printf nor scanf + if (Name.starts_with("v")) + // cannot check args for va_list, so `vprintf`s are treated as regular + // unsafe libc calls: + return true; + + // Truncate "printf", focus on prefixes. There are different possible + // name prefixes: "k", "f", "s", "sn", "fw", ..., "snw". We strip off the + // 'w' and handle printfs differently by "k", "f", "s", "sn" or no prefix: + StringRef Prefix = Name.drop_back(6); + + if (Prefix.ends_with("w")) + Prefix = Prefix.drop_back(1); + return isUnsafePrintf(Prefix); + } + + // A pointer type expression is known to be null-terminated, if it has the + // form: E.c_str(), for any expression E of `std::string` type. + static bool isNullTermPointer(const Expr *Ptr) { + if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts())) + return true; + if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) { + const CXXMethodDecl *MD = MCE->getMethodDecl(); + const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl(); + + if (MD && RD && RD->isInStdNamespace()) + if (MD->getName() == "c_str" && RD->getName() == "basic_string") + return true; + } + return false; + } + + // Check safe patterns for printfs w.r.t their prefixes: + bool isUnsafePrintf(StringRef Prefix /* empty, 'k', 'f', 's', or 'sn' */) { + auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool { + return Arg->getType()->isPointerType() && !isNullTermPointer(Arg); + }; + + if (Prefix.empty() || Prefix == "k") // printf: all pointer args should be null-terminated + return llvm::any_of(Call->arguments(), AnyUnsafeStrPtr); + + if (Prefix == "f" && Call->getNumArgs() > 1) { + // fprintf, same as printf but skip the first arg + auto Range = llvm::make_range(Call->arg_begin() + 1, Call->arg_end()); + return llvm::any_of(Range, AnyUnsafeStrPtr); + } + if (Prefix == "sn" && Call->getNumArgs() > 2) { + // The first two arguments need to be in safe patterns, which is checked + // by `isSafeSizedby`: + auto Range = llvm::make_range(Call->arg_begin() + 2, Call->arg_end()); + return !isSafeSizedby(*Call->arg_begin(), *(Call->arg_begin() + 1)) || + llvm::any_of(Range, AnyUnsafeStrPtr); + } + // Note `sprintf`s should be changed to `snprintf`s, so they are treated + // as regular unsafe libc calls: + return true; + } + + // Checks if the two Exprs `SizedByPtr` and `Size` are in the pattern: + // SizedByPtr := DRE.data() + // Size := DRE.size_bytes(), for a same DRE of sized-container/view + // type. + bool isSafeSizedby(const Expr *SizedByPtr, const Expr *Size) { + static StringRef SizedObjs[] = {"span", "array", "vector", + "basic_string_view", "basic_string"}; + if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(SizedByPtr)) + if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) { + if (auto *DREPtr = + dyn_cast<DeclRefExpr>(MCEPtr->getImplicitObjectArgument())) + if (auto *DRESize = + dyn_cast<DeclRefExpr>(MCESize->getImplicitObjectArgument())) + if (DREPtr->getDecl() == + DRESize->getDecl()) // coming out of the same variable + if (MCEPtr->getMethodDecl()->getName() == "data") + if (MCESize->getMethodDecl()->getName() == "size_bytes") + for (StringRef SizedObj : SizedObjs) + if (MCEPtr->getRecordDecl()->isInStdNamespace() && + MCEPtr->getRecordDecl() + ->getCanonicalDecl() + ->getName() == SizedObj) + return true; + } + return false; + } + + // This is safe: strlen("hello"). We don't want to be noisy on this case. + bool isSafeStrlen(StringRef Name) { + return Name == "strlen" && Call->getNumArgs() == 1 && + isa<StringLiteral>(Call->getArg(0)->IgnoreParenImpCasts()); + } + } FuncNameMatch{&Node}; + + const FunctionDecl *FD = Node.getDirectCallee(); + const IdentifierInfo *II; + + if (!FD) + return false; + II = FD->getIdentifier(); + // If this is a special C++ name without IdentifierInfo, it can't be a + // C library function. + if (!II) + return false; + + // Look through 'extern "C"' and anything similar invented in the future. + // If this function is not in TU directly, it is not a C library function. + if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit()) + return false; + + // If this function is not externally visible, it is not a C library function. + // Note that we make an exception for inline functions, which may be + // declared in header files without external linkage. + if (!FD->isInlined() && !FD->isExternallyVisible()) ---------------- ziqingluo-90 wrote:
I just blindly bought what the comment says. Let me know if you think this check is actually problematic 🤔. https://github.com/llvm/llvm-project/pull/101583 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits