donat.nagy created this revision. donat.nagy added reviewers: dkrupp, gamesh411, steakhal. Herald added subscribers: manas, ASDenysPetrov, martong, Szelethus, mikhail.ramalho, a.sidorin, rnkovacs, szepet, baloghadamsoftware, xazax.hun. Herald added a reviewer: NoQ. Herald added a project: All. donat.nagy requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits.
The checker alpha.security.ArrayBoundV2 was implemented with a `check::Location` callback, so it triggered on memory read or write operations (and checked access to ElementRegions). The generality of this solution was elegant, but I'm replacing it with an implementation based on `check::PreStmt<ArraySubscriptExpr>`, because that's closer to the intuition of the users. (I'm planning to improve the error messages and it'd be good to connect each bug report with a concrete operator that triggers it.) Note that the logic for simplifying expressions is unchanged and this commit doesn't modify the behavior of the checker under common circumstances (e.g. all unit tests pass without changes). Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D150446 Files: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
Index: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -31,7 +31,7 @@ namespace { class ArrayBoundCheckerV2 : - public Checker<check::Location> { + public Checker<check::PreStmt<ArraySubscriptExpr>> { mutable std::unique_ptr<BuiltinBug> BT; mutable std::unique_ptr<BugType> TaintBT; @@ -45,8 +45,9 @@ static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC); public: - void checkLocation(SVal l, bool isLoad, const Stmt *S, - CheckerContext &C) const; + void checkPreStmt(const ArraySubscriptExpr *ASE, CheckerContext &C) const; + // void checkLocation(SVal l, bool isLoad, const Stmt *S, + // CheckerContext &C) const; }; // FIXME: Eventually replace RegionRawOffset with this class. @@ -63,7 +64,7 @@ const SubRegion *getRegion() const { return baseRegion; } static std::optional<RegionRawOffsetV2> - computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location); + computeOffset(ProgramStateRef State, SValBuilder &SVB, const LocationContext *LCtx, const ArraySubscriptExpr *ASE); void dump() const; void dumpToStream(raw_ostream &os) const; @@ -86,8 +87,8 @@ APSIntType(extent.getValue()).convert(SIE->getRHS()); switch (SIE->getOpcode()) { case BO_Mul: - // The constant should never be 0 here, since it the result of scaling - // based on the size of a type which is never 0. + // In a SymIntExpr multiplication the constant cannot be 0, because + // then the enginge would've replaced it with a constant 0 value. if ((extent.getValue() % constant) != 0) return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); else @@ -140,44 +141,41 @@ return {nullptr, nullptr}; } -void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, - const Stmt* LoadS, - CheckerContext &checkerContext) const { - +void ArrayBoundCheckerV2::checkPreStmt(const ArraySubscriptExpr *ASE, + CheckerContext &C) const { // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping // some new logic here that reasons directly about memory region extents. // Once that logic is more mature, we can bring it back to assumeInBound() // for all clients to use. - // - // The algorithm we are using here for bounds checking is to see if the - // memory access is within the extent of the base region. Since we - // have some flexibility in defining the base region, we can achieve - // various levels of conservatism in our buffer overflow checking. // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as // #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX) // and incomplete analysis of these leads to false positives. As even // accurate reports would be confusing for the users, just disable reports // from these macros: - if (isFromCtypeMacro(LoadS, checkerContext.getASTContext())) + if (isFromCtypeMacro(ASE, C.getASTContext())) return; - ProgramStateRef state = checkerContext.getState(); + ProgramStateRef state = C.getState(); + + SValBuilder &svalBuilder = C.getSValBuilder(); + + const LocationContext *LCtx = C.getLocationContext(); - SValBuilder &svalBuilder = checkerContext.getSValBuilder(); const std::optional<RegionRawOffsetV2> &RawOffset = - RegionRawOffsetV2::computeOffset(state, svalBuilder, location); + RegionRawOffsetV2::computeOffset(state, svalBuilder, LCtx, ASE); if (!RawOffset) return; NonLoc ByteOffset = RawOffset->getByteOffset(); + const SubRegion *Reg = RawOffset->getRegion(); // CHECK LOWER BOUND - const MemSpaceRegion *SR = RawOffset->getRegion()->getMemorySpace(); - if (!llvm::isa<UnknownSpaceRegion>(SR)) { - // A pointer to UnknownSpaceRegion may point to the middle of - // an allocated region. + const MemSpaceRegion *Space = Reg->getMemorySpace(); + if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) { + // Symbolic regions in unknown spaces are used for modelling unknown + // pointers, so they may point to the middle of an array. auto [state_precedesLowerBound, state_withinLowerBound] = compareValueToThreshold(state, ByteOffset, @@ -185,7 +183,7 @@ if (state_precedesLowerBound && !state_withinLowerBound) { // We know that the index definitely precedes the lower bound. - reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes); + reportOOB(C, state_precedesLowerBound, OOB_Precedes); return; } @@ -194,8 +192,7 @@ } // CHECK UPPER BOUND - DefinedOrUnknownSVal Size = - getDynamicExtent(state, RawOffset->getRegion(), svalBuilder); + DefinedOrUnknownSVal Size = getDynamicExtent(state, Reg, svalBuilder); if (auto KnownSize = Size.getAs<NonLoc>()) { auto [state_withinUpperBound, state_exceedsUpperBound] = compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder); @@ -203,12 +200,12 @@ if (state_exceedsUpperBound) { if (!state_withinUpperBound) { // We know that the index definitely exceeds the upper bound. - reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes); + reportOOB(C, state_exceedsUpperBound, OOB_Excedes); return; } if (isTainted(state, ByteOffset)) { // Both cases are possible, but the index is tainted, so report. - reportTaintOOB(checkerContext, state_exceedsUpperBound, ByteOffset); + reportTaintOOB(C, state_exceedsUpperBound, ByteOffset); return; } } @@ -217,7 +214,7 @@ state = state_withinUpperBound; } - checkerContext.addTransition(state); + C.addTransition(state); } void ArrayBoundCheckerV2::reportTaintOOB(CheckerContext &checkerContext, @@ -303,53 +300,48 @@ } #endif -/// For a given Location that can be represented as a symbolic expression -/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block -/// Arr and the distance of Location from the beginning of Arr (expressed in a -/// NonLoc that specifies the number of CharUnits). Returns nullopt when these -/// cannot be determined. +/// For a given ArraySubscriptExpr 'Ptr[Idx]', calculate a (Region, Offset) +/// pair where Region is the memory area that may be legitimately accessed via +/// Ptr and Offset is the distance of Ptr[Idx] from the beginning of Region +/// (expressed as a NonLoc that specifies the number of CharUnits). Returns +/// nullopt when these cannot be determined. std::optional<RegionRawOffsetV2> RegionRawOffsetV2::computeOffset(ProgramStateRef State, SValBuilder &SVB, - SVal Location) { - QualType T = SVB.getArrayIndexType(); - auto Calc = [&SVB, State, T](BinaryOperatorKind Op, NonLoc LHS, NonLoc RHS) { - // We will use this utility to add and multiply values. - return SVB.evalBinOpNN(State, Op, LHS, RHS, T).getAs<NonLoc>(); - }; - - const MemRegion *Region = Location.getAsRegion(); - NonLoc Offset = SVB.makeZeroArrayIndex(); - - while (Region) { - if (const auto *ERegion = dyn_cast<ElementRegion>(Region)) { - if (const auto Index = ERegion->getIndex().getAs<NonLoc>()) { - QualType ElemType = ERegion->getElementType(); - // If the element is an incomplete type, go no further. - if (ElemType->isIncompleteType()) - return std::nullopt; - - // Perform Offset += Index * sizeof(ElemType); then continue the offset - // calculations with SuperRegion: - NonLoc Size = SVB.makeArrayIndex( - SVB.getContext().getTypeSizeInChars(ElemType).getQuantity()); - if (auto Delta = Calc(BO_Mul, *Index, Size)) { - if (auto NewOffset = Calc(BO_Add, Offset, *Delta)) { - Offset = *NewOffset; - Region = ERegion->getSuperRegion(); - continue; - } - } + const LocationContext *LCtx, + const ArraySubscriptExpr *ASE) { + QualType ElemType = ASE->getType(); + assert(!ElemType->isIncompleteType()); + + QualType IdxType = SVB.getArrayIndexType(); + + if (auto Idx = State->getSVal(ASE->getIdx(), LCtx).getAs<NonLoc>()) { + const MemRegion *R = State->getSVal(ASE->getBase(), LCtx).getAsRegion(); + const ElementRegion *ER; + while ((ER = dyn_cast_or_null<ElementRegion>(R)) && ER->getElementType() == ElemType) { + // Special case to strip ElementRegion layers that represent pointer + // arithmetics. For example in code like + // char Text[] = "Hello world!", *Word = Text+6; + // the area *Word is represented as Element{<Text>, 6 S64B, char} which + // is nominally a single byte area, but logically we want to handle + // Word[X] equivalently to Text[X + 6]. On the other hand if we have + // int Matrix[10][10]; + // we want to warn when we see Matrix[0][20] because this is logically + // invalid and technically invokes undefined behavior when it takes the + // 20th element of the int[10] value Matrix[0]. + R = ER->getSuperRegion(); + Idx = SVB.evalBinOpNN(State, BO_Add, *Idx, ER->getIndex(), IdxType).getAs<NonLoc>(); + if (!Idx) + return std::nullopt; + } + + if (const auto *SRegion = dyn_cast_or_null<SubRegion>(R)) { + NonLoc ESize = SVB.makeArrayIndex( + SVB.getContext().getTypeSizeInChars(ElemType).getQuantity()); + SVal Offset = SVB.evalBinOpNN(State, BO_Mul, ESize, *Idx, IdxType); + if (const auto OffsetNL = Offset.getAs<NonLoc>()) { + return RegionRawOffsetV2(SRegion, *OffsetNL); } - } else if (const auto *SRegion = dyn_cast<SubRegion>(Region)) { - // NOTE: The dyn_cast<>() is expected to succeed, it'd be very surprising - // to see a MemSpaceRegion at this point. - // FIXME: We may return with {<Region>, 0} even if we didn't handle any - // ElementRegion layers. I think that this behavior was introduced - // accidentally by 8a4c760c204546aba566e302f299f7ed2e00e287 in 2011, so - // it may be useful to review it in the future. - return RegionRawOffsetV2(SRegion, Offset); } - return std::nullopt; } return std::nullopt; }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits