=?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com>, =?utf-8?q?Donát?= Nagy <donat.n...@ericsson.com> Message-ID: In-Reply-To: <llvm.org/llvm/llvm-project/pull/78...@github.com>
https://github.com/NagyDonat updated https://github.com/llvm/llvm-project/pull/78315 >From c75c05c6e894a46797913c5bdccb240cbcc01ae9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com> Date: Tue, 12 Dec 2023 13:07:54 +0100 Subject: [PATCH 1/4] [analyzer] Support interestingness in ArrayBoundV2 This commit improves alpha.security.ArrayBoundV2 in two connected areas: (1) It calls `markInteresting()` on the symbolic values that are responsible for the out of bounds access. (2) Its index-is-in-bounds assumptions are reported in note tags if they provide information about the value of an interesting symbol. This commit is limited to "display" changes: it introduces new diagnostic pieces (potentially to bugs found by other checkers), but the ArrayBoundV2 will make the same assumptions and detect the same bugs before and after this change. As a minor unrelated change, this commit also updates/removes some very old comments which became obsolate due to my previous changes. --- .../Checkers/ArrayBoundCheckerV2.cpp | 345 ++++++++++++++---- .../test/Analysis/out-of-bounds-diagnostics.c | 10 + clang/test/Analysis/out-of-bounds-notes.c | 128 +++++++ 3 files changed, 413 insertions(+), 70 deletions(-) create mode 100644 clang/test/Analysis/out-of-bounds-notes.c diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index 6c7a1601402efa..4241fa3a2ce8af 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -33,7 +33,66 @@ using namespace taint; using llvm::formatv; namespace { -enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint }; +class StateUpdateReporter { + const SubRegion *Reg; + NonLoc ByteOffsetVal; + std::optional<QualType> ElementType = std::nullopt; + std::optional<int64_t> ElementSize = std::nullopt; + bool AssumedNonNegative = false; + std::optional<NonLoc> AssumedUpperBound = std::nullopt; + +public: + StateUpdateReporter(const SubRegion *R, NonLoc ByteOffsVal, const Expr *E, + CheckerContext &C) + : Reg(R), ByteOffsetVal(ByteOffsVal) { + initializeElementInfo(E, C); + } + + void initializeElementInfo(const Expr *E, CheckerContext &C) { + if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) { + const MemRegion *SubscriptBaseReg = + C.getSVal(ASE->getBase()).getAsRegion(); + if (!SubscriptBaseReg) + return; + SubscriptBaseReg = SubscriptBaseReg->StripCasts(); + if (!isa_and_nonnull<ElementRegion>(SubscriptBaseReg)) { + ElementType = ASE->getType(); + ElementSize = + C.getASTContext().getTypeSizeInChars(*ElementType).getQuantity(); + } + } + } + void recordNonNegativeAssumption() { AssumedNonNegative = true; } + void recordUpperBoundAssumption(NonLoc UpperBoundVal) { + AssumedUpperBound = UpperBoundVal; + } + + const NoteTag *createNoteTag(CheckerContext &C) const; + +private: + std::string getMessage(PathSensitiveBugReport &BR) const; + + /// Return true if information about the value of `Sym` can put constraints + /// on some symbol which is interesting within the bug report `BR`. + /// In particular, this returns true when `Sym` is interesting within `BR`; + /// but it also returns true if `Sym` is an expression that contains integer + /// constants and a single symbolic operand which is interesting (in `BR`). + /// We need to use this instead of plain `BR.isInteresting()` because if we + /// are analyzing code like + /// int array[10]; + /// int f(int arg) { + /// return array[arg] && array[arg + 10]; + /// } + /// then the byte offsets are `arg * 4` and `(arg + 10) * 4`, which are not + /// sub-expressions of each other (but `getSimplifiedOffsets` is smart enough + /// to detect this out of bounds access). + static bool providesInformationAboutInteresting(SymbolRef Sym, + PathSensitiveBugReport &BR); + static bool providesInformationAboutInteresting(SVal SV, + PathSensitiveBugReport &BR) { + return providesInformationAboutInteresting(SV.getAsSymbol(), BR); + } +}; struct Messages { std::string Short, Full; @@ -54,11 +113,14 @@ class ArrayBoundCheckerV2 : public Checker<check::PostStmt<ArraySubscriptExpr>, void performCheck(const Expr *E, CheckerContext &C) const; - void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind, - NonLoc Offset, Messages Msgs) const; + void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, Messages Msgs, + NonLoc Offset, bool IsTaintBug = false) const; static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC); + static bool isIdiomaticPastTheEndPtr(const Expr *E, ProgramStateRef State, + NonLoc Offset, NonLoc Limit, + CheckerContext &C); static bool isInAddressOf(const Stmt *S, ASTContext &AC); public: @@ -133,12 +195,19 @@ computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) { return std::nullopt; } -// TODO: once the constraint manager is smart enough to handle non simplified -// symbolic expressions remove this function. Note that this can not be used in -// the constraint manager as is, since this does not handle overflows. It is -// safe to assume, however, that memory offsets will not overflow. -// NOTE: callers of this function need to be aware of the effects of overflows -// and signed<->unsigned conversions! +// NOTE: This function is the "heart" of this checker. It simplifies +// inequalities with transformations that are valid (and very elementary) in +// pure mathematics, but become invalid if we use them in C++ number model +// where the calculations may overflow. +// Due to the overflow issues I think it's impossible (or at least not +// practical) to integrate this kind of simplification into the resolution of +// arbitrary inequalities (i.e. the code of `evalBinOp`); but this function +// produces valid results if the arguments are memory offsets which are known +// to be << SIZE_MAX. +// TODO: This algorithm should be moved to a central location where it's +// available for other checkers that need to compare memory offsets. +// NOTE: When using the results of this function, don't forget that `evalBinOp` +// uses the evaluation rules of C++, so e.g. `(size_t)123 < -1`! static std::pair<NonLoc, nonloc::ConcreteInt> getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, SValBuilder &svalBuilder) { @@ -239,13 +308,8 @@ static std::optional<int64_t> getConcreteValue(NonLoc SV) { return std::nullopt; } -static std::string getShortMsg(OOB_Kind Kind, std::string RegName) { - static const char *ShortMsgTemplates[] = { - "Out of bound access to memory preceding {0}", - "Out of bound access to memory after the end of {0}", - "Potential out of bound access to {0} with tainted offset"}; - - return formatv(ShortMsgTemplates[Kind], RegName); +static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) { + return SV ? getConcreteValue(*SV) : std::nullopt; } static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) { @@ -255,7 +319,28 @@ static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) { Out << "Access of " << RegName << " at negative byte offset"; if (auto ConcreteIdx = Offset.getAs<nonloc::ConcreteInt>()) Out << ' ' << ConcreteIdx->getValue(); - return {getShortMsg(OOB_Precedes, RegName), std::string(Buf)}; + return {formatv("Out of bound access to memory preceding {0}", RegName), + std::string(Buf)}; +} + +/// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if +/// it can be performed (`Divisor` is nonzero and there is no remainder). The +/// values `Val1` and `Val2` may be nullopt and in that case the corresponding +/// division is considered to be successful. +bool tryDividePair(std::optional<int64_t> &Val1, std::optional<int64_t> &Val2, + int64_t Divisor) { + if (!Divisor) + return false; + const bool Val1HasRemainder = Val1 && *Val1 % Divisor; + const bool Val2HasRemainder = Val2 && *Val2 % Divisor; + if (!Val1HasRemainder && !Val2HasRemainder) { + if (Val1) + *Val1 /= Divisor; + if (Val2) + *Val2 /= Divisor; + return true; + } + return false; } static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region, @@ -268,18 +353,9 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region, std::optional<int64_t> OffsetN = getConcreteValue(Offset); std::optional<int64_t> ExtentN = getConcreteValue(Extent); - bool UseByteOffsets = true; - if (int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity()) { - const bool OffsetHasRemainder = OffsetN && *OffsetN % ElemSize; - const bool ExtentHasRemainder = ExtentN && *ExtentN % ElemSize; - if (!OffsetHasRemainder && !ExtentHasRemainder) { - UseByteOffsets = false; - if (OffsetN) - *OffsetN /= ElemSize; - if (ExtentN) - *ExtentN /= ElemSize; - } - } + int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity(); + + bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, ElemSize); SmallString<256> Buf; llvm::raw_svector_ostream Out(Buf); @@ -307,7 +383,9 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region, Out << "s"; } - return {getShortMsg(OOB_Exceeds, RegName), std::string(Buf)}; + return { + formatv("Out of bound access to memory after the end of {0}", RegName), + std::string(Buf)}; } static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName) { @@ -318,17 +396,87 @@ static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName) { RegName, OffsetName)}; } -void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const { - // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping - // some new logic here that reasons directly about memory region extents. - // Once that logic is more mature, we can bring it back to assumeInBound() - // for all clients to use. - // - // The algorithm we are using here for bounds checking is to see if the - // memory access is within the extent of the base region. Since we - // have some flexibility in defining the base region, we can achieve - // various levels of conservatism in our buffer overflow checking. +const NoteTag *StateUpdateReporter::createNoteTag(CheckerContext &C) const { + // Don't create a note tag if we didn't assume anything: + if (!AssumedNonNegative && !AssumedUpperBound) + return nullptr; + + return C.getNoteTag( + [*this](PathSensitiveBugReport &BR) -> std::string { + return getMessage(BR); + }, + /*isPrunable=*/false); +} + +std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const { + bool ShouldReportNonNegative = AssumedNonNegative; + if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) { + if (AssumedUpperBound && + providesInformationAboutInteresting(*AssumedUpperBound, BR)) + ShouldReportNonNegative = false; + else + return ""; + } + + std::optional<int64_t> OffsetN = getConcreteValue(ByteOffsetVal); + std::optional<int64_t> ExtentN = getConcreteValue(AssumedUpperBound); + const bool UseIndex = + ElementSize && tryDividePair(OffsetN, ExtentN, *ElementSize); + + SmallString<256> Buf; + llvm::raw_svector_ostream Out(Buf); + Out << "Assuming "; + if (UseIndex) { + Out << "index "; + if (OffsetN) + Out << "'" << OffsetN << "' "; + } else if (AssumedUpperBound) { + Out << "byte offset "; + if (OffsetN) + Out << "'" << OffsetN << "' "; + } else { + Out << "offset "; + } + + Out << "is"; + if (ShouldReportNonNegative) { + Out << " non-negative"; + } + if (AssumedUpperBound) { + if (ShouldReportNonNegative) + Out << " and"; + Out << " less than "; + if (ExtentN) + Out << *ExtentN << ", "; + if (UseIndex && ElementType) + Out << "the number of '" << ElementType->getAsString() + << "' elements in "; + else + Out << "the extent of "; + Out << getRegionName(Reg); + } + return std::string(Out.str()); +} + +bool StateUpdateReporter::providesInformationAboutInteresting( + SymbolRef Sym, PathSensitiveBugReport &BR) { + if (!Sym) + return false; + for (SymbolRef PartSym : Sym->symbols()) { + // The interestingess mark may appear on any layer as we're stripping off + // the SymIntExpr, UnarySymExpr etc. layers... + if (BR.isInteresting(PartSym)) + return true; + // ...but if both sides of the expression are symbolic (i.e. unknown), then + // the analyzer can't use the combined result to constrain the operands. + if (isa<SymSymExpr>(PartSym)) + return false; + } + return false; +} + +void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const { const SVal Location = C.getSVal(E); // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as @@ -350,6 +498,10 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const { auto [Reg, ByteOffset] = *RawOffset; + // The state updates will be reported as a single note tag, which will be + // composed by this helper class. + StateUpdateReporter SUR(Reg, ByteOffset, E, C); + // CHECK LOWER BOUND const MemSpaceRegion *Space = Reg->getMemorySpace(); if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) { @@ -363,13 +515,22 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const { auto [PrecedesLowerBound, WithinLowerBound] = compareValueToThreshold( State, ByteOffset, SVB.makeZeroArrayIndex(), SVB); - if (PrecedesLowerBound && !WithinLowerBound) { - // We know that the index definitely precedes the lower bound. - Messages Msgs = getPrecedesMsgs(Reg, ByteOffset); - reportOOB(C, PrecedesLowerBound, OOB_Precedes, ByteOffset, Msgs); - return; + if (PrecedesLowerBound) { + // The offset may be invalid (negative)... + if (!WithinLowerBound) { + // ...and it cannot be valid (>= 0), so report an error. + Messages Msgs = getPrecedesMsgs(Reg, ByteOffset); + reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset); + return; + } + // ...but it can be valid as well, so the checker will (optimistically) + // assume that it's valid and mention this in the note tag. + SUR.recordNonNegativeAssumption(); } + // Actually update the state. The "if" only fails in the extremely unlikely + // case when compareValueToThreshold returns {nullptr, nullptr} becasue + // evalBinOpNN fails to evaluate the less-than operator. if (WithinLowerBound) State = WithinLowerBound; } @@ -381,32 +542,30 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const { compareValueToThreshold(State, ByteOffset, *KnownSize, SVB); if (ExceedsUpperBound) { + // The offset may be invalid (>= Size)... if (!WithinUpperBound) { - // We know that the index definitely exceeds the upper bound. - if (isa<ArraySubscriptExpr>(E) && isInAddressOf(E, C.getASTContext())) { - // ...but this is within an addressof expression, so we need to check - // for the exceptional case that `&array[size]` is valid. - auto [EqualsToThreshold, NotEqualToThreshold] = - compareValueToThreshold(ExceedsUpperBound, ByteOffset, *KnownSize, - SVB, /*CheckEquality=*/true); - if (EqualsToThreshold && !NotEqualToThreshold) { - // We are definitely in the exceptional case, so return early - // instead of reporting a bug. - C.addTransition(EqualsToThreshold); - return; - } + // ...and it cannot be within bounds, so report an error, unless we can + // definitely determine that this is an idiomatic `&array[size]` + // expression that calculates the past-the-end pointer. + if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset, + *KnownSize, C)) { + // FIXME: this duplicates the `addTransition` at the end of the + // function, but `goto` is taboo nowdays. + C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C)); + return; } + Messages Msgs = getExceedsMsgs(C.getASTContext(), Reg, ByteOffset, *KnownSize, Location); - reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, Msgs); + reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset); return; } + // ...and it can be valid as well... if (isTainted(State, ByteOffset)) { - // Both cases are possible, but the offset is tainted, so report. - std::string RegName = getRegionName(Reg); + // ...but it's tainted, so report an error. - // Diagnostic detail: "tainted offset" is always correct, but the - // common case is that 'idx' is tainted in 'arr[idx]' and then it's + // Diagnostic detail: saying "tainted offset" is always correct, but + // the common case is that 'idx' is tainted in 'arr[idx]' and then it's // nicer to say "tainted index". const char *OffsetName = "offset"; if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) @@ -414,33 +573,67 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const { OffsetName = "index"; Messages Msgs = getTaintMsgs(Reg, OffsetName); - reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, Msgs); + reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, /*IsTaintBug=*/true); return; } + // ...and it isn't tainted, so the checker will (optimistically) assume + // that the offset is in bounds and mention this in the note tag. + SUR.recordUpperBoundAssumption(*KnownSize); } + // Actually update the state. The "if" only fails in the extremely unlikely + // case when compareValueToThreshold returns {nullptr, nullptr} becasue + // evalBinOpNN fails to evaluate the less-than operator. if (WithinUpperBound) State = WithinUpperBound; } - C.addTransition(State); + // Add a transition, reporting the state updates that we accumulated. + C.addTransition(State, SUR.createNoteTag(C)); } void ArrayBoundCheckerV2::reportOOB(CheckerContext &C, - ProgramStateRef ErrorState, OOB_Kind Kind, - NonLoc Offset, Messages Msgs) const { + ProgramStateRef ErrorState, Messages Msgs, + NonLoc Offset, bool IsTaintBug) const { ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState); if (!ErrorNode) return; auto BR = std::make_unique<PathSensitiveBugReport>( - Kind == OOB_Taint ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode); + IsTaintBug ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode); + + // FIXME: ideally we would just call trackExpressionValue() and that would + // "do the right thing": mark the relevant symbols as interesting, track the + // control dependencies and statements storing the relevant values and add + // helpful diagnostic pieces. However, right now trackExpressionValue() is + // a heap of unreliable heuristics, so it would cause several issues: + // - Interestingness is not applied consistently, e.g. if `array[x+10]` + // causes an overflow, then `x` is not marked as interesting. + // - We get irrelevant diagnostic pieces, e.g. in the code + // `int *p = (int*)malloc(2*sizeof(int)); p[3] = 0;` + // it places a "Storing uninitialized value" note on the `malloc` call + // (which is technically true, but irrelevant). + // If trackExpressionValue() becomes reliable, it should be applied instead + // of the manual markInteresting() calls. + + if (SymbolRef OffsetSym = Offset.getAsSymbol()) { + // If the offset is a symbolic value, iterate over its "parts" with + // `SymExpr::symbols()` and mark each of them as interesting. + // For example, if the offset is `x*4 + y` then we put interestingness onto + // the SymSymExpr `x*4 + y`, the SymIntExpr `x*4` and the two data symbols + // `x` and `y`. + for (SymbolRef PartSym : OffsetSym->symbols()) + BR->markInteresting(PartSym); + } - // Track back the propagation of taintedness. - if (Kind == OOB_Taint) + if (IsTaintBug) { + // If the issue that we're reporting depends on the taintedness of the + // offset, then put interestingness onto symbols that could be the origin + // of the taint. for (SymbolRef Sym : getTaintedSymbols(ErrorState, Offset)) BR->markInteresting(Sym); + } C.emitReport(std::move(BR)); } @@ -476,6 +669,18 @@ bool ArrayBoundCheckerV2::isInAddressOf(const Stmt *S, ASTContext &ACtx) { return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf; } +bool ArrayBoundCheckerV2::isIdiomaticPastTheEndPtr(const Expr *E, + ProgramStateRef State, + NonLoc Offset, NonLoc Limit, + CheckerContext &C) { + if (isa<ArraySubscriptExpr>(E) && isInAddressOf(E, C.getASTContext())) { + auto [EqualsToThreshold, NotEqualToThreshold] = compareValueToThreshold( + State, Offset, Limit, C.getSValBuilder(), /*CheckEquality=*/true); + return EqualsToThreshold && !NotEqualToThreshold; + } + return false; +} + void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) { mgr.registerChecker<ArrayBoundCheckerV2>(); } diff --git a/clang/test/Analysis/out-of-bounds-diagnostics.c b/clang/test/Analysis/out-of-bounds-diagnostics.c index 769a8954f79669..7a8f59650fef16 100644 --- a/clang/test/Analysis/out-of-bounds-diagnostics.c +++ b/clang/test/Analysis/out-of-bounds-diagnostics.c @@ -195,12 +195,22 @@ void *malloc(size_t size); int *mallocRegion(void) { int *mem = (int*)malloc(2*sizeof(int)); + mem[3] = -2; // expected-warning@-1 {{Out of bound access to memory after the end of the heap area}} // expected-note@-2 {{Access of the heap area at index 3, while it holds only 2 'int' elements}} return mem; } +int *mallocRegionDeref(void) { + int *mem = (int*)malloc(2*sizeof(int)); + + *(mem + 3) = -2; + // expected-warning@-1 {{Out of bound access to memory after the end of the heap area}} + // expected-note@-2 {{Access of the heap area at index 3, while it holds only 2 'int' elements}} + return mem; +} + void *alloca(size_t size); int allocaRegion(void) { diff --git a/clang/test/Analysis/out-of-bounds-notes.c b/clang/test/Analysis/out-of-bounds-notes.c new file mode 100644 index 00000000000000..290df79f3e18a4 --- /dev/null +++ b/clang/test/Analysis/out-of-bounds-notes.c @@ -0,0 +1,128 @@ +// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-output=text \ +// RUN: -analyzer-checker=core,alpha.security.ArrayBoundV2,unix.Malloc,alpha.security.taint,debug.ExprInspection -verify %s + +int array[10]; + +void clang_analyzer_value(unsigned arg); + +int irrelevantAssumptions(int arg) { + int a = array[arg]; + // Here the analyzer assumes that `arg` is in bounds, but doesn't report this + // because `arg` is not interesting for the bug. + int b = array[13]; + // expected-warning@-1 {{Out of bound access to memory after the end of 'array'}} + // expected-note@-2 {{Access of 'array' at index 13, while it holds only 10 'int' elements}} + return a + b; +} + + +int assumingBoth(int arg) { + int a = array[arg]; + // expected-note@-1 {{Assuming index is non-negative and less than 10, the number of 'int' elements in 'array'}} + int b = array[arg]; // no additional note, we already assumed that 'arg' is in bounds + int c = array[arg + 10]; + // expected-warning@-1 {{Out of bound access to memory after the end of 'array'}} + // expected-note@-2 {{Access of 'array' at an overflowing index, while it holds only 10 'int' elements}} + return a + b + c; +} + +int assumingLower(int arg) { + // expected-note@+2 {{Assuming 'arg' is < 10}} + // expected-note@+1 {{Taking false branch}} + if (arg >= 10) + return 0; + int a = array[arg]; + // expected-note@-1 {{Assuming index is non-negative}} + int b = array[arg + 10]; + // expected-warning@-1 {{Out of bound access to memory after the end of 'array'}} + // expected-note@-2 {{Access of 'array' at an overflowing index, while it holds only 10 'int' elements}} + return a + b; +} + +int assumingUpper(int arg) { + // expected-note@+2 {{Assuming 'arg' is >= 0}} + // expected-note@+1 {{Taking false branch}} + if (arg < 0) + return 0; + int a = array[arg]; + // expected-note@-1 {{Assuming index is less than 10, the number of 'int' elements in 'array'}} + int b = array[arg - 10]; + // expected-warning@-1 {{Out of bound access to memory preceding 'array'}} + // expected-note@-2 {{Access of 'array' at negative byte offset}} + return a + b; +} + +int assumingUpperIrrelevant(int arg) { + // FIXME: The assumption "assuming index is less than 10" is printed because + // it's assuming something about the interesting variable `arg`; however, + // it's irrelevant because in this testcase the out of bound access is + // deduced from the _lower_ bound on `arg`. Currently the analyzer cannot + // filter out assumptions that are logically irrelevant but "touch" + // interesting symbols; eventually it would be good to add support for this. + + // expected-note@+2 {{Assuming 'arg' is >= 0}} + // expected-note@+1 {{Taking false branch}} + if (arg < 0) + return 0; + int a = array[arg]; + // expected-note@-1 {{Assuming index is less than 10, the number of 'int' elements in 'array'}} + int b = array[arg + 10]; + // expected-warning@-1 {{Out of bound access to memory after the end of 'array'}} + // expected-note@-2 {{Access of 'array' at an overflowing index, while it holds only 10 'int' elements}} + return a + b; +} + +int assumingUpperUnsigned(unsigned arg) { + int a = array[arg]; + // expected-note@-1 {{Assuming index is less than 10, the number of 'int' elements in 'array'}} + int b = array[(int)arg - 10]; + // expected-warning@-1 {{Out of bound access to memory preceding 'array'}} + // expected-note@-2 {{Access of 'array' at negative byte offset}} + return a + b; +} + +int assumingNothing(unsigned arg) { + // expected-note@+2 {{Assuming 'arg' is < 10}} + // expected-note@+1 {{Taking false branch}} + if (arg >= 10) + return 0; + int a = array[arg]; // no note here, we already know that 'arg' is in bounds + int b = array[(int)arg - 10]; + // expected-warning@-1 {{Out of bound access to memory preceding 'array'}} + // expected-note@-2 {{Access of 'array' at negative byte offset}} + return a + b; +} + +short assumingConverted(int arg) { + // When indices are reported, the note will use the element type that's the + // result type of the subscript operator. + char *cp = (char*)array; + char a = cp[arg]; + // expected-note@-1 {{Assuming index is non-negative and less than 40, the number of 'char' elements in 'array'}} + char b = cp[arg]; // no additional note, we already assumed that 'arg' is in bounds + char c = cp[arg + 40]; + // expected-warning@-1 {{Out of bound access to memory after the end of 'array'}} + // expected-note@-2 {{Access of 'array' at an overflowing index, while it holds only 40 'char' elements}} + return a + b + c; +} + +struct foo { + int num; + char a[8]; + char b[5]; +}; + +int assumingConverted2(struct foo f, int arg) { + // When indices are reported, the note will use the element type that's the + // result type of the subscript operator. + int a = ((int*)(f.a))[arg]; + // expected-note@-1 {{Assuming index is non-negative and less than 2, the number of 'int' elements in 'f.a'}} + // However, if the extent of the memory region is not divisible by the + // element size, the checker measures the offset and extent in bytes. + int b = ((int*)(f.b))[arg]; + // expected-note@-1 {{Assuming byte offset is less than 5, the extent of 'f.b'}} + int c = array[arg-2]; + // expected-warning@-1 {{Out of bound access to memory preceding 'array'}} + // expected-note@-2 {{Access of 'array' at negative byte offset}} + return a + b + c; +} >From c0a09db0fbb48626bbb069f85a587d2c94a8998a Mon Sep 17 00:00:00 2001 From: NagyDonat <donat.n...@ericsson.com> Date: Fri, 19 Jan 2024 19:10:54 +0100 Subject: [PATCH 2/4] Remove unused reference to debug.ExprInspection --- clang/test/Analysis/out-of-bounds-notes.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/clang/test/Analysis/out-of-bounds-notes.c b/clang/test/Analysis/out-of-bounds-notes.c index 290df79f3e18a4..3070ab643d832e 100644 --- a/clang/test/Analysis/out-of-bounds-notes.c +++ b/clang/test/Analysis/out-of-bounds-notes.c @@ -1,10 +1,8 @@ // RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-output=text \ -// RUN: -analyzer-checker=core,alpha.security.ArrayBoundV2,unix.Malloc,alpha.security.taint,debug.ExprInspection -verify %s +// RUN: -analyzer-checker=core,alpha.security.ArrayBoundV2,unix.Malloc,alpha.security.taint -verify %s int array[10]; -void clang_analyzer_value(unsigned arg); - int irrelevantAssumptions(int arg) { int a = array[arg]; // Here the analyzer assumes that `arg` is in bounds, but doesn't report this >From 18c00f79123ed8b22feefdb3b4bcc2f08b59c79b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com> Date: Mon, 22 Jan 2024 15:26:57 +0100 Subject: [PATCH 3/4] Improve comments (as suggested on code review) --- .../Checkers/ArrayBoundCheckerV2.cpp | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index 4241fa3a2ce8af..593095a2bebafe 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -202,12 +202,19 @@ computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) { // Due to the overflow issues I think it's impossible (or at least not // practical) to integrate this kind of simplification into the resolution of // arbitrary inequalities (i.e. the code of `evalBinOp`); but this function -// produces valid results if the arguments are memory offsets which are known -// to be << SIZE_MAX. +// produces valid results when the calculations are handling memory offsets +// and every value is well below SIZE_MAX. // TODO: This algorithm should be moved to a central location where it's // available for other checkers that need to compare memory offsets. -// NOTE: When using the results of this function, don't forget that `evalBinOp` -// uses the evaluation rules of C++, so e.g. `(size_t)123 < -1`! +// NOTE: the simplification preserves the order of the two operands in a +// mathematical sense, but it may change the result produced by a C++ +// comparison operator (and the automatic type conversions). +// For example, consider a comparison "X+1 < 0", where the LHS is stored as a +// size_t and the RHS is stored in an int. (As size_t is unsigned, this +// comparison is false for all values of "X".) However, the simplification may +// turn it into "X < -1", which is still always false in a mathematical sense, +// but can produce a true result when evaluated by `evalBinOp` (which follows +// the rules of C++ and casts -1 to SIZE_MAX). static std::pair<NonLoc, nonloc::ConcreteInt> getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, SValBuilder &svalBuilder) { @@ -401,11 +408,9 @@ const NoteTag *StateUpdateReporter::createNoteTag(CheckerContext &C) const { if (!AssumedNonNegative && !AssumedUpperBound) return nullptr; - return C.getNoteTag( - [*this](PathSensitiveBugReport &BR) -> std::string { - return getMessage(BR); - }, - /*isPrunable=*/false); + return C.getNoteTag([*this](PathSensitiveBugReport &BR) -> std::string { + return getMessage(BR); + }); } std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const { @@ -468,8 +473,9 @@ bool StateUpdateReporter::providesInformationAboutInteresting( // the SymIntExpr, UnarySymExpr etc. layers... if (BR.isInteresting(PartSym)) return true; - // ...but if both sides of the expression are symbolic (i.e. unknown), then - // the analyzer can't use the combined result to constrain the operands. + // ...but if both sides of the expression are symbolic, then there is no + // practical algorithm to produce separate constraints for the two + // operands (from the single combined result). if (isa<SymSymExpr>(PartSym)) return false; } @@ -549,8 +555,6 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const { // expression that calculates the past-the-end pointer. if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset, *KnownSize, C)) { - // FIXME: this duplicates the `addTransition` at the end of the - // function, but `goto` is taboo nowdays. C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C)); return; } >From 6ed41d51078796983983b777af6067056de78299 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com> Date: Mon, 22 Jan 2024 15:56:30 +0100 Subject: [PATCH 4/4] Make a function 'static' --- clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index 593095a2bebafe..e6f0cc0cbf0dd3 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -334,8 +334,8 @@ static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) { /// it can be performed (`Divisor` is nonzero and there is no remainder). The /// values `Val1` and `Val2` may be nullopt and in that case the corresponding /// division is considered to be successful. -bool tryDividePair(std::optional<int64_t> &Val1, std::optional<int64_t> &Val2, - int64_t Divisor) { +static bool tryDividePair(std::optional<int64_t> &Val1, + std::optional<int64_t> &Val2, int64_t Divisor) { if (!Divisor) return false; const bool Val1HasRemainder = Val1 && *Val1 % Divisor; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits