Author: Donát Nagy Date: 2023-04-26T15:02:23+02:00 New Revision: de2547329b41ad6ea4ea876d12731bde5a6b64c5
URL: https://github.com/llvm/llvm-project/commit/de2547329b41ad6ea4ea876d12731bde5a6b64c5 DIFF: https://github.com/llvm/llvm-project/commit/de2547329b41ad6ea4ea876d12731bde5a6b64c5.diff LOG: [analyzer] Fix comparison logic in ArrayBoundCheckerV2 The prototype checker alpha.security.ArrayBoundV2 performs two comparisons to check that in an expression like Array[Index] 0 <= Index < length(Array) holds. These comparisons are handled by almost identical logic: the inequality is first rearranged by getSimplifiedOffsets(), then evaluated with evalBinOpNN(). However the simplification used "naive" elementary mathematical schematics, but evalBinOpNN() performed the signed -> unsigned conversions described in the C/C++ standards, and this confusion led to wildly inaccurate results: false positives from the lower bound check and false negatives from the upper bound check. This commit eliminates the code duplication by moving the comparison logic into a separate function, then adds an explicit check to this unified code path, which handles the problematic case separately. In addition to this, the commit also cleans up a testcase that was demonstrating the presence of this problem. Note that while that testcase was failing with an overflow error, its actual problem was in the underflow handler logic: (0) The testcase introduces a five-element array "char a[5]" and an unknown argument "size_t len"; then evaluates "a[len+1]". (1) The underflow check tries to determine whether "len+1 < 0" holds. (2) This inequality is rearranged to "len < -1". (3) evalBinOpNN() evaluates this with the schematics of C/C++ and converts -1 to the size_t value SIZE_MAX. (4) The engine concludes that len == SIZE_MAX, because otherwise we'd have an underflow here. (5) The overflow check tries to determine whether "len+1 >= 5". (6) This inequality is rearranged to "len >= 4". (7) The engine substitutes len == SIZE_MAX and reports that we have an overflow. Differential Revision: https://reviews.llvm.org/D135375 Added: clang/test/Analysis/array-bound-v2-constraint-check.c Modified: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp Removed: clang/test/Analysis/out-of-bounds-false-positive.c ################################################################################ diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index 142577174ead8..a476613b6dcc7 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -57,8 +57,8 @@ class RegionRawOffsetV2 { : baseRegion(nullptr), byteOffset(UnknownVal()) {} public: - RegionRawOffsetV2(const SubRegion* base, SVal offset) - : baseRegion(base), byteOffset(offset) {} + RegionRawOffsetV2(const SubRegion *base, NonLoc offset) + : baseRegion(base), byteOffset(offset) { assert(base); } NonLoc getByteOffset() const { return byteOffset.castAs<NonLoc>(); } const SubRegion *getRegion() const { return baseRegion; } @@ -72,19 +72,12 @@ class RegionRawOffsetV2 { }; } -static SVal computeExtentBegin(SValBuilder &svalBuilder, - const MemRegion *region) { - const MemSpaceRegion *SR = region->getMemorySpace(); - if (SR->getKind() == MemRegion::UnknownSpaceRegionKind) - return UnknownVal(); - else - return svalBuilder.makeZeroArrayIndex(); -} - // TODO: once the constraint manager is smart enough to handle non simplified // symbolic expressions remove this function. Note that this can not be used in // the constraint manager as is, since this does not handle overflows. It is // safe to assume, however, that memory offsets will not overflow. +// NOTE: callers of this function need to be aware of the effects of overflows +// and signed<->unsigned conversions! static std::pair<NonLoc, nonloc::ConcreteInt> getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, SValBuilder &svalBuilder) { @@ -117,6 +110,38 @@ getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); } +// Evaluate the comparison Value < Threshold with the help of the custom +// simplification algorithm defined for this checker. Return a pair of states, +// where the first one corresponds to "value below threshold" and the second +// corresponds to "value at or above threshold". Returns {nullptr, nullptr} in +// the case when the evaluation fails. +static std::pair<ProgramStateRef, ProgramStateRef> +compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, + SValBuilder &SVB) { + if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) { + std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB); + } + if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) { + QualType T = Value.getType(SVB.getContext()); + if (T->isUnsignedIntegerType() && ConcreteThreshold->getValue().isNegative()) { + // In this case we reduced the bound check to a comparison of the form + // (symbol or value with unsigned type) < (negative number) + // which is always false. We are handling these cases separately because + // evalBinOpNN can perform a signed->unsigned conversion that turns the + // negative number into a huge positive value and leads to wildly + // inaccurate conclusions. + return {nullptr, State}; + } + } + auto BelowThreshold = + SVB.evalBinOpNN(State, BO_LT, Value, Threshold, SVB.getConditionType()).getAs<NonLoc>(); + + if (BelowThreshold) + return State->assume(*BelowThreshold); + + return {nullptr, nullptr}; +} + void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, const Stmt* LoadS, CheckerContext &checkerContext) const { @@ -139,95 +164,55 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, if (!rawOffset.getRegion()) return; - NonLoc rawOffsetVal = rawOffset.getByteOffset(); + NonLoc ByteOffset = rawOffset.getByteOffset(); - // CHECK LOWER BOUND: Is byteOffset < extent begin? - // If so, we are doing a load/store - // before the first valid offset in the memory region. - - SVal extentBegin = computeExtentBegin(svalBuilder, rawOffset.getRegion()); - - if (std::optional<NonLoc> NV = extentBegin.getAs<NonLoc>()) { - if (auto ConcreteNV = NV->getAs<nonloc::ConcreteInt>()) { - std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets = - getSimplifiedOffsets(rawOffset.getByteOffset(), *ConcreteNV, - svalBuilder); - rawOffsetVal = simplifiedOffsets.first; - *NV = simplifiedOffsets.second; - } + // CHECK LOWER BOUND + const MemSpaceRegion *SR = rawOffset.getRegion()->getMemorySpace(); + if (!llvm::isa<UnknownSpaceRegion>(SR)) { + // A pointer to UnknownSpaceRegion may point to the middle of + // an allocated region. - SVal lowerBound = svalBuilder.evalBinOpNN(state, BO_LT, rawOffsetVal, *NV, - svalBuilder.getConditionType()); + auto [state_precedesLowerBound, state_withinLowerBound] = + compareValueToThreshold(state, ByteOffset, + svalBuilder.makeZeroArrayIndex(), svalBuilder); - std::optional<NonLoc> lowerBoundToCheck = lowerBound.getAs<NonLoc>(); - if (!lowerBoundToCheck) - return; - - ProgramStateRef state_precedesLowerBound, state_withinLowerBound; - std::tie(state_precedesLowerBound, state_withinLowerBound) = - state->assume(*lowerBoundToCheck); - - // Are we constrained enough to definitely precede the lower bound? if (state_precedesLowerBound && !state_withinLowerBound) { + // We know that the index definitely precedes the lower bound. reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes); return; } - // Otherwise, assume the constraint of the lower bound. - assert(state_withinLowerBound); - state = state_withinLowerBound; + if (state_withinLowerBound) + state = state_withinLowerBound; } - do { - // CHECK UPPER BOUND: Is byteOffset >= size(baseRegion)? If so, - // we are doing a load/store after the last valid offset. - const MemRegion *MR = rawOffset.getRegion(); - DefinedOrUnknownSVal Size = getDynamicExtent(state, MR, svalBuilder); - if (!isa<NonLoc>(Size)) - break; - - if (auto ConcreteSize = Size.getAs<nonloc::ConcreteInt>()) { - std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets = - getSimplifiedOffsets(rawOffset.getByteOffset(), *ConcreteSize, - svalBuilder); - rawOffsetVal = simplifiedOffsets.first; - Size = simplifiedOffsets.second; - } - - SVal upperbound = svalBuilder.evalBinOpNN(state, BO_GE, rawOffsetVal, - Size.castAs<NonLoc>(), - svalBuilder.getConditionType()); - - std::optional<NonLoc> upperboundToCheck = upperbound.getAs<NonLoc>(); - if (!upperboundToCheck) - break; - - ProgramStateRef state_exceedsUpperBound, state_withinUpperBound; - std::tie(state_exceedsUpperBound, state_withinUpperBound) = - state->assume(*upperboundToCheck); - - // If we are under constrained and the index variables are tainted, report. - if (state_exceedsUpperBound && state_withinUpperBound) { - SVal ByteOffset = rawOffset.getByteOffset(); + // CHECK UPPER BOUND + DefinedOrUnknownSVal Size = + getDynamicExtent(state, rawOffset.getRegion(), svalBuilder); + if (auto KnownSize = Size.getAs<NonLoc>()) { + auto [state_withinUpperBound, state_exceedsUpperBound] = + compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder); + + if (state_exceedsUpperBound) { + if (!state_withinUpperBound) { + // We know that the index definitely exceeds the upper bound. + reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes); + return; + } if (isTainted(state, ByteOffset)) { + // Both cases are possible, but the index is tainted, so report. reportTaintOOB(checkerContext, state_exceedsUpperBound, ByteOffset); return; } - } else if (state_exceedsUpperBound) { - // If we are constrained enough to definitely exceed the upper bound, - // report. - assert(!state_withinUpperBound); - reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes); - return; } - assert(state_withinUpperBound); - state = state_withinUpperBound; + if (state_withinUpperBound) + state = state_withinUpperBound; } - while (false); checkerContext.addTransition(state); } + void ArrayBoundCheckerV2::reportTaintOOB(CheckerContext &checkerContext, ProgramStateRef errorState, SVal TaintedSVal) const { @@ -336,9 +321,8 @@ RegionRawOffsetV2 RegionRawOffsetV2::computeOffset(ProgramStateRef state, switch (region->getKind()) { default: { if (const SubRegion *subReg = dyn_cast<SubRegion>(region)) { - offset = getValue(offset, svalBuilder); - if (!offset.isUnknownOrUndef()) - return RegionRawOffsetV2(subReg, offset); + if (auto Offset = getValue(offset, svalBuilder).getAs<NonLoc>()) + return RegionRawOffsetV2(subReg, *Offset); } return RegionRawOffsetV2(); } diff --git a/clang/test/Analysis/out-of-bounds-false-positive.c b/clang/test/Analysis/array-bound-v2-constraint-check.c similarity index 94% rename from clang/test/Analysis/out-of-bounds-false-positive.c rename to clang/test/Analysis/array-bound-v2-constraint-check.c index 2c63f0791ecb2..dc138196b56a9 100644 --- a/clang/test/Analysis/out-of-bounds-false-positive.c +++ b/clang/test/Analysis/array-bound-v2-constraint-check.c @@ -8,12 +8,11 @@ typedef unsigned long long size_t; const char a[] = "abcd"; // extent: 5 bytes void symbolic_size_t_and_int0(size_t len) { - // FIXME: Should not warn for this. - (void)a[len + 1]; // expected-warning {{Out of bound memory access}} + (void)a[len + 1]; // no-warning // We infered that the 'len' must be in a specific range to make the previous indexing valid. // len: [0,3] - clang_analyzer_eval(len <= 3); // expected - warning {{TRUE}} - clang_analyzer_eval(len <= 2); // expected - warning {{UNKNOWN}} + clang_analyzer_eval(len <= 3); // expected-warning {{TRUE}} + clang_analyzer_eval(len <= 2); // expected-warning {{UNKNOWN}} } void symbolic_size_t_and_int1(size_t len) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits