donat.nagy updated this revision to Diff 522544.
donat.nagy edited the summary of this revision.
donat.nagy added a comment.
I managed to debug the issue that plagued my commit (see `test_multidim_zero()`
for details, I think I'll fix it in a followup change); and I have uploaded a
new version of this patch with testcases that highlight the improvements
introduced by it.
@steakhal If you need any additional clarification, feel free to ask me.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D150446/new/
https://reviews.llvm.org/D150446
Files:
clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
clang/test/Analysis/out-of-bounds.c
Index: clang/test/Analysis/out-of-bounds.c
===================================================================
--- clang/test/Analysis/out-of-bounds.c
+++ clang/test/Analysis/out-of-bounds.c
@@ -174,3 +174,49 @@
clang_analyzer_eval(x <= 99); // expected-warning{{TRUE}}
}
+void use_ptr(int *arg) {
+ *arg = 42; //no-warning
+}
+
+void test_indirect_use(void) {
+ int arr[10];
+ use_ptr(&arr[100]); // expected-warning{{Out of bound memory access}}
+}
+
+extern int matrix[5][5];
+
+int test_multidim_zero(void) {
+ // TODO: Currently this does not warn because MemRegion::stripCasts() uses an
+ // overzealuos heuristic and thinks that matrix[0] just represents a cast
+ // expression and not a real indexing operation.
+ return matrix[0][10]; // expected - warning{{Out of bound memory access}}
+}
+
+int test_multidim_generic(void) {
+ // This is the generic case that does not trigger the stripCasts() issue:
+ return matrix[1][10]; // expected-warning{{Out of bound memory access}}
+}
+
+struct s {
+ int field;
+} arr[10] = {0};
+
+int test_field(void) {
+ return arr[20].field; // expected-warning{{Out of bound memory access}}
+}
+
+typedef struct {
+ int id;
+ char name[256];
+} user_t;
+
+char test_in_unknown_space(user_t *userptr) {
+ return userptr->name[-1]; // expected-warning{{Out of bound memory access}}
+}
+
+void test_undersized_region(void) {
+ // TODO: Currently the checker does not look for this kind of error.
+ char arr[2];
+ long *p = (long *)arr;
+ p[0] = 3; // expected - warning{{Out of bound memory access}}
+}
Index: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -30,8 +30,7 @@
using namespace taint;
namespace {
-class ArrayBoundCheckerV2 :
- public Checker<check::Location> {
+class ArrayBoundCheckerV2 : public Checker<check::PreStmt<ArraySubscriptExpr>> {
mutable std::unique_ptr<BuiltinBug> BT;
mutable std::unique_ptr<BugType> TaintBT;
@@ -45,8 +44,7 @@
static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
public:
- void checkLocation(SVal l, bool isLoad, const Stmt *S,
- CheckerContext &C) const;
+ void checkPreStmt(const ArraySubscriptExpr *ASE, CheckerContext &C) const;
};
// FIXME: Eventually replace RegionRawOffset with this class.
@@ -63,7 +61,8 @@
const SubRegion *getRegion() const { return baseRegion; }
static std::optional<RegionRawOffsetV2>
- computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location);
+ computeOffset(ProgramStateRef State, SValBuilder &SVB,
+ const LocationContext *LCtx, const ArraySubscriptExpr *ASE);
void dump() const;
void dumpToStream(raw_ostream &os) const;
@@ -86,8 +85,8 @@
APSIntType(extent.getValue()).convert(SIE->getRHS());
switch (SIE->getOpcode()) {
case BO_Mul:
- // The constant should never be 0 here, since it the result of scaling
- // based on the size of a type which is never 0.
+ // In a SymIntExpr multiplication the constant cannot be 0, because
+ // then the enginge would've replaced it with a constant 0 value.
if ((extent.getValue() % constant) != 0)
return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
else
@@ -140,44 +139,41 @@
return {nullptr, nullptr};
}
-void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
- const Stmt* LoadS,
- CheckerContext &checkerContext) const {
-
+void ArrayBoundCheckerV2::checkPreStmt(const ArraySubscriptExpr *ASE,
+ CheckerContext &C) const {
// NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
// some new logic here that reasons directly about memory region extents.
// Once that logic is more mature, we can bring it back to assumeInBound()
// for all clients to use.
- //
- // The algorithm we are using here for bounds checking is to see if the
- // memory access is within the extent of the base region. Since we
- // have some flexibility in defining the base region, we can achieve
- // various levels of conservatism in our buffer overflow checking.
// The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as
// #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX)
// and incomplete analysis of these leads to false positives. As even
// accurate reports would be confusing for the users, just disable reports
// from these macros:
- if (isFromCtypeMacro(LoadS, checkerContext.getASTContext()))
+ if (isFromCtypeMacro(ASE, C.getASTContext()))
return;
- ProgramStateRef state = checkerContext.getState();
+ ProgramStateRef state = C.getState();
+
+ SValBuilder &svalBuilder = C.getSValBuilder();
+
+ const LocationContext *LCtx = C.getLocationContext();
- SValBuilder &svalBuilder = checkerContext.getSValBuilder();
const std::optional<RegionRawOffsetV2> &RawOffset =
- RegionRawOffsetV2::computeOffset(state, svalBuilder, location);
+ RegionRawOffsetV2::computeOffset(state, svalBuilder, LCtx, ASE);
if (!RawOffset)
return;
NonLoc ByteOffset = RawOffset->getByteOffset();
+ const SubRegion *Reg = RawOffset->getRegion();
// CHECK LOWER BOUND
- const MemSpaceRegion *SR = RawOffset->getRegion()->getMemorySpace();
- if (!llvm::isa<UnknownSpaceRegion>(SR)) {
- // A pointer to UnknownSpaceRegion may point to the middle of
- // an allocated region.
+ const MemSpaceRegion *Space = Reg->getMemorySpace();
+ if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) {
+ // Symbolic regions in unknown spaces are used for modelling unknown
+ // pointers, so they may point to the middle of an array.
auto [state_precedesLowerBound, state_withinLowerBound] =
compareValueToThreshold(state, ByteOffset,
@@ -185,7 +181,7 @@
if (state_precedesLowerBound && !state_withinLowerBound) {
// We know that the index definitely precedes the lower bound.
- reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes);
+ reportOOB(C, state_precedesLowerBound, OOB_Precedes);
return;
}
@@ -194,8 +190,7 @@
}
// CHECK UPPER BOUND
- DefinedOrUnknownSVal Size =
- getDynamicExtent(state, RawOffset->getRegion(), svalBuilder);
+ DefinedOrUnknownSVal Size = getDynamicExtent(state, Reg, svalBuilder);
if (auto KnownSize = Size.getAs<NonLoc>()) {
auto [state_withinUpperBound, state_exceedsUpperBound] =
compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder);
@@ -203,12 +198,12 @@
if (state_exceedsUpperBound) {
if (!state_withinUpperBound) {
// We know that the index definitely exceeds the upper bound.
- reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes);
+ reportOOB(C, state_exceedsUpperBound, OOB_Excedes);
return;
}
if (isTainted(state, ByteOffset)) {
// Both cases are possible, but the index is tainted, so report.
- reportTaintOOB(checkerContext, state_exceedsUpperBound, ByteOffset);
+ reportTaintOOB(C, state_exceedsUpperBound, ByteOffset);
return;
}
}
@@ -217,7 +212,7 @@
state = state_withinUpperBound;
}
- checkerContext.addTransition(state);
+ C.addTransition(state);
}
void ArrayBoundCheckerV2::reportTaintOOB(CheckerContext &checkerContext,
@@ -303,53 +298,50 @@
}
#endif
-/// For a given Location that can be represented as a symbolic expression
-/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
-/// Arr and the distance of Location from the beginning of Arr (expressed in a
-/// NonLoc that specifies the number of CharUnits). Returns nullopt when these
-/// cannot be determined.
+/// For a given ArraySubscriptExpr 'Ptr[Idx]', calculate a (Region, Offset)
+/// pair where Region is the memory area that may be legitimately accessed via
+/// Ptr and Offset is the distance of Ptr[Idx] from the beginning of Region
+/// (expressed as a NonLoc that specifies the number of CharUnits). Returns
+/// nullopt when these cannot be determined.
std::optional<RegionRawOffsetV2>
RegionRawOffsetV2::computeOffset(ProgramStateRef State, SValBuilder &SVB,
- SVal Location) {
- QualType T = SVB.getArrayIndexType();
- auto Calc = [&SVB, State, T](BinaryOperatorKind Op, NonLoc LHS, NonLoc RHS) {
- // We will use this utility to add and multiply values.
- return SVB.evalBinOpNN(State, Op, LHS, RHS, T).getAs<NonLoc>();
- };
-
- const MemRegion *Region = Location.getAsRegion();
- NonLoc Offset = SVB.makeZeroArrayIndex();
-
- while (Region) {
- if (const auto *ERegion = dyn_cast<ElementRegion>(Region)) {
- if (const auto Index = ERegion->getIndex().getAs<NonLoc>()) {
- QualType ElemType = ERegion->getElementType();
- // If the element is an incomplete type, go no further.
- if (ElemType->isIncompleteType())
- return std::nullopt;
-
- // Perform Offset += Index * sizeof(ElemType); then continue the offset
- // calculations with SuperRegion:
- NonLoc Size = SVB.makeArrayIndex(
- SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
- if (auto Delta = Calc(BO_Mul, *Index, Size)) {
- if (auto NewOffset = Calc(BO_Add, Offset, *Delta)) {
- Offset = *NewOffset;
- Region = ERegion->getSuperRegion();
- continue;
- }
- }
+ const LocationContext *LCtx,
+ const ArraySubscriptExpr *ASE) {
+ QualType ElemType = ASE->getType();
+ assert(!ElemType->isIncompleteType());
+
+ QualType IdxType = SVB.getArrayIndexType();
+
+ if (auto Idx = State->getSVal(ASE->getIdx(), LCtx).getAs<NonLoc>()) {
+ const MemRegion *R = State->getSVal(ASE->getBase(), LCtx).getAsRegion();
+ const ElementRegion *ER;
+ while ((ER = dyn_cast_or_null<ElementRegion>(R)) &&
+ ER->getElementType() == ElemType) {
+ // Special case to strip ElementRegion layers that represent pointer
+ // arithmetics. For example in code like
+ // char Text[] = "Hello world!", *Word = Text+6;
+ // the area *Word is represented as Element{<Text>, 6 S64B, char} which
+ // is nominally a single byte area, but logically we want to handle
+ // Word[X] equivalently to Text[X + 6]. On the other hand if we have
+ // int Matrix[10][10];
+ // we want to warn when we see Matrix[0][20] because this is logically
+ // invalid and technically invokes undefined behavior when it takes the
+ // 20th element of the int[10] value Matrix[0].
+ R = ER->getSuperRegion();
+ Idx = SVB.evalBinOpNN(State, BO_Add, *Idx, ER->getIndex(), IdxType)
+ .getAs<NonLoc>();
+ if (!Idx)
+ return std::nullopt;
+ }
+
+ if (const auto *SRegion = dyn_cast_or_null<SubRegion>(R)) {
+ NonLoc ESize = SVB.makeArrayIndex(
+ SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
+ SVal Offset = SVB.evalBinOpNN(State, BO_Mul, ESize, *Idx, IdxType);
+ if (const auto OffsetNL = Offset.getAs<NonLoc>()) {
+ return RegionRawOffsetV2(SRegion, *OffsetNL);
}
- } else if (const auto *SRegion = dyn_cast<SubRegion>(Region)) {
- // NOTE: The dyn_cast<>() is expected to succeed, it'd be very surprising
- // to see a MemSpaceRegion at this point.
- // FIXME: We may return with {<Region>, 0} even if we didn't handle any
- // ElementRegion layers. I think that this behavior was introduced
- // accidentally by 8a4c760c204546aba566e302f299f7ed2e00e287 in 2011, so
- // it may be useful to review it in the future.
- return RegionRawOffsetV2(SRegion, Offset);
}
- return std::nullopt;
}
return std::nullopt;
}
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits