vlad.tsyrklevich updated this revision to Diff 84517. vlad.tsyrklevich added a comment.
Artem, thank you for the very detailed reply! Between this, and hitting the right search terms to find your clang analyzer guide my understanding of the symbol abstractions the analyzer exposes has improved significantly. You state that it's not easy to derive the conjured symbols from the Store; however, it didn't seem to be too difficult to do by recursively finding the bindings for the constituent FieldRegions (if the LCV is backing a struct/union) or the first ElementRegion (if the LCV is backing an array) until you reach an element/field initialized with the conjured symbol. Does the new patch look correct to you? Your comment about the difficulty has me unsure whether I've fully grasped the scope of the problem. One wrinkle you'll notice is in the patch to `taint-tester.c`, one FIXME for missing taint has been fixed, but now one variable that should not be tainted is! This seems to be because of overeager invalidation, not strictly the fault of the patch but it is exposed by it. https://reviews.llvm.org/D28445 Files: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp lib/StaticAnalyzer/Core/ProgramState.cpp test/Analysis/taint-generic.c test/Analysis/taint-tester.c
Index: test/Analysis/taint-tester.c =================================================================== --- test/Analysis/taint-tester.c +++ test/Analysis/taint-tester.c @@ -51,8 +51,9 @@ scanf("%d", &xy.y); scanf("%d", &xy.x); int tx = xy.x; // expected-warning + {{tainted}} - int ty = xy.y; // FIXME: This should be tainted as well. - char ntz = xy.z;// no warning + int ty = xy.y; // expected-warning + {{tainted}} + // FIXME: xy.z should not be tainted + char ntz = xy.z; // expected-warning + {{tainted}} // Now, scanf scans both. scanf("%d %d", &xy.y, &xy.x); int ttx = xy.x; // expected-warning + {{tainted}} Index: test/Analysis/taint-generic.c =================================================================== --- test/Analysis/taint-generic.c +++ test/Analysis/taint-generic.c @@ -169,6 +169,26 @@ sock = socket(AF_LOCAL, SOCK_STREAM, 0); read(sock, buffer, 100); execl(buffer, "filename", 0); // no-warning + + sock = socket(AF_INET, SOCK_STREAM, 0); + // References to both buffer and &buffer as an argument should taint the argument + read(sock, &buffer, 100); + execl(buffer, "filename", 0); // expected-warning {{Untrusted data is passed to a system call}} +} + +void testStruct() { + struct { + struct {} st; + char buf[16]; + int length; + } tainted; + + char buffer[16]; + int sock; + + sock = socket(AF_INET, SOCK_STREAM, 0); + read(sock, &tainted, sizeof(tainted)); + __builtin_memcpy(buffer, tainted.buf, tainted.length); // expected-warning {{Untrusted data is used to specify the buffer size}} } int testDivByZero() { Index: lib/StaticAnalyzer/Core/ProgramState.cpp =================================================================== --- lib/StaticAnalyzer/Core/ProgramState.cpp +++ lib/StaticAnalyzer/Core/ProgramState.cpp @@ -664,6 +664,10 @@ while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym)) Sym = SC->getOperand(); + // If this a derived symbol, taint the parent symbol. + if (const SymbolDerived *SD = dyn_cast<SymbolDerived>(Sym)) + Sym = SD->getParentSymbol(); + ProgramStateRef NewState = set<TaintMap>(Sym, Kind); assert(NewState); return NewState; Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp =================================================================== --- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -65,6 +65,11 @@ /// and thus, is tainted. static bool isStdin(const Expr *E, CheckerContext &C); + /// \brief Given a LazyCompoundValue, get the symbol of the first FieldRegion/ + /// ElementRegion we can find a binding for. + static SymbolRef getLCVSymbol(CheckerContext &C, + nonloc::LazyCompoundVal &LCV); + /// \brief Given a pointer argument, get the symbol of the value it contains /// (points to). static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg); @@ -423,6 +428,43 @@ return false; } +SymbolRef GenericTaintChecker::getLCVSymbol(CheckerContext &C, + nonloc::LazyCompoundVal &LCV) { + StoreManager &StoreMgr = C.getStoreManager(); + MemRegionManager &MrMgr = C.getSValBuilder().getRegionManager(); + + QualType T = LCV.getRegion()->getValueType(); + if (T->isStructureType() || T->isUnionType()) { + const RecordType *RT = T->getAsStructureType(); + if (!RT) + RT = T->getAsUnionType(); + + const RecordDecl *RD = RT->getDecl()->getDefinition(); + for (const auto *I : RD->fields()) { + const FieldRegion *FR = MrMgr.getFieldRegion(I, LCV.getRegion()); + const SVal &V = + StoreMgr.getBinding(LCV.getStore(), loc::MemRegionVal(FR)); + if (auto Sym = V.getAsSymbol()) + return Sym; + else if (auto _LCV = V.getAs<nonloc::LazyCompoundVal>()) { + if (auto Sym = getLCVSymbol(C, *_LCV)) + return Sym; + } + } + } else if (T->isArrayType()) { + const ArrayType *AT = T->getAsArrayTypeUnsafe(); + const ElementRegion *ER = + StoreMgr.GetElementZeroRegion(LCV.getRegion(), AT->getElementType()); + const SVal &V = StoreMgr.getBinding(LCV.getStore(), loc::MemRegionVal(ER)); + if (auto Sym = V.getAsSymbol()) + return Sym; + else if (auto _LCV = V.getAs<nonloc::LazyCompoundVal>()) + return getLCVSymbol(C, *_LCV); + } + + return nullptr; +} + SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, const Expr* Arg) { ProgramStateRef State = C.getState(); @@ -438,6 +480,10 @@ dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr()); SVal Val = State->getSVal(*AddrLoc, ArgTy ? ArgTy->getPointeeType(): QualType()); + + if (auto LCV = Val.getAs<nonloc::LazyCompoundVal>()) + return getLCVSymbol(C, *LCV); + return Val.getAsSymbol(); }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits