NoQ created this revision. NoQ added reviewers: dcoughlin, xazax.hun, a_sidorin, rnkovacs, mikhail.ramalho, Szelethus, baloghadamsoftware, Charusso, alexfh. Herald added subscribers: cfe-commits, jdoerfert, dkrupp, donat.nagy, a.sidorin, szepet, mgorny. Herald added a project: clang.
Given how much effort @boga95 is spending on the taint checker, i decided to undig my effort to remove the whole taint API business from the `ProgramState` class and instead implement it as our usual, modern inter-checker communication API. Modern as in "putting all such stuff into the `ProgramState` class clearly doesn't scale, but we still didn't come up with anything better, so let's just put it in the header and see if a better design suddenly emerges in the future after we do it a few hundred times". In other words, this was the plan that is part of the even-more-long-term plan of completely deprecating the `void*`-based Generic Data Map in favor of something that the analyzer core could introspect and help checkers keep track of, which would reduce boilerplate and make developing checkers easier. The patch is rebased on top of the current master, but most likely conflicts with @boga95's latest patches. I'm sorry i didn't do this quickly enough, but better late than never, i suppose. I do not insist on this design, but i believe it looks much cleaner. Repository: rC Clang https://reviews.llvm.org/D59861 Files: clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h clang/include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h clang/include/clang/StaticAnalyzer/Core/PathSensitive/TaintTag.h clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp clang/lib/StaticAnalyzer/Checkers/Taint.cpp clang/lib/StaticAnalyzer/Checkers/Taint.h clang/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp clang/lib/StaticAnalyzer/Core/CMakeLists.txt clang/lib/StaticAnalyzer/Core/ProgramState.cpp clang/lib/StaticAnalyzer/Core/TaintManager.cpp
Index: clang/lib/StaticAnalyzer/Core/TaintManager.cpp =================================================================== --- clang/lib/StaticAnalyzer/Core/TaintManager.cpp +++ /dev/null @@ -1,22 +0,0 @@ -//== TaintManager.cpp ------------------------------------------ -*- C++ -*--=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h" - -using namespace clang; -using namespace ento; - -void *ProgramStateTrait<TaintMap>::GDMIndex() { - static int index = 0; - return &index; -} - -void *ProgramStateTrait<DerivedSymTaint>::GDMIndex() { - static int index; - return &index; -} Index: clang/lib/StaticAnalyzer/Core/ProgramState.cpp =================================================================== --- clang/lib/StaticAnalyzer/Core/ProgramState.cpp +++ clang/lib/StaticAnalyzer/Core/ProgramState.cpp @@ -16,7 +16,6 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h" #include "llvm/Support/raw_ostream.h" @@ -458,9 +457,6 @@ // Print out the tracked dynamic types. printDynamicTypeInfo(this, Out, NL, Sep); - // Print out tainted symbols. - printTaint(Out, NL); - // Print checker-specific data. Mgr.getOwningEngine().printState(Out, this, NL, Sep, LC); } @@ -474,22 +470,6 @@ print(llvm::errs()); } -void ProgramState::printTaint(raw_ostream &Out, - const char *NL) const { - TaintMapImpl TM = get<TaintMap>(); - - if (!TM.isEmpty()) - Out <<"Tainted symbols:" << NL; - - for (TaintMapImpl::iterator I = TM.begin(), E = TM.end(); I != E; ++I) { - Out << I->first << " : " << I->second << NL; - } -} - -void ProgramState::dumpTaint() const { - printTaint(llvm::errs()); -} - AnalysisManager& ProgramState::getAnalysisManager() const { return stateMgr->getOwningEngine().getAnalysisManager(); } @@ -657,166 +637,3 @@ } return true; } - -ProgramStateRef ProgramState::addTaint(const Stmt *S, - const LocationContext *LCtx, - TaintTagType Kind) const { - if (const Expr *E = dyn_cast_or_null<Expr>(S)) - S = E->IgnoreParens(); - - return addTaint(getSVal(S, LCtx), Kind); -} - -ProgramStateRef ProgramState::addTaint(SVal V, - TaintTagType Kind) const { - SymbolRef Sym = V.getAsSymbol(); - if (Sym) - return addTaint(Sym, Kind); - - // If the SVal represents a structure, try to mass-taint all values within the - // structure. For now it only works efficiently on lazy compound values that - // were conjured during a conservative evaluation of a function - either as - // return values of functions that return structures or arrays by value, or as - // values of structures or arrays passed into the function by reference, - // directly or through pointer aliasing. Such lazy compound values are - // characterized by having exactly one binding in their captured store within - // their parent region, which is a conjured symbol default-bound to the base - // region of the parent region. - if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) { - if (Optional<SVal> binding = getStateManager().StoreMgr->getDefaultBinding(*LCV)) { - if (SymbolRef Sym = binding->getAsSymbol()) - return addPartialTaint(Sym, LCV->getRegion(), Kind); - } - } - - const MemRegion *R = V.getAsRegion(); - return addTaint(R, Kind); -} - -ProgramStateRef ProgramState::addTaint(const MemRegion *R, - TaintTagType Kind) const { - if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R)) - return addTaint(SR->getSymbol(), Kind); - return this; -} - -ProgramStateRef ProgramState::addTaint(SymbolRef Sym, - TaintTagType Kind) const { - // If this is a symbol cast, remove the cast before adding the taint. Taint - // is cast agnostic. - while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym)) - Sym = SC->getOperand(); - - ProgramStateRef NewState = set<TaintMap>(Sym, Kind); - assert(NewState); - return NewState; -} - -ProgramStateRef ProgramState::addPartialTaint(SymbolRef ParentSym, - const SubRegion *SubRegion, - TaintTagType Kind) const { - // Ignore partial taint if the entire parent symbol is already tainted. - if (contains<TaintMap>(ParentSym) && *get<TaintMap>(ParentSym) == Kind) - return this; - - // Partial taint applies if only a portion of the symbol is tainted. - if (SubRegion == SubRegion->getBaseRegion()) - return addTaint(ParentSym, Kind); - - const TaintedSubRegions *SavedRegs = get<DerivedSymTaint>(ParentSym); - TaintedSubRegions Regs = - SavedRegs ? *SavedRegs : stateMgr->TSRFactory.getEmptyMap(); - - Regs = stateMgr->TSRFactory.add(Regs, SubRegion, Kind); - ProgramStateRef NewState = set<DerivedSymTaint>(ParentSym, Regs); - assert(NewState); - return NewState; -} - -bool ProgramState::isTainted(const Stmt *S, const LocationContext *LCtx, - TaintTagType Kind) const { - if (const Expr *E = dyn_cast_or_null<Expr>(S)) - S = E->IgnoreParens(); - - SVal val = getSVal(S, LCtx); - return isTainted(val, Kind); -} - -bool ProgramState::isTainted(SVal V, TaintTagType Kind) const { - if (const SymExpr *Sym = V.getAsSymExpr()) - return isTainted(Sym, Kind); - if (const MemRegion *Reg = V.getAsRegion()) - return isTainted(Reg, Kind); - return false; -} - -bool ProgramState::isTainted(const MemRegion *Reg, TaintTagType K) const { - if (!Reg) - return false; - - // Element region (array element) is tainted if either the base or the offset - // are tainted. - if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg)) - return isTainted(ER->getSuperRegion(), K) || isTainted(ER->getIndex(), K); - - if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg)) - return isTainted(SR->getSymbol(), K); - - if (const SubRegion *ER = dyn_cast<SubRegion>(Reg)) - return isTainted(ER->getSuperRegion(), K); - - return false; -} - -bool ProgramState::isTainted(SymbolRef Sym, TaintTagType Kind) const { - if (!Sym) - return false; - - // Traverse all the symbols this symbol depends on to see if any are tainted. - for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), SE =Sym->symbol_end(); - SI != SE; ++SI) { - if (!isa<SymbolData>(*SI)) - continue; - - if (const TaintTagType *Tag = get<TaintMap>(*SI)) { - if (*Tag == Kind) - return true; - } - - if (const SymbolDerived *SD = dyn_cast<SymbolDerived>(*SI)) { - // If this is a SymbolDerived with a tainted parent, it's also tainted. - if (isTainted(SD->getParentSymbol(), Kind)) - return true; - - // If this is a SymbolDerived with the same parent symbol as another - // tainted SymbolDerived and a region that's a sub-region of that tainted - // symbol, it's also tainted. - if (const TaintedSubRegions *Regs = - get<DerivedSymTaint>(SD->getParentSymbol())) { - const TypedValueRegion *R = SD->getRegion(); - for (auto I : *Regs) { - // FIXME: The logic to identify tainted regions could be more - // complete. For example, this would not currently identify - // overlapping fields in a union as tainted. To identify this we can - // check for overlapping/nested byte offsets. - if (Kind == I.second && R->isSubRegionOf(I.first)) - return true; - } - } - } - - // If memory region is tainted, data is also tainted. - if (const SymbolRegionValue *SRV = dyn_cast<SymbolRegionValue>(*SI)) { - if (isTainted(SRV->getRegion(), Kind)) - return true; - } - - // If this is a SymbolCast from a tainted value, it's also tainted. - if (const SymbolCast *SC = dyn_cast<SymbolCast>(*SI)) { - if (isTainted(SC->getOperand(), Kind)) - return true; - } - } - - return false; -} Index: clang/lib/StaticAnalyzer/Core/CMakeLists.txt =================================================================== --- clang/lib/StaticAnalyzer/Core/CMakeLists.txt +++ clang/lib/StaticAnalyzer/Core/CMakeLists.txt @@ -45,7 +45,6 @@ SValBuilder.cpp SVals.cpp SymbolManager.cpp - TaintManager.cpp WorkList.cpp LINK_LIBS Index: clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp =================================================================== --- clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp +++ clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -2420,26 +2420,6 @@ return std::move(Piece); } -std::shared_ptr<PathDiagnosticPiece> -TaintBugVisitor::VisitNode(const ExplodedNode *N, - BugReporterContext &BRC, BugReport &) { - - // Find the ExplodedNode where the taint was first introduced - if (!N->getState()->isTainted(V) || N->getFirstPred()->getState()->isTainted(V)) - return nullptr; - - const Stmt *S = PathDiagnosticLocation::getStmt(N); - if (!S) - return nullptr; - - const LocationContext *NCtx = N->getLocationContext(); - PathDiagnosticLocation L = - PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx); - if (!L.isValid() || !L.asLocation().isValid()) - return nullptr; - - return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here"); -} FalsePositiveRefutationBRVisitor::FalsePositiveRefutationBRVisitor() : Constraints(ConstraintRangeTy::Factory().getEmptyMap()) {} Index: clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "Taint.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/AST/CharUnits.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" @@ -25,6 +26,7 @@ using namespace clang; using namespace ento; +using namespace taint; namespace { class VLASizeChecker : public Checker< check::PreStmt<DeclStmt> > { @@ -106,7 +108,7 @@ return; // Check if the size is tainted. - if (state->isTainted(sizeV)) { + if (isTainted(state, sizeV)) { reportBug(VLA_Tainted, SE, nullptr, C, llvm::make_unique<TaintBugVisitor>(sizeV)); return; Index: clang/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp @@ -9,6 +9,8 @@ // This checker can be used for testing how taint data is propagated. // //===----------------------------------------------------------------------===// + +#include "Taint.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" @@ -17,6 +19,7 @@ using namespace clang; using namespace ento; +using namespace taint; namespace { class TaintTesterChecker : public Checker< check::PostStmt<Expr> > { @@ -46,7 +49,7 @@ if (!State) return; - if (State->isTainted(E, C.getLocationContext())) { + if (isTainted(State, E, C.getLocationContext())) { if (ExplodedNode *N = C.generateNonFatalErrorNode()) { initBugType(); auto report = llvm::make_unique<BugReport>(*BT, "tainted",N); Index: clang/lib/StaticAnalyzer/Checkers/Taint.h =================================================================== --- /dev/null +++ clang/lib/StaticAnalyzer/Checkers/Taint.h @@ -0,0 +1,103 @@ +//=== Taint.h - Taint tracking and basic propagation rules. --------*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines basic, non-domain-specific mechanisms for tracking tainted values. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_TAINT_H +#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_TAINT_H + +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h" + +namespace clang { +namespace ento { +namespace taint { + +/// The type of taint, which helps to differentiate between different types of +/// taint. +using TaintTagType = unsigned; + +static constexpr TaintTagType TaintTagGeneric = 0; + +/// Create a new state in which the value of the statement is marked as tainted. +LLVM_NODISCARD ProgramStateRef +addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx, + TaintTagType Kind = TaintTagGeneric); + +/// Create a new state in which the value is marked as tainted. +LLVM_NODISCARD ProgramStateRef +addTaint(ProgramStateRef State, SVal V, + TaintTagType Kind = TaintTagGeneric); + +/// Create a new state in which the symbol is marked as tainted. +LLVM_NODISCARD ProgramStateRef +addTaint(ProgramStateRef State, SymbolRef Sym, + TaintTagType Kind = TaintTagGeneric); + +/// Create a new state in which the pointer represented by the region +/// is marked as tainted. +LLVM_NODISCARD ProgramStateRef +addTaint(ProgramStateRef State, const MemRegion *R, + TaintTagType Kind = TaintTagGeneric); + +/// Create a new state in a which a sub-region of a given symbol is tainted. +/// This might be necessary when referring to regions that can not have an +/// individual symbol, e.g. if they are represented by the default binding of +/// a LazyCompoundVal. +LLVM_NODISCARD ProgramStateRef +addPartialTaint(ProgramStateRef State, + SymbolRef ParentSym, const SubRegion *SubRegion, + TaintTagType Kind = TaintTagGeneric); + +/// Check if the statement has a tainted value in the given state. +bool isTainted(ProgramStateRef State, const Stmt *S, + const LocationContext *LCtx, + TaintTagType Kind = TaintTagGeneric); + +/// Check if the value is tainted in the given state. +bool isTainted(ProgramStateRef State, SVal V, + TaintTagType Kind = TaintTagGeneric); + +/// Check if the symbol is tainted in the given state. +bool isTainted(ProgramStateRef State, SymbolRef Sym, + TaintTagType Kind = TaintTagGeneric); + +/// Check if the pointer represented by the region is tainted in the given +/// state. +bool isTainted(ProgramStateRef State, const MemRegion *Reg, + TaintTagType Kind = TaintTagGeneric); + +void printTaint(ProgramStateRef State, raw_ostream &Out, const char *nl = "\n", + const char *sep = ""); + +void dumpTaint(ProgramStateRef State); + +/// The bug visitor prints a diagnostic message at the location where a given +/// variable was tainted. +class TaintBugVisitor final : public BugReporterVisitor { +private: + const SVal V; + +public: + TaintBugVisitor(const SVal V) : V(V) {} + void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); } + + std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N, + BugReporterContext &BRC, + BugReport &BR) override; +}; + +} // namespace taint +} // namespace ento +} // namespace clang + +#endif + Index: clang/lib/StaticAnalyzer/Checkers/Taint.cpp =================================================================== --- /dev/null +++ clang/lib/StaticAnalyzer/Checkers/Taint.cpp @@ -0,0 +1,228 @@ +//=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Defines basic, non-domain-specific mechanisms for tracking tainted values. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "Taint.h" + +using namespace clang; +using namespace ento; +using namespace taint; + +// Fully tainted symbols. +REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType) + +// Partially tainted symbols. +REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, + const SubRegion *, TaintTagType); +REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions) + +void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL, + const char *Sep) { + TaintMapTy TM = State->get<TaintMap>(); + + if (!TM.isEmpty()) + Out <<"Tainted symbols:" << NL; + + for (const auto &I : TM) + Out << I.first << " : " << I.second << NL; +} + +void dumpTaint(ProgramStateRef State) { + printTaint(State, llvm::errs()); +} + +ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S, + const LocationContext *LCtx, + TaintTagType Kind) { + return addTaint(State, State->getSVal(S, LCtx), Kind); +} + +ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V, + TaintTagType Kind) { + SymbolRef Sym = V.getAsSymbol(); + if (Sym) + return addTaint(State, Sym, Kind); + + // If the SVal represents a structure, try to mass-taint all values within the + // structure. For now it only works efficiently on lazy compound values that + // were conjured during a conservative evaluation of a function - either as + // return values of functions that return structures or arrays by value, or as + // values of structures or arrays passed into the function by reference, + // directly or through pointer aliasing. Such lazy compound values are + // characterized by having exactly one binding in their captured store within + // their parent region, which is a conjured symbol default-bound to the base + // region of the parent region. + if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) { + if (Optional<SVal> binding = + State->getStateManager().getStoreManager() + .getDefaultBinding(*LCV)) { + if (SymbolRef Sym = binding->getAsSymbol()) + return addPartialTaint(State, Sym, LCV->getRegion(), Kind); + } + } + + const MemRegion *R = V.getAsRegion(); + return addTaint(State, R, Kind); +} + +ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R, + TaintTagType Kind) { + if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R)) + return addTaint(State, SR->getSymbol(), Kind); + return State; +} + +ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym, + TaintTagType Kind) { + // If this is a symbol cast, remove the cast before adding the taint. Taint + // is cast agnostic. + while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym)) + Sym = SC->getOperand(); + + ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind); + assert(NewState); + return NewState; +} + +ProgramStateRef taint::addPartialTaint(ProgramStateRef State, + SymbolRef ParentSym, + const SubRegion *SubRegion, + TaintTagType Kind) { + // Ignore partial taint if the entire parent symbol is already tainted. + if (const TaintTagType *T = State->get<TaintMap>(ParentSym)) + if (*T == Kind) + return State; + + // Partial taint applies if only a portion of the symbol is tainted. + if (SubRegion == SubRegion->getBaseRegion()) + return addTaint(State, ParentSym, Kind); + + const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym); + TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>(); + TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap(); + + Regs = F.add(Regs, SubRegion, Kind); + ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs); + assert(NewState); + return NewState; +} + +bool taint::isTainted(ProgramStateRef State, const Stmt *S, + const LocationContext *LCtx, TaintTagType Kind) { + SVal val = State->getSVal(S, LCtx); + return isTainted(State, val, Kind); +} + +bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) { + if (const SymExpr *Sym = V.getAsSymExpr()) + return isTainted(State, Sym, Kind); + if (const MemRegion *Reg = V.getAsRegion()) + return isTainted(State, Reg, Kind); + return false; +} + +bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg, + TaintTagType K) { + if (!Reg) + return false; + + // Element region (array element) is tainted if either the base or the offset + // are tainted. + if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg)) + return isTainted(State, ER->getSuperRegion(), K) || + isTainted(State, ER->getIndex(), K); + + if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg)) + return isTainted(State, SR->getSymbol(), K); + + if (const SubRegion *ER = dyn_cast<SubRegion>(Reg)) + return isTainted(State, ER->getSuperRegion(), K); + + return false; +} + +bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) { + if (!Sym) + return false; + + // Traverse all the symbols this symbol depends on to see if any are tainted. + for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), SE =Sym->symbol_end(); + SI != SE; ++SI) { + if (!isa<SymbolData>(*SI)) + continue; + + if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) { + if (*Tag == Kind) + return true; + } + + if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) { + // If this is a SymbolDerived with a tainted parent, it's also tainted. + if (isTainted(State, SD->getParentSymbol(), Kind)) + return true; + + // If this is a SymbolDerived with the same parent symbol as another + // tainted SymbolDerived and a region that's a sub-region of that tainted + // symbol, it's also tainted. + if (const TaintedSubRegions *Regs = + State->get<DerivedSymTaint>(SD->getParentSymbol())) { + const TypedValueRegion *R = SD->getRegion(); + for (auto I : *Regs) { + // FIXME: The logic to identify tainted regions could be more + // complete. For example, this would not currently identify + // overlapping fields in a union as tainted. To identify this we can + // check for overlapping/nested byte offsets. + if (Kind == I.second && R->isSubRegionOf(I.first)) + return true; + } + } + } + + // If memory region is tainted, data is also tainted. + if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) { + if (isTainted(State, SRV->getRegion(), Kind)) + return true; + } + + // If this is a SymbolCast from a tainted value, it's also tainted. + if (const auto *SC = dyn_cast<SymbolCast>(*SI)) { + if (isTainted(State, SC->getOperand(), Kind)) + return true; + } + } + + return false; +} + +std::shared_ptr<PathDiagnosticPiece> +TaintBugVisitor::VisitNode(const ExplodedNode *N, BugReporterContext &BRC, + BugReport &BR) { + + // Find the ExplodedNode where the taint was first introduced + if (!isTainted(N->getState(), V) || + isTainted(N->getFirstPred()->getState(), V)) + return nullptr; + + const Stmt *S = PathDiagnosticLocation::getStmt(N); + if (!S) + return nullptr; + + const LocationContext *NCtx = N->getLocationContext(); + PathDiagnosticLocation L = + PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx); + if (!L.isValid() || !L.asLocation().isValid()) + return nullptr; + + return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here"); +} Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -13,6 +13,8 @@ // aggressively, even if the involved symbols are under constrained. // //===----------------------------------------------------------------------===// + +#include "Taint.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/AST/Attr.h" #include "clang/Basic/Builtins.h" @@ -27,6 +29,7 @@ using namespace clang; using namespace ento; +using namespace taint; namespace { class GenericTaintChecker @@ -152,14 +155,14 @@ static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State, CheckerContext &C) { - if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C)) + if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C)) return true; if (!E->getType().getTypePtr()->isPointerType()) return false; Optional<SVal> V = getPointedToSVal(C, E); - return (V && State->isTainted(*V)); + return (V && isTainted(State, *V)); } /// Pre-process a function which propagates taint according to the @@ -354,7 +357,7 @@ for (unsigned ArgNum : TaintArgs) { // Special handling for the tainted return value. if (ArgNum == ReturnValueIndex) { - State = State->addTaint(CE, C.getLocationContext()); + State = addTaint(State, CE, C.getLocationContext()); continue; } @@ -365,7 +368,7 @@ const Expr *Arg = CE->getArg(ArgNum); Optional<SVal> V = getPointedToSVal(C, Arg); if (V) - State = State->addTaint(*V); + State = addTaint(State, *V); } // Clear up the taint info from the state. @@ -570,9 +573,9 @@ ProgramStateRef State = C.getState(); Optional<SVal> PointedToSVal = getPointedToSVal(C, E); SVal TaintedSVal; - if (PointedToSVal && State->isTainted(*PointedToSVal)) + if (PointedToSVal && isTainted(State, *PointedToSVal)) TaintedSVal = *PointedToSVal; - else if (State->isTainted(E, C.getLocationContext())) + else if (isTainted(State, E, C.getLocationContext())) TaintedSVal = C.getSVal(E); else return false; Index: clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "Taint.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" @@ -19,6 +20,7 @@ using namespace clang; using namespace ento; +using namespace taint; namespace { class DivZeroChecker : public Checker< check::PreStmt<BinaryOperator> > { @@ -83,10 +85,10 @@ return; } - bool TaintedD = C.getState()->isTainted(*DV); + bool TaintedD = isTainted(C.getState(), *DV); if ((stateNotZero && stateZero && TaintedD)) { reportBug("Division by a tainted value, possibly zero", stateZero, C, - llvm::make_unique<TaintBugVisitor>(*DV)); + llvm::make_unique<taint::TaintBugVisitor>(*DV)); return; } Index: clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt =================================================================== --- clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt +++ clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt @@ -87,6 +87,7 @@ StackAddrEscapeChecker.cpp StdLibraryFunctionsChecker.cpp StreamChecker.cpp + Taint.cpp TaintTesterChecker.cpp TestAfterDivZeroChecker.cpp TraversalChecker.cpp Index: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "Taint.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/AST/CharUnits.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" @@ -24,6 +25,7 @@ using namespace clang; using namespace ento; +using namespace taint; namespace { class ArrayBoundCheckerV2 : @@ -204,7 +206,7 @@ // If we are under constrained and the index variables are tainted, report. if (state_exceedsUpperBound && state_withinUpperBound) { SVal ByteOffset = rawOffset.getByteOffset(); - if (state->isTainted(ByteOffset)) { + if (isTainted(state, ByteOffset)) { reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted, llvm::make_unique<TaintBugVisitor>(ByteOffset)); return; Index: clang/include/clang/StaticAnalyzer/Core/PathSensitive/TaintTag.h =================================================================== --- clang/include/clang/StaticAnalyzer/Core/PathSensitive/TaintTag.h +++ /dev/null @@ -1,29 +0,0 @@ -//===- TaintTag.h - Path-sensitive "State" for tracking values --*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Defines a set of taint tags. Several tags are used to differentiate kinds -// of taint. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_TAINTTAG_H -#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_TAINTTAG_H - -namespace clang { -namespace ento { - -/// The type of taint, which helps to differentiate between different types of -/// taint. -using TaintTagType = unsigned; - -static const TaintTagType TaintTagGeneric = 0; - -} // namespace ento -} // namespace clang - -#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_TAINTTAG_H Index: clang/include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h =================================================================== --- clang/include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h +++ /dev/null @@ -1,58 +0,0 @@ -//===- TaintManager.h - Managing taint --------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file provides APIs for adding, removing, querying symbol taint. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_TAINTMANAGER_H -#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_TAINTMANAGER_H - -#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/TaintTag.h" -#include "llvm/ADT/ImmutableMap.h" - -namespace clang { -namespace ento { - -/// The GDM component containing the tainted root symbols. We lazily infer the -/// taint of the dependent symbols. Currently, this is a map from a symbol to -/// tag kind. TODO: Should support multiple tag kinds. -// FIXME: This does not use the nice trait macros because it must be accessible -// from multiple translation units. -struct TaintMap {}; - -using TaintMapImpl = llvm::ImmutableMap<SymbolRef, TaintTagType>; - -template<> struct ProgramStateTrait<TaintMap> - : public ProgramStatePartialTrait<TaintMapImpl> { - static void *GDMIndex(); -}; - -/// The GDM component mapping derived symbols' parent symbols to their -/// underlying regions. This is used to efficiently check whether a symbol is -/// tainted when it represents a sub-region of a tainted symbol. -struct DerivedSymTaint {}; - -using DerivedSymTaintImpl = llvm::ImmutableMap<SymbolRef, TaintedSubRegions>; - -template<> struct ProgramStateTrait<DerivedSymTaint> - : public ProgramStatePartialTrait<DerivedSymTaintImpl> { - static void *GDMIndex(); -}; - -class TaintManager { - TaintManager() = default; -}; - -} // namespace ento -} // namespace clang - -#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_TAINTMANAGER_H Index: clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h =================================================================== --- clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h +++ clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h @@ -20,7 +20,6 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h" #include "clang/StaticAnalyzer/Core/PathSensitive/Store.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/TaintTag.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/ImmutableMap.h" #include "llvm/Support/Allocator.h" @@ -43,7 +42,6 @@ ProgramStateManager &, SubEngine *); typedef std::unique_ptr<StoreManager>(*StoreManagerCreator)( ProgramStateManager &); -typedef llvm::ImmutableMap<const SubRegion*, TaintTagType> TaintedSubRegions; //===----------------------------------------------------------------------===// // ProgramStateTrait - Traits used by the Generic Data Map of a ProgramState. @@ -367,38 +365,6 @@ template <typename CB> CB scanReachableSymbols(llvm::iterator_range<region_iterator> Reachable) const; - /// Create a new state in which the statement is marked as tainted. - LLVM_NODISCARD ProgramStateRef - addTaint(const Stmt *S, const LocationContext *LCtx, - TaintTagType Kind = TaintTagGeneric) const; - - /// Create a new state in which the value is marked as tainted. - LLVM_NODISCARD ProgramStateRef - addTaint(SVal V, TaintTagType Kind = TaintTagGeneric) const; - - /// Create a new state in which the symbol is marked as tainted. - LLVM_NODISCARD ProgramStateRef addTaint(SymbolRef S, - TaintTagType Kind = TaintTagGeneric) const; - - /// Create a new state in which the region symbol is marked as tainted. - LLVM_NODISCARD ProgramStateRef - addTaint(const MemRegion *R, TaintTagType Kind = TaintTagGeneric) const; - - /// Create a new state in a which a sub-region of a given symbol is tainted. - /// This might be necessary when referring to regions that can not have an - /// individual symbol, e.g. if they are represented by the default binding of - /// a LazyCompoundVal. - LLVM_NODISCARD ProgramStateRef - addPartialTaint(SymbolRef ParentSym, const SubRegion *SubRegion, - TaintTagType Kind = TaintTagGeneric) const; - - /// Check if the statement is tainted in the current state. - bool isTainted(const Stmt *S, const LocationContext *LCtx, - TaintTagType Kind = TaintTagGeneric) const; - bool isTainted(SVal V, TaintTagType Kind = TaintTagGeneric) const; - bool isTainted(SymbolRef Sym, TaintTagType Kind = TaintTagGeneric) const; - bool isTainted(const MemRegion *Reg, TaintTagType Kind=TaintTagGeneric) const; - //==---------------------------------------------------------------------==// // Accessing the Generic Data Map (GDM). //==---------------------------------------------------------------------==// @@ -462,10 +428,8 @@ const LocationContext *CurrentLC = nullptr) const; void printDOT(raw_ostream &Out, const LocationContext *CurrentLC = nullptr) const; - void printTaint(raw_ostream &Out, const char *nl = "\n") const; void dump() const; - void dumpTaint() const; private: friend void ProgramStateRetain(const ProgramState *state); @@ -499,7 +463,6 @@ std::unique_ptr<ConstraintManager> ConstraintMgr; ProgramState::GenericDataMap::Factory GDMFactory; - TaintedSubRegions::Factory TSRFactory; typedef llvm::DenseMap<void*,std::pair<void*,void (*)(void*)> > GDMContextsTy; GDMContextsTy GDMContexts; Index: clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h =================================================================== --- clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h +++ clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h @@ -307,20 +307,6 @@ BugReport &BR) override; }; -/// The bug visitor prints a diagnostic message at the location where a given -/// variable was tainted. -class TaintBugVisitor final : public BugReporterVisitor { -private: - const SVal V; - -public: - TaintBugVisitor(const SVal V) : V(V) {} - void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); } - - std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N, - BugReporterContext &BRC, - BugReport &BR) override; -}; /// The bug visitor will walk all the nodes in a path and collect all the /// constraints. When it reaches the root node, will create a refutation
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits