vlad.tsyrklevich updated this revision to Diff 84517.
vlad.tsyrklevich added a comment.

Artem, thank you for the very detailed reply! Between this, and hitting the 
right search terms to find your clang analyzer guide my understanding of the 
symbol abstractions the analyzer exposes has improved significantly.

You state that it's not easy to derive the conjured symbols from the Store; 
however, it didn't seem to be too difficult to do by recursively finding the 
bindings for the constituent FieldRegions (if the LCV is backing a 
struct/union) or the first ElementRegion (if the LCV is backing an array) until 
you reach an element/field initialized with the conjured symbol. Does the new 
patch look correct to you? Your comment about the difficulty has me unsure 
whether I've fully grasped the scope of the problem.

One wrinkle you'll notice is in the patch to `taint-tester.c`, one FIXME for 
missing taint has been fixed, but now one variable that should not be tainted 
is! This seems to be because of overeager invalidation, not strictly the fault 
of the patch but it is exposed by it.


https://reviews.llvm.org/D28445

Files:
  lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  lib/StaticAnalyzer/Core/ProgramState.cpp
  test/Analysis/taint-generic.c
  test/Analysis/taint-tester.c

Index: test/Analysis/taint-tester.c
===================================================================
--- test/Analysis/taint-tester.c
+++ test/Analysis/taint-tester.c
@@ -51,8 +51,9 @@
   scanf("%d", &xy.y);
   scanf("%d", &xy.x);
   int tx = xy.x; // expected-warning + {{tainted}}
-  int ty = xy.y; // FIXME: This should be tainted as well.
-  char ntz = xy.z;// no warning
+  int ty = xy.y; // expected-warning + {{tainted}}
+  // FIXME: xy.z should not be tainted
+  char ntz = xy.z; // expected-warning + {{tainted}}
   // Now, scanf scans both.
   scanf("%d %d", &xy.y, &xy.x);
   int ttx = xy.x; // expected-warning + {{tainted}}
Index: test/Analysis/taint-generic.c
===================================================================
--- test/Analysis/taint-generic.c
+++ test/Analysis/taint-generic.c
@@ -169,6 +169,26 @@
   sock = socket(AF_LOCAL, SOCK_STREAM, 0);
   read(sock, buffer, 100);
   execl(buffer, "filename", 0); // no-warning
+
+  sock = socket(AF_INET, SOCK_STREAM, 0);
+  // References to both buffer and &buffer as an argument should taint the argument
+  read(sock, &buffer, 100);
+  execl(buffer, "filename", 0); // expected-warning {{Untrusted data is passed to a system call}}
+}
+
+void testStruct() {
+  struct {
+    struct {} st;
+    char buf[16];
+    int length;
+  } tainted;
+
+  char buffer[16];
+  int sock;
+
+  sock = socket(AF_INET, SOCK_STREAM, 0);
+  read(sock, &tainted, sizeof(tainted));
+  __builtin_memcpy(buffer, tainted.buf, tainted.length); // expected-warning {{Untrusted data is used to specify the buffer size}}
 }
 
 int testDivByZero() {
Index: lib/StaticAnalyzer/Core/ProgramState.cpp
===================================================================
--- lib/StaticAnalyzer/Core/ProgramState.cpp
+++ lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -664,6 +664,10 @@
   while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
     Sym = SC->getOperand();
 
+  // If this a derived symbol, taint the parent symbol.
+  if (const SymbolDerived *SD = dyn_cast<SymbolDerived>(Sym))
+    Sym = SD->getParentSymbol();
+
   ProgramStateRef NewState = set<TaintMap>(Sym, Kind);
   assert(NewState);
   return NewState;
Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===================================================================
--- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -65,6 +65,11 @@
   /// and thus, is tainted.
   static bool isStdin(const Expr *E, CheckerContext &C);
 
+  /// \brief Given a LazyCompoundValue, get the symbol of the first FieldRegion/
+  /// ElementRegion we can find a binding for.
+  static SymbolRef getLCVSymbol(CheckerContext &C,
+                                nonloc::LazyCompoundVal &LCV);
+
   /// \brief Given a pointer argument, get the symbol of the value it contains
   /// (points to).
   static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
@@ -423,6 +428,43 @@
   return false;
 }
 
+SymbolRef GenericTaintChecker::getLCVSymbol(CheckerContext &C,
+                                            nonloc::LazyCompoundVal &LCV) {
+  StoreManager &StoreMgr = C.getStoreManager();
+  MemRegionManager &MrMgr = C.getSValBuilder().getRegionManager();
+
+  QualType T = LCV.getRegion()->getValueType();
+  if (T->isStructureType() || T->isUnionType()) {
+    const RecordType *RT = T->getAsStructureType();
+    if (!RT)
+      RT = T->getAsUnionType();
+
+    const RecordDecl *RD = RT->getDecl()->getDefinition();
+    for (const auto *I : RD->fields()) {
+      const FieldRegion *FR = MrMgr.getFieldRegion(I, LCV.getRegion());
+      const SVal &V =
+          StoreMgr.getBinding(LCV.getStore(), loc::MemRegionVal(FR));
+      if (auto Sym = V.getAsSymbol())
+        return Sym;
+      else if (auto _LCV = V.getAs<nonloc::LazyCompoundVal>()) {
+        if (auto Sym = getLCVSymbol(C, *_LCV))
+          return Sym;
+      }
+    }
+  } else if (T->isArrayType()) {
+    const ArrayType *AT = T->getAsArrayTypeUnsafe();
+    const ElementRegion *ER =
+        StoreMgr.GetElementZeroRegion(LCV.getRegion(), AT->getElementType());
+    const SVal &V = StoreMgr.getBinding(LCV.getStore(), loc::MemRegionVal(ER));
+    if (auto Sym = V.getAsSymbol())
+      return Sym;
+    else if (auto _LCV = V.getAs<nonloc::LazyCompoundVal>())
+      return getLCVSymbol(C, *_LCV);
+  }
+
+  return nullptr;
+}
+
 SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
                                                   const Expr* Arg) {
   ProgramStateRef State = C.getState();
@@ -438,6 +480,10 @@
     dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
   SVal Val = State->getSVal(*AddrLoc,
                             ArgTy ? ArgTy->getPointeeType(): QualType());
+
+  if (auto LCV = Val.getAs<nonloc::LazyCompoundVal>())
+    return getLCVSymbol(C, *LCV);
+
   return Val.getAsSymbol();
 }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to