boga95 updated this revision to Diff 193705.
boga95 added a comment.

Rebase after https://reviews.llvm.org/D59861.
Fix custom filter test case: functions without definition always remove 
taintedness.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59516/new/

https://reviews.llvm.org/D59516

Files:
  lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  lib/StaticAnalyzer/Checkers/Taint.cpp
  lib/StaticAnalyzer/Checkers/Taint.h
  test/Analysis/Inputs/taint-generic-config.yaml
  test/Analysis/taint-generic.c

Index: test/Analysis/taint-generic.c
===================================================================
--- test/Analysis/taint-generic.c
+++ test/Analysis/taint-generic.c
@@ -296,13 +296,15 @@
     *(volatile int *) 0; // no-warning
 }
 
-
 // Test configuration
 int mySource1();
 void mySource2(int*);
 void myScanf(const char*, ...);
 int myPropagator(int, int*);
 int mySnprintf(char*, size_t, const char*, ...);
+void myFilter1(int* x) {}
+void myFilter2(const int*);
+void myFilter3(int);
 void mySink(int, int, int);
 
 void testConfigurationSources1() {
@@ -329,6 +331,24 @@
   Buffer[y] = 1; // expected-warning {{Out of bound memory access }}
 }
 
+void testConfigurationFilter1() {
+  int x = mySource1();
+  myFilter1(&x);
+  Buffer[x] = 1; // no-warning
+}
+
+void testConfigurationFilter2() {
+  int x = mySource1();
+  myFilter2(&x); // Pass by const int*
+  Buffer[x] = 1; // expected-warning {{Out of bound memory access }}
+}
+
+void testConfigurationFilter3() {
+  int x = mySource1();
+  myFilter3(x); // Pass by value
+  Buffer[x] = 1; // expected-warning {{Out of bound memory access }}
+}
+
 void testConfigurationSinks() {
   int x = mySource1();
   mySink(x, 1, 2); // expected-warning {{Untrusted data is passed to a user defined sink}}
Index: test/Analysis/Inputs/taint-generic-config.yaml
===================================================================
--- test/Analysis/Inputs/taint-generic-config.yaml
+++ test/Analysis/Inputs/taint-generic-config.yaml
@@ -36,8 +36,8 @@
 # A list of filter functions
 Filters:
   # int x; // x is tainted
-  # myFilter(&x); // x is not tainted anymore
-  - Name: myFilter
+  # myFilter1(&x); // x is not tainted anymore
+  - Name: myFilter1
     Args: [0]
 
 # A list of sink functions
Index: lib/StaticAnalyzer/Checkers/Taint.h
===================================================================
--- lib/StaticAnalyzer/Checkers/Taint.h
+++ lib/StaticAnalyzer/Checkers/Taint.h
@@ -24,37 +24,35 @@
 /// taint.
 using TaintTagType = unsigned;
 
-static constexpr TaintTagType TaintTagGeneric = 0;
+static constexpr TaintTagType TaintTagNotTainted = 0;
+static constexpr TaintTagType TaintTagGeneric = 1;
 
 /// Create a new state in which the value of the statement is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, const Stmt *S, const LocationContext *LCtx,
-         TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S,
+                                        const LocationContext *LCtx,
+                                        TaintTagType Kind = TaintTagGeneric);
 
 /// Create a new state in which the value is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, SVal V,
-         TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, SVal V,
+                                        TaintTagType Kind = TaintTagGeneric);
 
 /// Create a new state in which the symbol is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, SymbolRef Sym,
-         TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State, SymbolRef Sym,
+                                        TaintTagType Kind = TaintTagGeneric);
 
 /// Create a new state in which the pointer represented by the region
 /// is marked as tainted.
-LLVM_NODISCARD ProgramStateRef
-addTaint(ProgramStateRef State, const MemRegion *R,
-         TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addTaint(ProgramStateRef State,
+                                        const MemRegion *R,
+                                        TaintTagType Kind = TaintTagGeneric);
 
 /// Create a new state in a which a sub-region of a given symbol is tainted.
 /// This might be necessary when referring to regions that can not have an
 /// individual symbol, e.g. if they are represented by the default binding of
 /// a LazyCompoundVal.
-LLVM_NODISCARD ProgramStateRef
-addPartialTaint(ProgramStateRef State,
-                SymbolRef ParentSym, const SubRegion *SubRegion,
-                TaintTagType Kind = TaintTagGeneric);
+LLVM_NODISCARD ProgramStateRef addPartialTaint(
+    ProgramStateRef State, SymbolRef ParentSym, const SubRegion *SubRegion,
+    TaintTagType Kind = TaintTagGeneric);
 
 /// Check if the statement has a tainted value in the given state.
 bool isTainted(ProgramStateRef State, const Stmt *S,
@@ -99,4 +97,3 @@
 } // namespace clang
 
 #endif
-
Index: lib/StaticAnalyzer/Checkers/Taint.cpp
===================================================================
--- lib/StaticAnalyzer/Checkers/Taint.cpp
+++ lib/StaticAnalyzer/Checkers/Taint.cpp
@@ -37,9 +37,7 @@
     Out << I.first << " : " << I.second << NL;
 }
 
-void dumpTaint(ProgramStateRef State) {
-  printTaint(State, llvm::errs());
-}
+void dumpTaint(ProgramStateRef State) { printTaint(State, llvm::errs()); }
 
 ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S,
                                 const LocationContext *LCtx,
@@ -64,8 +62,8 @@
   // region of the parent region.
   if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
     if (Optional<SVal> binding =
-            State->getStateManager().getStoreManager()
-                                    .getDefaultBinding(*LCV)) {
+            State->getStateManager().getStoreManager().getDefaultBinding(
+                *LCV)) {
       if (SymbolRef Sym = binding->getAsSymbol())
         return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
     }
@@ -157,12 +155,13 @@
 
   // Traverse all the symbols this symbol depends on to see if any are tainted.
   for (SymExpr::symbol_iterator SI = Sym->symbol_begin(),
-                                SE = Sym->symbol_end(); SI != SE; ++SI) {
+                                SE = Sym->symbol_end();
+       SI != SE; ++SI) {
     if (!isa<SymbolData>(*SI))
       continue;
 
     if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) {
-      if (*Tag == Kind)
+      if (*Tag != TaintTagNotTainted && *Tag == Kind)
         return true;
     }
 
Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===================================================================
--- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -34,6 +34,28 @@
 using namespace taint;
 
 namespace {
+/// A struct to store tainted argument and taint type as a pair in the program
+/// state.
+struct TaintArgTypePair {
+  unsigned Arg;
+  TaintTagType TagType;
+
+  bool operator==(const TaintArgTypePair &X) const {
+    return Arg == X.Arg && TagType == X.TagType;
+  }
+
+  bool operator<(const TaintArgTypePair &X) const {
+    if (Arg != X.Arg)
+      return Arg < X.Arg;
+    return TagType < X.TagType;
+  }
+
+  void Profile(llvm::FoldingSetNodeID &ID) const {
+    ID.AddInteger(Arg);
+    ID.AddInteger(TagType);
+  }
+};
+
 class GenericTaintChecker
     : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
 public:
@@ -46,8 +68,8 @@
 
   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
 
-  void printState(raw_ostream &Out, ProgramStateRef State,
-                  const char *NL, const char *Sep) const override;
+  void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
+                  const char *Sep) const override;
 
   using ArgVector = SmallVector<unsigned, 2>;
 
@@ -89,10 +111,16 @@
 
   /// Catch taint related bugs. Check if tainted data is passed to a
   /// system call etc.
-  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
+  bool checkPre(const CallExpr *CE, const FunctionDecl *FDecl, StringRef Name,
+                CheckerContext &C) const;
 
   /// Add taint sources on a pre-visit.
-  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
+  bool addSourcesPre(const CallExpr *CE, const FunctionDecl *FDecl,
+                     StringRef Name, CheckerContext &C) const;
+
+  /// Mark filter's arguments not tainted on a pre-visit.
+  bool addFiltersPre(const CallExpr *CE, StringRef Name,
+                     CheckerContext &C) const;
 
   /// Propagate taint generated at pre-visit.
   bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
@@ -294,7 +322,7 @@
 /// to the call post-visit. The values are unsigned integers, which are either
 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
 /// points to data, which should be tainted on return.
-REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
+REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, TaintArgTypePair)
 
 void GenericTaintChecker::getConfiguration(StringRef ConfigFile) {
   if (ConfigFile.trim().empty())
@@ -443,14 +471,25 @@
 
 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
                                        CheckerContext &C) const {
+  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
+  if (!FDecl || FDecl->getKind() != Decl::Function)
+    return;
+
+  StringRef Name = C.getCalleeName(FDecl);
+  if (Name.empty())
+    return;
+
   // Check for taintedness related errors first: system call, uncontrolled
   // format string, tainted buffer size.
-  if (checkPre(CE, C))
+  if (checkPre(CE, FDecl, Name, C))
     return;
 
   // Marks the function's arguments and/or return value tainted if it present in
   // the list.
-  addSourcesPre(CE, C);
+  if (addSourcesPre(CE, FDecl, Name, C))
+    return;
+
+  addFiltersPre(CE, Name, C);
 }
 
 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
@@ -465,31 +504,46 @@
   printTaint(State, Out, NL, Sep);
 }
 
-void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
+bool GenericTaintChecker::addSourcesPre(const CallExpr *CE,
+                                        const FunctionDecl *FDecl,
+                                        StringRef Name,
                                         CheckerContext &C) const {
-  ProgramStateRef State = nullptr;
-  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
-  if (!FDecl || FDecl->getKind() != Decl::Function)
-    return;
-
-  StringRef Name = C.getCalleeName(FDecl);
-  if (Name.empty())
-    return;
-
   // First, try generating a propagation rule for this function.
   TaintPropagationRule Rule =
       TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
   if (!Rule.isNull()) {
-    State = Rule.process(CE, C);
-    if (!State)
-      return;
-    C.addTransition(State);
-    return;
+    ProgramStateRef State = Rule.process(CE, C);
+    if (State) {
+      C.addTransition(State);
+      return true;
+    }
   }
+  return false;
+}
 
-  if (!State)
-    return;
-  C.addTransition(State);
+bool GenericTaintChecker::addFiltersPre(const CallExpr *CE, StringRef Name,
+                                        CheckerContext &C) const {
+  auto It = CustomFilters.find(Name);
+  if (It == CustomFilters.end())
+    return false;
+
+  ProgramStateRef State = C.getState();
+  const GenericTaintChecker::ArgVector &Args = It->getValue();
+  for (unsigned ArgNum : Args) {
+    if (ArgNum >= CE->getNumArgs()) {
+      llvm::errs() << "Skip out of bound filter Arg: " << Name << ":" << ArgNum
+                   << '\n';
+      continue;
+    }
+
+    State = State->add<TaintArgsOnPostVisit>({ArgNum, TaintTagNotTainted});
+  }
+
+  if (State != C.getState()) {
+    C.addTransition(State);
+    return true;
+  }
+  return false;
 }
 
 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
@@ -503,10 +557,12 @@
   if (TaintArgs.isEmpty())
     return false;
 
-  for (unsigned ArgNum : TaintArgs) {
+  for (auto ArgTypePair : TaintArgs) {
+    unsigned ArgNum = ArgTypePair.Arg;
+    TaintTagType TagType = ArgTypePair.TagType;
     // Special handling for the tainted return value.
     if (ArgNum == ReturnValueIndex) {
-      State = addTaint(State, CE, C.getLocationContext());
+      State = addTaint(State, CE, C.getLocationContext(), TagType);
       continue;
     }
 
@@ -517,7 +573,7 @@
     const Expr *Arg = CE->getArg(ArgNum);
     Optional<SVal> V = getPointedToSVal(C, Arg);
     if (V)
-      State = addTaint(State, *V);
+      State = addTaint(State, *V, TagType);
   }
 
   // Clear up the taint info from the state.
@@ -531,19 +587,12 @@
 }
 
 bool GenericTaintChecker::checkPre(const CallExpr *CE,
+                                   const FunctionDecl *FDecl, StringRef Name,
                                    CheckerContext &C) const {
 
   if (checkUncontrolledFormatString(CE, C))
     return true;
 
-  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
-  if (!FDecl || FDecl->getKind() != Decl::Function)
-    return false;
-
-  StringRef Name = C.getCalleeName(FDecl);
-  if (Name.empty())
-    return false;
-
   if (checkSystemCall(CE, Name, C))
     return true;
 
@@ -618,7 +667,8 @@
   for (unsigned ArgNum : DstArgs) {
     // Should mark the return value?
     if (ArgNum == ReturnValueIndex) {
-      State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
+      State =
+          State->add<TaintArgsOnPostVisit>({ReturnValueIndex, TaintTagGeneric});
       continue;
     }
 
@@ -630,7 +680,7 @@
     }
 
     // Mark the given argument.
-    State = State->add<TaintArgsOnPostVisit>(ArgNum);
+    State = State->add<TaintArgsOnPostVisit>({ArgNum, TaintTagGeneric});
   }
 
   // Mark all variadic arguments tainted if present.
@@ -646,7 +696,7 @@
       QualType PType = ArgTy->getPointeeType();
       if ((!PType.isNull() && !PType.isConstQualified()) ||
           (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
-        State = State->add<TaintArgsOnPostVisit>(i);
+        State = State->add<TaintArgsOnPostVisit>({i, TaintTagGeneric});
     }
   }
 
@@ -836,7 +886,7 @@
   if (It == CustomSinks.end())
     return false;
 
-  const GenericTaintChecker::ArgVector& Args = It->getValue();
+  const GenericTaintChecker::ArgVector &Args = It->getValue();
   for (unsigned ArgNum : Args) {
     if (ArgNum >= CE->getNumArgs()) {
       llvm::errs() << "Skip out of bound sink Arg: " << Name << ":" << ArgNum
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to