boga95 updated this revision to Diff 249019.
boga95 marked 2 inline comments as done.
boga95 added a comment.

Rebase to master.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71524/new/

https://reviews.llvm.org/D71524

Files:
  clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  clang/lib/StaticAnalyzer/Checkers/Taint.cpp
  clang/test/Analysis/taint-generic.cpp

Index: clang/test/Analysis/taint-generic.cpp
===================================================================
--- clang/test/Analysis/taint-generic.cpp
+++ clang/test/Analysis/taint-generic.cpp
@@ -124,3 +124,127 @@
   foo.myMemberScanf("%d", &x);
   Buffer[x] = 1; // expected-warning {{Out of bound memory access }}
 }
+
+// Test I/O
+namespace std {
+template <class CharT>
+class char_traits {};
+
+class ios_base {};
+
+template <class CharT, class Traits = char_traits<CharT>>
+class basic_ios : public ios_base {};
+
+template <class CharT, class Traits = char_traits<CharT>>
+class basic_istream : virtual public basic_ios<CharT, Traits> {};
+template <class CharT, class Traits = char_traits<CharT>>
+class basic_ostream : virtual public basic_ios<CharT, Traits> {};
+
+using istream = basic_istream<char>;
+using ostream = basic_ostream<char>;
+using wistream = basic_istream<wchar_t>;
+
+istream &operator>>(istream &is, int &val);
+wistream &operator>>(wistream &is, int &val);
+
+extern istream cin;
+extern istream wcin;
+
+template <class CharT, class Traits = char_traits<CharT>>
+class basic_fstream : public basic_istream<CharT, Traits>, public basic_ostream<CharT, Traits> {
+public:
+  basic_fstream(const char *) {}
+};
+template <class CharT, class Traits = char_traits<CharT>>
+class basic_ifstream : public basic_istream<CharT, Traits> {
+public:
+  basic_ifstream(const char *) {}
+};
+
+using ifstream = basic_ifstream<char>;
+using fstream = basic_ifstream<char>;
+
+template <class CharT>
+class allocator {};
+
+namespace __cxx11 {
+template <
+    class CharT,
+    class Traits = std::char_traits<CharT>,
+    class Allocator = std::allocator<CharT>>
+class basic_string {
+public:
+  const char *c_str();
+  basic_string operator=(const basic_string &);
+
+private:
+  unsigned size;
+  char *ptr;
+};
+} // namespace __cxx11
+
+using string = __cxx11::basic_string<char>;
+
+istream &operator>>(istream &is, string &val);
+istream &getline(istream &is, string &str);
+} // namespace std
+
+void testCin() {
+  int x, y;
+  std::cin >> x >> y;
+  Buffer[y] = 1; // expected-warning {{Out of bound memory access }}
+}
+
+void testWcin() {
+  int x, y;
+  std::wcin >> x >> y;
+  Buffer[y] = 1; // expected-warning {{Out of bound memory access }}
+}
+
+void mySink(const std::string &, int, const char *);
+
+void testFstream() {
+  std::string str;
+  std::ifstream file("example.txt");
+  file >> str;
+  mySink(str, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}}
+}
+
+void testIfstream() {
+  std::string str;
+  std::fstream file("example.txt");
+  file >> str;
+  mySink(str, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}}
+}
+
+void testStdstring() {
+  std::string str1;
+  std::cin >> str1;
+
+  std::string &str2 = str1;
+  mySink(str2, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}}
+
+  const std::string &str3 = str1;
+  mySink(str3, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}}
+}
+
+void testTaintedThis() {
+  std::string str;
+  mySink(std::string(), 0, str.c_str()); // no-warning
+
+  std::cin >> str;
+  mySink(std::string(), 0, str.c_str()); // expected-warning {{Untrusted data is passed to a user-defined sink}}
+}
+
+void testOverloadedAssignmentOp() {
+  std::string str1, str2;
+  std::cin >> str1;
+  str2 = str1;
+  mySink(str2, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}}
+}
+
+void testGetline() {
+  std::string str;
+  std::getline(std::cin, str);
+  mySink(str, 0, ""); // expected-warning {{Untrusted data is passed to a user-defined sink}}
+}
Index: clang/lib/StaticAnalyzer/Checkers/Taint.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/Taint.cpp
+++ clang/lib/StaticAnalyzer/Checkers/Taint.cpp
@@ -152,6 +152,14 @@
     return isTainted(State, Sym, Kind);
   if (const MemRegion *Reg = V.getAsRegion())
     return isTainted(State, Reg, Kind);
+  if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
+    if (Optional<SVal> binding =
+            State->getStateManager().getStoreManager().getDefaultBinding(
+                *LCV)) {
+      if (SymbolRef Sym = binding->getAsSymbol())
+        return isTainted(State, Sym, Kind);
+    }
+  }
   return false;
 }
 
Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -138,6 +138,9 @@
   bool checkPre(const CallEvent &Call, const FunctionData &FData,
                 CheckerContext &C) const;
 
+  /// Add taint sources for operator>> on pre-visit.
+  static bool addOverloadedOpPre(const CallEvent &Call, CheckerContext &C);
+
   /// Add taint sources on a pre-visit. Returns true on matching.
   bool addSourcesPre(const CallEvent &Call, const FunctionData &FData,
                      CheckerContext &C) const;
@@ -154,6 +157,9 @@
   /// and thus, is tainted.
   static bool isStdin(const Expr *E, CheckerContext &C);
 
+  /// Check if the type of the variable is std::basic_istream
+  static bool isStdstream(const Expr *E, CheckerContext &C);
+
   /// Given a pointer argument, return the value it points to.
   static Optional<SVal> getPointeeOf(CheckerContext &C, const Expr *Arg);
 
@@ -258,15 +264,12 @@
       return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
     }
 
-    static bool isTaintedOrPointsToTainted(const Expr *E,
-                                           const ProgramStateRef &State,
+    static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
                                            CheckerContext &C) {
-      if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
+      if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C) ||
+          isStdstream(E, C))
         return true;
 
-      if (!E->getType().getTypePtr()->isPointerType())
-        return false;
-
       Optional<SVal> V = getPointeeOf(C, E);
       return (V && isTainted(State, *V));
     }
@@ -282,7 +285,12 @@
 
   /// Defines a map between the propagation function's name, scope
   /// and TaintPropagationRule.
-  NameRuleMap CustomPropagations;
+  NameRuleMap CustomPropagations{
+      {"c_str", {"std::__cxx11::basic_string", {{0}, {ReturnValueIndex}}}},
+      {"data", {"std::__cxx11::basic_string", {{0}, {ReturnValueIndex}}}},
+      {"size", {"std::__cxx11::basic_string", {{0}, {ReturnValueIndex}}}},
+      {"length", {"std::__cxx11::basic_string", {{0}, {ReturnValueIndex}}}},
+      {"getline", {"std::", {{0}, {1, ReturnValueIndex}}}}};
 
   /// Defines a map between the filter function's name, scope and filtering
   /// args.
@@ -354,6 +362,19 @@
 /// points to data, which should be tainted on return.
 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
 
+/// Treat implicit this parameter as the 0. argument.
+const Expr *getArgWithImplicitThis(const CallEvent &Call, unsigned ArgNum) {
+  if (const auto *InstanceCall = dyn_cast<CXXInstanceCall>(&Call))
+    return ArgNum == 0 ? InstanceCall->getCXXThisExpr()
+                       : InstanceCall->getArgExpr(ArgNum - 1);
+  return Call.getArgExpr(ArgNum);
+}
+
+/// Class member functions has an implicit this parameter.
+unsigned getNumArgsWithImplicitThis(const CallEvent &Call) {
+  return Call.getNumArgs() + static_cast<unsigned>(isa<CXXInstanceCall>(Call));
+}
+
 GenericTaintChecker::ArgVector
 GenericTaintChecker::convertToArgVector(CheckerManager &Mgr,
                                         const std::string &Option,
@@ -521,6 +542,9 @@
 
 void GenericTaintChecker::checkPreCall(const CallEvent &Call,
                                        CheckerContext &C) const {
+  if (addOverloadedOpPre(Call, C))
+    return;
+
   Optional<FunctionData> FData = FunctionData::create(Call, C);
   if (!FData)
     return;
@@ -550,6 +574,31 @@
   printTaint(State, Out, NL, Sep);
 }
 
+bool GenericTaintChecker::addOverloadedOpPre(const CallEvent &Call,
+                                             CheckerContext &C) {
+  const auto *OCE = dyn_cast<CXXOperatorCallExpr>(Call.getOriginExpr());
+  if (OCE) {
+    TaintPropagationRule Rule;
+    switch (OCE->getOperator()) {
+    case OO_GreaterGreater:
+      Rule = {{0}, {1, ReturnValueIndex}};
+      break;
+    case OO_Equal:
+      Rule = {{1}, {0}};
+      break;
+    default:
+      return false;
+    }
+
+    ProgramStateRef State = Rule.process(Call, C);
+    if (State) {
+      C.addTransition(State);
+      return true;
+    }
+  }
+  return false;
+}
+
 bool GenericTaintChecker::addSourcesPre(const CallEvent &Call,
                                         const FunctionData &FData,
                                         CheckerContext &C) const {
@@ -577,10 +626,10 @@
   const auto &Value = It->second;
   const ArgVector &Args = Value.second;
   for (unsigned ArgNum : Args) {
-    if (ArgNum >= Call.getNumArgs())
+    if (ArgNum >= getNumArgsWithImplicitThis(Call))
       continue;
 
-    const Expr *Arg = Call.getArgExpr(ArgNum);
+    const Expr *Arg = getArgWithImplicitThis(Call, ArgNum);
     Optional<SVal> V = getPointeeOf(C, Arg);
     if (V)
       State = removeTaint(State, *V);
@@ -613,9 +662,9 @@
 
     // The arguments are pointer arguments. The data they are pointing at is
     // tainted after the call.
-    if (Call.getNumArgs() < (ArgNum + 1))
+    if (getNumArgsWithImplicitThis(Call) < (ArgNum + 1))
       return false;
-    const Expr *Arg = Call.getArgExpr(ArgNum);
+    const Expr *Arg = getArgWithImplicitThis(Call, ArgNum);
     Optional<SVal> V = getPointeeOf(C, Arg);
     if (V)
       State = addTaint(State, *V);
@@ -679,20 +728,21 @@
   // Check for taint in arguments.
   bool IsTainted = true;
   for (unsigned ArgNum : SrcArgs) {
-    if (ArgNum >= Call.getNumArgs())
+    if (ArgNum >= getNumArgsWithImplicitThis(Call))
       continue;
 
-    if ((IsTainted =
-             isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C)))
+    if ((IsTainted = isTaintedOrPointsToTainted(
+             getArgWithImplicitThis(Call, ArgNum), State, C)))
       break;
   }
 
   // Check for taint in variadic arguments.
   if (!IsTainted && VariadicType::Src == VarType) {
     // Check if any of the arguments is tainted
-    for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
-      if ((IsTainted =
-               isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C)))
+    for (unsigned i = VariadicIndex; i < getNumArgsWithImplicitThis(Call);
+         ++i) {
+      if ((IsTainted = isTaintedOrPointsToTainted(
+               getArgWithImplicitThis(Call, i), State, C)))
         break;
     }
   }
@@ -711,7 +761,7 @@
       continue;
     }
 
-    if (ArgNum >= Call.getNumArgs())
+    if (ArgNum >= getNumArgsWithImplicitThis(Call))
       continue;
 
     // Mark the given argument.
@@ -724,8 +774,9 @@
     //   If they are not pointing to const data, mark data as tainted.
     //   TODO: So far we are just going one level down; ideally we'd need to
     //         recurse here.
-    for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
-      const Expr *Arg = Call.getArgExpr(i);
+    for (unsigned i = VariadicIndex; i < getNumArgsWithImplicitThis(Call);
+         ++i) {
+      const Expr *Arg = getArgWithImplicitThis(Call, i);
       // Process pointer argument.
       const Type *ArgTy = Arg->getType().getTypePtr();
       QualType PType = ArgTy->getPointeeType();
@@ -742,7 +793,7 @@
 // If argument 0(protocol domain) is network, the return value should get taint.
 bool GenericTaintChecker::TaintPropagationRule::postSocket(
     bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) {
-  SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc();
+  SourceLocation DomLoc = getArgWithImplicitThis(Call, 0)->getExprLoc();
   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
   // White list the internal communication protocols.
   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
@@ -751,6 +802,11 @@
   return true;
 }
 
+bool GenericTaintChecker::isStdstream(const Expr *E, CheckerContext &C) {
+  const std::string CT = E->getType().getCanonicalType().getAsString();
+  return StringRef(CT).find("std::basic_istream") != StringRef::npos;
+}
+
 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
   ProgramStateRef State = C.getState();
   SVal Val = C.getSVal(E);
@@ -797,7 +853,7 @@
   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
     ArgNum = Format->getFormatIdx() - 1;
     if ((Format->getType()->getName() == "printf") &&
-        Call.getNumArgs() > ArgNum)
+        getNumArgsWithImplicitThis(Call) > ArgNum)
       return true;
   }
 
@@ -846,7 +902,7 @@
 
   // If either the format string content or the pointer itself are tainted,
   // warn.
-  return generateReportIfTainted(Call.getArgExpr(ArgNum),
+  return generateReportIfTainted(getArgWithImplicitThis(Call, ArgNum),
                                  MsgUncontrolledFormatString, C);
 }
 
@@ -868,11 +924,12 @@
                         .Case("dlopen", 0)
                         .Default(InvalidArgIndex);
 
-  if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1))
+  if (ArgNum == InvalidArgIndex ||
+      getNumArgsWithImplicitThis(Call) < (ArgNum + 1))
     return false;
 
-  return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs,
-                                 C);
+  return generateReportIfTainted(getArgWithImplicitThis(Call, ArgNum),
+                                 MsgSanitizeSystemArgs, C);
 }
 
 // TODO: Should this check be a part of the CString checker?
@@ -912,9 +969,10 @@
       ArgNum = 2;
   }
 
-  return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum &&
-         generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize,
-                                 C);
+  return ArgNum != InvalidArgIndex &&
+         getNumArgsWithImplicitThis(Call) > ArgNum &&
+         generateReportIfTainted(getArgWithImplicitThis(Call, ArgNum),
+                                 MsgTaintedBufferSize, C);
 }
 
 bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call,
@@ -927,10 +985,11 @@
   const auto &Value = It->second;
   const GenericTaintChecker::ArgVector &Args = Value.second;
   for (unsigned ArgNum : Args) {
-    if (ArgNum >= Call.getNumArgs())
+    if (ArgNum >= getNumArgsWithImplicitThis(Call))
       continue;
 
-    if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C))
+    if (generateReportIfTainted(getArgWithImplicitThis(Call, ArgNum),
+                                MsgCustomSink, C))
       return true;
   }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to