steakhal updated this revision to Diff 286212.
steakhal marked 2 inline comments as done.
steakhal edited the summary of this revision.
steakhal added a comment.
Herald added a subscriber: mgorny.

Fixed Artem's inline comments:

- `cstring::getCStringLength` now takes `StateRef` by value
- `cstring::dumpCStringLengths` now takes by `StateRef` by non const value


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D84979/new/

https://reviews.llvm.org/D84979

Files:
  clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
  clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
  clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringChecker.cpp
  clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringChecker.h
  clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringLength.h
  clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringLengthModeling.cpp

Index: clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringLengthModeling.cpp
===================================================================
--- /dev/null
+++ clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringLengthModeling.cpp
@@ -0,0 +1,306 @@
+//=== CStringLengthModeling.cpp  Implementation of CStringLength API C++ -*--=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the CStringLength API and the CStringChecker bookkeeping parts.
+// Updates the associated cstring lengths of memory regions:
+//  - Infers the cstring length of string literals.
+//  - Removes cstring length associations of dead symbols.
+//  - Handles region invalidation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CStringChecker.h"
+#include "CStringLength.h"
+
+#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace clang;
+using namespace ento;
+using namespace cstring;
+
+/// Associates an strlen to a memory region.
+REGISTER_MAP_WITH_PROGRAMSTATE(CStringLengthMap, const MemRegion *, SVal)
+
+//===----------------------------------------------------------------------===//
+// Implementation of the public CStringLength API.
+//===----------------------------------------------------------------------===//
+
+ProgramStateRef cstring::setCStringLength(ProgramStateRef State,
+                                          const MemRegion *MR, SVal StrLength) {
+  assert(!StrLength.isUndef() && "Attempt to set an undefined string length");
+
+  MR = MR->StripCasts();
+
+  switch (MR->getKind()) {
+  case MemRegion::StringRegionKind:
+    // FIXME: This can happen if we strcpy() into a string region. This is
+    // undefined [C99 6.4.5p6], but we should still warn about it.
+    return State;
+
+  case MemRegion::SymbolicRegionKind:
+  case MemRegion::AllocaRegionKind:
+  case MemRegion::NonParamVarRegionKind:
+  case MemRegion::ParamVarRegionKind:
+  case MemRegion::FieldRegionKind:
+  case MemRegion::ObjCIvarRegionKind:
+    // These are the types we can currently track string lengths for.
+    break;
+
+  case MemRegion::ElementRegionKind:
+    // FIXME: Handle element regions by upper-bounding the parent region's
+    // string length.
+    return State;
+
+  default:
+    // Other regions (mostly non-data) can't have a reliable C string length.
+    // For now, just ignore the change.
+    // FIXME: These are rare but not impossible. We should output some kind of
+    // warning for things like strcpy((char[]){'a', 0}, "b");
+    return State;
+  }
+
+  if (StrLength.isUnknown())
+    return removeCStringLength(State, MR);
+  return State->set<CStringLengthMap>(MR, StrLength);
+}
+
+ProgramStateRef cstring::removeCStringLength(ProgramStateRef State,
+                                             const MemRegion *MR) {
+  return State->remove<CStringLengthMap>(MR);
+}
+
+NonLoc cstring::createCStringLength(ProgramStateRef &State, CheckerContext &Ctx,
+                                    const Expr *Ex, const MemRegion *MR) {
+  assert(Ex);
+  assert(MR);
+
+  SValBuilder &SVB = Ctx.getSValBuilder();
+  QualType SizeTy = SVB.getContext().getSizeType();
+  NonLoc CStrLen =
+      SVB.getMetadataSymbolVal(CStringChecker::getTag(), MR, Ex, SizeTy,
+                               Ctx.getLocationContext(), Ctx.blockCount())
+          .castAs<NonLoc>();
+
+  // Implicitly constrain the range to SIZE_MAX/4
+  BasicValueFactory &BVF = SVB.getBasicValueFactory();
+  const llvm::APSInt &MaxValue = BVF.getMaxValue(SizeTy);
+  const llvm::APSInt Four = APSIntType(MaxValue).getValue(4);
+  const llvm::APSInt *MaxLength = BVF.evalAPSInt(BO_Div, MaxValue, Four);
+  const NonLoc MaxLengthSVal = SVB.makeIntVal(*MaxLength);
+  SVal Constrained =
+      SVB.evalBinOpNN(State, BO_LE, CStrLen, MaxLengthSVal, SizeTy);
+  State = State->assume(Constrained.castAs<DefinedOrUnknownSVal>(), true);
+  State = State->set<CStringLengthMap>(MR, CStrLen);
+  return CStrLen;
+}
+
+Optional<SVal> cstring::getCStringLength(CheckerContext &Ctx,
+                                         ProgramStateRef State, SVal Buf) {
+  if (Buf.isUnknownOrUndef())
+    return Buf;
+
+  if (Buf.getAs<loc::GotoLabel>())
+    return UndefinedVal();
+
+  // If it's not a region, give up.
+  const MemRegion *MR = Buf.getAsRegion();
+  if (!MR)
+    return UnknownVal();
+
+  // If we have a region, strip casts from it and see if we can figure out
+  // its length. For anything we can't figure out, just return UnknownVal.
+  MR = MR->StripCasts();
+
+  switch (MR->getKind()) {
+  case MemRegion::StringRegionKind: {
+    // Modifying the contents of string regions is undefined [C99 6.4.5p6],
+    // so we can assume that the byte length is the correct C string length.
+    SValBuilder &SVB = Ctx.getSValBuilder();
+    QualType SizeTy = SVB.getContext().getSizeType();
+    const StringLiteral *StrLiteral =
+        cast<StringRegion>(MR)->getStringLiteral();
+    return SVB.makeIntVal(StrLiteral->getByteLength(), SizeTy);
+  }
+  case MemRegion::SymbolicRegionKind:
+  case MemRegion::AllocaRegionKind:
+  case MemRegion::NonParamVarRegionKind:
+  case MemRegion::ParamVarRegionKind:
+  case MemRegion::FieldRegionKind:
+  case MemRegion::ObjCIvarRegionKind:
+    if (const SVal *RecordedLength = State->get<CStringLengthMap>(MR))
+      return *RecordedLength;
+    return llvm::None;
+  case MemRegion::CompoundLiteralRegionKind:
+    // FIXME: Can we track this? Is it necessary?
+    return UnknownVal();
+  case MemRegion::ElementRegionKind:
+    // FIXME: How can we handle this? It's not good enough to subtract the
+    // offset from the base string length; consider "123\x00567" and &a[5].
+    return UnknownVal();
+  default:
+    // Other regions (mostly non-data) can't have a reliable C string length.
+    return UndefinedVal();
+  }
+}
+
+void cstring::dumpCStringLengths(ProgramStateRef State, raw_ostream &Out,
+                                 const char *NL, const char *Sep) {
+  const CStringLengthMapTy Items = State->get<CStringLengthMap>();
+  if (!Items.isEmpty())
+    Out << "CString lengths:" << NL;
+  for (const auto &Item : Items) {
+    Item.first->dumpToStream(Out);
+    Out << Sep;
+    Item.second.dumpToStream(Out);
+    Out << NL;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Implementation of the tracking and bookkeeping part of the CStringChecker.
+// Updates the CStringLengthMap.
+// - Infers the cstring length of string literals.
+// - Removes cstring length associations of dead symbols.
+// - Handles region invalidation.
+//===----------------------------------------------------------------------===//
+
+void *CStringChecker::getTag() {
+  static int Tag;
+  return &Tag;
+}
+
+void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
+  // Record string length for char a[] = "abc";
+  ProgramStateRef state = C.getState();
+
+  for (const auto *I : DS->decls()) {
+    const VarDecl *D = dyn_cast<VarDecl>(I);
+    if (!D)
+      continue;
+
+    // FIXME: Handle array fields of structs.
+    if (!D->getType()->isArrayType())
+      continue;
+
+    const Expr *Init = D->getInit();
+    if (!Init)
+      continue;
+    if (!isa<StringLiteral>(Init))
+      continue;
+
+    Loc VarLoc = state->getLValue(D, C.getLocationContext());
+    const MemRegion *MR = VarLoc.getAsRegion();
+    if (!MR)
+      continue;
+
+    SVal StrVal = C.getSVal(Init);
+    assert(StrVal.isValid() && "Initializer string is unknown or undefined");
+    DefinedOrUnknownSVal strLength =
+        getCStringLength(C, state, StrVal)->castAs<DefinedOrUnknownSVal>();
+
+    state = state->set<CStringLengthMap>(MR, strLength);
+  }
+
+  C.addTransition(state);
+}
+
+void CStringChecker::checkLiveSymbols(ProgramStateRef State,
+                                      SymbolReaper &SR) const {
+  // Mark all symbols in our string length map as valid.
+  for (const auto &Item : State->get<CStringLengthMap>()) {
+    SVal Len = Item.second;
+    const auto LenSymbolRange =
+        llvm::make_range(Len.symbol_begin(), Len.symbol_end());
+    for (SymbolRef Symbol : LenSymbolRange)
+      SR.markInUse(Symbol);
+  }
+}
+
+void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
+                                      CheckerContext &C) const {
+  ProgramStateRef State = C.getState();
+  CStringLengthMapTy Entries = State->get<CStringLengthMap>();
+  if (Entries.isEmpty())
+    return;
+
+  CStringLengthMapTy::Factory &F = State->get_context<CStringLengthMap>();
+  for (CStringLengthMapTy::iterator I = Entries.begin(), E = Entries.end();
+       I != E; ++I) {
+    SVal Len = I.getData();
+    if (SymbolRef Sym = Len.getAsSymbol()) {
+      if (SR.isDead(Sym))
+        Entries = F.remove(Entries, I.getKey());
+    }
+  }
+
+  State = State->set<CStringLengthMap>(Entries);
+  C.addTransition(State);
+}
+
+ProgramStateRef CStringChecker::checkRegionChanges(
+    ProgramStateRef state, const InvalidatedSymbols *,
+    ArrayRef<const MemRegion *> ExplicitRegions,
+    ArrayRef<const MemRegion *> Regions, const LocationContext *,
+    const CallEvent *) const {
+  CStringLengthMapTy Entries = state->get<CStringLengthMap>();
+  if (Entries.isEmpty())
+    return state;
+
+  llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
+  llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
+
+  // First build sets for the changed regions and their super-regions.
+  for (ArrayRef<const MemRegion *>::iterator I = Regions.begin(),
+                                             E = Regions.end();
+       I != E; ++I) {
+    const MemRegion *MR = *I;
+    Invalidated.insert(MR);
+
+    SuperRegions.insert(MR);
+    while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
+      MR = SR->getSuperRegion();
+      SuperRegions.insert(MR);
+    }
+  }
+
+  CStringLengthMapTy::Factory &F = state->get_context<CStringLengthMap>();
+
+  // Then loop over the entries in the current state.
+  for (CStringLengthMapTy::iterator I = Entries.begin(), E = Entries.end();
+       I != E; ++I) {
+    const MemRegion *MR = I.getKey();
+
+    // Is this entry for a super-region of a changed region?
+    if (SuperRegions.count(MR)) {
+      Entries = F.remove(Entries, MR);
+      continue;
+    }
+
+    // Is this entry for a sub-region of a changed region?
+    const MemRegion *Super = MR;
+    while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
+      Super = SR->getSuperRegion();
+      if (Invalidated.count(Super)) {
+        Entries = F.remove(Entries, MR);
+        break;
+      }
+    }
+  }
+
+  return state->set<CStringLengthMap>(Entries);
+}
+
+// TODO: Is it useful?
+void CStringChecker::printState(raw_ostream &Out, ProgramStateRef State,
+                                const char *NL, const char *Sep) const {
+  dumpCStringLengths(State, Out, NL, Sep);
+}
Index: clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringLength.h
===================================================================
--- /dev/null
+++ clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringLength.h
@@ -0,0 +1,58 @@
+//=== CStringLength.h Query and store the length of a cstring. ---*- C++ -*--=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines an interface for interacting and manipulating the associated cstring
+// length of a given memory region.
+// You can assign a cstring length to any memory region.
+// The represented value is what strlen would return on the given memory region.
+// Eg: 3 for both "ABC" and "abc\00def".
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_CSTRINGLENGTH_H
+#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_CSTRINGLENGTH_H
+
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
+
+namespace clang {
+namespace ento {
+class CheckerContext;
+
+namespace cstring {
+
+/// Assigns a cstring length to a memory region.
+LLVM_NODISCARD ProgramStateRef setCStringLength(ProgramStateRef State,
+                                                const MemRegion *MR,
+                                                SVal StrLength);
+
+/// Removes the assigned cstring length from the memory region.
+/// It is useful for invalidation.
+LLVM_NODISCARD ProgramStateRef removeCStringLength(ProgramStateRef State,
+                                                   const MemRegion *MR);
+
+/// Gets the associated cstring length of a region.
+/// If no such exists, None returned.
+LLVM_NODISCARD Optional<SVal> getCStringLength(CheckerContext &Ctx,
+                                               ProgramStateRef State, SVal Buf);
+
+/// Creates a metadata symbol, tracking the cstring length of the given region.
+/// It implicitly applies certain constraints to the created value.
+LLVM_NODISCARD NonLoc createCStringLength(ProgramStateRef &State,
+                                          CheckerContext &Ctx, const Expr *Ex,
+                                          const MemRegion *MR);
+
+LLVM_DUMP_METHOD void dumpCStringLengths(ProgramStateRef State,
+                                         raw_ostream &Out = llvm::errs(),
+                                         const char *NL = "\n",
+                                         const char *Sep = ": ");
+} // namespace cstring
+} // namespace ento
+} // namespace clang
+
+#endif
Index: clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringChecker.h
===================================================================
--- /dev/null
+++ clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringChecker.h
@@ -0,0 +1,225 @@
+//= CStringChecker.h - Checks calls to C string functions ----------*- C++ -*-//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Models C string related functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_CSTRINGCHECKER_CSTRINGCHECKER_H
+#define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_CSTRINGCHECKER_CSTRINGCHECKER_H
+
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+namespace clang {
+namespace ento {
+namespace cstring {
+
+struct AnyArgExpr {
+  // FIXME: Remove constructor in C++17 to turn it into an aggregate.
+  AnyArgExpr(const Expr *Expression, unsigned ArgumentIndex)
+      : Expression{Expression}, ArgumentIndex{ArgumentIndex} {}
+  const Expr *Expression;
+  unsigned ArgumentIndex;
+};
+
+struct SourceArgExpr : AnyArgExpr {
+  using AnyArgExpr::AnyArgExpr; // FIXME: Remove using in C++17.
+};
+
+struct DestinationArgExpr : AnyArgExpr {
+  using AnyArgExpr::AnyArgExpr; // FIXME: Same.
+};
+
+struct SizeArgExpr : AnyArgExpr {
+  using AnyArgExpr::AnyArgExpr; // FIXME: Same.
+};
+
+class CStringChecker
+    : public Checker<eval::Call, check::PreStmt<DeclStmt>, check::LiveSymbols,
+                     check::DeadSymbols, check::RegionChanges> {
+  mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
+      BT_NotCString;
+
+  mutable const char *CurrentFunctionDescription;
+
+  using ErrorMessage = SmallString<128>;
+  enum class AccessKind { write, read };
+  enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 };
+
+public:
+  /// Models and checks cstring related function pre and post-conditions.
+  bool evalCall(const CallEvent &Call, CheckerContext &C) const;
+
+  /// Tracks and maintains the associated cstring lengths of memory regions.
+  static void *getTag();
+  void checkPreStmt(const DeclStmt *, CheckerContext &) const;
+  void checkLiveSymbols(ProgramStateRef, SymbolReaper &) const;
+  void checkDeadSymbols(SymbolReaper &, CheckerContext &) const;
+  ProgramStateRef
+  checkRegionChanges(ProgramStateRef, const InvalidatedSymbols *,
+                     ArrayRef<const MemRegion *>, ArrayRef<const MemRegion *>,
+                     const LocationContext *, const CallEvent *) const;
+  // TODO: Is it useful?
+  void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
+                  const char *Sep) const;
+
+  /// The filter is used to filter out the diagnostics which are not enabled by
+  /// the user.
+  struct {
+    DefaultBool CheckCStringNullArg;
+    DefaultBool CheckCStringOutOfBounds;
+    DefaultBool CheckCStringBufferOverlap;
+    DefaultBool CheckCStringNotNullTerm;
+
+    CheckerNameRef CheckNameCStringNullArg;
+    CheckerNameRef CheckNameCStringOutOfBounds;
+    CheckerNameRef CheckNameCStringBufferOverlap;
+    CheckerNameRef CheckNameCStringNotNullTerm;
+  } Filter;
+
+private:
+  typedef void (CStringChecker::*FnCheck)(CheckerContext &,
+                                          const CallExpr *) const;
+  CallDescriptionMap<FnCheck> Callbacks = {
+      {{CDF_MaybeBuiltin, "memcpy", 3}, &CStringChecker::evalMemcpy},
+      {{CDF_MaybeBuiltin, "mempcpy", 3}, &CStringChecker::evalMempcpy},
+      {{CDF_MaybeBuiltin, "memcmp", 3}, &CStringChecker::evalMemcmp},
+      {{CDF_MaybeBuiltin, "memmove", 3}, &CStringChecker::evalMemmove},
+      {{CDF_MaybeBuiltin, "memset", 3}, &CStringChecker::evalMemset},
+      {{CDF_MaybeBuiltin, "explicit_memset", 3}, &CStringChecker::evalMemset},
+      {{CDF_MaybeBuiltin, "strcpy", 2}, &CStringChecker::evalStrcpy},
+      {{CDF_MaybeBuiltin, "strncpy", 3}, &CStringChecker::evalStrncpy},
+      {{CDF_MaybeBuiltin, "stpcpy", 2}, &CStringChecker::evalStpcpy},
+      {{CDF_MaybeBuiltin, "strlcpy", 3}, &CStringChecker::evalStrlcpy},
+      {{CDF_MaybeBuiltin, "strcat", 2}, &CStringChecker::evalStrcat},
+      {{CDF_MaybeBuiltin, "strncat", 3}, &CStringChecker::evalStrncat},
+      {{CDF_MaybeBuiltin, "strlcat", 3}, &CStringChecker::evalStrlcat},
+      {{CDF_MaybeBuiltin, "strlen", 1}, &CStringChecker::evalstrLength},
+      {{CDF_MaybeBuiltin, "strnlen", 2}, &CStringChecker::evalstrnLength},
+      {{CDF_MaybeBuiltin, "strcmp", 2}, &CStringChecker::evalStrcmp},
+      {{CDF_MaybeBuiltin, "strncmp", 3}, &CStringChecker::evalStrncmp},
+      {{CDF_MaybeBuiltin, "strcasecmp", 2}, &CStringChecker::evalStrcasecmp},
+      {{CDF_MaybeBuiltin, "strncasecmp", 3}, &CStringChecker::evalStrncasecmp},
+      {{CDF_MaybeBuiltin, "strsep", 2}, &CStringChecker::evalStrsep},
+      {{CDF_MaybeBuiltin, "bcopy", 3}, &CStringChecker::evalBcopy},
+      {{CDF_MaybeBuiltin, "bcmp", 3}, &CStringChecker::evalMemcmp},
+      {{CDF_MaybeBuiltin, "bzero", 2}, &CStringChecker::evalBzero},
+      {{CDF_MaybeBuiltin, "explicit_bzero", 2}, &CStringChecker::evalBzero},
+  };
+
+  // These require a bit of special handling.
+  CallDescription StdCopy{{"std", "copy"}, 3},
+      StdCopyBackward{{"std", "copy_backward"}, 3};
+
+  FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const;
+  void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
+  void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
+  void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
+  void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
+  void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
+                      ProgramStateRef state, SizeArgExpr Size,
+                      DestinationArgExpr Dest, SourceArgExpr Source,
+                      bool Restricted, bool IsMempcpy) const;
+
+  void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
+
+  void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
+  void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
+  void evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
+                           bool IsStrnlen = false) const;
+
+  void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
+  void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
+  void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
+  void evalStrlcpy(CheckerContext &C, const CallExpr *CE) const;
+  void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool ReturnEnd,
+                        bool IsBounded, ConcatFnKind appendK,
+                        bool returnPtr = true) const;
+
+  void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
+  void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
+  void evalStrlcat(CheckerContext &C, const CallExpr *CE) const;
+
+  void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
+  void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
+  void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
+  void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
+  void evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
+                        bool IsBounded = false, bool IgnoreCase = false) const;
+
+  void evalStrsep(CheckerContext &C, const CallExpr *CE) const;
+
+  void evalStdCopy(CheckerContext &C, const CallExpr *CE) const;
+  void evalStdCopyBackward(CheckerContext &C, const CallExpr *CE) const;
+  void evalStdCopyCommon(CheckerContext &C, const CallExpr *CE) const;
+  void evalMemset(CheckerContext &C, const CallExpr *CE) const;
+  void evalBzero(CheckerContext &C, const CallExpr *CE) const;
+
+  // Utility methods
+
+  static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription,
+                                               AccessKind Access);
+
+  /// Simply wraps the cstring::getCStringLength function to emit warnings.
+  SVal getCStringLengthChecked(CheckerContext &Ctx, ProgramStateRef &State,
+                               const Expr *Ex, SVal Buf) const;
+
+  std::pair<ProgramStateRef, ProgramStateRef> static assumeZero(
+      CheckerContext &C, ProgramStateRef state, SVal V, QualType Ty);
+
+  static ProgramStateRef InvalidateBuffer(CheckerContext &C,
+                                          ProgramStateRef state, const Expr *Ex,
+                                          SVal V, bool IsSourceBuffer,
+                                          const Expr *Size);
+
+  static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
+                              const MemRegion *MR);
+
+  static bool memsetAux(const Expr *DstBuffer, SVal CharE, const Expr *Size,
+                        CheckerContext &C, ProgramStateRef &State);
+
+  // Re-usable checks
+  ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
+                               AnyArgExpr Arg, SVal l) const;
+  ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
+                                AnyArgExpr Buffer, SVal Element,
+                                AccessKind Access) const;
+  ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
+                                    AnyArgExpr Buffer, SizeArgExpr Size,
+                                    AccessKind Access) const;
+  ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state,
+                               SizeArgExpr Size, AnyArgExpr First,
+                               AnyArgExpr Second) const;
+  void emitOverlapBug(CheckerContext &C, ProgramStateRef state,
+                      const Stmt *First, const Stmt *Second) const;
+
+  void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
+                      StringRef WarningMsg) const;
+  void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
+                          const Stmt *S, StringRef WarningMsg) const;
+  void emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
+                         const Stmt *S, StringRef WarningMsg) const;
+  void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const;
+
+  ProgramStateRef checkAdditionOverflow(CheckerContext &C,
+                                        ProgramStateRef state, NonLoc left,
+                                        NonLoc right) const;
+
+  // Return true if the destination buffer of the copy function may be in bound.
+  // Expects SVal of Size to be positive and unsigned.
+  // Expects SVal of FirstBuf to be a FieldRegion.
+  static bool IsFirstBufInBound(CheckerContext &C, ProgramStateRef state,
+                                const Expr *FirstBuf, const Expr *Size);
+};
+
+} // namespace cstring
+} // namespace ento
+} // namespace clang
+
+#endif
\ No newline at end of file
Index: clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringChecker.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/CStringChecker/CStringChecker.cpp
@@ -11,50 +11,27 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InterCheckerAPI.h"
+#include "CStringChecker.h"
+#include "CStringLength.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
-#include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h"
-#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/raw_ostream.h"
 
-using namespace clang;
-using namespace ento;
+namespace clang {
+namespace ento {
+namespace cstring {
 
-namespace {
-struct AnyArgExpr {
-  // FIXME: Remove constructor in C++17 to turn it into an aggregate.
-  AnyArgExpr(const Expr *Expression, unsigned ArgumentIndex)
-      : Expression{Expression}, ArgumentIndex{ArgumentIndex} {}
-  const Expr *Expression;
-  unsigned ArgumentIndex;
-};
-
-struct SourceArgExpr : AnyArgExpr {
-  using AnyArgExpr::AnyArgExpr; // FIXME: Remove using in C++17.
-};
-
-struct DestinationArgExpr : AnyArgExpr {
-  using AnyArgExpr::AnyArgExpr; // FIXME: Same.
-};
-
-struct SizeArgExpr : AnyArgExpr {
-  using AnyArgExpr::AnyArgExpr; // FIXME: Same.
-};
-
-using ErrorMessage = SmallString<128>;
-enum class AccessKind { write, read };
-
-static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription,
-                                             AccessKind Access) {
+auto CStringChecker::createOutOfBoundErrorMsg(StringRef FunctionDescription,
+                                              AccessKind Access)
+    -> ErrorMessage {
   ErrorMessage Message;
   llvm::raw_svector_ostream Os(Message);
 
@@ -71,214 +48,86 @@
   return Message;
 }
 
-enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 };
-class CStringChecker : public Checker< eval::Call,
-                                         check::PreStmt<DeclStmt>,
-                                         check::LiveSymbols,
-                                         check::DeadSymbols,
-                                         check::RegionChanges
-                                         > {
-  mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
-      BT_NotCString, BT_AdditionOverflow;
-
-  mutable const char *CurrentFunctionDescription;
-
-public:
-  /// The filter is used to filter out the diagnostics which are not enabled by
-  /// the user.
-  struct CStringChecksFilter {
-    DefaultBool CheckCStringNullArg;
-    DefaultBool CheckCStringOutOfBounds;
-    DefaultBool CheckCStringBufferOverlap;
-    DefaultBool CheckCStringNotNullTerm;
-
-    CheckerNameRef CheckNameCStringNullArg;
-    CheckerNameRef CheckNameCStringOutOfBounds;
-    CheckerNameRef CheckNameCStringBufferOverlap;
-    CheckerNameRef CheckNameCStringNotNullTerm;
-  };
-
-  CStringChecksFilter Filter;
-
-  static void *getTag() { static int tag; return &tag; }
-
-  bool evalCall(const CallEvent &Call, CheckerContext &C) const;
-  void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
-  void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
-  void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
-
-  ProgramStateRef
-    checkRegionChanges(ProgramStateRef state,
-                       const InvalidatedSymbols *,
-                       ArrayRef<const MemRegion *> ExplicitRegions,
-                       ArrayRef<const MemRegion *> Regions,
-                       const LocationContext *LCtx,
-                       const CallEvent *Call) const;
-
-  typedef void (CStringChecker::*FnCheck)(CheckerContext &,
-                                          const CallExpr *) const;
-  CallDescriptionMap<FnCheck> Callbacks = {
-      {{CDF_MaybeBuiltin, "memcpy", 3}, &CStringChecker::evalMemcpy},
-      {{CDF_MaybeBuiltin, "mempcpy", 3}, &CStringChecker::evalMempcpy},
-      {{CDF_MaybeBuiltin, "memcmp", 3}, &CStringChecker::evalMemcmp},
-      {{CDF_MaybeBuiltin, "memmove", 3}, &CStringChecker::evalMemmove},
-      {{CDF_MaybeBuiltin, "memset", 3}, &CStringChecker::evalMemset},
-      {{CDF_MaybeBuiltin, "explicit_memset", 3}, &CStringChecker::evalMemset},
-      {{CDF_MaybeBuiltin, "strcpy", 2}, &CStringChecker::evalStrcpy},
-      {{CDF_MaybeBuiltin, "strncpy", 3}, &CStringChecker::evalStrncpy},
-      {{CDF_MaybeBuiltin, "stpcpy", 2}, &CStringChecker::evalStpcpy},
-      {{CDF_MaybeBuiltin, "strlcpy", 3}, &CStringChecker::evalStrlcpy},
-      {{CDF_MaybeBuiltin, "strcat", 2}, &CStringChecker::evalStrcat},
-      {{CDF_MaybeBuiltin, "strncat", 3}, &CStringChecker::evalStrncat},
-      {{CDF_MaybeBuiltin, "strlcat", 3}, &CStringChecker::evalStrlcat},
-      {{CDF_MaybeBuiltin, "strlen", 1}, &CStringChecker::evalstrLength},
-      {{CDF_MaybeBuiltin, "strnlen", 2}, &CStringChecker::evalstrnLength},
-      {{CDF_MaybeBuiltin, "strcmp", 2}, &CStringChecker::evalStrcmp},
-      {{CDF_MaybeBuiltin, "strncmp", 3}, &CStringChecker::evalStrncmp},
-      {{CDF_MaybeBuiltin, "strcasecmp", 2}, &CStringChecker::evalStrcasecmp},
-      {{CDF_MaybeBuiltin, "strncasecmp", 3}, &CStringChecker::evalStrncasecmp},
-      {{CDF_MaybeBuiltin, "strsep", 2}, &CStringChecker::evalStrsep},
-      {{CDF_MaybeBuiltin, "bcopy", 3}, &CStringChecker::evalBcopy},
-      {{CDF_MaybeBuiltin, "bcmp", 3}, &CStringChecker::evalMemcmp},
-      {{CDF_MaybeBuiltin, "bzero", 2}, &CStringChecker::evalBzero},
-      {{CDF_MaybeBuiltin, "explicit_bzero", 2}, &CStringChecker::evalBzero},
-  };
-
-  // These require a bit of special handling.
-  CallDescription StdCopy{{"std", "copy"}, 3},
-      StdCopyBackward{{"std", "copy_backward"}, 3};
-
-  FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const;
-  void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
-  void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
-  void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
-  void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
-  void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
-                      ProgramStateRef state, SizeArgExpr Size,
-                      DestinationArgExpr Dest, SourceArgExpr Source,
-                      bool Restricted, bool IsMempcpy) const;
-
-  void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
-
-  void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
-  void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
-  void evalstrLengthCommon(CheckerContext &C,
-                           const CallExpr *CE,
-                           bool IsStrnlen = false) const;
-
-  void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
-  void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
-  void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
-  void evalStrlcpy(CheckerContext &C, const CallExpr *CE) const;
-  void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool ReturnEnd,
-                        bool IsBounded, ConcatFnKind appendK,
-                        bool returnPtr = true) const;
-
-  void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
-  void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
-  void evalStrlcat(CheckerContext &C, const CallExpr *CE) const;
-
-  void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
-  void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
-  void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
-  void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
-  void evalStrcmpCommon(CheckerContext &C,
-                        const CallExpr *CE,
-                        bool IsBounded = false,
-                        bool IgnoreCase = false) const;
-
-  void evalStrsep(CheckerContext &C, const CallExpr *CE) const;
-
-  void evalStdCopy(CheckerContext &C, const CallExpr *CE) const;
-  void evalStdCopyBackward(CheckerContext &C, const CallExpr *CE) const;
-  void evalStdCopyCommon(CheckerContext &C, const CallExpr *CE) const;
-  void evalMemset(CheckerContext &C, const CallExpr *CE) const;
-  void evalBzero(CheckerContext &C, const CallExpr *CE) const;
-
-  // Utility methods
-  std::pair<ProgramStateRef , ProgramStateRef >
-  static assumeZero(CheckerContext &C,
-                    ProgramStateRef state, SVal V, QualType Ty);
-
-  static ProgramStateRef setCStringLength(ProgramStateRef state,
-                                              const MemRegion *MR,
-                                              SVal strLength);
-  static SVal getCStringLengthForRegion(CheckerContext &C,
-                                        ProgramStateRef &state,
-                                        const Expr *Ex,
-                                        const MemRegion *MR,
-                                        bool hypothetical);
-  SVal getCStringLength(CheckerContext &C,
-                        ProgramStateRef &state,
-                        const Expr *Ex,
-                        SVal Buf,
-                        bool hypothetical = false) const;
-
-  const StringLiteral *getCStringLiteral(CheckerContext &C,
-                                         ProgramStateRef &state,
-                                         const Expr *expr,
-                                         SVal val) const;
-
-  static ProgramStateRef InvalidateBuffer(CheckerContext &C,
-                                          ProgramStateRef state,
-                                          const Expr *Ex, SVal V,
-                                          bool IsSourceBuffer,
-                                          const Expr *Size);
-
-  static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
-                              const MemRegion *MR);
-
-  static bool memsetAux(const Expr *DstBuffer, SVal CharE,
-                        const Expr *Size, CheckerContext &C,
-                        ProgramStateRef &State);
-
-  // Re-usable checks
-  ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
-                               AnyArgExpr Arg, SVal l) const;
-  ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
-                                AnyArgExpr Buffer, SVal Element,
-                                AccessKind Access) const;
-  ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
-                                    AnyArgExpr Buffer, SizeArgExpr Size,
-                                    AccessKind Access) const;
-  ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state,
-                               SizeArgExpr Size, AnyArgExpr First,
-                               AnyArgExpr Second) const;
-  void emitOverlapBug(CheckerContext &C,
-                      ProgramStateRef state,
-                      const Stmt *First,
-                      const Stmt *Second) const;
-
-  void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
-                      StringRef WarningMsg) const;
-  void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
-                          const Stmt *S, StringRef WarningMsg) const;
-  void emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
-                         const Stmt *S, StringRef WarningMsg) const;
-  void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const;
-
-  ProgramStateRef checkAdditionOverflow(CheckerContext &C,
-                                            ProgramStateRef state,
-                                            NonLoc left,
-                                            NonLoc right) const;
-
-  // Return true if the destination buffer of the copy function may be in bound.
-  // Expects SVal of Size to be positive and unsigned.
-  // Expects SVal of FirstBuf to be a FieldRegion.
-  static bool IsFirstBufInBound(CheckerContext &C,
-                                ProgramStateRef state,
-                                const Expr *FirstBuf,
-                                const Expr *Size);
-};
-
-} //end anonymous namespace
-
-REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
-
 //===----------------------------------------------------------------------===//
 // Individual checks and utility methods.
 //===----------------------------------------------------------------------===//
 
+static const StringLiteral *getCStringLiteral(SVal val) {
+  // Get the memory region pointed to by the val.
+  const MemRegion *bufRegion = val.getAsRegion();
+  if (!bufRegion)
+    return nullptr;
+
+  // Strip casts off the memory region.
+  bufRegion = bufRegion->StripCasts();
+
+  // Cast the memory region to a string region.
+  const StringRegion *strRegion = dyn_cast<StringRegion>(bufRegion);
+  if (!strRegion)
+    return nullptr;
+
+  // Return the actual string in the string region.
+  return strRegion->getStringLiteral();
+}
+
+SVal CStringChecker::getCStringLengthChecked(CheckerContext &Ctx,
+                                             ProgramStateRef &State,
+                                             const Expr *Ex, SVal Buf) const {
+  // Try to get the associated cstring length, if fails, create a new one.
+  const SVal CStrLen = [&]() -> SVal {
+    Optional<SVal> Tmp = cstring::getCStringLength(Ctx, State, Buf);
+    if (Tmp.hasValue())
+      return Tmp.getValue();
+    return cstring::createCStringLength(State, Ctx, Ex,
+                                        Buf.getAsRegion()->StripCasts());
+  }();
+
+  // Simply return if everything goes well.
+  // Otherwise we shall investigate why did it fail.
+  if (!CStrLen.isUndef())
+    return CStrLen;
+
+  // Handle if the buffer was not referring to a memory region.
+  const MemRegion *MR = Buf.getAsRegion();
+  if (!MR) {
+    // If we can't get a region, see if it's something we /know/ isn't a
+    // C string. In the context of locations, the only time we can issue such
+    // a warning is for labels.
+    if (Optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
+      if (Filter.CheckCStringNotNullTerm) {
+        SmallString<120> buf;
+        llvm::raw_svector_ostream os(buf);
+        assert(CurrentFunctionDescription);
+        os << "Argument to " << CurrentFunctionDescription
+           << " is the address of the label '" << Label->getLabel()->getName()
+           << "', which is not a null-terminated string";
+
+        emitNotCStringBug(Ctx, State, Ex, os.str());
+      }
+      return UndefinedVal();
+    }
+  }
+
+  // Other regions (mostly non-data) can't have a reliable C string length.
+  // In this case, an error is emitted and UndefinedVal is returned.
+  // The caller should always be prepared to handle this case.
+  if (Filter.CheckCStringNotNullTerm) {
+    SmallString<120> buf;
+    llvm::raw_svector_ostream os(buf);
+
+    assert(CurrentFunctionDescription);
+    os << "Argument to " << CurrentFunctionDescription << " is ";
+
+    if (SummarizeRegion(os, Ctx.getASTContext(), MR))
+      os << ", which is not a null-terminated string";
+    else
+      os << "not a null-terminated string";
+
+    emitNotCStringBug(Ctx, State, Ex, os.str());
+  }
+  return UndefinedVal();
+}
+
 std::pair<ProgramStateRef , ProgramStateRef >
 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
                            QualType Ty) {
@@ -694,181 +543,6 @@
   return state;
 }
 
-ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
-                                                const MemRegion *MR,
-                                                SVal strLength) {
-  assert(!strLength.isUndef() && "Attempt to set an undefined string length");
-
-  MR = MR->StripCasts();
-
-  switch (MR->getKind()) {
-  case MemRegion::StringRegionKind:
-    // FIXME: This can happen if we strcpy() into a string region. This is
-    // undefined [C99 6.4.5p6], but we should still warn about it.
-    return state;
-
-  case MemRegion::SymbolicRegionKind:
-  case MemRegion::AllocaRegionKind:
-  case MemRegion::NonParamVarRegionKind:
-  case MemRegion::ParamVarRegionKind:
-  case MemRegion::FieldRegionKind:
-  case MemRegion::ObjCIvarRegionKind:
-    // These are the types we can currently track string lengths for.
-    break;
-
-  case MemRegion::ElementRegionKind:
-    // FIXME: Handle element regions by upper-bounding the parent region's
-    // string length.
-    return state;
-
-  default:
-    // Other regions (mostly non-data) can't have a reliable C string length.
-    // For now, just ignore the change.
-    // FIXME: These are rare but not impossible. We should output some kind of
-    // warning for things like strcpy((char[]){'a', 0}, "b");
-    return state;
-  }
-
-  if (strLength.isUnknown())
-    return state->remove<CStringLength>(MR);
-
-  return state->set<CStringLength>(MR, strLength);
-}
-
-SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
-                                               ProgramStateRef &state,
-                                               const Expr *Ex,
-                                               const MemRegion *MR,
-                                               bool hypothetical) {
-  if (!hypothetical) {
-    // If there's a recorded length, go ahead and return it.
-    const SVal *Recorded = state->get<CStringLength>(MR);
-    if (Recorded)
-      return *Recorded;
-  }
-
-  // Otherwise, get a new symbol and update the state.
-  SValBuilder &svalBuilder = C.getSValBuilder();
-  QualType sizeTy = svalBuilder.getContext().getSizeType();
-  SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
-                                                    MR, Ex, sizeTy,
-                                                    C.getLocationContext(),
-                                                    C.blockCount());
-
-  if (!hypothetical) {
-    if (Optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
-      // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
-      BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
-      const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
-      llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
-      const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,
-                                                        fourInt);
-      NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
-      SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn,
-                                                maxLength, sizeTy);
-      state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
-    }
-    state = state->set<CStringLength>(MR, strLength);
-  }
-
-  return strLength;
-}
-
-SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
-                                      const Expr *Ex, SVal Buf,
-                                      bool hypothetical) const {
-  const MemRegion *MR = Buf.getAsRegion();
-  if (!MR) {
-    // If we can't get a region, see if it's something we /know/ isn't a
-    // C string. In the context of locations, the only time we can issue such
-    // a warning is for labels.
-    if (Optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
-      if (Filter.CheckCStringNotNullTerm) {
-        SmallString<120> buf;
-        llvm::raw_svector_ostream os(buf);
-        assert(CurrentFunctionDescription);
-        os << "Argument to " << CurrentFunctionDescription
-           << " is the address of the label '" << Label->getLabel()->getName()
-           << "', which is not a null-terminated string";
-
-        emitNotCStringBug(C, state, Ex, os.str());
-      }
-      return UndefinedVal();
-    }
-
-    // If it's not a region and not a label, give up.
-    return UnknownVal();
-  }
-
-  // If we have a region, strip casts from it and see if we can figure out
-  // its length. For anything we can't figure out, just return UnknownVal.
-  MR = MR->StripCasts();
-
-  switch (MR->getKind()) {
-  case MemRegion::StringRegionKind: {
-    // Modifying the contents of string regions is undefined [C99 6.4.5p6],
-    // so we can assume that the byte length is the correct C string length.
-    SValBuilder &svalBuilder = C.getSValBuilder();
-    QualType sizeTy = svalBuilder.getContext().getSizeType();
-    const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
-    return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
-  }
-  case MemRegion::SymbolicRegionKind:
-  case MemRegion::AllocaRegionKind:
-  case MemRegion::NonParamVarRegionKind:
-  case MemRegion::ParamVarRegionKind:
-  case MemRegion::FieldRegionKind:
-  case MemRegion::ObjCIvarRegionKind:
-    return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
-  case MemRegion::CompoundLiteralRegionKind:
-    // FIXME: Can we track this? Is it necessary?
-    return UnknownVal();
-  case MemRegion::ElementRegionKind:
-    // FIXME: How can we handle this? It's not good enough to subtract the
-    // offset from the base string length; consider "123\x00567" and &a[5].
-    return UnknownVal();
-  default:
-    // Other regions (mostly non-data) can't have a reliable C string length.
-    // In this case, an error is emitted and UndefinedVal is returned.
-    // The caller should always be prepared to handle this case.
-    if (Filter.CheckCStringNotNullTerm) {
-      SmallString<120> buf;
-      llvm::raw_svector_ostream os(buf);
-
-      assert(CurrentFunctionDescription);
-      os << "Argument to " << CurrentFunctionDescription << " is ";
-
-      if (SummarizeRegion(os, C.getASTContext(), MR))
-        os << ", which is not a null-terminated string";
-      else
-        os << "not a null-terminated string";
-
-      emitNotCStringBug(C, state, Ex, os.str());
-    }
-    return UndefinedVal();
-  }
-}
-
-const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
-  ProgramStateRef &state, const Expr *expr, SVal val) const {
-
-  // Get the memory region pointed to by the val.
-  const MemRegion *bufRegion = val.getAsRegion();
-  if (!bufRegion)
-    return nullptr;
-
-  // Strip casts off the memory region.
-  bufRegion = bufRegion->StripCasts();
-
-  // Cast the memory region to a string region.
-  const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
-  if (!strRegion)
-    return nullptr;
-
-  // Return the actual string in the string region.
-  return strRegion->getStringLiteral();
-}
-
 bool CStringChecker::IsFirstBufInBound(CheckerContext &C,
                                        ProgramStateRef state,
                                        const Expr *FirstBuf,
@@ -1094,8 +768,8 @@
     if (StateNullChar && !StateNonNullChar) {
       // If the value of the second argument of 'memset()' is zero, set the
       // string length of destination buffer to 0 directly.
-      State = setCStringLength(State, MR,
-                               svalBuilder.makeZeroVal(Ctx.getSizeType()));
+      State = cstring::setCStringLength(
+          State, MR, svalBuilder.makeZeroVal(Ctx.getSizeType()));
     } else if (!StateNullChar && StateNonNullChar) {
       SVal NewStrLen = svalBuilder.getMetadataSymbolVal(
           CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(),
@@ -1106,7 +780,7 @@
       SVal NewStrLenGESize = svalBuilder.evalBinOp(
           State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType());
 
-      State = setCStringLength(
+      State = cstring::setCStringLength(
           State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true),
           MR, NewStrLen);
     }
@@ -1394,7 +1068,7 @@
   if (!state)
     return;
 
-  SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal);
+  SVal strLength = getCStringLengthChecked(C, state, Arg.Expression, ArgVal);
 
   // If the argument isn't a valid C string, there's no valid state to
   // transition to.
@@ -1561,11 +1235,12 @@
     return;
 
   // Get the string length of the source.
-  SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal);
+  SVal strLength =
+      getCStringLengthChecked(C, state, srcExpr.Expression, srcVal);
   Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
 
   // Get the string length of the destination buffer.
-  SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal);
+  SVal dstStrLength = getCStringLengthChecked(C, state, Dst.Expression, DstVal);
   Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
 
   // If the source isn't a valid C string, give up.
@@ -1787,9 +1462,10 @@
     // If we couldn't get a single value for the final string length,
     // we can at least bound it by the individual lengths.
     if (finalStrLength.isUnknown()) {
-      // Try to get a "hypothetical" string length symbol, which we can later
+      // Get a //hypothetical// string length symbol, which we can later
       // set as a real value if that turns out to be the case.
-      finalStrLength = getCStringLength(C, state, CE, DstVal, true);
+      finalStrLength =
+          cstring::createCStringLength(state, C, CE, DstVal.getAsRegion());
       assert(!finalStrLength.isUndef());
 
       if (Optional<NonLoc> finalStrLengthNL = finalStrLength.getAs<NonLoc>()) {
@@ -1899,7 +1575,8 @@
       if (amountCopied != strLength)
         finalStrLength = UnknownVal();
     }
-    state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
+    state = cstring::setCStringLength(state, dstRegVal->getRegion(),
+                                      finalStrLength);
   }
 
   assert(state);
@@ -1959,12 +1636,13 @@
     return;
 
   // Get the string length of the first string or give up.
-  SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal);
+  SVal LeftLength = getCStringLengthChecked(C, state, Left.Expression, LeftVal);
   if (LeftLength.isUndef())
     return;
 
   // Get the string length of the second string or give up.
-  SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal);
+  SVal RightLength =
+      getCStringLengthChecked(C, state, Right.Expression, RightVal);
   if (RightLength.isUndef())
     return;
 
@@ -1999,10 +1677,8 @@
   // For now, we only do this if they're both known string literals.
 
   // Attempt to extract string literals from both expressions.
-  const StringLiteral *LeftStrLiteral =
-      getCStringLiteral(C, state, Left.Expression, LeftVal);
-  const StringLiteral *RightStrLiteral =
-      getCStringLiteral(C, state, Right.Expression, RightVal);
+  const StringLiteral *LeftStrLiteral = getCStringLiteral(LeftVal);
+  const StringLiteral *RightStrLiteral = getCStringLiteral(RightVal);
   bool canComputeResult = false;
   SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
       C.blockCount());
@@ -2314,147 +1990,29 @@
   return C.isDifferent();
 }
 
-void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
-  // Record string length for char a[] = "abc";
-  ProgramStateRef state = C.getState();
-
-  for (const auto *I : DS->decls()) {
-    const VarDecl *D = dyn_cast<VarDecl>(I);
-    if (!D)
-      continue;
-
-    // FIXME: Handle array fields of structs.
-    if (!D->getType()->isArrayType())
-      continue;
-
-    const Expr *Init = D->getInit();
-    if (!Init)
-      continue;
-    if (!isa<StringLiteral>(Init))
-      continue;
-
-    Loc VarLoc = state->getLValue(D, C.getLocationContext());
-    const MemRegion *MR = VarLoc.getAsRegion();
-    if (!MR)
-      continue;
-
-    SVal StrVal = C.getSVal(Init);
-    assert(StrVal.isValid() && "Initializer string is unknown or undefined");
-    DefinedOrUnknownSVal strLength =
-      getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
-
-    state = state->set<CStringLength>(MR, strLength);
-  }
-
-  C.addTransition(state);
-}
-
-ProgramStateRef
-CStringChecker::checkRegionChanges(ProgramStateRef state,
-    const InvalidatedSymbols *,
-    ArrayRef<const MemRegion *> ExplicitRegions,
-    ArrayRef<const MemRegion *> Regions,
-    const LocationContext *LCtx,
-    const CallEvent *Call) const {
-  CStringLengthTy Entries = state->get<CStringLength>();
-  if (Entries.isEmpty())
-    return state;
-
-  llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
-  llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
-
-  // First build sets for the changed regions and their super-regions.
-  for (ArrayRef<const MemRegion *>::iterator
-      I = Regions.begin(), E = Regions.end(); I != E; ++I) {
-    const MemRegion *MR = *I;
-    Invalidated.insert(MR);
-
-    SuperRegions.insert(MR);
-    while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
-      MR = SR->getSuperRegion();
-      SuperRegions.insert(MR);
-    }
-  }
+} // namespace cstring
+} // namespace ento
+} // namespace clang
 
-  CStringLengthTy::Factory &F = state->get_context<CStringLength>();
-
-  // Then loop over the entries in the current state.
-  for (CStringLengthTy::iterator I = Entries.begin(),
-      E = Entries.end(); I != E; ++I) {
-    const MemRegion *MR = I.getKey();
-
-    // Is this entry for a super-region of a changed region?
-    if (SuperRegions.count(MR)) {
-      Entries = F.remove(Entries, MR);
-      continue;
-    }
-
-    // Is this entry for a sub-region of a changed region?
-    const MemRegion *Super = MR;
-    while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
-      Super = SR->getSuperRegion();
-      if (Invalidated.count(Super)) {
-        Entries = F.remove(Entries, MR);
-        break;
-      }
-    }
-  }
-
-  return state->set<CStringLength>(Entries);
+void clang::ento::registerCStringModeling(CheckerManager &Mgr) {
+  Mgr.registerChecker<clang::ento::cstring::CStringChecker>();
 }
 
-void CStringChecker::checkLiveSymbols(ProgramStateRef state,
-    SymbolReaper &SR) const {
-  // Mark all symbols in our string length map as valid.
-  CStringLengthTy Entries = state->get<CStringLength>();
-
-  for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
-      I != E; ++I) {
-    SVal Len = I.getData();
-
-    for (SymExpr::symbol_iterator si = Len.symbol_begin(),
-        se = Len.symbol_end(); si != se; ++si)
-      SR.markInUse(*si);
-  }
-}
-
-void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
-    CheckerContext &C) const {
-  ProgramStateRef state = C.getState();
-  CStringLengthTy Entries = state->get<CStringLength>();
-  if (Entries.isEmpty())
-    return;
-
-  CStringLengthTy::Factory &F = state->get_context<CStringLength>();
-  for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
-      I != E; ++I) {
-    SVal Len = I.getData();
-    if (SymbolRef Sym = Len.getAsSymbol()) {
-      if (SR.isDead(Sym))
-        Entries = F.remove(Entries, I.getKey());
-    }
-  }
-
-  state = state->set<CStringLength>(Entries);
-  C.addTransition(state);
-}
-
-void ento::registerCStringModeling(CheckerManager &Mgr) {
-  Mgr.registerChecker<CStringChecker>();
-}
-
-bool ento::shouldRegisterCStringModeling(const CheckerManager &mgr) {
+bool clang::ento::shouldRegisterCStringModeling(const CheckerManager &) {
   return true;
 }
 
 #define REGISTER_CHECKER(name)                                                 \
-  void ento::register##name(CheckerManager &mgr) {                             \
-    CStringChecker *checker = mgr.getChecker<CStringChecker>();                \
+  void clang::ento::register##name(clang::ento::CheckerManager &mgr) {         \
+    auto *checker = mgr.getChecker<clang::ento::cstring::CStringChecker>();    \
     checker->Filter.Check##name = true;                                        \
     checker->Filter.CheckName##name = mgr.getCurrentCheckerName();             \
   }                                                                            \
                                                                                \
-  bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }
+  bool clang::ento::shouldRegister##name(                                      \
+      const clang::ento::CheckerManager &mgr) {                                \
+    return true;                                                               \
+  }
 
 REGISTER_CHECKER(CStringNullArg)
 REGISTER_CHECKER(CStringOutOfBounds)
Index: clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
+++ clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
@@ -12,7 +12,8 @@
   BlockInCriticalSectionChecker.cpp
   BoolAssignmentChecker.cpp
   BuiltinFunctionChecker.cpp
-  CStringChecker.cpp
+  CStringChecker/CStringChecker.cpp
+  CStringChecker/CStringLengthModeling.cpp
   CStringSyntaxChecker.cpp
   CallAndMessageChecker.cpp
   CastSizeChecker.cpp
@@ -140,3 +141,7 @@
   DEPENDS
   omp_gen
   )
+
+target_include_directories(clangStaticAnalyzerCheckers PRIVATE
+  CStringChecker
+  )
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D84979: [analyzer][... Balázs Benics via Phabricator via cfe-commits

Reply via email to