tomasz-kaminski-sonarsource created this revision.
Herald added subscribers: steakhal, manas, ASDenysPetrov, martong, dkrupp, 
donat.nagy, Szelethus, mikhail.ramalho, a.sidorin, szepet, baloghadamsoftware, 
xazax.hun.
Herald added a reviewer: NoQ.
Herald added a project: All.
tomasz-kaminski-sonarsource requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

To illustrate our current understanding, let's start with the following
program:
https://godbolt.org/z/33f6vheh1

  void clang_analyzer_printState();
  
  struct C {
     int x;
     int y;
     int more_padding;
  };
  
  struct D {
     C c;
     int z;
  };
  
  C foo(D d, int new_x, int new_y) {
     d.c.x = new_x;       // B1
     assert(d.c.x < 13);  // C1
  
     C c = d.c;           // L
  
     assert(d.c.y < 10);  // C2
     assert(d.z < 5);     // C3
  
     d.c.y = new_y;       // B2
  
     assert(d.c.y < 10);  // C4
  
     return c;  // R
  }

In the code, we create a few bindings to subregions of root region `d`
(`B1`, `B2`), a constrain on the values  (`C1`, `C2`, ….), and create a
`lazyCompoundVal` for the part of the region `d` at point `L`, which is
returned at point `R`.

Now, the question is which of these should remain live as long the
return value of the `foo` call is live. In perfect a word we should
preserve:

1. only the bindings of the subregions of `d.c`, which were created

before the copy at `L`. In our example, this includes `B1`, and not
`B2`. In other words, `new_x` should be live but `new_y` shouldn’t.

2. constraints on the values of `d.c`, that are reachable through `c`.

This can be created both before the point of making the copy (`L`) or
after. In our case, that would be `C1` and `C2`. But not `C3` (`d.z`
value is not reachable through `c`) and `C4` (the original value of
`d.c.y` was overridden at `B2` after creation of `c`).

The current code in the `RegionStore` covers the use case (1), by using
the `getInterestingValues()` to extract bindings to parts of the
referred region present in the store at the point of copy. This also
partially covers point (2), in case when constraints are applied to a
location that has binding at the point of the copy (in our case `d.c.x`
in `C1` that has value `new_x`), but it fails to preserve the
constraints that require creating a new symbol for location (`d.c.y` in
`C2`).

We introduce the concept of _lazily copied_ locations (regions) to the
`SymbolReaper`, i.e. for which a program can access the value stored at
that location, but not its address. These locations are constructed as a
set of regions referred to by `lazyCompoundVal`. A _readable_ location
(region) is a location that _live_ or _lazily copied_ . And symbols that
refer to values in regions are alive if the region is _readable_.

For simplicity, we follow the current approach to live regions and mark
the base region as _lazily copied_, and consider any subregions as
_readable_. This makes some symbols falsy live (`d.z` in our example)
and keeps the corresponding constraints alive.

The rename `Regions` to `LiveRegions` inside  `RegionStore` is NFC
change, that was done to make it clear, what is difference between
regions stored in this two sets.

Co-authored-by: Balazs Benics <balazs.ben...@sonarsource.com>


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D134947

Files:
  clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
  clang/lib/StaticAnalyzer/Core/RegionStore.cpp
  clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
  clang/test/Analysis/trivial-copy-struct.cpp

Index: clang/test/Analysis/trivial-copy-struct.cpp
===================================================================
--- clang/test/Analysis/trivial-copy-struct.cpp
+++ clang/test/Analysis/trivial-copy-struct.cpp
@@ -34,3 +34,28 @@
   (void)(n1->ptr);
   (void)(n2->ptr);
 }
+
+struct List {
+  List* next;
+};
+
+void ptr1(List* n) {
+  List* n2 = new List(*n); // cctor
+  if (!n->next) {
+    if (n2->next) {
+      clang_analyzer_warnIfReached(); // unreachable
+    }
+  }
+  delete n2;
+}
+
+void ptr2(List* n) {
+  List* n2 = new List(); // ctor
+  *n2 = *n; // assignment
+  if (!n->next) {
+    if (n2->next) {
+      clang_analyzer_warnIfReached(); // unreachable
+    }
+  }
+  delete n2;
+}
Index: clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
+++ clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
@@ -411,10 +411,14 @@
 }
 
 void SymbolReaper::markLive(const MemRegion *region) {
-  RegionRoots.insert(region->getBaseRegion());
+  LiveRegionRoots.insert(region->getBaseRegion());
   markElementIndicesLive(region);
 }
 
+void SymbolReaper::markLazilyCopied(const clang::ento::MemRegion *region) {
+  LazilyCopiedRegionRoots.insert(region->getBaseRegion());
+}
+
 void SymbolReaper::markElementIndicesLive(const MemRegion *region) {
   for (auto SR = dyn_cast<SubRegion>(region); SR;
        SR = dyn_cast<SubRegion>(SR->getSuperRegion())) {
@@ -437,8 +441,7 @@
   // is not used later in the path, we can diagnose a leak of a value within
   // that field earlier than, say, the variable that contains the field dies.
   MR = MR->getBaseRegion();
-
-  if (RegionRoots.count(MR))
+  if (LiveRegionRoots.count(MR))
     return true;
 
   if (const auto *SR = dyn_cast<SymbolicRegion>(MR))
@@ -454,6 +457,15 @@
   return isa<AllocaRegion, CXXThisRegion, MemSpaceRegion, CodeTextRegion>(MR);
 }
 
+bool SymbolReaper::isLazilyCopiedRegion(const MemRegion *MR) const {
+  // TODO: See comment in isLiveRegion.
+  return LazilyCopiedRegionRoots.count(MR->getBaseRegion());
+}
+
+bool SymbolReaper::isReadableRegion(const MemRegion *MR) {
+  return isLiveRegion(MR) || isLazilyCopiedRegion(MR);
+}
+
 bool SymbolReaper::isLive(SymbolRef sym) {
   if (TheLiving.count(sym)) {
     markDependentsLive(sym);
@@ -464,7 +476,7 @@
 
   switch (sym->getKind()) {
   case SymExpr::SymbolRegionValueKind:
-    KnownLive = isLiveRegion(cast<SymbolRegionValue>(sym)->getRegion());
+    KnownLive = isReadableRegion(cast<SymbolRegionValue>(sym)->getRegion());
     break;
   case SymExpr::SymbolConjuredKind:
     KnownLive = false;
Index: clang/lib/StaticAnalyzer/Core/RegionStore.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -2838,6 +2838,10 @@
   // Is it a LazyCompoundVal?  All referenced regions are live as well.
   if (Optional<nonloc::LazyCompoundVal> LCS =
           V.getAs<nonloc::LazyCompoundVal>()) {
+    // TODO: Make regions referred to by `lazyCompoundVals` that are bound to
+    // subregions of the `LCS.getRegion()` also lazily copied.
+    if (const MemRegion *R = LCS->getRegion())
+      SymReaper.markLazilyCopied(R);
 
     const RegionStoreManager::SValListTy &Vals = RM.getInterestingValues(*LCS);
 
Index: clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
===================================================================
--- clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
+++ clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
@@ -582,7 +582,8 @@
   SymbolMapTy TheLiving;
   SymbolSetTy MetadataInUse;
 
-  RegionSetTy RegionRoots;
+  RegionSetTy LiveRegionRoots;
+  RegionSetTy LazilyCopiedRegionRoots;
 
   const StackFrameContext *LCtx;
   const Stmt *Loc;
@@ -628,8 +629,8 @@
 
   using region_iterator = RegionSetTy::const_iterator;
 
-  region_iterator region_begin() const { return RegionRoots.begin(); }
-  region_iterator region_end() const { return RegionRoots.end(); }
+  region_iterator region_begin() const { return LiveRegionRoots.begin(); }
+  region_iterator region_end() const { return LiveRegionRoots.end(); }
 
   /// Returns whether or not a symbol has been confirmed dead.
   ///
@@ -640,6 +641,7 @@
   }
 
   void markLive(const MemRegion *region);
+  void markLazilyCopied(const MemRegion *region);
   void markElementIndicesLive(const MemRegion *region);
 
   /// Set to the value of the symbolic store after
@@ -647,6 +649,9 @@
   void setReapedStore(StoreRef st) { reapedStore = st; }
 
 private:
+  bool isLazilyCopiedRegion(const MemRegion *region) const;
+  bool isReadableRegion(const MemRegion *region);
+
   /// Mark the symbols dependent on the input symbol as live.
   void markDependentsLive(SymbolRef sym);
 };
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to