Hi Dave,

Recently I've been working on symbolic value support for the reference
count checker. I've attached a patch for it below; let me know it looks
OK for trunk. Thanks!

Best,
Eric

---

This patch enhances the reference count checker in the CPython plugin by
adding support for symbolic values. Whereas previously we were only able
to check the reference count of PyObject* objects created in the scope
of the function; we are now able to emit diagnostics on reference count
mismatch of objects that were, for example, passed in as a function
parameter.

rc6.c:6:10: warning: expected ‘obj’ to have reference count: N + ‘1’ but 
ob_refcnt field is N + ‘2’
    6 |   return obj;
      |          ^~~
  ‘create_py_object2’: event 1
    |
    |    6 |   return obj;
    |      |          ^~~
    |      |          |
    |      |          (1) here
    |


gcc/testsuite/ChangeLog:
        PR analyzer/107646
        * gcc.dg/plugin/analyzer_cpython_plugin.c: Support reference count 
checking
        of symbolic values.
        * gcc.dg/plugin/cpython-plugin-test-PyList_Append.c: New test.
        * gcc.dg/plugin/plugin.exp: New test.
        * gcc.dg/plugin/cpython-plugin-test-refcnt.c: New test.

Signed-off-by: Eric Feng <ef2...@columbia.edu>

---
 .../gcc.dg/plugin/analyzer_cpython_plugin.c   | 133 +++++++++++-------
 .../cpython-plugin-test-PyList_Append.c       |  21 ++-
 .../plugin/cpython-plugin-test-refcnt.c       |  18 +++
 gcc/testsuite/gcc.dg/plugin/plugin.exp        |   1 +
 4 files changed, 118 insertions(+), 55 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c

diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c 
b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
index bf1982e79c3..d7ecd7fce09 100644
--- a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
+++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
@@ -314,17 +314,20 @@ public:
   {
     diagnostic_metadata m;
     bool warned;
-    // just assuming constants for now
-    auto actual_refcnt
-       = m_actual_refcnt->dyn_cast_constant_svalue ()->get_constant ();
-    auto ob_refcnt = m_ob_refcnt->dyn_cast_constant_svalue ()->get_constant ();
-    warned = warning_meta (rich_loc, m, get_controlling_option (),
-                          "expected %qE to have "
-                          "reference count: %qE but ob_refcnt field is: %qE",
-                          m_reg_tree, actual_refcnt, ob_refcnt);
-
-    // location_t loc = rich_loc->get_loc ();
-    // foo (loc);
+
+    const auto *actual_refcnt_constant
+       = m_actual_refcnt->dyn_cast_constant_svalue ();
+    const auto *ob_refcnt_constant = m_ob_refcnt->dyn_cast_constant_svalue ();
+    if (!actual_refcnt_constant || !ob_refcnt_constant)
+      return false;
+
+    auto actual_refcnt = actual_refcnt_constant->get_constant ();
+    auto ob_refcnt = ob_refcnt_constant->get_constant ();
+    warned = warning_meta (
+       rich_loc, m, get_controlling_option (),
+       "expected %qE to have "
+       "reference count: N + %qE but ob_refcnt field is N + %qE",
+       m_reg_tree, actual_refcnt, ob_refcnt);
     return warned;
   }
 
@@ -336,10 +339,6 @@ public:
 
 private:
 
-  void foo(location_t loc) const 
-  {
-    inform(loc, "something is up right here");
-  }
   const region *m_base_region;
   const svalue *m_ob_refcnt;
   const svalue *m_actual_refcnt;
@@ -369,6 +368,19 @@ increment_region_refcnt (hash_map<const region *, int> 
&map, const region *key)
   refcnt = existed ? refcnt + 1 : 1;
 }
 
+static const region *
+get_region_from_svalue (const svalue *sval, region_model_manager *mgr)
+{
+  const auto *region_sval = sval->dyn_cast_region_svalue ();
+  if (region_sval)
+    return region_sval->get_pointee ();
+
+  const auto *initial_sval = sval->dyn_cast_initial_svalue ();
+  if (initial_sval)
+    return mgr->get_symbolic_region (initial_sval);
+
+  return nullptr;
+}
 
 /* Recursively fills in region_to_refcnt with the references owned by
    pyobj_ptr_sval.  */
@@ -381,20 +393,9 @@ count_pyobj_references (const region_model *model,
   if (!pyobj_ptr_sval)
     return;
 
-  const auto *pyobj_region_sval = pyobj_ptr_sval->dyn_cast_region_svalue ();
-  const auto *pyobj_initial_sval = pyobj_ptr_sval->dyn_cast_initial_svalue ();
-  if (!pyobj_region_sval && !pyobj_initial_sval)
-    return;
-
-  // todo: support initial sval (e.g passed in as parameter)
-  if (pyobj_initial_sval)
-    {
-      //     increment_region_refcnt (region_to_refcnt,
-      //                      pyobj_initial_sval->get_region ());
-      return;
-    }
+  region_model_manager *mgr = model->get_manager ();
 
-  const region *pyobj_region = pyobj_region_sval->get_pointee ();
+  const region *pyobj_region = get_region_from_svalue (pyobj_ptr_sval, mgr);
   if (!pyobj_region || seen.contains (pyobj_region))
     return;
 
@@ -409,49 +410,75 @@ count_pyobj_references (const region_model *model,
     return;
 
   const auto &retval_binding_map = retval_cluster->get_map ();
-
   for (const auto &binding : retval_binding_map)
     {
-      const svalue *binding_sval = binding.second;
-      const svalue *unwrapped_sval = binding_sval->unwrap_any_unmergeable ();
-      const region *pointee = unwrapped_sval->maybe_get_region ();
-
-      if (pointee && pointee->get_kind () == RK_HEAP_ALLOCATED)
+      const svalue *binding_sval = binding.second->unwrap_any_unmergeable ();
+      if (get_region_from_svalue (binding_sval, mgr))
        count_pyobj_references (model, region_to_refcnt, binding_sval, seen);
     }
 }
 
+static void
+unwrap_any_ob_refcnt_sval (const svalue *&ob_refcnt_sval)
+{
+  if (ob_refcnt_sval->get_kind () != SK_CONSTANT)
+    {
+      auto unwrap_cast = ob_refcnt_sval->maybe_undo_cast ();
+      if (!unwrap_cast)
+       unwrap_cast = ob_refcnt_sval;
+
+      if (unwrap_cast->get_kind () == SK_BINOP)
+       ob_refcnt_sval = unwrap_cast->dyn_cast_binop_svalue ()->get_arg1 ();
+    }
+}
+
+static void
+handle_refcnt_mismatch (const region_model *old_model,
+                       const ana::region *curr_region,
+                       const svalue *ob_refcnt_sval,
+                       const svalue *actual_refcnt_sval,
+                       region_model_context *ctxt)
+{
+  region_model_manager *mgr = old_model->get_manager ();
+  const svalue *curr_reg_sval
+      = mgr->get_ptr_svalue (pyobj_ptr_tree, curr_region);
+  tree reg_tree = old_model->get_representative_tree (curr_reg_sval);
+
+  if (!reg_tree)
+    return;
+
+  const auto &eg = ctxt->get_eg ();
+  refcnt_stmt_finder finder (*eg, reg_tree);
+  auto pd = make_unique<refcnt_mismatch> (curr_region, ob_refcnt_sval,
+                                         actual_refcnt_sval, reg_tree);
+  if (pd && eg)
+    ctxt->warn (std::move (pd), &finder);
+}
+
 /* Compare ob_refcnt field vs the actual reference count of a region */
 static void
 check_refcnt (const region_model *model,
              const region_model *old_model,
              region_model_context *ctxt,
              const hash_map<const ana::region *,
-                            int>::iterator::reference_pair region_refcnt)
+                            int>::iterator::reference_pair &region_refcnt)
 {
   region_model_manager *mgr = model->get_manager ();
   const auto &curr_region = region_refcnt.first;
   const auto &actual_refcnt = region_refcnt.second;
+
   const svalue *ob_refcnt_sval
       = retrieve_ob_refcnt_sval (curr_region, model, ctxt);
+  if (!ob_refcnt_sval)
+    return;
+
+  unwrap_any_ob_refcnt_sval (ob_refcnt_sval);
+
   const svalue *actual_refcnt_sval = mgr->get_or_create_int_cst (
       ob_refcnt_sval->get_type (), actual_refcnt);
-
   if (ob_refcnt_sval != actual_refcnt_sval)
-    {
-      const svalue *curr_reg_sval
-         = mgr->get_ptr_svalue (pyobj_ptr_tree, curr_region);
-      tree reg_tree = old_model->get_representative_tree (curr_reg_sval);
-      if (!reg_tree)
-       return;
-
-      const auto &eg = ctxt->get_eg ();
-      refcnt_stmt_finder finder (*eg, reg_tree);
-      auto pd = make_unique<refcnt_mismatch> (curr_region, ob_refcnt_sval,
-                                             actual_refcnt_sval, reg_tree);
-      if (pd && eg)
-       ctxt->warn (std::move (pd), &finder);
-    }
+    handle_refcnt_mismatch (old_model, curr_region, ob_refcnt_sval,
+                           actual_refcnt_sval, ctxt);
 }
 
 static void
@@ -493,8 +520,6 @@ count_all_references (const region_model *model,
   for (const auto &cluster : *model->get_store ())
     {
       auto curr_region = cluster.first;
-      if (curr_region->get_kind () != RK_HEAP_ALLOCATED)
-       continue;
 
       increment_region_refcnt (region_to_refcnt, curr_region);
 
@@ -505,8 +530,8 @@ count_all_references (const region_model *model,
 
          const svalue *unwrapped_sval
              = binding_sval->unwrap_any_unmergeable ();
-         // if (unwrapped_sval->get_type () != pyobj_ptr_tree)
-         // continue;
+         if (unwrapped_sval->get_type () != pyobj_ptr_tree)
+         continue;
 
          const region *pointee = unwrapped_sval->maybe_get_region ();
          if (!pointee || pointee->get_kind () != RK_HEAP_ALLOCATED)
diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c 
b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
index e1efd9efda5..46daf2f8975 100644
--- a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
+++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
@@ -75,4 +75,23 @@ test_PyListAppend_6 ()
   PyObject *list = NULL;
   PyList_Append(list, item);
   return list;
-}
\ No newline at end of file
+}
+
+PyObject *
+test_PyListAppend_7 (PyObject *item)
+{
+  PyObject *list = PyList_New (0);
+  Py_INCREF(item);
+  PyList_Append(list, item);
+  return list;
+  /* { dg-warning "expected 'item' to have reference count" "" { target *-*-* 
} .-1 } */
+}
+
+PyObject *
+test_PyListAppend_8 (PyObject *item, PyObject *list)
+{
+  Py_INCREF(item);
+  Py_INCREF(item);
+  PyList_Append(list, item);
+  return list;
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c 
b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
new file mode 100644
index 00000000000..a7f39509d6d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target analyzer } */
+/* { dg-options "-fanalyzer" } */
+/* { dg-require-python-h "" } */
+
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "../analyzer/analyzer-decls.h"
+
+PyObject *
+test_refcnt_1 (PyObject *obj)
+{
+  Py_INCREF(obj);
+  Py_INCREF(obj);
+  return obj;
+  /* { dg-warning "expected 'obj' to have reference count" "" { target *-*-* } 
.-1 } */
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp 
b/gcc/testsuite/gcc.dg/plugin/plugin.exp
index ed72912309c..87862b4ca00 100644
--- a/gcc/testsuite/gcc.dg/plugin/plugin.exp
+++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp
@@ -162,6 +162,7 @@ set plugin_test_list [list \
          taint-antipatterns-1.c } \
     { analyzer_cpython_plugin.c \
          cpython-plugin-test-no-Python-h.c \
+         cpython-plugin-test-refcnt.c \
          cpython-plugin-test-PyList_Append.c \
          cpython-plugin-test-PyList_New.c \
          cpython-plugin-test-PyLong_FromLong.c } \
-- 
2.30.2

Reply via email to