libbacktrace patch committed

Ian Lance Taylor Fri, 18 Oct 2024 13:05:29 -0700

Because libbacktrace merges adjacent address ranges where possible,
and because the GNU linker can deduplicate functions leaving debuginfo
that refers to address ranges in other compilation units, it is
possible for libbacktrace to have overlapping address ranges, in
particular to overlap ranges with gaps between the overlapped ranges.
This can cause libbacktrace to fail to find the right, most specific,
address range for a given address.


This patch fixes the problem by noticing overlapping ranges, and
filling in the gaps with references to the overlapping range.  This
somewhat complicates the computation of address ranges, but that
happens only once.  It does not affect the relatively simple code that
looks up an address range.

There is more discussion of this at
https://github.com/ianlancetaylor/libbacktrace/issues/137.

I've tested this patch with the libbacktrace and Go testsuites on
x86_64-pc-linux-gnu.  I've verified that it fixes the test case in the
github issue.  I haven't tried to add a test case because it's very
fiddly, depending on the exact compiler and linker behavior.

Ian

* dwarf.c (resolve_unit_addrs_overlap_walk): New static function.
(resolve_unit_addrs_overlap): New static function.
(build_dwarf_data): Call resolve_unit_addrs_overlap.

bf4bedf3abb9a6dd84a8bf572ac9c2960b264f1b
diff --git a/libbacktrace/dwarf.c b/libbacktrace/dwarf.c
index 96ffc4cc481..cc5cad70333 100644
--- a/libbacktrace/dwarf.c
+++ b/libbacktrace/dwarf.c
@@ -1276,6 +1276,194 @@ unit_addrs_search (const void *vkey, const void *ventry)
     return 0;
 }
 
+/* Fill in overlapping ranges as needed.  This is a subroutine of
+   resolve_unit_addrs_overlap.  */
+
+static int
+resolve_unit_addrs_overlap_walk (struct backtrace_state *state,
+                                size_t *pfrom, size_t *pto,
+                                struct unit_addrs *enclosing,
+                                struct unit_addrs_vector *old_vec,
+                                backtrace_error_callback error_callback,
+                                void *data,
+                                struct unit_addrs_vector *new_vec)
+{
+  struct unit_addrs *old_addrs;
+  size_t old_count;
+  struct unit_addrs *new_addrs;
+  size_t from;
+  size_t to;
+
+  old_addrs = (struct unit_addrs *) old_vec->vec.base;
+  old_count = old_vec->count;
+  new_addrs = (struct unit_addrs *) new_vec->vec.base;
+
+  for (from = *pfrom, to = *pto; from < old_count; from++, to++)
+    {
+      /* If we are in the scope of a larger range that can no longer
+        cover any further ranges, return back to the caller.  */
+
+      if (enclosing != NULL
+         && enclosing->high <= old_addrs[from].low)
+       {
+         *pfrom = from;
+         *pto = to;
+         return 1;
+       }
+
+      new_addrs[to] = old_addrs[from];
+
+      /* If we are in scope of a larger range, fill in any gaps
+        between this entry and the next one.
+
+        There is an extra entry at the end of the vector, so it's
+        always OK to refer to from + 1.  */
+
+      if (enclosing != NULL
+         && enclosing->high > old_addrs[from].high
+         && old_addrs[from].high < old_addrs[from + 1].low)
+       {
+         void *grew;
+         size_t new_high;
+
+         grew = backtrace_vector_grow (state, sizeof (struct unit_addrs),
+                                       error_callback, data, &new_vec->vec);
+         if (grew == NULL)
+           return 0;
+         new_addrs = (struct unit_addrs *) new_vec->vec.base;
+         to++;
+         new_addrs[to].low = old_addrs[from].high;
+         new_high = old_addrs[from + 1].low;
+         if (enclosing->high < new_high)
+           new_high = enclosing->high;
+         new_addrs[to].high = new_high;
+         new_addrs[to].u = enclosing->u;
+       }
+
+      /* If this range has a larger scope than the next one, use it to
+        fill in any gaps.  */
+
+      if (old_addrs[from].high > old_addrs[from + 1].high)
+       {
+         *pfrom = from + 1;
+         *pto = to + 1;
+         if (!resolve_unit_addrs_overlap_walk (state, pfrom, pto,
+                                               &old_addrs[from], old_vec,
+                                               error_callback, data, new_vec))
+           return 0;
+         from = *pfrom;
+         to = *pto;
+
+         /* Undo the increment the loop is about to do.  */
+         from--;
+         to--;
+       }
+    }
+
+  if (enclosing == NULL)
+    {
+      struct unit_addrs *pa;
+
+      /* Add trailing entry.  */
+
+      pa = ((struct unit_addrs *)
+           backtrace_vector_grow (state, sizeof (struct unit_addrs),
+                                  error_callback, data, &new_vec->vec));
+      if (pa == NULL)
+       return 0;
+      pa->low = 0;
+      --pa->low;
+      pa->high = pa->low;
+      pa->u = NULL;
+
+      new_vec->count = to;
+    }
+
+  return 1;
+}
+
+/* It is possible for the unit_addrs list to contain overlaps, as in
+
+       10: low == 10, high == 20, unit 1
+       11: low == 12, high == 15, unit 2
+       12: low == 20, high == 30, unit 1
+
+   In such a case, for pc == 17, a search using units_addr_search will
+   return entry 11.  However, pc == 17 doesn't fit in that range.  We
+   actually want range 10.
+
+   It seems that in general we might have an arbitrary number of
+   ranges in between 10 and 12.
+
+   To handle this we look for cases where range R1 is followed by
+   range R2 such that R2 is a strict subset of R1.  In such cases we
+   insert a new range R3 following R2 that fills in the remainder of
+   the address space covered by R1.  That lets a relatively simple
+   search find the correct range.
+
+   These overlaps can occur because of the range merging we do in
+   add_unit_addr.  When the linker de-duplicates functions, it can
+   leave behind an address range that refers to the address range of
+   the retained duplicate.  If the retained duplicate address range is
+   merged with others, then after sorting we can see overlapping
+   address ranges.
+
+   See https://github.com/ianlancetaylor/libbacktrace/issues/137.  */
+
+static int
+resolve_unit_addrs_overlap (struct backtrace_state *state,
+                           backtrace_error_callback error_callback,
+                           void *data, struct unit_addrs_vector *addrs_vec)
+{
+  struct unit_addrs *addrs;
+  size_t count;
+  int found;
+  struct unit_addrs *entry;
+  size_t i;
+  struct unit_addrs_vector new_vec;
+  void *grew;
+  size_t from;
+  size_t to;
+
+  addrs = (struct unit_addrs *) addrs_vec->vec.base;
+  count = addrs_vec->count;
+
+  if (count == 0)
+    return 1;
+
+  /* Optimistically assume that overlaps are rare.  */
+  found = 0;
+  entry = addrs;
+  for (i = 0; i < count - 1; i++)
+    {
+      if (entry->low < (entry + 1)->low
+         && entry->high > (entry + 1)->high)
+       {
+         found = 1;
+         break;
+       }
+      entry++;
+    }
+  if (!found)
+    return 1;
+
+  memset (&new_vec, 0, sizeof new_vec);
+  grew = backtrace_vector_grow (state,
+                               count * sizeof (struct unit_addrs),
+                               error_callback, data, &new_vec.vec);
+  if (grew == NULL)
+    return 0;
+
+  from = 0;
+  to = 0;
+  resolve_unit_addrs_overlap_walk (state, &from, &to, NULL, addrs_vec,
+                                  error_callback, data, &new_vec);
+  backtrace_vector_free (state, &addrs_vec->vec, error_callback, data);
+  *addrs_vec = new_vec;
+
+  return 1;
+}
+
 /* Sort the line vector by PC.  We want a stable sort here to maintain
    the order of lines for the same PC values.  Since the sequence is
    being sorted in place, their addresses cannot be relied on to
@@ -2980,7 +3168,7 @@ read_line_info (struct backtrace_state *state, struct 
dwarf_data *ddata,
 
   if (vec.count == 0)
     {
-      /* This is not a failure in the sense of a generating an error,
+      /* This is not a failure in the sense of generating an error,
         but it is a failure in that sense that we have no useful
         information.  */
       goto fail;
@@ -3966,11 +4154,7 @@ build_dwarf_data (struct backtrace_state *state,
                  void *data)
 {
   struct unit_addrs_vector addrs_vec;
-  struct unit_addrs *addrs;
-  size_t addrs_count;
   struct unit_vector units_vec;
-  struct unit **units;
-  size_t units_count;
   struct dwarf_data *fdata;
 
   if (!build_address_map (state, base_address, dwarf_sections, is_bigendian,
@@ -3982,12 +4166,12 @@ build_dwarf_data (struct backtrace_state *state,
     return NULL;
   if (!backtrace_vector_release (state, &units_vec.vec, error_callback, data))
     return NULL;
-  addrs = (struct unit_addrs *) addrs_vec.vec.base;
-  units = (struct unit **) units_vec.vec.base;
-  addrs_count = addrs_vec.count;
-  units_count = units_vec.count;
-  backtrace_qsort (addrs, addrs_count, sizeof (struct unit_addrs),
-                  unit_addrs_compare);
+
+  backtrace_qsort ((struct unit_addrs *) addrs_vec.vec.base, addrs_vec.count,
+                  sizeof (struct unit_addrs), unit_addrs_compare);
+  if (!resolve_unit_addrs_overlap (state, error_callback, data, &addrs_vec))
+    return NULL;
+
   /* No qsort for units required, already sorted.  */
 
   fdata = ((struct dwarf_data *)
@@ -3999,10 +4183,10 @@ build_dwarf_data (struct backtrace_state *state,
   fdata->next = NULL;
   fdata->altlink = altlink;
   fdata->base_address = base_address;
-  fdata->addrs = addrs;
-  fdata->addrs_count = addrs_count;
-  fdata->units = units;
-  fdata->units_count = units_count;
+  fdata->addrs = (struct unit_addrs *) addrs_vec.vec.base;
+  fdata->addrs_count = addrs_vec.count;
+  fdata->units = (struct unit **) units_vec.vec.base;
+  fdata->units_count = units_vec.count;
   fdata->dwarf_sections = *dwarf_sections;
   fdata->is_bigendian = is_bigendian;
   memset (&fdata->fvec, 0, sizeof fdata->fvec);

libbacktrace patch committed

Reply via email to