On Thu, 10 Apr 2025, Jakub Jelinek wrote:

> Hi!
> 
> As noted by Richi on a large testcase, there are unnecessary paddings
> in some heavily used dwarf2out.{h,cc} structures on 64-bit hosts.
> 
> struct dw_val_node {
>         enum dw_val_class          val_class;            /*     0     4 */
> 
>         /* XXX 4 bytes hole, try to pack */
> 
>         struct addr_table_entry *  val_entry;            /*     8     8 */
>         union dw_val_struct_union  v;                    /*    16    16 */
> 
>         /* size: 32, cachelines: 1, members: 3 */
>         /* sum members: 28, holes: 1, sum holes: 4 */
>         /* last cacheline: 32 bytes */
> };
> struct dw_loc_descr_node {
>         dw_loc_descr_ref           dw_loc_next;          /*     0     8 */
>         enum dwarf_location_atom   dw_loc_opc:8;         /*     8: 0  4 */
>         unsigned int               dtprel:1;             /*     8: 8  4 */
>         unsigned int               frame_offset_rel:1;   /*     8: 9  4 */
> 
>         /* XXX 22 bits hole, try to pack */
> 
>         int                        dw_loc_addr;          /*    12     4 */
>         struct dw_val_node         dw_loc_oprnd1;        /*    16    32 */
>         struct dw_val_node         dw_loc_oprnd2;        /*    48    32 */
> 
>         /* size: 80, cachelines: 2, members: 7 */
>         /* sum members: 76 */
>         /* sum bitfield members: 10 bits, bit holes: 1, sum bit holes: 22 
> bits */
>         /* last cacheline: 16 bytes */
> };
> struct dw_attr_struct {
>         enum dwarf_attribute       dw_attr;              /*     0     4 */
> 
>         /* XXX 4 bytes hole, try to pack */
> 
>         struct dw_val_node         dw_attr_val;          /*     8    32 */
> 
>         /* size: 40, cachelines: 1, members: 2 */
>         /* sum members: 36, holes: 1, sum holes: 4 */
>         /* last cacheline: 40 bytes */
> };
> 
> The following patch is an (not very clean admittedly) attempt to decrease
> size of dw_loc_descr_node from 80 bytes to 72 and (more importantly)
> dw_attr_struct from 40 bytes to 32 by moving the dw_attr member from
> dw_attr_struct into dw_attr_val's padding and similarly move
> dw_loc_opc/dtprel/frame_offset_rel members into dw_loc_oprnd1 padding
> and dw_loc_addr into dw_loc_oprnd2 padding.
> All we need to ensure is that nothing tries to copy whole dw_val_node
> structs unless it is copied as part of whole dw_loc_descr_node or
> dw_attr_struct copy.
> 
> To verify that wasn't the case, I've temporarily added a deleted copy ctor
> to dw_val_node and then looked at all the errors/warnings caused by that,
> and those were just from memcpy/memmove or structure assignments of whole
> dw_loc_descr_node/dw_attr_struct.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux.
> Ok for stage1?

LGTM.

I agree it's not the prettiest thing, and possibly fragile
with respect to copying.  But I don't have a better solution
(for dw_attr I suggested packed/aligned on IRC, but that would not
work for the dw_loc_descr_node case).  So let's go with well-placed
comments indicating the situation.

I also noted that the struct addr_table_entry * GTY(()) val_entry; is
only used with -gsplit-dwarf and that we might want to find another
(temporary) home for it.

Thanks,
Richard.

> 2025-04-10  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR debug/119711
>       * dwarf2out.h (struct dw_val_node): Add u member.
>       (struct dw_loc_descr_node): Remove dw_loc_opc, dtprel,
>       frame_offset_rel and dw_loc_addr members.
>       (dw_loc_opc, dw_loc_dtprel, dw_loc_frame_offset_rel, dw_loc_addr):
>       Define.
>       (struct dw_attr_struct): Remove dw_attr member.
>       (dw_attr): Define.
>       * dwarf2out.cc (loc_descr_equal_p_1): Use dw_loc_dtprel instead of
>       dtprel.
>       (output_loc_operands, new_addr_loc_descr, loc_checksum,
>       loc_checksum_ordered): Likewise.
>       (resolve_args_picking_1): Use dw_loc_frame_offset_rel instead of
>       frame_offset_rel.
>       (loc_list_from_tree_1): Likewise.
>       (resolve_addr_in_expr): Use dw_loc_dtprel instead of dtprel.
>       (copy_deref_exprloc): Copy val_class, val_entry and v members
>       instead of whole dw_loc_oprnd1 and dw_loc_oprnd2.
>       (optimize_string_length): Copy val_class, val_entry and v members
>       instead of whole dw_attr_val.
>       (hash_loc_operands): Use dw_loc_dtprel instead of dtprel.
>       (compare_loc_operands, compare_locs): Likewise.
> 
> --- gcc/dwarf2out.h.jj        2025-04-08 14:08:51.222282269 +0200
> +++ gcc/dwarf2out.h   2025-04-10 19:07:32.535018798 +0200
> @@ -276,6 +276,25 @@ typedef struct GTY(()) dw_loc_list_struc
>  
>  struct GTY(()) dw_val_node {
>    enum dw_val_class val_class;
> +  /* On 64-bit host, there are 4 bytes of padding between val_class
> +     and val_entry.  Reuse the padding for other content of
> +     dw_loc_descr_node and dw_attr_struct.  */
> +  union dw_val_node_parent
> +    {
> +      struct dw_val_loc_descr_node
> +     {
> +       ENUM_BITFIELD (dwarf_location_atom) dw_loc_opc_v : 8;
> +       /* Used to distinguish DW_OP_addr with a direct symbol relocation
> +          from DW_OP_addr with a dtp-relative symbol relocation.  */
> +       unsigned int dw_loc_dtprel_v : 1;
> +       /* For DW_OP_pick, DW_OP_dup and DW_OP_over operations: true iff.
> +          it targets a DWARF prodecure argument.  In this case, it needs to 
> be
> +          relocated according to the current frame offset.  */
> +       unsigned int dw_loc_frame_offset_rel_v : 1;
> +     } u1;
> +      int u2;
> +      enum dwarf_attribute u3;
> +    } GTY((skip)) u;
>    struct addr_table_entry * GTY(()) val_entry;
>    union dw_val_struct_union
>      {
> @@ -321,15 +340,15 @@ struct GTY(()) dw_val_node {
>  
>  struct GTY((chain_next ("%h.dw_loc_next"))) dw_loc_descr_node {
>    dw_loc_descr_ref dw_loc_next;
> -  ENUM_BITFIELD (dwarf_location_atom) dw_loc_opc : 8;
> +#define dw_loc_opc dw_loc_oprnd1.u.u1.dw_loc_opc_v
>    /* Used to distinguish DW_OP_addr with a direct symbol relocation
>       from DW_OP_addr with a dtp-relative symbol relocation.  */
> -  unsigned int dtprel : 1;
> +#define dw_loc_dtprel dw_loc_oprnd1.u.u1.dw_loc_dtprel_v
>    /* For DW_OP_pick, DW_OP_dup and DW_OP_over operations: true iff.
>       it targets a DWARF prodecure argument.  In this case, it needs to be
>       relocated according to the current frame offset.  */
> -  unsigned int frame_offset_rel : 1;
> -  int dw_loc_addr;
> +#define dw_loc_frame_offset_rel dw_loc_oprnd1.u.u1.dw_loc_frame_offset_rel_v
> +#define dw_loc_addr dw_loc_oprnd2.u.u2
>    dw_val_node dw_loc_oprnd1;
>    dw_val_node dw_loc_oprnd2;
>  };
> @@ -493,7 +512,7 @@ void dwarf2out_cc_finalize (void);
>     Attributes are typically linked below the DIE they modify.  */
>  
>  typedef struct GTY(()) dw_attr_struct {
> -  enum dwarf_attribute dw_attr;
> +#define dw_attr dw_attr_val.u.u3
>    dw_val_node dw_attr_val;
>  }
>  dw_attr_node;
> --- gcc/dwarf2out.cc.jj       2025-04-08 14:08:51.222282269 +0200
> +++ gcc/dwarf2out.cc  2025-04-10 19:07:51.301760436 +0200
> @@ -1536,7 +1536,7 @@ loc_descr_equal_p_1 (dw_loc_descr_ref a,
>    /* ??? This is only ever set for DW_OP_constNu, for N equal to the
>       address size, but since we always allocate cleared storage it
>       should be zero for other types of locations.  */
> -  if (a->dtprel != b->dtprel)
> +  if (a->dw_loc_dtprel != b->dw_loc_dtprel)
>      return false;
>  
>    return (dw_val_equal_p (&a->dw_loc_oprnd1, &b->dw_loc_oprnd1)
> @@ -2115,7 +2115,7 @@ output_loc_operands (dw_loc_descr_ref lo
>        dw2_asm_output_data (2, val1->v.val_int, NULL);
>        break;
>      case DW_OP_const4u:
> -      if (loc->dtprel)
> +      if (loc->dw_loc_dtprel)
>       {
>         gcc_assert (targetm.asm_out.output_dwarf_dtprel);
>         targetm.asm_out.output_dwarf_dtprel (asm_out_file, 4,
> @@ -2128,7 +2128,7 @@ output_loc_operands (dw_loc_descr_ref lo
>        dw2_asm_output_data (4, val1->v.val_int, NULL);
>        break;
>      case DW_OP_const8u:
> -      if (loc->dtprel)
> +      if (loc->dw_loc_dtprel)
>       {
>         gcc_assert (targetm.asm_out.output_dwarf_dtprel);
>         targetm.asm_out.output_dwarf_dtprel (asm_out_file, 8,
> @@ -2323,7 +2323,7 @@ output_loc_operands (dw_loc_descr_ref lo
>        break;
>  
>      case DW_OP_addr:
> -      if (loc->dtprel)
> +      if (loc->dw_loc_dtprel)
>       {
>         if (targetm.asm_out.output_dwarf_dtprel)
>           {
> @@ -4028,7 +4028,7 @@ new_addr_loc_descr (rtx addr, enum dtpre
>  
>    ref->dw_loc_oprnd1.val_class = dw_val_class_addr;
>    ref->dw_loc_oprnd1.v.val_addr = addr;
> -  ref->dtprel = dtprel;
> +  ref->dw_loc_dtprel = dtprel;
>    if (dwarf_split_debug_info)
>      ref->dw_loc_oprnd1.val_entry
>        = add_addr_table_entry (addr,
> @@ -7036,7 +7036,7 @@ loc_checksum (dw_loc_descr_ref loc, stru
>    inchash::hash hstate;
>    hashval_t hash;
>  
> -  tem = (loc->dtprel << 8) | ((unsigned int) loc->dw_loc_opc);
> +  tem = (loc->dw_loc_dtprel << 8) | ((unsigned int) loc->dw_loc_opc);
>    CHECKSUM (tem);
>    hash_loc_operands (loc, hstate);
>    hash = hstate.end();
> @@ -7259,7 +7259,7 @@ loc_checksum_ordered (dw_loc_descr_ref l
>        inchash::hash hstate;
>        hashval_t hash;
>  
> -      CHECKSUM_ULEB128 (loc->dtprel);
> +      CHECKSUM_ULEB128 (loc->dw_loc_dtprel);
>        CHECKSUM_ULEB128 (loc->dw_loc_opc);
>        hash_loc_operands (loc, hstate);
>        hash = hstate.end ();
> @@ -18310,7 +18310,7 @@ resolve_args_picking_1 (dw_loc_descr_ref
>  
>        /* If needed, relocate the picking offset with respect to the frame
>        offset. */
> -      if (l->frame_offset_rel)
> +      if (l->dw_loc_frame_offset_rel)
>       {
>         unsigned HOST_WIDE_INT off;
>         switch (l->dw_loc_opc)
> @@ -18826,7 +18826,7 @@ loc_list_from_tree_1 (tree loc, int want
>              && want_address == 0)
>       {
>         ret = new_loc_descr (DW_OP_pick, 0, 0);
> -       ret->frame_offset_rel = 1;
> +       ret->dw_loc_frame_offset_rel = 1;
>         context->placeholder_seen = true;
>         break;
>       }
> @@ -18993,7 +18993,7 @@ loc_list_from_tree_1 (tree loc, int want
>         gcc_assert (cursor != NULL_TREE);
>  
>         ret = new_loc_descr (DW_OP_pick, i, 0);
> -       ret->frame_offset_rel = 1;
> +       ret->dw_loc_frame_offset_rel = 1;
>         break;
>       }
>        /* FALLTHRU */
> @@ -31061,7 +31061,7 @@ resolve_addr_in_expr (dw_attr_node *a, d
>            || loc->dw_loc_opc == DW_OP_addrx)
>           || ((loc->dw_loc_opc == DW_OP_GNU_const_index
>                || loc->dw_loc_opc == DW_OP_constx)
> -             && loc->dtprel))
> +             && loc->dw_loc_dtprel))
>            {
>              rtx rtl = loc->dw_loc_oprnd1.val_entry->addr.rtl;
>              if (!resolve_one_addr (&rtl))
> @@ -31073,7 +31073,7 @@ resolve_addr_in_expr (dw_attr_node *a, d
>       break;
>        case DW_OP_const4u:
>        case DW_OP_const8u:
> -     if (loc->dtprel
> +     if (loc->dw_loc_dtprel
>           && !resolve_one_addr (&loc->dw_loc_oprnd1.v.val_addr))
>         return false;
>       break;
> @@ -31359,8 +31359,12 @@ copy_deref_exprloc (dw_loc_descr_ref exp
>    while (expr != l)
>      {
>        *p = new_loc_descr (expr->dw_loc_opc, 0, 0);
> -      (*p)->dw_loc_oprnd1 = expr->dw_loc_oprnd1;
> -      (*p)->dw_loc_oprnd2 = expr->dw_loc_oprnd2;
> +      (*p)->dw_loc_oprnd1.val_class = expr->dw_loc_oprnd1.val_class;
> +      (*p)->dw_loc_oprnd1.val_entry = expr->dw_loc_oprnd1.val_entry;
> +      (*p)->dw_loc_oprnd1.v = expr->dw_loc_oprnd1.v;
> +      (*p)->dw_loc_oprnd2.val_class = expr->dw_loc_oprnd2.val_class;
> +      (*p)->dw_loc_oprnd2.val_entry = expr->dw_loc_oprnd2.val_entry;
> +      (*p)->dw_loc_oprnd2.v = expr->dw_loc_oprnd2.v;
>        p = &(*p)->dw_loc_next;
>        expr = expr->dw_loc_next;
>      }
> @@ -31451,7 +31455,9 @@ optimize_string_length (dw_attr_node *a)
>       copy over the DW_AT_location attribute from die to a.  */
>    if (l->dw_loc_next != NULL)
>      {
> -      a->dw_attr_val = av->dw_attr_val;
> +      a->dw_attr_val.val_class = av->dw_attr_val.val_class;
> +      a->dw_attr_val.val_entry = av->dw_attr_val.val_entry;
> +      a->dw_attr_val.v = av->dw_attr_val.v;
>        return 1;
>      }
>  
> @@ -31737,7 +31743,7 @@ hash_loc_operands (dw_loc_descr_ref loc,
>      {
>      case DW_OP_const4u:
>      case DW_OP_const8u:
> -      if (loc->dtprel)
> +      if (loc->dw_loc_dtprel)
>       goto hash_addr;
>        /* FALLTHRU */
>      case DW_OP_const1u:
> @@ -31839,7 +31845,7 @@ hash_loc_operands (dw_loc_descr_ref loc,
>        break;
>      case DW_OP_addr:
>      hash_addr:
> -      if (loc->dtprel)
> +      if (loc->dw_loc_dtprel)
>       {
>         unsigned char dtprel = 0xd1;
>         hstate.add_object (dtprel);
> @@ -31851,7 +31857,7 @@ hash_loc_operands (dw_loc_descr_ref loc,
>      case DW_OP_GNU_const_index:
>      case DW_OP_constx:
>        {
> -        if (loc->dtprel)
> +     if (loc->dw_loc_dtprel)
>            {
>              unsigned char dtprel = 0xd1;
>           hstate.add_object (dtprel);
> @@ -31998,7 +32004,7 @@ compare_loc_operands (dw_loc_descr_ref x
>      {
>      case DW_OP_const4u:
>      case DW_OP_const8u:
> -      if (x->dtprel)
> +      if (x->dw_loc_dtprel)
>       goto hash_addr;
>        /* FALLTHRU */
>      case DW_OP_const1u:
> @@ -32162,7 +32168,7 @@ compare_locs (dw_loc_descr_ref x, dw_loc
>  {
>    for (; x != NULL && y != NULL; x = x->dw_loc_next, y = y->dw_loc_next)
>      if (x->dw_loc_opc != y->dw_loc_opc
> -     || x->dtprel != y->dtprel
> +     || x->dw_loc_dtprel != y->dw_loc_dtprel
>       || !compare_loc_operands (x, y))
>        break;
>    return x == NULL && y == NULL;
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to