On Thu, 10 Apr 2025, Jakub Jelinek wrote: > Hi! > > As noted by Richi on a large testcase, there are unnecessary paddings > in some heavily used dwarf2out.{h,cc} structures on 64-bit hosts. > > struct dw_val_node { > enum dw_val_class val_class; /* 0 4 */ > > /* XXX 4 bytes hole, try to pack */ > > struct addr_table_entry * val_entry; /* 8 8 */ > union dw_val_struct_union v; /* 16 16 */ > > /* size: 32, cachelines: 1, members: 3 */ > /* sum members: 28, holes: 1, sum holes: 4 */ > /* last cacheline: 32 bytes */ > }; > struct dw_loc_descr_node { > dw_loc_descr_ref dw_loc_next; /* 0 8 */ > enum dwarf_location_atom dw_loc_opc:8; /* 8: 0 4 */ > unsigned int dtprel:1; /* 8: 8 4 */ > unsigned int frame_offset_rel:1; /* 8: 9 4 */ > > /* XXX 22 bits hole, try to pack */ > > int dw_loc_addr; /* 12 4 */ > struct dw_val_node dw_loc_oprnd1; /* 16 32 */ > struct dw_val_node dw_loc_oprnd2; /* 48 32 */ > > /* size: 80, cachelines: 2, members: 7 */ > /* sum members: 76 */ > /* sum bitfield members: 10 bits, bit holes: 1, sum bit holes: 22 > bits */ > /* last cacheline: 16 bytes */ > }; > struct dw_attr_struct { > enum dwarf_attribute dw_attr; /* 0 4 */ > > /* XXX 4 bytes hole, try to pack */ > > struct dw_val_node dw_attr_val; /* 8 32 */ > > /* size: 40, cachelines: 1, members: 2 */ > /* sum members: 36, holes: 1, sum holes: 4 */ > /* last cacheline: 40 bytes */ > }; > > The following patch is an (not very clean admittedly) attempt to decrease > size of dw_loc_descr_node from 80 bytes to 72 and (more importantly) > dw_attr_struct from 40 bytes to 32 by moving the dw_attr member from > dw_attr_struct into dw_attr_val's padding and similarly move > dw_loc_opc/dtprel/frame_offset_rel members into dw_loc_oprnd1 padding > and dw_loc_addr into dw_loc_oprnd2 padding. > All we need to ensure is that nothing tries to copy whole dw_val_node > structs unless it is copied as part of whole dw_loc_descr_node or > dw_attr_struct copy. > > To verify that wasn't the case, I've temporarily added a deleted copy ctor > to dw_val_node and then looked at all the errors/warnings caused by that, > and those were just from memcpy/memmove or structure assignments of whole > dw_loc_descr_node/dw_attr_struct. > > Bootstrapped/regtested on x86_64-linux and i686-linux. > Ok for stage1?
LGTM. I agree it's not the prettiest thing, and possibly fragile with respect to copying. But I don't have a better solution (for dw_attr I suggested packed/aligned on IRC, but that would not work for the dw_loc_descr_node case). So let's go with well-placed comments indicating the situation. I also noted that the struct addr_table_entry * GTY(()) val_entry; is only used with -gsplit-dwarf and that we might want to find another (temporary) home for it. Thanks, Richard. > 2025-04-10 Jakub Jelinek <ja...@redhat.com> > > PR debug/119711 > * dwarf2out.h (struct dw_val_node): Add u member. > (struct dw_loc_descr_node): Remove dw_loc_opc, dtprel, > frame_offset_rel and dw_loc_addr members. > (dw_loc_opc, dw_loc_dtprel, dw_loc_frame_offset_rel, dw_loc_addr): > Define. > (struct dw_attr_struct): Remove dw_attr member. > (dw_attr): Define. > * dwarf2out.cc (loc_descr_equal_p_1): Use dw_loc_dtprel instead of > dtprel. > (output_loc_operands, new_addr_loc_descr, loc_checksum, > loc_checksum_ordered): Likewise. > (resolve_args_picking_1): Use dw_loc_frame_offset_rel instead of > frame_offset_rel. > (loc_list_from_tree_1): Likewise. > (resolve_addr_in_expr): Use dw_loc_dtprel instead of dtprel. > (copy_deref_exprloc): Copy val_class, val_entry and v members > instead of whole dw_loc_oprnd1 and dw_loc_oprnd2. > (optimize_string_length): Copy val_class, val_entry and v members > instead of whole dw_attr_val. > (hash_loc_operands): Use dw_loc_dtprel instead of dtprel. > (compare_loc_operands, compare_locs): Likewise. > > --- gcc/dwarf2out.h.jj 2025-04-08 14:08:51.222282269 +0200 > +++ gcc/dwarf2out.h 2025-04-10 19:07:32.535018798 +0200 > @@ -276,6 +276,25 @@ typedef struct GTY(()) dw_loc_list_struc > > struct GTY(()) dw_val_node { > enum dw_val_class val_class; > + /* On 64-bit host, there are 4 bytes of padding between val_class > + and val_entry. Reuse the padding for other content of > + dw_loc_descr_node and dw_attr_struct. */ > + union dw_val_node_parent > + { > + struct dw_val_loc_descr_node > + { > + ENUM_BITFIELD (dwarf_location_atom) dw_loc_opc_v : 8; > + /* Used to distinguish DW_OP_addr with a direct symbol relocation > + from DW_OP_addr with a dtp-relative symbol relocation. */ > + unsigned int dw_loc_dtprel_v : 1; > + /* For DW_OP_pick, DW_OP_dup and DW_OP_over operations: true iff. > + it targets a DWARF prodecure argument. In this case, it needs to > be > + relocated according to the current frame offset. */ > + unsigned int dw_loc_frame_offset_rel_v : 1; > + } u1; > + int u2; > + enum dwarf_attribute u3; > + } GTY((skip)) u; > struct addr_table_entry * GTY(()) val_entry; > union dw_val_struct_union > { > @@ -321,15 +340,15 @@ struct GTY(()) dw_val_node { > > struct GTY((chain_next ("%h.dw_loc_next"))) dw_loc_descr_node { > dw_loc_descr_ref dw_loc_next; > - ENUM_BITFIELD (dwarf_location_atom) dw_loc_opc : 8; > +#define dw_loc_opc dw_loc_oprnd1.u.u1.dw_loc_opc_v > /* Used to distinguish DW_OP_addr with a direct symbol relocation > from DW_OP_addr with a dtp-relative symbol relocation. */ > - unsigned int dtprel : 1; > +#define dw_loc_dtprel dw_loc_oprnd1.u.u1.dw_loc_dtprel_v > /* For DW_OP_pick, DW_OP_dup and DW_OP_over operations: true iff. > it targets a DWARF prodecure argument. In this case, it needs to be > relocated according to the current frame offset. */ > - unsigned int frame_offset_rel : 1; > - int dw_loc_addr; > +#define dw_loc_frame_offset_rel dw_loc_oprnd1.u.u1.dw_loc_frame_offset_rel_v > +#define dw_loc_addr dw_loc_oprnd2.u.u2 > dw_val_node dw_loc_oprnd1; > dw_val_node dw_loc_oprnd2; > }; > @@ -493,7 +512,7 @@ void dwarf2out_cc_finalize (void); > Attributes are typically linked below the DIE they modify. */ > > typedef struct GTY(()) dw_attr_struct { > - enum dwarf_attribute dw_attr; > +#define dw_attr dw_attr_val.u.u3 > dw_val_node dw_attr_val; > } > dw_attr_node; > --- gcc/dwarf2out.cc.jj 2025-04-08 14:08:51.222282269 +0200 > +++ gcc/dwarf2out.cc 2025-04-10 19:07:51.301760436 +0200 > @@ -1536,7 +1536,7 @@ loc_descr_equal_p_1 (dw_loc_descr_ref a, > /* ??? This is only ever set for DW_OP_constNu, for N equal to the > address size, but since we always allocate cleared storage it > should be zero for other types of locations. */ > - if (a->dtprel != b->dtprel) > + if (a->dw_loc_dtprel != b->dw_loc_dtprel) > return false; > > return (dw_val_equal_p (&a->dw_loc_oprnd1, &b->dw_loc_oprnd1) > @@ -2115,7 +2115,7 @@ output_loc_operands (dw_loc_descr_ref lo > dw2_asm_output_data (2, val1->v.val_int, NULL); > break; > case DW_OP_const4u: > - if (loc->dtprel) > + if (loc->dw_loc_dtprel) > { > gcc_assert (targetm.asm_out.output_dwarf_dtprel); > targetm.asm_out.output_dwarf_dtprel (asm_out_file, 4, > @@ -2128,7 +2128,7 @@ output_loc_operands (dw_loc_descr_ref lo > dw2_asm_output_data (4, val1->v.val_int, NULL); > break; > case DW_OP_const8u: > - if (loc->dtprel) > + if (loc->dw_loc_dtprel) > { > gcc_assert (targetm.asm_out.output_dwarf_dtprel); > targetm.asm_out.output_dwarf_dtprel (asm_out_file, 8, > @@ -2323,7 +2323,7 @@ output_loc_operands (dw_loc_descr_ref lo > break; > > case DW_OP_addr: > - if (loc->dtprel) > + if (loc->dw_loc_dtprel) > { > if (targetm.asm_out.output_dwarf_dtprel) > { > @@ -4028,7 +4028,7 @@ new_addr_loc_descr (rtx addr, enum dtpre > > ref->dw_loc_oprnd1.val_class = dw_val_class_addr; > ref->dw_loc_oprnd1.v.val_addr = addr; > - ref->dtprel = dtprel; > + ref->dw_loc_dtprel = dtprel; > if (dwarf_split_debug_info) > ref->dw_loc_oprnd1.val_entry > = add_addr_table_entry (addr, > @@ -7036,7 +7036,7 @@ loc_checksum (dw_loc_descr_ref loc, stru > inchash::hash hstate; > hashval_t hash; > > - tem = (loc->dtprel << 8) | ((unsigned int) loc->dw_loc_opc); > + tem = (loc->dw_loc_dtprel << 8) | ((unsigned int) loc->dw_loc_opc); > CHECKSUM (tem); > hash_loc_operands (loc, hstate); > hash = hstate.end(); > @@ -7259,7 +7259,7 @@ loc_checksum_ordered (dw_loc_descr_ref l > inchash::hash hstate; > hashval_t hash; > > - CHECKSUM_ULEB128 (loc->dtprel); > + CHECKSUM_ULEB128 (loc->dw_loc_dtprel); > CHECKSUM_ULEB128 (loc->dw_loc_opc); > hash_loc_operands (loc, hstate); > hash = hstate.end (); > @@ -18310,7 +18310,7 @@ resolve_args_picking_1 (dw_loc_descr_ref > > /* If needed, relocate the picking offset with respect to the frame > offset. */ > - if (l->frame_offset_rel) > + if (l->dw_loc_frame_offset_rel) > { > unsigned HOST_WIDE_INT off; > switch (l->dw_loc_opc) > @@ -18826,7 +18826,7 @@ loc_list_from_tree_1 (tree loc, int want > && want_address == 0) > { > ret = new_loc_descr (DW_OP_pick, 0, 0); > - ret->frame_offset_rel = 1; > + ret->dw_loc_frame_offset_rel = 1; > context->placeholder_seen = true; > break; > } > @@ -18993,7 +18993,7 @@ loc_list_from_tree_1 (tree loc, int want > gcc_assert (cursor != NULL_TREE); > > ret = new_loc_descr (DW_OP_pick, i, 0); > - ret->frame_offset_rel = 1; > + ret->dw_loc_frame_offset_rel = 1; > break; > } > /* FALLTHRU */ > @@ -31061,7 +31061,7 @@ resolve_addr_in_expr (dw_attr_node *a, d > || loc->dw_loc_opc == DW_OP_addrx) > || ((loc->dw_loc_opc == DW_OP_GNU_const_index > || loc->dw_loc_opc == DW_OP_constx) > - && loc->dtprel)) > + && loc->dw_loc_dtprel)) > { > rtx rtl = loc->dw_loc_oprnd1.val_entry->addr.rtl; > if (!resolve_one_addr (&rtl)) > @@ -31073,7 +31073,7 @@ resolve_addr_in_expr (dw_attr_node *a, d > break; > case DW_OP_const4u: > case DW_OP_const8u: > - if (loc->dtprel > + if (loc->dw_loc_dtprel > && !resolve_one_addr (&loc->dw_loc_oprnd1.v.val_addr)) > return false; > break; > @@ -31359,8 +31359,12 @@ copy_deref_exprloc (dw_loc_descr_ref exp > while (expr != l) > { > *p = new_loc_descr (expr->dw_loc_opc, 0, 0); > - (*p)->dw_loc_oprnd1 = expr->dw_loc_oprnd1; > - (*p)->dw_loc_oprnd2 = expr->dw_loc_oprnd2; > + (*p)->dw_loc_oprnd1.val_class = expr->dw_loc_oprnd1.val_class; > + (*p)->dw_loc_oprnd1.val_entry = expr->dw_loc_oprnd1.val_entry; > + (*p)->dw_loc_oprnd1.v = expr->dw_loc_oprnd1.v; > + (*p)->dw_loc_oprnd2.val_class = expr->dw_loc_oprnd2.val_class; > + (*p)->dw_loc_oprnd2.val_entry = expr->dw_loc_oprnd2.val_entry; > + (*p)->dw_loc_oprnd2.v = expr->dw_loc_oprnd2.v; > p = &(*p)->dw_loc_next; > expr = expr->dw_loc_next; > } > @@ -31451,7 +31455,9 @@ optimize_string_length (dw_attr_node *a) > copy over the DW_AT_location attribute from die to a. */ > if (l->dw_loc_next != NULL) > { > - a->dw_attr_val = av->dw_attr_val; > + a->dw_attr_val.val_class = av->dw_attr_val.val_class; > + a->dw_attr_val.val_entry = av->dw_attr_val.val_entry; > + a->dw_attr_val.v = av->dw_attr_val.v; > return 1; > } > > @@ -31737,7 +31743,7 @@ hash_loc_operands (dw_loc_descr_ref loc, > { > case DW_OP_const4u: > case DW_OP_const8u: > - if (loc->dtprel) > + if (loc->dw_loc_dtprel) > goto hash_addr; > /* FALLTHRU */ > case DW_OP_const1u: > @@ -31839,7 +31845,7 @@ hash_loc_operands (dw_loc_descr_ref loc, > break; > case DW_OP_addr: > hash_addr: > - if (loc->dtprel) > + if (loc->dw_loc_dtprel) > { > unsigned char dtprel = 0xd1; > hstate.add_object (dtprel); > @@ -31851,7 +31857,7 @@ hash_loc_operands (dw_loc_descr_ref loc, > case DW_OP_GNU_const_index: > case DW_OP_constx: > { > - if (loc->dtprel) > + if (loc->dw_loc_dtprel) > { > unsigned char dtprel = 0xd1; > hstate.add_object (dtprel); > @@ -31998,7 +32004,7 @@ compare_loc_operands (dw_loc_descr_ref x > { > case DW_OP_const4u: > case DW_OP_const8u: > - if (x->dtprel) > + if (x->dw_loc_dtprel) > goto hash_addr; > /* FALLTHRU */ > case DW_OP_const1u: > @@ -32162,7 +32168,7 @@ compare_locs (dw_loc_descr_ref x, dw_loc > { > for (; x != NULL && y != NULL; x = x->dw_loc_next, y = y->dw_loc_next) > if (x->dw_loc_opc != y->dw_loc_opc > - || x->dtprel != y->dtprel > + || x->dw_loc_dtprel != y->dw_loc_dtprel > || !compare_loc_operands (x, y)) > break; > return x == NULL && y == NULL; > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)