Hi! As noted by Richi on a large testcase, there are unnecessary paddings in some heavily used dwarf2out.{h,cc} structures on 64-bit hosts.
struct dw_val_node { enum dw_val_class val_class; /* 0 4 */ /* XXX 4 bytes hole, try to pack */ struct addr_table_entry * val_entry; /* 8 8 */ union dw_val_struct_union v; /* 16 16 */ /* size: 32, cachelines: 1, members: 3 */ /* sum members: 28, holes: 1, sum holes: 4 */ /* last cacheline: 32 bytes */ }; struct dw_loc_descr_node { dw_loc_descr_ref dw_loc_next; /* 0 8 */ enum dwarf_location_atom dw_loc_opc:8; /* 8: 0 4 */ unsigned int dtprel:1; /* 8: 8 4 */ unsigned int frame_offset_rel:1; /* 8: 9 4 */ /* XXX 22 bits hole, try to pack */ int dw_loc_addr; /* 12 4 */ struct dw_val_node dw_loc_oprnd1; /* 16 32 */ struct dw_val_node dw_loc_oprnd2; /* 48 32 */ /* size: 80, cachelines: 2, members: 7 */ /* sum members: 76 */ /* sum bitfield members: 10 bits, bit holes: 1, sum bit holes: 22 bits */ /* last cacheline: 16 bytes */ }; struct dw_attr_struct { enum dwarf_attribute dw_attr; /* 0 4 */ /* XXX 4 bytes hole, try to pack */ struct dw_val_node dw_attr_val; /* 8 32 */ /* size: 40, cachelines: 1, members: 2 */ /* sum members: 36, holes: 1, sum holes: 4 */ /* last cacheline: 40 bytes */ }; The following patch is an (not very clean admittedly) attempt to decrease size of dw_loc_descr_node from 80 bytes to 72 and (more importantly) dw_attr_struct from 40 bytes to 32 by moving the dw_attr member from dw_attr_struct into dw_attr_val's padding and similarly move dw_loc_opc/dtprel/frame_offset_rel members into dw_loc_oprnd1 padding and dw_loc_addr into dw_loc_oprnd2 padding. All we need to ensure is that nothing tries to copy whole dw_val_node structs unless it is copied as part of whole dw_loc_descr_node or dw_attr_struct copy. To verify that wasn't the case, I've temporarily added a deleted copy ctor to dw_val_node and then looked at all the errors/warnings caused by that, and those were just from memcpy/memmove or structure assignments of whole dw_loc_descr_node/dw_attr_struct. Bootstrapped/regtested on x86_64-linux and i686-linux. Ok for stage1? 2025-04-10 Jakub Jelinek <ja...@redhat.com> PR debug/119711 * dwarf2out.h (struct dw_val_node): Add u member. (struct dw_loc_descr_node): Remove dw_loc_opc, dtprel, frame_offset_rel and dw_loc_addr members. (dw_loc_opc, dw_loc_dtprel, dw_loc_frame_offset_rel, dw_loc_addr): Define. (struct dw_attr_struct): Remove dw_attr member. (dw_attr): Define. * dwarf2out.cc (loc_descr_equal_p_1): Use dw_loc_dtprel instead of dtprel. (output_loc_operands, new_addr_loc_descr, loc_checksum, loc_checksum_ordered): Likewise. (resolve_args_picking_1): Use dw_loc_frame_offset_rel instead of frame_offset_rel. (loc_list_from_tree_1): Likewise. (resolve_addr_in_expr): Use dw_loc_dtprel instead of dtprel. (copy_deref_exprloc): Copy val_class, val_entry and v members instead of whole dw_loc_oprnd1 and dw_loc_oprnd2. (optimize_string_length): Copy val_class, val_entry and v members instead of whole dw_attr_val. (hash_loc_operands): Use dw_loc_dtprel instead of dtprel. (compare_loc_operands, compare_locs): Likewise. --- gcc/dwarf2out.h.jj 2025-04-08 14:08:51.222282269 +0200 +++ gcc/dwarf2out.h 2025-04-10 19:07:32.535018798 +0200 @@ -276,6 +276,25 @@ typedef struct GTY(()) dw_loc_list_struc struct GTY(()) dw_val_node { enum dw_val_class val_class; + /* On 64-bit host, there are 4 bytes of padding between val_class + and val_entry. Reuse the padding for other content of + dw_loc_descr_node and dw_attr_struct. */ + union dw_val_node_parent + { + struct dw_val_loc_descr_node + { + ENUM_BITFIELD (dwarf_location_atom) dw_loc_opc_v : 8; + /* Used to distinguish DW_OP_addr with a direct symbol relocation + from DW_OP_addr with a dtp-relative symbol relocation. */ + unsigned int dw_loc_dtprel_v : 1; + /* For DW_OP_pick, DW_OP_dup and DW_OP_over operations: true iff. + it targets a DWARF prodecure argument. In this case, it needs to be + relocated according to the current frame offset. */ + unsigned int dw_loc_frame_offset_rel_v : 1; + } u1; + int u2; + enum dwarf_attribute u3; + } GTY((skip)) u; struct addr_table_entry * GTY(()) val_entry; union dw_val_struct_union { @@ -321,15 +340,15 @@ struct GTY(()) dw_val_node { struct GTY((chain_next ("%h.dw_loc_next"))) dw_loc_descr_node { dw_loc_descr_ref dw_loc_next; - ENUM_BITFIELD (dwarf_location_atom) dw_loc_opc : 8; +#define dw_loc_opc dw_loc_oprnd1.u.u1.dw_loc_opc_v /* Used to distinguish DW_OP_addr with a direct symbol relocation from DW_OP_addr with a dtp-relative symbol relocation. */ - unsigned int dtprel : 1; +#define dw_loc_dtprel dw_loc_oprnd1.u.u1.dw_loc_dtprel_v /* For DW_OP_pick, DW_OP_dup and DW_OP_over operations: true iff. it targets a DWARF prodecure argument. In this case, it needs to be relocated according to the current frame offset. */ - unsigned int frame_offset_rel : 1; - int dw_loc_addr; +#define dw_loc_frame_offset_rel dw_loc_oprnd1.u.u1.dw_loc_frame_offset_rel_v +#define dw_loc_addr dw_loc_oprnd2.u.u2 dw_val_node dw_loc_oprnd1; dw_val_node dw_loc_oprnd2; }; @@ -493,7 +512,7 @@ void dwarf2out_cc_finalize (void); Attributes are typically linked below the DIE they modify. */ typedef struct GTY(()) dw_attr_struct { - enum dwarf_attribute dw_attr; +#define dw_attr dw_attr_val.u.u3 dw_val_node dw_attr_val; } dw_attr_node; --- gcc/dwarf2out.cc.jj 2025-04-08 14:08:51.222282269 +0200 +++ gcc/dwarf2out.cc 2025-04-10 19:07:51.301760436 +0200 @@ -1536,7 +1536,7 @@ loc_descr_equal_p_1 (dw_loc_descr_ref a, /* ??? This is only ever set for DW_OP_constNu, for N equal to the address size, but since we always allocate cleared storage it should be zero for other types of locations. */ - if (a->dtprel != b->dtprel) + if (a->dw_loc_dtprel != b->dw_loc_dtprel) return false; return (dw_val_equal_p (&a->dw_loc_oprnd1, &b->dw_loc_oprnd1) @@ -2115,7 +2115,7 @@ output_loc_operands (dw_loc_descr_ref lo dw2_asm_output_data (2, val1->v.val_int, NULL); break; case DW_OP_const4u: - if (loc->dtprel) + if (loc->dw_loc_dtprel) { gcc_assert (targetm.asm_out.output_dwarf_dtprel); targetm.asm_out.output_dwarf_dtprel (asm_out_file, 4, @@ -2128,7 +2128,7 @@ output_loc_operands (dw_loc_descr_ref lo dw2_asm_output_data (4, val1->v.val_int, NULL); break; case DW_OP_const8u: - if (loc->dtprel) + if (loc->dw_loc_dtprel) { gcc_assert (targetm.asm_out.output_dwarf_dtprel); targetm.asm_out.output_dwarf_dtprel (asm_out_file, 8, @@ -2323,7 +2323,7 @@ output_loc_operands (dw_loc_descr_ref lo break; case DW_OP_addr: - if (loc->dtprel) + if (loc->dw_loc_dtprel) { if (targetm.asm_out.output_dwarf_dtprel) { @@ -4028,7 +4028,7 @@ new_addr_loc_descr (rtx addr, enum dtpre ref->dw_loc_oprnd1.val_class = dw_val_class_addr; ref->dw_loc_oprnd1.v.val_addr = addr; - ref->dtprel = dtprel; + ref->dw_loc_dtprel = dtprel; if (dwarf_split_debug_info) ref->dw_loc_oprnd1.val_entry = add_addr_table_entry (addr, @@ -7036,7 +7036,7 @@ loc_checksum (dw_loc_descr_ref loc, stru inchash::hash hstate; hashval_t hash; - tem = (loc->dtprel << 8) | ((unsigned int) loc->dw_loc_opc); + tem = (loc->dw_loc_dtprel << 8) | ((unsigned int) loc->dw_loc_opc); CHECKSUM (tem); hash_loc_operands (loc, hstate); hash = hstate.end(); @@ -7259,7 +7259,7 @@ loc_checksum_ordered (dw_loc_descr_ref l inchash::hash hstate; hashval_t hash; - CHECKSUM_ULEB128 (loc->dtprel); + CHECKSUM_ULEB128 (loc->dw_loc_dtprel); CHECKSUM_ULEB128 (loc->dw_loc_opc); hash_loc_operands (loc, hstate); hash = hstate.end (); @@ -18310,7 +18310,7 @@ resolve_args_picking_1 (dw_loc_descr_ref /* If needed, relocate the picking offset with respect to the frame offset. */ - if (l->frame_offset_rel) + if (l->dw_loc_frame_offset_rel) { unsigned HOST_WIDE_INT off; switch (l->dw_loc_opc) @@ -18826,7 +18826,7 @@ loc_list_from_tree_1 (tree loc, int want && want_address == 0) { ret = new_loc_descr (DW_OP_pick, 0, 0); - ret->frame_offset_rel = 1; + ret->dw_loc_frame_offset_rel = 1; context->placeholder_seen = true; break; } @@ -18993,7 +18993,7 @@ loc_list_from_tree_1 (tree loc, int want gcc_assert (cursor != NULL_TREE); ret = new_loc_descr (DW_OP_pick, i, 0); - ret->frame_offset_rel = 1; + ret->dw_loc_frame_offset_rel = 1; break; } /* FALLTHRU */ @@ -31061,7 +31061,7 @@ resolve_addr_in_expr (dw_attr_node *a, d || loc->dw_loc_opc == DW_OP_addrx) || ((loc->dw_loc_opc == DW_OP_GNU_const_index || loc->dw_loc_opc == DW_OP_constx) - && loc->dtprel)) + && loc->dw_loc_dtprel)) { rtx rtl = loc->dw_loc_oprnd1.val_entry->addr.rtl; if (!resolve_one_addr (&rtl)) @@ -31073,7 +31073,7 @@ resolve_addr_in_expr (dw_attr_node *a, d break; case DW_OP_const4u: case DW_OP_const8u: - if (loc->dtprel + if (loc->dw_loc_dtprel && !resolve_one_addr (&loc->dw_loc_oprnd1.v.val_addr)) return false; break; @@ -31359,8 +31359,12 @@ copy_deref_exprloc (dw_loc_descr_ref exp while (expr != l) { *p = new_loc_descr (expr->dw_loc_opc, 0, 0); - (*p)->dw_loc_oprnd1 = expr->dw_loc_oprnd1; - (*p)->dw_loc_oprnd2 = expr->dw_loc_oprnd2; + (*p)->dw_loc_oprnd1.val_class = expr->dw_loc_oprnd1.val_class; + (*p)->dw_loc_oprnd1.val_entry = expr->dw_loc_oprnd1.val_entry; + (*p)->dw_loc_oprnd1.v = expr->dw_loc_oprnd1.v; + (*p)->dw_loc_oprnd2.val_class = expr->dw_loc_oprnd2.val_class; + (*p)->dw_loc_oprnd2.val_entry = expr->dw_loc_oprnd2.val_entry; + (*p)->dw_loc_oprnd2.v = expr->dw_loc_oprnd2.v; p = &(*p)->dw_loc_next; expr = expr->dw_loc_next; } @@ -31451,7 +31455,9 @@ optimize_string_length (dw_attr_node *a) copy over the DW_AT_location attribute from die to a. */ if (l->dw_loc_next != NULL) { - a->dw_attr_val = av->dw_attr_val; + a->dw_attr_val.val_class = av->dw_attr_val.val_class; + a->dw_attr_val.val_entry = av->dw_attr_val.val_entry; + a->dw_attr_val.v = av->dw_attr_val.v; return 1; } @@ -31737,7 +31743,7 @@ hash_loc_operands (dw_loc_descr_ref loc, { case DW_OP_const4u: case DW_OP_const8u: - if (loc->dtprel) + if (loc->dw_loc_dtprel) goto hash_addr; /* FALLTHRU */ case DW_OP_const1u: @@ -31839,7 +31845,7 @@ hash_loc_operands (dw_loc_descr_ref loc, break; case DW_OP_addr: hash_addr: - if (loc->dtprel) + if (loc->dw_loc_dtprel) { unsigned char dtprel = 0xd1; hstate.add_object (dtprel); @@ -31851,7 +31857,7 @@ hash_loc_operands (dw_loc_descr_ref loc, case DW_OP_GNU_const_index: case DW_OP_constx: { - if (loc->dtprel) + if (loc->dw_loc_dtprel) { unsigned char dtprel = 0xd1; hstate.add_object (dtprel); @@ -31998,7 +32004,7 @@ compare_loc_operands (dw_loc_descr_ref x { case DW_OP_const4u: case DW_OP_const8u: - if (x->dtprel) + if (x->dw_loc_dtprel) goto hash_addr; /* FALLTHRU */ case DW_OP_const1u: @@ -32162,7 +32168,7 @@ compare_locs (dw_loc_descr_ref x, dw_loc { for (; x != NULL && y != NULL; x = x->dw_loc_next, y = y->dw_loc_next) if (x->dw_loc_opc != y->dw_loc_opc - || x->dtprel != y->dtprel + || x->dw_loc_dtprel != y->dw_loc_dtprel || !compare_loc_operands (x, y)) break; return x == NULL && y == NULL; Jakub