On Aug 15, 2024, Alexandre Oliva <ol...@adacore.com> wrote: > I can't quite envision what to check for in a target-independent test.
Got it. Also dropped some occurrences of CONST_CAST_TREE that I added, then changed function signatures but failed to remove them. Retested on x86_64-linux-gnu. Ok to install? When small objects containing padding bits (or bytes) are fully initialized, we will often store them in registers, and setting bitfields and other small fields will attempt to preserve the uninitialized padding bits, which tends to be expensive. Zero-initializing registers, OTOH, tends to be cheap. So, if we're optimizing, zero-initialize such small padded objects even if that's not needed for correctness. We can't zero-initialize all such padding objects, though: if there's no padding whatsoever, and all fields are initialized with nonzero, the zero initialization would be flagged as dead. That's why we introduce machinery to detect whether objects have padding bits. I considered distinguishing between bitfields, units and larger padding elements, but I didn't pursue that distinction. Since the object's zero-initialization subsumes fields' zero-initialization, the empty string test in builtin-snprintf-6.c's test_assign_aggregate would regress without the addition of native_encode_constructor. for gcc/ChangeLog * expr.cc (categorize_ctor_elements_1): Change p_complete to int, to distinguish complete initialization in presence or absence of uninitialized padding bits. (categorize_ctor_elements): Likewise. Adjust all callers... * expr.h (categorize_ctor_elements): ... and declaration. (type_has_padding_at_level_p): New. * gimple-fold.cc (type_has_padding_at_level_p): New. * fold-const.cc (native_encode_constructor): New. (native_encode_expr): Call it. * gimplify.cc (gimplify_init_constructor): Clear small non-addressable non-volatile objects with padding or other uninitialized fields as an optimization. for gcc/testsuite/ChangeLog * gcc.dg/init-pad-1.c: New. --- gcc/expr.cc | 20 ++++++++++----- gcc/expr.h | 3 +- gcc/fold-const.cc | 33 ++++++++++++++++++++++++ gcc/gimple-fold.cc | 50 +++++++++++++++++++++++++++++++++++++ gcc/gimplify.cc | 14 ++++++++++ gcc/testsuite/gcc.dg/init-pad-1.c | 18 +++++++++++++ 6 files changed, 129 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/init-pad-1.c diff --git a/gcc/expr.cc b/gcc/expr.cc index 2089c2b86a98a..a701c67b3485d 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -7096,7 +7096,7 @@ count_type_elements (const_tree type, bool for_ctor_p) static bool categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, HOST_WIDE_INT *p_unique_nz_elts, - HOST_WIDE_INT *p_init_elts, bool *p_complete) + HOST_WIDE_INT *p_init_elts, int *p_complete) { unsigned HOST_WIDE_INT idx; HOST_WIDE_INT nz_elts, unique_nz_elts, init_elts, num_fields; @@ -7218,7 +7218,10 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, if (*p_complete && !complete_ctor_at_level_p (TREE_TYPE (ctor), num_fields, elt_type)) - *p_complete = false; + *p_complete = 0; + else if (*p_complete > 0 + && type_has_padding_at_level_p (TREE_TYPE (ctor))) + *p_complete = -1; *p_nz_elts += nz_elts; *p_unique_nz_elts += unique_nz_elts; @@ -7239,7 +7242,10 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, and place it in *P_ELT_COUNT. * whether the constructor is complete -- in the sense that every meaningful byte is explicitly given a value -- - and place it in *P_COMPLETE. + and place it in *P_COMPLETE: + - 0 if any field is missing + - 1 if all fields are initialized, and there's no padding + - -1 if all fields are initialized, but there's padding Return whether or not CTOR is a valid static constant initializer, the same as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0". */ @@ -7247,12 +7253,12 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, bool categorize_ctor_elements (const_tree ctor, HOST_WIDE_INT *p_nz_elts, HOST_WIDE_INT *p_unique_nz_elts, - HOST_WIDE_INT *p_init_elts, bool *p_complete) + HOST_WIDE_INT *p_init_elts, int *p_complete) { *p_nz_elts = 0; *p_unique_nz_elts = 0; *p_init_elts = 0; - *p_complete = true; + *p_complete = 1; return categorize_ctor_elements_1 (ctor, p_nz_elts, p_unique_nz_elts, p_init_elts, p_complete); @@ -7313,7 +7319,7 @@ mostly_zeros_p (const_tree exp) if (TREE_CODE (exp) == CONSTRUCTOR) { HOST_WIDE_INT nz_elts, unz_elts, init_elts; - bool complete_p; + int complete_p; categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts, &complete_p); @@ -7331,7 +7337,7 @@ all_zeros_p (const_tree exp) if (TREE_CODE (exp) == CONSTRUCTOR) { HOST_WIDE_INT nz_elts, unz_elts, init_elts; - bool complete_p; + int complete_p; categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts, &complete_p); diff --git a/gcc/expr.h b/gcc/expr.h index 533ae0af3871a..04782b15f192c 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -361,7 +361,8 @@ extern unsigned HOST_WIDE_INT highest_pow2_factor (const_tree); extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *, HOST_WIDE_INT *, HOST_WIDE_INT *, - bool *); + int *); +extern bool type_has_padding_at_level_p (tree); extern bool immediate_const_ctor_p (const_tree, unsigned int words = 1); extern void store_constructor (tree, rtx, int, poly_int64, bool); extern HOST_WIDE_INT int_expr_size (const_tree exp); diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc index 8908e7381e72c..ff2c8f35303b8 100644 --- a/gcc/fold-const.cc +++ b/gcc/fold-const.cc @@ -8193,6 +8193,36 @@ native_encode_string (const_tree expr, unsigned char *ptr, int len, int off) return len; } +/* Subroutine of native_encode_expr. Encode the CONSTRUCTOR + specified by EXPR into the buffer PTR of length LEN bytes. + Return the number of bytes placed in the buffer, or zero + upon failure. */ + +static int +native_encode_constructor (const_tree expr, unsigned char *ptr, int len, int off) +{ + /* We are only concerned with zero-initialization constructors here. */ + if (CONSTRUCTOR_NELTS (expr)) + return 0; + + /* Wide-char strings are encoded in target byte-order so native + encoding them is trivial. */ + if (BITS_PER_UNIT != CHAR_BIT + || !tree_fits_shwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr)))) + return 0; + + HOST_WIDE_INT total_bytes = tree_to_shwi (TYPE_SIZE_UNIT (TREE_TYPE (expr))); + if ((off == -1 && total_bytes > len) || off >= total_bytes) + return 0; + if (off == -1) + off = 0; + len = MIN (total_bytes - off, len); + if (ptr == NULL) + /* Dry run. */; + else + memset (ptr, 0, len); + return len; +} /* Subroutine of fold_view_convert_expr. Encode the INTEGER_CST, REAL_CST, FIXED_CST, COMPLEX_CST, STRING_CST, or VECTOR_CST specified by EXPR into @@ -8229,6 +8259,9 @@ native_encode_expr (const_tree expr, unsigned char *ptr, int len, int off) case STRING_CST: return native_encode_string (expr, ptr, len, off); + case CONSTRUCTOR: + return native_encode_constructor (expr, ptr, len, off); + default: return 0; } diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index 18d7a6b176db7..f955a006ebe4e 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -4661,6 +4661,56 @@ clear_padding_type_may_have_padding_p (tree type) } } +/* Return true if TYPE has padding bits aside from those in fields, + elements, etc. */ + +bool +type_has_padding_at_level_p (tree type) +{ + switch (TREE_CODE (type)) + { + case RECORD_TYPE: + { + tree bitpos = size_zero_node; + /* Expect fields to be sorted by bit position. */ + for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f)) + if (TREE_CODE (f) == FIELD_DECL) + { + if (DECL_PADDING_P (f)) + return true; + tree pos = bit_position (f); + if (simple_cst_equal (bitpos, pos) != 1) + return true; + if (!DECL_SIZE (f)) + return true; + bitpos = int_const_binop (PLUS_EXPR, pos, DECL_SIZE (f)); + } + if (simple_cst_equal (bitpos, TYPE_SIZE (type)) != 1) + return true; + return false; + } + case UNION_TYPE: + /* If any of the fields is smaller than the whole, there is padding. */ + for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f)) + if (TREE_CODE (f) == FIELD_DECL) + if (simple_cst_equal (TYPE_SIZE (TREE_TYPE (f)), + TREE_TYPE (type)) != 1) + return true; + return false; + case ARRAY_TYPE: + case COMPLEX_TYPE: + case VECTOR_TYPE: + /* No recursing here, no padding at this level. */ + return false; + case REAL_TYPE: + return clear_padding_real_needs_padding_p (type); + case BITINT_TYPE: + return clear_padding_bitint_needs_padding_p (type); + default: + return false; + } +} + /* Emit a runtime loop: for (; buf.base != end; buf.base += sz) __builtin_clear_padding (buf.base); */ diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc index 26a216e151d6f..354a6f53a7216 100644 --- a/gcc/gimplify.cc +++ b/gcc/gimplify.cc @@ -5564,7 +5564,8 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, struct gimplify_init_ctor_preeval_data preeval_data; HOST_WIDE_INT num_ctor_elements, num_nonzero_elements; HOST_WIDE_INT num_unique_nonzero_elements; - bool complete_p, valid_const_initializer; + int complete_p; + bool valid_const_initializer; /* Aggregate types must lower constructors to initialization of individual elements. The exception is that a CONSTRUCTOR node @@ -5668,6 +5669,17 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, /* If a single access to the target must be ensured and all elements are zero, then it's optimal to clear whatever their number. */ cleared = true; + /* If the object is small enough to go in registers, and it's + not required to be constructed in memory, clear it first. + That will avoid wasting cycles preserving any padding bits + that might be there, and if there aren't any, the compiler + is smart enough to optimize the clearing out. */ + else if (complete_p <= 0 + && !TREE_ADDRESSABLE (ctor) && !TREE_THIS_VOLATILE (object) + && (TYPE_MODE (type) != BLKmode || TYPE_NO_FORCE_BLK (type)) + && (opt_for_fn (cfun->decl, optimize) + || opt_for_fn (cfun->decl, optimize_size))) + cleared = true; else cleared = false; diff --git a/gcc/testsuite/gcc.dg/init-pad-1.c b/gcc/testsuite/gcc.dg/init-pad-1.c new file mode 100644 index 0000000000000..801f93813e3ad --- /dev/null +++ b/gcc/testsuite/gcc.dg/init-pad-1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-Og -fdump-tree-gimple" } */ + +struct s { + short a : 3; + short b : 3; + char c; +}; + +extern void g(struct s *); + +void f() { + struct s x = { 0, 0, 1 }; + g (&x); +} + +/* { dg-final { scan-tree-dump-times "= {};" 1 "gimple" } } */ +/* { dg-final { scan-tree-dump-not "= 0;" "gimple" } } */ -- Alexandre Oliva, happy hacker https://FSFLA.org/blogs/lxo/ Free Software Activist GNU Toolchain Engineer More tolerance and less prejudice are key for inclusion and diversity Excluding neuro-others for not behaving ""normal"" is *not* inclusive