Hi, PR10228 1https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102281 Exposed an issue in the current implementation of the padding initialization of -ftrivial-auto-var-init.
Currently, we add __builtin_clear_padding call _AFTER_ every explicit initialization of an auto variable: var_decl = {init_constructor}; __builtin_clear_padding (&var_decl, 0B, 1); the reason I added the call to EVERY auto variable that has explicit initialization is, the folding of __builtin_clear_padding will automatically turn this call to a NOP when there is no padding in the variable. So, we don't need to check whether the variable has padding explicitly. However, always adding the call to __builtin_clear_padding (&var_decl,…) might introduce invalid IR when VAR_DECL cannot be addressable. In order to resolve this issue, I propose the following solution: Instead of adding the call to __builtin_clear_padding _AFTER_ the explicit initialization, Using zero to initialize the whole variable BEFORE explicit fields initialization when VAR_DECL has padding, i.e: If (had_padding_p (var_decl)) var_decl = ZERO; var_decl = {init_constructor}; This should resolve the invalid IR issue. However, there might be more run time overhead from such padding initialization since the whole variable is set to zero instead of only the paddings. Please let me know you comments on this. Thanks. Qing ==================================== The complete patch is : From cb2ef83e8f53c13694c70ac4bc1df6e09b15f1c7 Mon Sep 17 00:00:00 2001 From: Qing Zhao <qing.z...@oracle.com> Date: Tue, 12 Oct 2021 22:33:06 +0000 Subject: [PATCH] Fix pr102281 --- gcc/gimple-fold.c | 25 ++++++++++++++ gcc/gimple-fold.h | 1 + gcc/gimplify.c | 49 +++++++++++++++++---------- gcc/testsuite/c-c++-common/pr102281.c | 15 ++++++++ 4 files changed, 72 insertions(+), 18 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/pr102281.c diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c index 7fcfef41f72..de4feb27dbc 100644 --- a/gcc/gimple-fold.c +++ b/gcc/gimple-fold.c @@ -4651,6 +4651,31 @@ clear_padding_type_may_have_padding_p (tree type) } } +/* Return true if TYPE contains any padding bits. */ + +bool +clear_padding_type_has_padding_p (tree type) +{ + bool has_padding = false; + if (BITS_PER_UNIT == 8 + && CHAR_BIT == 8 + && clear_padding_type_may_have_padding_p (type)) + { + HOST_WIDE_INT sz = int_size_in_bytes (type), i; + gcc_assert (sz > 0); + unsigned char *buf = XALLOCAVEC (unsigned char, sz); + memset (buf, ~0, sz); + clear_type_padding_in_mask (type, buf); + for (i = 0; i < sz; i++) + if (buf[i] != (unsigned char) ~0) + { + has_padding = true; + break; + } + } + return has_padding; +} + /* Emit a runtime loop: for (; buf.base != end; buf.base += sz) __builtin_clear_padding (buf.base); */ diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h index 397f4aeb7cf..eb750a68eca 100644 --- a/gcc/gimple-fold.h +++ b/gcc/gimple-fold.h @@ -37,6 +37,7 @@ extern tree maybe_fold_and_comparisons (tree, enum tree_code, tree, tree, extern tree maybe_fold_or_comparisons (tree, enum tree_code, tree, tree, enum tree_code, tree, tree); extern bool clear_padding_type_may_have_padding_p (tree); +extern bool clear_padding_type_has_padding_p (tree); extern void clear_type_padding_in_mask (tree, unsigned char *); extern bool optimize_atomic_compare_exchange_p (gimple *); extern void fold_builtin_atomic_compare_exchange (gimple_stmt_iterator *); diff --git a/gcc/gimplify.c b/gcc/gimplify.c index d8e4b139349..4cc3ca3ae4e 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -1955,7 +1955,8 @@ gimplify_decl_expr (tree *stmt_p, gimple_seq *seq_p) In order to make the paddings as zeroes for pattern init, We should add a call to __builtin_clear_padding to clear the paddings to zero in compatiple with CLANG. */ - if (flag_auto_var_init == AUTO_INIT_PATTERN) + if (flag_auto_var_init == AUTO_INIT_PATTERN + && clear_padding_type_has_padding_p (TREE_TYPE (decl))) gimple_add_padding_init_for_auto_var (decl, is_vla, seq_p); } } @@ -4994,9 +4995,7 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, tree object, ctor, type; enum gimplify_status ret; vec<constructor_elt, va_gc> *elts; - bool cleared = false; - bool is_empty_ctor = false; - bool is_init_expr = (TREE_CODE (*expr_p) == INIT_EXPR); + bool need_clear_padding = false; gcc_assert (TREE_CODE (TREE_OPERAND (*expr_p, 1)) == CONSTRUCTOR); @@ -5015,6 +5014,13 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, elts = CONSTRUCTOR_ELTS (ctor); ret = GS_ALL_DONE; + /* If the user requests to initialize automatic variables, we + should initialize paddings inside the variable. */ + if ((TREE_CODE (*expr_p) == INIT_EXPR) + && clear_padding_type_has_padding_p (type) + && is_var_need_auto_init (object)) + need_clear_padding = true; + switch (TREE_CODE (type)) { case RECORD_TYPE: @@ -5036,6 +5042,7 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, = TREE_THIS_VOLATILE (object) && !TREE_ADDRESSABLE (type) && vec_safe_length (elts) > 1; + bool cleared = false; struct gimplify_init_ctor_preeval_data preeval_data; HOST_WIDE_INT num_ctor_elements, num_nonzero_elements; HOST_WIDE_INT num_unique_nonzero_elements; @@ -5048,7 +5055,6 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, { if (notify_temp_creation) return GS_OK; - is_empty_ctor = true; break; } @@ -5131,6 +5137,10 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, /* If a single access to the target must be ensured and all elements are zero, then it's optimal to clear whatever their number. */ cleared = true; + else if (need_clear_padding) + /* if the padding need to be cleared, clear the whole variable + first. */ + cleared = true; else cleared = false; @@ -5266,6 +5276,14 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, if (notify_temp_creation) return GS_OK; + /* If the padding need be cleared, clear the whole variable first. */ + if (need_clear_padding) + { + tree rhs = build_zero_cst (type); + tree lhs = unshare_expr (object); + gimplify_assign (lhs, rhs, pre_p); + } + /* Extract the real and imaginary parts out of the ctor. */ gcc_assert (elts->length () == 2); r = (*elts)[0].value; @@ -5307,6 +5325,14 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, if (notify_temp_creation) return GS_OK; + /* If the padding need be cleared, clear the whole variable first. */ + if (need_clear_padding) + { + tree rhs = build_zero_cst (type); + tree lhs = unshare_expr (object); + gimplify_assign (lhs, rhs, pre_p); + } + /* Go ahead and simplify constant constructors to VECTOR_CST. */ if (TREE_CONSTANT (ctor)) { @@ -5382,19 +5408,6 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, ret = GS_ALL_DONE; } - /* If the user requests to initialize automatic variables, we - should initialize paddings inside the variable. Add a call to - __BUILTIN_CLEAR_PADDING (&object, 0, for_auto_init = true) to - initialize paddings of object always to zero regardless of - INIT_TYPE. Note, we will not insert this call if the aggregate - variable has be completely cleared already or it's initialized - with an empty constructor. */ - if (is_init_expr - && ((AGGREGATE_TYPE_P (type) && !cleared && !is_empty_ctor) - || !AGGREGATE_TYPE_P (type)) - && is_var_need_auto_init (object)) - gimple_add_padding_init_for_auto_var (object, false, pre_p); - return ret; } diff --git a/gcc/testsuite/c-c++-common/pr102281.c b/gcc/testsuite/c-c++-common/pr102281.c new file mode 100644 index 00000000000..bfe9b08524b --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr102281.c @@ -0,0 +1,15 @@ +/* PR102281 */ +/* { dg-do compile } */ +/* { dg-options "-ftrivial-auto-var-init=zero" } */ +long _mm_set_epi64x___q0; +__attribute__((__vector_size__(2 * sizeof(long)))) long long _mm_set_epi64x() { + return (__attribute__((__vector_size__(2 * sizeof(long)))) long long){ + _mm_set_epi64x___q0}; +} + +float _mm_set1_ps___F; +__attribute__((__vector_size__(4 * sizeof(float)))) float +__attribute___mm_set1_ps() { + return (__attribute__((__vector_size__(4 * sizeof(float)))) float){ + _mm_set1_ps___F}; +} -- 2.27.0