On Thu, Aug 21, 2025 at 9:30 AM Andrew Pinski <andrew.pin...@oss.qualcomm.com> wrote: > > Just like r16-465-gf2bb7ffe84840d8 but this time > instead of a VCE there is a full on load from a boolean. > This showed up when trying to remove the extra copy > in the testcase from the revision mentioned above (pr120122-1.c). > So when moving loads from a boolean type from being conditional > to non-conditional, the load needs to become a full load and then > casted into a bool so that the upper bits are correct. > > Bitfields loads will always do the truncation so they don't need to > be rewritten. Non boolean types always do the truncation too. > > What we do is wrap the original reference with a VCE which causes > the full load and then do a casting to do the truncation. Using > fold_build1 with VCE will do the correct thing if there is a secondary > VCE and will also fold if this was just a plain MEM_REF so there is > no need to handle those 2 cases special either. > > Changes since v1: > * v2: Use VIEW_CONVERT_EXPR instead of doing a manual load. > Accept all non mode precision loads rather than just > boolean ones. > * v3: Move back to checking boolean type. Don't handle BIT_FIELD_REF. > Add asserts for IMAG/REAL_PART_EXPR. > > Bootstrapped and tested on x86_64-linux-gnu.
OK. Thanks, Richard. > PR tree-optimization/121279 > > gcc/ChangeLog: > > * gimple-fold.cc (gimple_needing_rewrite_undefined): Return > true for non mode precision boolean loads. > (rewrite_to_defined_unconditional): Handle non mode precision loads. > > gcc/testsuite/ChangeLog: > > * gcc.dg/torture/pr121279-1.c: New test. > > Signed-off-by: Andrew Pinski <andrew.pin...@oss.qualcomm.com> > --- > gcc/gimple-fold.cc | 66 ++++++++++++++++++++++- > gcc/testsuite/gcc.dg/torture/pr121279-1.c | 49 +++++++++++++++++ > 2 files changed, 114 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.dg/torture/pr121279-1.c > > diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc > index 85319b33492..e475f2ba5d1 100644 > --- a/gcc/gimple-fold.cc > +++ b/gcc/gimple-fold.cc > @@ -10520,6 +10520,20 @@ gimple_needing_rewrite_undefined (gimple *stmt) > && !POINTER_TYPE_P (lhs_type)) > return false; > tree rhs = gimple_assign_rhs1 (stmt); > + /* Boolean loads need special handling as they are treated as a full MODE > load > + and don't mask off the bits for the precision. */ > + if (gimple_assign_load_p (stmt) > + /* Booleans are the integral type which has this non-masking issue. */ > + && TREE_CODE (lhs_type) == BOOLEAN_TYPE > + /* Only non mode precision booleans are need the masking. */ > + && !type_has_mode_precision_p (lhs_type) > + /* BFR should be the correct thing and just grab the precision. */ > + && TREE_CODE (rhs) != BIT_FIELD_REF > + /* Bit-fields loads don't need a rewrite as the masking > + happens for them. */ > + && (TREE_CODE (rhs) != COMPONENT_REF > + || !DECL_BIT_FIELD (TREE_OPERAND (rhs, 1)))) > + return true; > /* VCE from integral types to a integral types but with > a smaller precision need to be changed into casts > to be well defined. */ > @@ -10558,6 +10572,57 @@ rewrite_to_defined_unconditional > (gimple_stmt_iterator *gsi, gimple *stmt, > print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); > } > gimple_seq stmts = NULL; > + tree lhs = gimple_assign_lhs (stmt); > + > + /* Boolean loads need to be rewritten to be a load from the same mode > + and then a cast to the other type so the other bits are masked off > + correctly since the load was done conditionally. It is similar to the > VCE > + case below. */ > + if (gimple_assign_load_p (stmt) > + && TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE) > + { > + tree rhs = gimple_assign_rhs1 (stmt); > + > + /* Double check that gimple_needing_rewrite_undefined was called. */ > + /* Bit-fields loads will do the masking so don't need the rewriting. > */ > + gcc_assert (TREE_CODE (rhs) != COMPONENT_REF > + || !DECL_BIT_FIELD (TREE_OPERAND (rhs, 1))); > + /* BFR is like a bit field load and will do the correct thing. */ > + gcc_assert (TREE_CODE (lhs) != BIT_FIELD_REF); > + /* Complex boolean types are not valid so REAL/IMAG part will > + never show up. */ > + gcc_assert (TREE_CODE (rhs) != REALPART_EXPR > + && TREE_CODE (lhs) != IMAGPART_EXPR); > + > + auto bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (rhs))); > + tree new_type = build_nonstandard_integer_type (bits, true); > + location_t loc = gimple_location (stmt); > + tree mem_ref = fold_build1_loc (loc, VIEW_CONVERT_EXPR, new_type, rhs); > + /* Replace the original load with a new load and a new lhs. */ > + tree new_lhs = make_ssa_name (new_type); > + gimple_assign_set_rhs1 (stmt, mem_ref); > + gimple_assign_set_lhs (stmt, new_lhs); > + > + if (in_place) > + update_stmt (stmt); > + else > + { > + gimple_set_modified (stmt, true); > + gimple_seq_add_stmt (&stmts, stmt); > + } > + > + /* Build the conversion statement. */ > + gimple *cvt = gimple_build_assign (lhs, NOP_EXPR, new_lhs); > + if (in_place) > + { > + gsi_insert_after (gsi, cvt, GSI_SAME_STMT); > + update_stmt (stmt); > + } > + else > + gimple_seq_add_stmt (&stmts, cvt); > + return stmts; > + } > + > /* VCE from integral types to another integral types but with > smaller precisions need to be changed into casts > to be well defined. */ > @@ -10579,7 +10644,6 @@ rewrite_to_defined_unconditional > (gimple_stmt_iterator *gsi, gimple *stmt, > } > return stmts; > } > - tree lhs = gimple_assign_lhs (stmt); > tree type = unsigned_type_for (TREE_TYPE (lhs)); > if (gimple_assign_rhs_code (stmt) == ABS_EXPR) > gimple_assign_set_rhs_code (stmt, ABSU_EXPR); > diff --git a/gcc/testsuite/gcc.dg/torture/pr121279-1.c > b/gcc/testsuite/gcc.dg/torture/pr121279-1.c > new file mode 100644 > index 00000000000..516b887fe65 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/torture/pr121279-1.c > @@ -0,0 +1,49 @@ > +/* { dg-do run } */ > +/* PR tree-optimization/121279 */ > + > +#include <stdbool.h> > + > +struct Value { > + int type; > + union { > + bool boolean; > + long long t; > + }; > +}; > + > +static struct Value s_item_mem; > + > +/* truthy was being miscompiled for the value.type==2 case, > + because the bool load from the union is pulled out of the > + loop but that was conditional before and now it is not, > + so turns it into `s_item_mem.type == 1 | bool` which is > + not valid if `s_item_mem.type == 2` . */ > +static bool truthy(void) __attribute__((noipa)); > +static bool > +truthy(void) > +{ > + bool tt = false; > + for(int i = 0; i < 10; i++) > + { > + if (s_item_mem.type == 0) > + tt = tt | 0; > + else if (s_item_mem.type == 1) > + tt = tt | s_item_mem.boolean; > + else > + tt = tt | 1; > + } > + return tt; > +} > + > +int > +main(void) > +{ > + s_item_mem.type = 2; > + s_item_mem.t = -1; > + bool b1 = !truthy(); > + s_item_mem.type = 1; > + s_item_mem.boolean = b1; > + bool b = truthy(); > + if (b1 != b) __builtin_abort(); > + if (b) __builtin_abort(); > +} > -- > 2.43.0 >