On Thu, Aug 21, 2025 at 9:30 AM Andrew Pinski
<andrew.pin...@oss.qualcomm.com> wrote:
>
> Just like r16-465-gf2bb7ffe84840d8 but this time
> instead of a VCE there is a full on load from a boolean.
> This showed up when trying to remove the extra copy
> in the testcase from the revision mentioned above (pr120122-1.c).
> So when moving loads from a boolean type from being conditional
> to non-conditional, the load needs to become a full load and then
> casted into a bool so that the upper bits are correct.
>
> Bitfields loads will always do the truncation so they don't need to
> be rewritten. Non boolean types always do the truncation too.
>
> What we do is wrap the original reference with a VCE which causes
> the full load and then do a casting to do the truncation. Using
> fold_build1 with VCE will do the correct thing if there is a secondary
> VCE and will also fold if this was just a plain MEM_REF so there is
> no need to handle those 2 cases special either.
>
> Changes since v1:
> * v2: Use VIEW_CONVERT_EXPR instead of doing a manual load.
>       Accept all non mode precision loads rather than just
>       boolean ones.
> * v3: Move back to checking boolean type. Don't handle BIT_FIELD_REF.
>       Add asserts for IMAG/REAL_PART_EXPR.
>
> Bootstrapped and tested on x86_64-linux-gnu.

OK.

Thanks,
Richard.

>         PR tree-optimization/121279
>
> gcc/ChangeLog:
>
>         * gimple-fold.cc (gimple_needing_rewrite_undefined): Return
>         true for non mode precision boolean loads.
>         (rewrite_to_defined_unconditional): Handle non mode precision loads.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/torture/pr121279-1.c: New test.
>
> Signed-off-by: Andrew Pinski <andrew.pin...@oss.qualcomm.com>
> ---
>  gcc/gimple-fold.cc                        | 66 ++++++++++++++++++++++-
>  gcc/testsuite/gcc.dg/torture/pr121279-1.c | 49 +++++++++++++++++
>  2 files changed, 114 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.dg/torture/pr121279-1.c
>
> diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
> index 85319b33492..e475f2ba5d1 100644
> --- a/gcc/gimple-fold.cc
> +++ b/gcc/gimple-fold.cc
> @@ -10520,6 +10520,20 @@ gimple_needing_rewrite_undefined (gimple *stmt)
>        && !POINTER_TYPE_P (lhs_type))
>      return false;
>    tree rhs = gimple_assign_rhs1 (stmt);
> +  /* Boolean loads need special handling as they are treated as a full MODE 
> load
> +     and don't mask off the bits for the precision.  */
> +  if (gimple_assign_load_p (stmt)
> +      /* Booleans are the integral type which has this non-masking issue. */
> +      && TREE_CODE (lhs_type) == BOOLEAN_TYPE
> +      /* Only non mode precision booleans are need the masking.  */
> +      && !type_has_mode_precision_p (lhs_type)
> +      /* BFR should be the correct thing and just grab the precision.  */
> +      && TREE_CODE (rhs) != BIT_FIELD_REF
> +      /* Bit-fields loads don't need a rewrite as the masking
> +        happens for them.  */
> +      && (TREE_CODE (rhs) != COMPONENT_REF
> +         || !DECL_BIT_FIELD (TREE_OPERAND (rhs, 1))))
> +    return true;
>    /* VCE from integral types to a integral types but with
>       a smaller precision need to be changed into casts
>       to be well defined. */
> @@ -10558,6 +10572,57 @@ rewrite_to_defined_unconditional 
> (gimple_stmt_iterator *gsi, gimple *stmt,
>        print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
>      }
>    gimple_seq stmts = NULL;
> +  tree lhs = gimple_assign_lhs (stmt);
> +
> +  /* Boolean loads need to be rewritten to be a load from the same mode
> +     and then a cast to the other type so the other bits are masked off
> +     correctly since the load was done conditionally. It is similar to the 
> VCE
> +     case below.  */
> +  if (gimple_assign_load_p (stmt)
> +      && TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE)
> +    {
> +      tree rhs = gimple_assign_rhs1 (stmt);
> +
> +      /* Double check that gimple_needing_rewrite_undefined was called.  */
> +      /* Bit-fields loads will do the masking so don't need the rewriting.  
> */
> +      gcc_assert (TREE_CODE (rhs) != COMPONENT_REF
> +                 || !DECL_BIT_FIELD (TREE_OPERAND (rhs, 1)));
> +      /* BFR is like a bit field load and will do the correct thing.  */
> +      gcc_assert (TREE_CODE (lhs) != BIT_FIELD_REF);
> +      /* Complex boolean types are not valid so REAL/IMAG part will
> +        never show up. */
> +      gcc_assert (TREE_CODE (rhs) != REALPART_EXPR
> +                 && TREE_CODE (lhs) != IMAGPART_EXPR);
> +
> +      auto bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (rhs)));
> +      tree new_type = build_nonstandard_integer_type (bits, true);
> +      location_t loc = gimple_location (stmt);
> +      tree mem_ref = fold_build1_loc (loc, VIEW_CONVERT_EXPR, new_type, rhs);
> +      /* Replace the original load with a new load and a new lhs. */
> +      tree new_lhs = make_ssa_name (new_type);
> +      gimple_assign_set_rhs1 (stmt, mem_ref);
> +      gimple_assign_set_lhs (stmt, new_lhs);
> +
> +      if (in_place)
> +         update_stmt (stmt);
> +      else
> +       {
> +         gimple_set_modified (stmt, true);
> +         gimple_seq_add_stmt (&stmts, stmt);
> +       }
> +
> +      /* Build the conversion statement.  */
> +      gimple *cvt = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
> +      if (in_place)
> +       {
> +         gsi_insert_after (gsi, cvt, GSI_SAME_STMT);
> +         update_stmt (stmt);
> +       }
> +      else
> +       gimple_seq_add_stmt (&stmts, cvt);
> +      return stmts;
> +    }
> +
>    /* VCE from integral types to another integral types but with
>       smaller precisions need to be changed into casts
>       to be well defined. */
> @@ -10579,7 +10644,6 @@ rewrite_to_defined_unconditional 
> (gimple_stmt_iterator *gsi, gimple *stmt,
>         }
>        return stmts;
>      }
> -  tree lhs = gimple_assign_lhs (stmt);
>    tree type = unsigned_type_for (TREE_TYPE (lhs));
>    if (gimple_assign_rhs_code (stmt) == ABS_EXPR)
>      gimple_assign_set_rhs_code (stmt, ABSU_EXPR);
> diff --git a/gcc/testsuite/gcc.dg/torture/pr121279-1.c 
> b/gcc/testsuite/gcc.dg/torture/pr121279-1.c
> new file mode 100644
> index 00000000000..516b887fe65
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/torture/pr121279-1.c
> @@ -0,0 +1,49 @@
> +/* { dg-do run } */
> +/* PR tree-optimization/121279 */
> +
> +#include <stdbool.h>
> +
> +struct Value {
> +    int type;
> +    union {
> +        bool boolean;
> +        long long t;
> +    };
> +};
> +
> +static struct Value s_item_mem;
> +
> +/* truthy was being miscompiled for the value.type==2 case,
> +   because the bool load from the union is pulled out of the
> +   loop but that was conditional before and now it is not,
> +   so turns it into  `s_item_mem.type == 1 | bool` which is
> +   not valid if `s_item_mem.type == 2` . */
> +static bool truthy(void) __attribute__((noipa));
> +static bool
> +truthy(void)
> +{
> +    bool tt = false;
> +    for(int i = 0; i < 10; i++)
> +    {
> +      if (s_item_mem.type == 0)
> +        tt = tt | 0;
> +      else if (s_item_mem.type == 1)
> +        tt = tt | s_item_mem.boolean;
> +      else
> +        tt = tt |  1;
> +    }
> +    return tt;
> +}
> +
> +int
> +main(void)
> +{
> +    s_item_mem.type = 2;
> +    s_item_mem.t = -1;
> +    bool b1 = !truthy();
> +    s_item_mem.type = 1;
> +    s_item_mem.boolean = b1;
> +    bool b = truthy();
> +    if (b1 != b)  __builtin_abort();
> +    if (b) __builtin_abort();
> +}
> --
> 2.43.0
>

Reply via email to