On Fri, Dec 12, 2025 at 12:57 PM Robin Dapp <[email protected]> wrote:
>
> Hi,
>
> In the PR we extract non-existent bits/elements from a vector.  This is
> because we use TYPE_SIZE (vectype) for a boolean vector which returns 8
> instead of 4 for RVV's vector (4) <signed-boolean:1>.
>
> The patch uses TYPE_VECTOR_SUBPARTS instead and multiplies its result
> with vector_element_bits to get the proper number of elements and size.
>
> Bootstrapped and regtested on x86 and power10.  Regtested on riscv64 and
> aarch64.

OK.

> Regards
>  Robin
>
>         PR tree-optimization/123097
>
> gcc/ChangeLog:
>
>         * tree-vect-loop.cc (vect_create_epilog_for_reduction):
>         Calculate vector size by number of elements * bit size per
>         element.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/autovec/pr123097-run.c: New test.
>         * gcc.target/riscv/rvv/autovec/pr123097.c: New test.
> ---
>  .../riscv/rvv/autovec/pr123097-run.c          | 19 ++++++++++++++++++
>  .../gcc.target/riscv/rvv/autovec/pr123097.c   | 20 +++++++++++++++++++
>  gcc/tree-vect-loop.cc                         | 12 +++++------
>  3 files changed, 45 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr123097-run.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr123097.c
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr123097-run.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr123097-run.c
> new file mode 100644
> index 00000000000..f536c82bcb9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr123097-run.c
> @@ -0,0 +1,19 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target riscv_v_ok } */
> +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d" } */
> +
> +long long a;
> +int b[22];
> +_Bool c[22];
> +
> +int main() {
> +    for (long f=0; f<10; ++f)
> +      b[f] = c[f] = 1;
> +    for (int f=0; f<5; f++)
> +      for( int g=0; g<5; g++)
> +        c[g*2] &= (_Bool)b[2*f];
> +    for (long f=0; f<2; ++f)
> +      a ^= c[f];
> +    if (a != 0)
> +      __builtin_abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr123097.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr123097.c
> new file mode 100644
> index 00000000000..7f1538940d2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr123097.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -fdump-tree-optimized" } */
> +
> +long long a;
> +int b[22];
> +_Bool c[22];
> +
> +int main() {
> +    for (long f=0; f<10; ++f)
> +      b[f] = c[f] = 1;
> +    for (int f=0; f<5; f++)
> +      for( int g=0; g<5; g++)
> +        c[g*2] &= (_Bool)b[2*f];
> +    for (long f=0; f<2; ++f)
> +      a ^= c[f];
> +    if (a != 0)
> +      __builtin_abort ();
> +}
> +
> +/* { dg-final { scan-tree-dump-not "BIT_FIELD_REF <\\{ -1, -1, -1, -1 \\}," 
> "optimized" } } */
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index f9dd88ed824..02ff28d522f 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -5972,13 +5972,11 @@ vect_create_epilog_for_reduction (loop_vec_info 
> loop_vinfo,
>
>        if (reduce_with_shift && (!slp_reduc || group_size == 1))
>         {
> -         tree bitsize = TYPE_SIZE (TREE_TYPE (vectype1));
> -         int element_bitsize = tree_to_uhwi (bitsize);
> +         int element_bitsize = vector_element_bits (vectype1);
>           /* Enforced by vectorizable_reduction, which disallows SLP 
> reductions
>              for variable-length vectors and also requires direct target 
> support
>              for loop reductions.  */
> -         int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
> -         int nelements = vec_size_in_bits / element_bitsize;
> +         int nelements = TYPE_VECTOR_SUBPARTS (vectype1).to_constant ();
>           vec_perm_builder sel;
>           vec_perm_indices indices;
>
> @@ -6019,7 +6017,8 @@ vect_create_epilog_for_reduction (loop_vec_info 
> loop_vinfo,
>                              "extract scalar result\n");
>
>           new_temp = gimple_build (&stmts, BIT_FIELD_REF, TREE_TYPE 
> (vectype1),
> -                                  new_temp, bitsize, bitsize_zero_node);
> +                                  new_temp, bitsize_int (element_bitsize),
> +                                  bitsize_zero_node);
>           new_temp = gimple_convert (&stmts, scalar_type, new_temp);
>           gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
>           scalar_results.safe_push (new_temp);
> @@ -6041,8 +6040,9 @@ vect_create_epilog_for_reduction (loop_vec_info 
> loop_vinfo,
>                              "Reduce using scalar code.\n");
>
>           tree compute_type = TREE_TYPE (vectype1);
> -         unsigned vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
>           unsigned element_bitsize = vector_element_bits (vectype1);
> +         unsigned vec_size_in_bits = element_bitsize
> +           * TYPE_VECTOR_SUBPARTS (vectype1).to_constant ();
>           tree bitsize = bitsize_int (element_bitsize);
>           gimple_seq stmts = NULL;
>           FOR_EACH_VEC_ELT (reduc_inputs, i, vec_temp)
> --
> 2.51.1
>

Reply via email to