> Am 18.10.2023 um 16:19 schrieb Robin Dapp <rdapp....@gmail.com>:
>
> Hi,
>
> even though there was no full conclusion yet I took the liberty of
> just posting this as a patch in case of further discussion.
>
> In PR/111794 we miss a vectorization because on riscv type precision and
> mode precision differ for mask types. We can still vectorize when
> allowing assignments with the same precision for dest and source which
> is what this patch does.
>
> Bootstrapped and regtested on x86, aarch64 and power10. No new failures
> on riscv.
It looks safe, thus OK.
Richard.
> Regards
> Robin
>
> gcc/ChangeLog:
>
> PR/111794
>
> * tree-vect-stmts.cc (vectorizable_assignment): Add
> same-precision exception for dest and source.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/slp-mask-1.c: New test.
> * gcc.target/riscv/rvv/autovec/slp-mask-run-1.c: New test.
> ---
> .../gcc.target/riscv/rvv/autovec/slp-mask-1.c | 18 +++++++++++
> .../riscv/rvv/autovec/slp-mask-run-1.c | 31 +++++++++++++++++++
> gcc/tree-vect-stmts.cc | 12 ++++---
> 3 files changed, 56 insertions(+), 5 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-1.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-run-1.c
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-1.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-1.c
> new file mode 100644
> index 00000000000..ee1baa58d63
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-std=gnu99 -O3 -march=rv64gcv -mabi=lp64d
> --param=riscv-autovec-preference=scalable -fdump-tree-slp-details" } */
> +
> +void
> +__attribute__ ((noipa))
> +f (int *restrict x, short *restrict y, int *restrict res)
> +{
> + res[0] = x[0] == 1 & y[0] == 2;
> + res[1] = x[1] == 1 & y[1] == 2;
> + res[2] = x[2] == 1 & y[2] == 2;
> + res[3] = x[3] == 1 & y[3] == 2;
> + res[4] = x[4] == 1 & y[4] == 2;
> + res[5] = x[5] == 1 & y[5] == 2;
> + res[6] = x[6] == 1 & y[6] == 2;
> + res[7] = x[7] == 1 & y[7] == 2;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp2"
> } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-run-1.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-run-1.c
> new file mode 100644
> index 00000000000..b3469c41c87
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-run-1.c
> @@ -0,0 +1,31 @@
> +/* { dg-do run { target { riscv_v } } } */
> +/* { dg-additional-options "-std=gnu99 -O3 -march=rv64gcv -mabi=lp64d
> --param=riscv-autovec-preference=scalable" } */
> +
> +#include <malloc.h>
> +#include <stdio.h>
> +
> +#include "slp-mask-1.c"
> +
> +#define SZ 8
> +
> +__attribute__ ((optimize ("1")))
> +int main ()
> +{
> + int *a = malloc (SZ * sizeof (*a));
> + short *b = malloc (SZ * sizeof (*b));
> + int *res = malloc (SZ * sizeof (*res));
> + int *ref = malloc (SZ * sizeof (*ref));
> +
> + for (int i = 0; i < SZ; i++)
> + {
> + a[i] = i & 1;
> + b[i] = 2;
> + ref[i] = a[i] == 1 & b[i] == 2;
> + }
> +
> + f (a, b, res);
> +
> + for (int i = 0; i < SZ; i++)
> + if (res[i] != ref[i])
> + __builtin_abort ();
> +}
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index cd7c1090d88..e612da6c492 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -6084,14 +6084,16 @@ vectorizable_assignment (vec_info *vinfo,
> /* But a conversion that does not change the bit-pattern is ok. */
> && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
> && INTEGRAL_TYPE_P (TREE_TYPE (op))
> - && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
> + && (((TYPE_PRECISION (TREE_TYPE (scalar_dest))
> > TYPE_PRECISION (TREE_TYPE (op)))
> - && TYPE_UNSIGNED (TREE_TYPE (op))))
> + && TYPE_UNSIGNED (TREE_TYPE (op)))
> + || (TYPE_PRECISION (TREE_TYPE (scalar_dest))
> + == TYPE_PRECISION (TREE_TYPE (op))))))
> {
> if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "type conversion to/from bit-precision "
> - "unsupported.\n");
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "type conversion to/from bit-precision "
> + "unsupported.\n");
> return false;
> }
>
> --
> 2.41.0
>