[gcc r14-9344] RISC-V: Adjust vec unit-stride load/store costs.

2024-03-06 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:9ae83078fe45d093bbaa02b8348f2407fe0c62d6

commit r14-9344-g9ae83078fe45d093bbaa02b8348f2407fe0c62d6
Author: Robin Dapp 
Date:   Mon Jan 15 17:34:58 2024 +0100

RISC-V: Adjust vec unit-stride load/store costs.

Scalar loads provide offset addressing while unit-stride vector
instructions cannot.  The offset must be loaded into a general-purpose
register before it can be used.  In order to account for this, this
patch adds an address arithmetic heuristic that keeps track of data
reference operands.  If we haven't seen the operand before we add the
cost of a scalar statement.

This helps to get rid of an lbm regression when vectorizing (roughly
0.5% fewer dynamic instructions).  gcc5 improves by 0.2% and deepsjeng
by 0.25%.  wrf and nab degrade by 0.1%.  This is because before we now
adjust the cost of SLP as well as loop-vectorized instructions whereas
we would only adjust loop-vectorized instructions before.
Considering higher scalar_to_vec costs (3 vs 1) for all vectorization
types causes some snippets not to get vectorized anymore.  Given these
costs the decision looks correct but appears worse when just counting
dynamic instructions.

In total SPECint 2017 has 4 bln dynamic instructions less and SPECfp 0.7
bln.

gcc/ChangeLog:

* config/riscv/riscv-vector-costs.cc (adjust_stmt_cost): Move...
(costs::adjust_stmt_cost): ... to here and add vec_load/vec_store
offset handling.
(costs::add_stmt_cost): Also adjust cost for statements without
stmt_info.
* config/riscv/riscv-vector-costs.h: Define zero constant.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/riscv/rvv/vse-slp-1.c: New test.
* gcc.dg/vect/costmodel/riscv/rvv/vse-slp-2.c: New test.

Diff:
---
 gcc/config/riscv/riscv-vector-costs.cc | 86 +++---
 gcc/config/riscv/riscv-vector-costs.h  | 10 +++
 .../gcc.dg/vect/costmodel/riscv/rvv/vse-slp-1.c| 51 +
 .../gcc.dg/vect/costmodel/riscv/rvv/vse-slp-2.c| 51 +
 4 files changed, 188 insertions(+), 10 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index 7c9840df4e9..adf9c197df5 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "backend.h"
 #include "tree-data-ref.h"
 #include "tree-ssa-loop-niter.h"
+#include "tree-hash-traits.h"
 
 /* This file should be included last.  */
 #include "riscv-vector-costs.h"
@@ -1047,18 +1048,81 @@ costs::better_main_loop_than_p (const vector_costs 
*uncast_other) const
top of riscv_builtin_vectorization_cost handling which doesn't have any
information on statement operation codes etc.  */
 
-static unsigned
-adjust_stmt_cost (enum vect_cost_for_stmt kind, tree vectype, int stmt_cost)
+unsigned
+costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop,
+stmt_vec_info stmt_info,
+slp_tree, tree vectype, int stmt_cost)
 {
   const cpu_vector_cost *costs = get_vector_costs ();
   switch (kind)
 {
 case scalar_to_vec:
-  return stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->FR2VR
- : costs->regmove->GR2VR);
+  stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->FR2VR
+   : costs->regmove->GR2VR);
+  break;
 case vec_to_scalar:
-  return stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->VR2FR
- : costs->regmove->VR2GR);
+  stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->VR2FR
+   : costs->regmove->VR2GR);
+  break;
+case vector_load:
+case vector_store:
+   {
+ /* Unit-stride vector loads and stores do not have offset addressing
+as opposed to scalar loads and stores.
+If the address depends on a variable we need an additional
+add/sub for each load/store in the worst case.  */
+ if (stmt_info && stmt_info->stmt)
+   {
+ data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+ class loop *father = stmt_info->stmt->bb->loop_father;
+ if (!loop && father && !father->inner && father->superloops)
+   {
+ tree ref;
+ if (TREE_CODE (dr->ref) != MEM_REF
+ || !(ref = TREE_OPERAND (dr->ref, 0))
+ || TREE_CODE (ref) != SSA_NAME)
+   break;
+
+ if (SSA_NAME_IS_DEFAULT_DEF (ref))
+   break;
+
+ if (memrefs.contains ({ref, cst0}))
+   break;
+
+ memrefs.add ({ref, cst0});
+
+  

[gcc r14-9345] RISC-V: Use vmv1r.v instead of vmv.v.v for fma output reloads [PR114200].

2024-03-06 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:59554a50be8ebbd52e8a6348a92110af182e1874

commit r14-9345-g59554a50be8ebbd52e8a6348a92110af182e1874
Author: Robin Dapp 
Date:   Wed Mar 6 12:15:40 2024 +0100

RISC-V: Use vmv1r.v instead of vmv.v.v for fma output reloads [PR114200].

Three-operand instructions like vmacc are modeled with an implicit
output reload when the output does not match one of the operands.  For
this we use vmv.v.v which is subject to length masking.

In a situation where the current vl is less than the full vlenb
and the fma's result value is used as input for a vector reduction
(which is never length masked) we effectively only reduce vl
elements.  The masked-out elements are relevant for the
reduction, though, leading to a wrong result.

This patch replaces the vmv reloads by full-register reloads.

gcc/ChangeLog:

PR target/114200
PR target/114202

* config/riscv/vector.md: Use vmv[1248]r.v instead of vmv.v.v.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr114200.c: New test.
* gcc.target/riscv/rvv/autovec/pr114202.c: New test.

Diff:
---
 gcc/config/riscv/vector.md | 96 +++---
 .../gcc.target/riscv/rvv/autovec/pr114200.c| 18 
 .../gcc.target/riscv/rvv/autovec/pr114202.c| 20 +
 3 files changed, 86 insertions(+), 48 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index f89f9c2fa86..8b1c24c5d79 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -5351,10 +5351,10 @@
   "@
vmadd.vv\t%0,%4,%5%p1
vmacc.vv\t%0,%3,%4%p1
-   vmv.v.v\t%0,%4\;vmacc.vv\t%0,%3,%4%p1
+   vmv%m4r.v\t%0,%4\;vmacc.vv\t%0,%3,%4%p1
vmadd.vv\t%0,%4,%5%p1
vmacc.vv\t%0,%3,%4%p1
-   vmv.v.v\t%0,%5\;vmacc.vv\t%0,%3,%4%p1"
+   vmv%m5r.v\t%0,%5\;vmacc.vv\t%0,%3,%4%p1"
   [(set_attr "type" "vimuladd")
(set_attr "mode" "")])
 
@@ -5378,9 +5378,9 @@
   "TARGET_VECTOR"
   "@
vmadd.vv\t%0,%3,%4%p1
-   vmv.v.v\t%0,%2\;vmadd.vv\t%0,%3,%4%p1
+   vmv%m2r.v\t%0,%2\;vmadd.vv\t%0,%3,%4%p1
vmadd.vv\t%0,%3,%4%p1
-   vmv.v.v\t%0,%2\;vmadd.vv\t%0,%3,%4%p1"
+   vmv%m2r.v\t%0,%2\;vmadd.vv\t%0,%3,%4%p1"
   [(set_attr "type" "vimuladd")
(set_attr "mode" "")
(set_attr "merge_op_idx" "2")
@@ -5409,9 +5409,9 @@
   "TARGET_VECTOR"
   "@
vmacc.vv\t%0,%2,%3%p1
-   vmv.v.v\t%0,%4\;vmacc.vv\t%0,%2,%3%p1
+   vmv%m4r.v\t%0,%4;vmacc.vv\t%0,%2,%3%p1
vmacc.vv\t%0,%2,%3%p1
-   vmv.v.v\t%0,%4\;vmacc.vv\t%0,%2,%3%p1"
+   vmv%m4r.v\t%0,%4\;vmacc.vv\t%0,%2,%3%p1"
   [(set_attr "type" "vimuladd")
(set_attr "mode" "")
(set_attr "merge_op_idx" "4")
@@ -5462,9 +5462,9 @@
   "TARGET_VECTOR"
   "@
vmadd.vx\t%0,%2,%4%p1
-   vmv.v.v\t%0,%3\;vmadd.vx\t%0,%2,%4%p1
+   vmv%m3r.v\t%0,%3\;vmadd.vx\t%0,%2,%4%p1
vmadd.vx\t%0,%2,%4%p1
-   vmv.v.v\t%0,%3\;vmadd.vx\t%0,%2,%4%p1"
+   vmv%m3r.v\t%0,%3\;vmadd.vx\t%0,%2,%4%p1"
   [(set_attr "type" "vimuladd")
(set_attr "mode" "")
(set_attr "merge_op_idx" "3")
@@ -5494,9 +5494,9 @@
   "TARGET_VECTOR"
   "@
vmacc.vx\t%0,%2,%3%p1
-   vmv.v.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1
+   vmv%m4r.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1
vmacc.vx\t%0,%2,%3%p1
-   vmv.v.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1"
+   vmv%m4r.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1"
   [(set_attr "type" "vimuladd")
(set_attr "mode" "")
(set_attr "merge_op_idx" "4")
@@ -5562,9 +5562,9 @@
   "TARGET_VECTOR && !TARGET_64BIT"
   "@
vmadd.vx\t%0,%2,%4%p1
-   vmv.v.v\t%0,%2\;vmadd.vx\t%0,%2,%4%p1
+   vmv%m2r.v\t%0,%2\;vmadd.vx\t%0,%2,%4%p1
vmadd.vx\t%0,%2,%4%p1
-   vmv.v.v\t%0,%2\;vmadd.vx\t%0,%2,%4%p1"
+   vmv%m2r.v\t%0,%2\;vmadd.vx\t%0,%2,%4%p1"
   [(set_attr "type" "vimuladd")
(set_attr "mode" "")
(set_attr "merge_op_idx" "3")
@@ -5595,9 +5595,9 @@
   "TARGET_VECTOR && !TARGET_64BIT"
   "@
vmacc.vx\t%0,%2,%3%p1
-   vmv.v.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1
+   vmv%m4r.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1
vmacc.vx\t%0,%2,%3%p1
-   vmv.v.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1"
+   vmv%m4r.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1"
   [(set_attr "type" "vimuladd")
(set_attr "mode" "")
(set_attr "merge_op_idx" "4")
@@ -5649,10 +5649,10 @@
   "@
vnmsub.vv\t%0,%4,%5%p1
vnmsac.vv\t%0,%3,%4%p1
-   vmv.v.v\t%0,%3\;vnmsub.vv\t%0,%4,%5%p1
+   vmv%m3r.v\t%0,%3\;vnmsub.vv\t%0,%4,%5%p1
vnmsub.vv\t%0,%4,%5%p1
vnmsac.vv\t%0,%3,%4%p1
-   vmv.v.v\t%0,%3\;vnmsub.vv\t%0,%4,%5%p1"
+   vmv%m3r.v\t%0,%3\;vnmsub.vv\t%0,%4,%5%p1"
   [(set_attr "type" "vimuladd")
(set_attr "mode" "")])
 
@@ -5676,9 +5676,9 @@
   "TARGET_VECTOR"
   "@
vnmsub.vv\t%0,%3,%4%p1
-   vmv.v.v\t%0,%2\;vnmsub.vv\t%0,%3,%4%p1
+   vmv%m2r.v\t%0,%2\;vnmsub.vv\t%0,%3,%4%p1
vnmsub.vv\t%0,%3,%4%p1
-   vmv.v.v\t%0,%2\;vnmsub.vv\t%0,%3,%4%p1"
+   vmv%m2r.v\t%0,%2\;vnmsub.vv\t%0,%3,%4%p1"
   [(set_attr "type" "vimuladd")
(set_attr "mode" "")
(set_attr "merge_op_idx" "2")
@@ -5707,9 +5707,9 

[gcc r14-9366] vect: Do not peel epilogue for partial vectors.

2024-03-07 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:226043a4d8fb23c7fe7bf16e485b3cfaa094db21

commit r14-9366-g226043a4d8fb23c7fe7bf16e485b3cfaa094db21
Author: Robin Dapp 
Date:   Wed Mar 6 16:54:35 2024 +0100

vect: Do not peel epilogue for partial vectors.

r14-7036-gcbf569486b2dec added an epilogue vectorization guard for early
break but PR114196 shows that we also run into the problem without early
break.  Therefore merge the condition into the topmost vectorization
guard.

gcc/ChangeLog:

PR middle-end/114196

* tree-vect-loop-manip.cc (vect_can_peel_nonlinear_iv_p): Merge
vectorization guards.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/pr114196.c: New test.
* gcc.target/riscv/rvv/autovec/pr114196.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/aarch64/pr114196.c| 19 ++
 .../gcc.target/riscv/rvv/autovec/pr114196.c| 19 ++
 gcc/tree-vect-loop-manip.cc| 30 +-
 3 files changed, 45 insertions(+), 23 deletions(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/pr114196.c 
b/gcc/testsuite/gcc.target/aarch64/pr114196.c
new file mode 100644
index 000..15e4b0e31b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr114196.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options { -O3 -fno-vect-cost-model -march=armv9-a 
-msve-vector-bits=256 } } */
+
+unsigned a;
+int b;
+long *c;
+
+int
+main ()
+{
+  for (int d = 0; d < 22; d += 4) {
+  b = ({
+   int e = c[d];
+   e;
+   })
+  ? 0 : -c[d];
+  a *= 3;
+  }
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114196.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114196.c
new file mode 100644
index 000..7ba9cbbed70
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114196.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options { -O3 -fno-vect-cost-model -march=rv64gcv_zvl256b -mabi=lp64d 
-mrvv-vector-bits=zvl } } */
+
+unsigned a;
+int b;
+long *c;
+
+int
+main ()
+{
+  for (int d = 0; d < 22; d += 4) {
+  b = ({
+   int e = c[d];
+   e;
+   })
+  ? 0 : -c[d];
+  a *= 3;
+  }
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index f72da915103..56a6d8e4a8d 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -2129,16 +2129,19 @@ vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo,
  For mult, don't known how to generate
  init_expr * pow (step, niters) for variable niters.
  For neg, it should be ok, since niters of vectorized main loop
- will always be multiple of 2.  */
-  if ((!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-   || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ())
+ will always be multiple of 2.
+ See also PR113163 and PR114196.  */
+  if ((!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()
+   || LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
+   || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
   && induction_type != vect_step_op_neg)
 {
   if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 "Peeling for epilogue is not supported"
 " for nonlinear induction except neg"
-" when iteration count is unknown.\n");
+" when iteration count is unknown or"
+" when using partial vectorization.\n");
   return false;
 }
 
@@ -2178,25 +2181,6 @@ vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo,
   return false;
 }
 
-  /* We can't support partial vectors and early breaks with an induction
- type other than add or neg since we require the epilog and can't
- perform the peeling.  The below condition mirrors that of
- vect_gen_vector_loop_niters  where niters_vector_mult_vf_var then sets
- step_vector to VF rather than 1.  This is what creates the nonlinear
- IV.  PR113163.  */
-  if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
-  && LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()
-  && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
-  && induction_type != vect_step_op_neg)
-{
-  if (dump_enabled_p ())
-   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-"Peeling for epilogue is not supported"
-" for nonlinear induction except neg"
-" when VF is known and early breaks.\n");
-  return false;
-}
-
   return true;
 }


[gcc r15-638] internal-fn: Do not force vcond_mask operands to reg.

2024-05-17 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:7ca35f2e430081d6ec91e910002f92d9713350fa

commit r15-638-g7ca35f2e430081d6ec91e910002f92d9713350fa
Author: Robin Dapp 
Date:   Fri May 10 12:44:44 2024 +0200

internal-fn: Do not force vcond_mask operands to reg.

In order to directly use constants this patch removes force_regs
in the vcond_mask expander.

gcc/ChangeLog:

PR middle-end/113474

* internal-fn.cc (expand_vec_cond_mask_optab_fn):  Remove
force_regs.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr113474.c: New test.

Diff:
---
 gcc/internal-fn.cc|  3 ---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113474.c | 13 +
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 73045ca8c8c1..9c09026793fa 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -3165,9 +3165,6 @@ expand_vec_cond_mask_optab_fn (internal_fn, gcall *stmt, 
convert_optab optab)
   rtx_op1 = expand_normal (op1);
   rtx_op2 = expand_normal (op2);
 
-  mask = force_reg (mask_mode, mask);
-  rtx_op1 = force_reg (mode, rtx_op1);
-
   rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
   create_output_operand (&ops[0], target, mode);
   create_input_operand (&ops[1], rtx_op1, mode);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113474.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113474.c
new file mode 100644
index ..0364bf9f5e38
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113474.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target riscv_v } }  */
+/* { dg-additional-options "-std=c99" }  */
+
+void
+foo (int n, int **a)
+{
+  int b;
+  for (b = 0; b < n; b++)
+for (long e = 8; e > 0; e--)
+  a[b][e] = a[b][e] == 15;
+}
+
+/* { dg-final { scan-assembler "vmerge.vim" } }  */


[gcc r15-639] RISC-V: Add initial cost handling for segment loads/stores.

2024-05-17 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:e0b9c8ad7098fb08a25a61fe17d4274dd73e5145

commit r15-639-ge0b9c8ad7098fb08a25a61fe17d4274dd73e5145
Author: Robin Dapp 
Date:   Mon Feb 26 13:09:15 2024 +0100

RISC-V: Add initial cost handling for segment loads/stores.

This patch makes segment loads and stores more expensive.  It adds
segment_permute_2 as well as 3 to 8 cost fields to the common vector
costs and adds handling to adjust_stmt_cost.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (struct common_vector_cost): Add
segment_permute cost.
* config/riscv/riscv-vector-costs.cc (costs::adjust_stmt_cost):
Handle segment loads/stores.
* config/riscv/riscv.cc: Initialize segment_permute_[2-8] to 1.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c: Adjust test.

Diff:
---
 gcc/config/riscv/riscv-protos.h|   9 ++
 gcc/config/riscv/riscv-vector-costs.cc | 163 +++--
 gcc/config/riscv/riscv.cc  |  14 ++
 .../gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c   |   4 +-
 4 files changed, 146 insertions(+), 44 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 565ead1382a7..004ceb1031b8 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -222,6 +222,15 @@ struct common_vector_cost
   const int gather_load_cost;
   const int scatter_store_cost;
 
+  /* Segment load/store permute cost.  */
+  const int segment_permute_2;
+  const int segment_permute_3;
+  const int segment_permute_4;
+  const int segment_permute_5;
+  const int segment_permute_6;
+  const int segment_permute_7;
+  const int segment_permute_8;
+
   /* Cost of a vector-to-scalar operation.  */
   const int vec_to_scalar_cost;
 
diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index 4582b0db4250..0a88e142a934 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -1052,6 +1052,25 @@ costs::better_main_loop_than_p (const vector_costs 
*uncast_other) const
   return vector_costs::better_main_loop_than_p (other);
 }
 
+/* Returns the group size i.e. the number of vectors to be loaded by a
+   segmented load/store instruction.  Return 0 if it is no segmented
+   load/store.  */
+static int
+segment_loadstore_group_size (enum vect_cost_for_stmt kind,
+ stmt_vec_info stmt_info)
+{
+  if (stmt_info
+  && (kind == vector_load || kind == vector_store)
+  && STMT_VINFO_DATA_REF (stmt_info))
+{
+  stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
+  if (stmt_info
+ && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_LOAD_STORE_LANES)
+   return DR_GROUP_SIZE (stmt_info);
+}
+  return 0;
+}
+
 /* Adjust vectorization cost after calling riscv_builtin_vectorization_cost.
For some statement, we would like to further fine-grain tweak the cost on
top of riscv_builtin_vectorization_cost handling which doesn't have any
@@ -1076,55 +1095,115 @@ costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, 
loop_vec_info loop,
 case vector_load:
 case vector_store:
{
- /* Unit-stride vector loads and stores do not have offset addressing
-as opposed to scalar loads and stores.
-If the address depends on a variable we need an additional
-add/sub for each load/store in the worst case.  */
- if (stmt_info && stmt_info->stmt)
+ if (stmt_info && stmt_info->stmt && STMT_VINFO_DATA_REF (stmt_info))
{
- data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
- class loop *father = stmt_info->stmt->bb->loop_father;
- if (!loop && father && !father->inner && father->superloops)
+ /* Segment loads and stores.  When the group size is > 1
+the vectorizer will add a vector load/store statement for
+each vector in the group.  Here we additionally add permute
+costs for each.  */
+ /* TODO: Indexed and ordered/unordered cost.  */
+ int group_size = segment_loadstore_group_size (kind, stmt_info);
+ if (group_size > 1)
+   {
+ switch (group_size)
+   {
+   case 2:
+ if (riscv_v_ext_vector_mode_p (loop->vector_mode))
+   stmt_cost += costs->vla->segment_permute_2;
+ else
+   stmt_cost += costs->vls->segment_permute_2;
+ break;
+   case 3:
+ if (riscv_v_ext_vector_mode_p (loop->vector_mode))
+   stmt_cost += costs->vla->segment_permute_3;
+ else
+   stmt_cost += costs->vls->segment_permute_3;
+ brea

[gcc r15-3119] RISC-V: Expand vec abs without masking.

2024-08-23 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:c22d57cdc52d990eb7d353fa82c67882bc824d40

commit r15-3119-gc22d57cdc52d990eb7d353fa82c67882bc824d40
Author: Robin Dapp 
Date:   Fri Aug 9 15:05:39 2024 +0200

RISC-V: Expand vec abs without masking.

Standard abs synthesis during expand is max (a, -a).  This
expansion has the advantage of avoiding masking and is thus potentially
faster than the a < 0 ? -a : a synthesis.

gcc/ChangeLog:

* config/riscv/autovec.md (abs2): Expand via max (a, -a).

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/unop/abs-rv32gcv.c: Adjust test
expectation.
* gcc.target/riscv/rvv/autovec/unop/abs-rv64gcv.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/abs-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/cond/cond_unary-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/cond/cond_unary-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/cond/cond_unary-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/cond/cond_unary-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/cond/cond_unary-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/cond/cond_unary-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/cond/cond_unary-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/cond/cond_unary-8.c: Ditto.

Diff:
---
 gcc/config/riscv/autovec.md| 26 +++---
 .../riscv/rvv/autovec/cond/cond_unary-1.c  |  6 +++--
 .../riscv/rvv/autovec/cond/cond_unary-2.c  |  6 +++--
 .../riscv/rvv/autovec/cond/cond_unary-3.c  |  6 +++--
 .../riscv/rvv/autovec/cond/cond_unary-4.c  |  6 +++--
 .../riscv/rvv/autovec/cond/cond_unary-5.c  |  6 +++--
 .../riscv/rvv/autovec/cond/cond_unary-6.c  |  6 +++--
 .../riscv/rvv/autovec/cond/cond_unary-7.c  |  6 +++--
 .../riscv/rvv/autovec/cond/cond_unary-8.c  |  6 +++--
 .../riscv/rvv/autovec/unop/abs-rv32gcv.c   |  6 ++---
 .../riscv/rvv/autovec/unop/abs-rv64gcv.c   |  6 ++---
 .../gcc.target/riscv/rvv/autovec/vls/abs-2.c   |  2 +-
 12 files changed, 47 insertions(+), 41 deletions(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index decfe2bf8cc8..4decaedbd826 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -1073,29 +1073,19 @@
 [(set_attr "type" "vialu")])
 
 ;; 
---
-;; - [INT] ABS expansion to vmslt and vneg.
+;; - [INT] ABS expansion to vneg and vmax.
 ;; 
---
 
-(define_insn_and_split "abs2"
+(define_expand "abs2"
   [(set (match_operand:V_VLSI 0 "register_operand")
- (abs:V_VLSI
-   (match_operand:V_VLSI 1 "register_operand")))]
-  "TARGET_VECTOR && can_create_pseudo_p ()"
-  "#"
-  "&& 1"
-  [(const_int 0)]
+(smax:V_VLSI
+ (match_dup 0)
+ (neg:V_VLSI
+   (match_operand:V_VLSI 1 "register_operand"]
+  "TARGET_VECTOR"
 {
-  rtx zero = gen_const_vec_duplicate (mode, GEN_INT (0));
-  machine_mode mask_mode = riscv_vector::get_mask_mode (mode);
-  rtx mask = gen_reg_rtx (mask_mode);
-  riscv_vector::expand_vec_cmp (mask, LT, operands[1], zero);
-
-  rtx ops[] = {operands[0], mask, operands[1], operands[1]};
-  riscv_vector::emit_vlmax_insn (code_for_pred (NEG, mode),
-  riscv_vector::UNARY_OP_TAMU, ops);
   DONE;
-}
-[(set_attr "type" "vector")])
+})
 
 ;; 
---
 ;;  [FP] Unary operations
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_unary-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_unary-1.c
index 2233c6eeecb9..4866b221ca4a 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_unary-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_unary-1.c
@@ -36,8 +36,10 @@
 
 TEST_ALL (DEF_LOOP)
 
-/* NOTE: int abs operator is converted to vmslt + vneg.v */
-/* { dg-final { scan-assembler-times {\tvneg\.v\tv[0-9]+,v[0-9]+,v0\.t} 8 } } 
*/
+/* NOTE: int abs operator is converted to vneg.v + vmax.vv */
+/* { dg-final { scan-assembler-times {\tvneg\.v\tv[0-9]+,v[0-9]+} 8 } } */
+/* { dg-final { scan-assembler-times {\tvmax\.vv\tv[0-9]+,v[0-9]+,v[0-9]+} 4 } 
} */
+/* { dg-final { scan-assembler-times {\tvneg\.v\tv[0-9]+,v[0-9]+,v0\.t} 4 } } 
*/
 /* { dg-final { scan-assembler-times {\tvnot\.v\tv[0-9]+,v[0-9]+,v0\.t} 4 } } 
*/
 /* { dg-final { scan-assembler-times {\tvfabs\.v\tv[0-9]+,v[0-9]+,v0\.t} 3 } } 
*/
 /* { dg-final { scan-assembler-times {\tvfneg\.v\tv[0-9]+,v[0-9]+,v0\.t} 3 } } 
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_unary-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_unary-2.c
index 4886bff67d86..651df9f86461 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_unary-2.c
+++ b/gcc/testsu

[gcc r15-3120] optabs-query: Use opt_machine_mode for smallest_int_mode_for_size [PR115495].

2024-08-23 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:96fe95bac67c7303dc811c04f5e99cc959a7182a

commit r15-3120-g96fe95bac67c7303dc811c04f5e99cc959a7182a
Author: Robin Dapp 
Date:   Tue Aug 20 14:02:09 2024 +0200

optabs-query: Use opt_machine_mode for smallest_int_mode_for_size 
[PR115495].

In get_best_extraction_insn we use smallest_int_mode_for_size with
struct_bits as size argument.  PR115495 has struct_bits = 256 and we
don't have a mode for that.  This patch makes smallest_mode_for_size
and smallest_int_mode_for_size return opt modes so we can just skip
over the loop when there is no mode.

PR middle-end/115495

gcc/ChangeLog:

* cfgexpand.cc (expand_debug_expr): Require mode.
* combine.cc (make_extraction): Ditto.
* config/aarch64/aarch64.cc (aarch64_expand_cpymem): Ditto.
(aarch64_expand_setmem): Ditto.
* config/arc/arc.cc (arc_expand_cpymem): Ditto.
* config/arm/arm.cc (arm_expand_divmod_libfunc): Ditto.
* config/i386/i386.cc (ix86_get_mask_mode): Ditto.
* config/rs6000/predicates.md: Ditto.
* config/rs6000/rs6000.cc (vspltis_constant): Ditto.
* config/s390/s390.cc (s390_expand_insv): Ditto.
* config/sparc/sparc.cc (assign_int_registers): Ditto.
* coverage.cc (get_gcov_type): Ditto.
(get_gcov_unsigned_t): Ditto.
* dse.cc (find_shift_sequence): Ditto.
* expmed.cc (store_integral_bit_field): Ditto.
* expr.cc (convert_mode_scalar): Ditto.
(op_by_pieces_d::smallest_fixed_size_mode_for_size): Ditto.
(emit_block_move_via_oriented_loop): Ditto.
(copy_blkmode_to_reg): Ditto.
(store_field): Ditto.
* internal-fn.cc (expand_arith_overflow): Ditto.
* machmode.h (HAVE_MACHINE_MODES): Ditto.
(smallest_mode_for_size): Use opt_machine_mode.
(smallest_int_mode_for_size): Use opt_scalar_int_mode.
* optabs-query.cc (get_best_extraction_insn): Require mode.
* optabs.cc (expand_twoval_binop_libfunc): Ditto.
* stor-layout.cc (smallest_mode_for_size): Return
opt_machine_mode.
(layout_type): Require mode.
(initialize_sizetypes): Ditto.
* tree-ssa-loop-manip.cc (canonicalize_loop_ivs): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr115495.c: New test.

gcc/ada/ChangeLog:

* gcc-interface/utils2.cc (fast_modulo_reduction): Require mode.
(nonbinary_modular_operation): Ditto.

Diff:
---
 gcc/ada/gcc-interface/utils2.cc   |  5 +++--
 gcc/cfgexpand.cc  |  2 +-
 gcc/combine.cc|  2 +-
 gcc/config/aarch64/aarch64.cc |  6 --
 gcc/config/arc/arc.cc |  2 +-
 gcc/config/arm/arm.cc |  2 +-
 gcc/config/i386/i386.cc   |  4 ++--
 gcc/config/rs6000/predicates.md   |  2 +-
 gcc/config/rs6000/rs6000.cc   |  2 +-
 gcc/config/s390/s390.cc   |  2 +-
 gcc/config/sparc/sparc.cc |  2 +-
 gcc/coverage.cc   |  5 +++--
 gcc/dse.cc|  3 ++-
 gcc/expmed.cc |  3 ++-
 gcc/expr.cc   | 17 +
 gcc/internal-fn.cc|  4 ++--
 gcc/machmode.h| 10 +-
 gcc/optabs-query.cc   |  1 +
 gcc/optabs.cc |  3 ++-
 gcc/stor-layout.cc| 16 +---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115495.c |  9 +
 gcc/tree-ssa-loop-manip.cc|  2 +-
 22 files changed, 62 insertions(+), 42 deletions(-)

diff --git a/gcc/ada/gcc-interface/utils2.cc b/gcc/ada/gcc-interface/utils2.cc
index 0d7e03ec6b07..8eebf5935960 100644
--- a/gcc/ada/gcc-interface/utils2.cc
+++ b/gcc/ada/gcc-interface/utils2.cc
@@ -661,7 +661,7 @@ fast_modulo_reduction (tree op, tree modulus, unsigned int 
precision)
  if (type_precision < BITS_PER_WORD)
{
  const scalar_int_mode m
-   = smallest_int_mode_for_size (type_precision + 1);
+   = smallest_int_mode_for_size (type_precision + 1).require ();
  tree new_type = gnat_type_for_mode (m, 1);
  op = fold_convert (new_type, op);
  modulus = fold_convert (new_type, modulus);
@@ -721,7 +721,8 @@ nonbinary_modular_operation (enum tree_code op_code, tree 
type, tree lhs,
  for its mode since ope

[gcc r15-3282] RISC-V: Fix subreg of VLS modes larger than a vector [PR116086].

2024-08-29 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:4ff4875a79ccb302dc2401c32fe0af2187b61b99

commit r15-3282-g4ff4875a79ccb302dc2401c32fe0af2187b61b99
Author: Robin Dapp 
Date:   Tue Aug 27 10:25:34 2024 +0200

RISC-V: Fix subreg of VLS modes larger than a vector [PR116086].

When the source mode is potentially larger than one vector (e.g. an
LMUL2 mode for VLEN=128) we don't know which vector the subreg actually
refers to.  For zvl128b and LMUL=2 the subreg in (subreg:V2DI (reg:V4DI))
could actually be the a full (high) vector register of a two-register
group (at VLEN=128) or the higher part of a single register (at VLEN>128).

As the subreg is statically ambiguous we prevent such situations in
can_change_mode_class.

The culprit in PR116086 is

 _12 = BIT_FIELD_REF ;

which can be expanded with a vector-vector extract (from V4DI to V2DI).
This patch adds a VLS-mode vector-vector extract that handles "halving"
cases like this one by sliding down the source vector, thus making sure
the correct part is used.

PR target/116086

gcc/ChangeLog:

* config/riscv/autovec.md (vec_extract): Add
vector-vector extract for VLS modes.
* config/riscv/riscv.cc (riscv_can_change_mode_class): Forbid
VLS modes larger than one vector.
* config/riscv/vector-iterators.md: Add vector-vector extract
iterators.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp: Add effective target checks for
zvl256b and zvl512b.
* gcc.target/riscv/rvv/autovec/pr116086-2-run.c: New test.
* gcc.target/riscv/rvv/autovec/pr116086-2.c: New test.
* gcc.target/riscv/rvv/autovec/pr116086.c: New test.

Diff:
---
 gcc/config/riscv/autovec.md|  35 
 gcc/config/riscv/riscv.cc  |  11 ++
 gcc/config/riscv/vector-iterators.md   | 202 +
 .../gcc.target/riscv/rvv/autovec/pr116086-2-run.c  |   6 +
 .../gcc.target/riscv/rvv/autovec/pr116086-2.c  |  18 ++
 .../gcc.target/riscv/rvv/autovec/pr116086.c|  76 
 gcc/testsuite/lib/target-supports.exp  |  37 
 7 files changed, 385 insertions(+)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 4decaedbd826..a4e108268b44 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -1453,6 +1453,41 @@
   DONE;
 })
 
+;; -
+;;  [INT,FP] Extract a vector from a vector.
+;; -
+;; TODO: This can be extended to allow basically any extract mode.
+;; For now this helps optimize VLS subregs like (subreg:V2DI (reg:V4DI) 16)
+;; that would otherwise need to go via memory.
+
+(define_expand "vec_extract"
+  [(set (match_operand:   0 "nonimmediate_operand")
+ (vec_select:
+   (match_operand:VLS_HAS_HALF  1 "register_operand")
+   (parallel
+[(match_operand 2 "immediate_operand")])))]
+  "TARGET_VECTOR"
+{
+  int sz = GET_MODE_NUNITS (mode).to_constant ();
+  int part = INTVAL (operands[2]);
+
+  rtx start = GEN_INT (part * sz);
+  rtx tmp = operands[1];
+
+  if (part != 0)
+{
+  tmp = gen_reg_rtx (mode);
+
+  rtx ops[] = {tmp, operands[1], start};
+  riscv_vector::emit_vlmax_insn
+   (code_for_pred_slide (UNSPEC_VSLIDEDOWN, mode),
+riscv_vector::BINARY_OP, ops);
+}
+
+  emit_move_insn (operands[0], gen_lowpart (mode, tmp));
+  DONE;
+})
+
 ;; -
 ;;  [FP] Binary operations
 ;; -
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index e9b1b9bc3add..3f5dfb838425 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -10670,6 +10670,17 @@ riscv_can_change_mode_class (machine_mode from, 
machine_mode to,
   if (reg_classes_intersect_p (V_REGS, rclass)
   && !ordered_p (GET_MODE_PRECISION (from), GET_MODE_PRECISION (to)))
 return false;
+
+  /* Subregs of modes larger than one vector are ambiguous.
+ A V4DImode with rv64gcv_zvl128b could, for example, span two registers/one
+ register group of two at VLEN = 128 or one register at VLEN >= 256 and
+ we cannot, statically, determine which part of it to extract.
+ Therefore prevent that.  */
+  if (reg_classes_intersect_p (V_REGS, rclass)
+  && riscv_v_ext_vls_mode_p (from)
+  && !ordered_p (BITS_PER_RISCV_VECTOR, GET_MODE_PRECISION (from)))
+  return false;
+
   return !reg_classes_intersect_p (FP_REGS, rclass);
 }
 
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index cbbd248c9bb3..a00b5c3feddd 100644
--- a/gcc/config/riscv/vector-iterat

[gcc r15-1861] RISC-V: Use tu policy for first-element vec_set [PR115725].

2024-07-05 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:acc3b703c05debc6276451f9daae5d0ffc797eac

commit r15-1861-gacc3b703c05debc6276451f9daae5d0ffc797eac
Author: Robin Dapp 
Date:   Mon Jul 1 13:37:17 2024 +0200

RISC-V: Use tu policy for first-element vec_set [PR115725].

This patch changes the tail policy for vmv.s.x from ta to tu.
By default the bug does not show up with qemu because qemu's
current vmv.s.x implementation always uses the tail-undisturbed
policy.  With a local qemu version that overwrites the tail
with ones when the tail-agnostic policy is specified, the bug
shows.

gcc/ChangeLog:

* config/riscv/autovec.md: Add TU policy.
* config/riscv/riscv-protos.h (enum insn_type): Define
SCALAR_MOVE_MERGED_OP_TU.

gcc/testsuite/ChangeLog:

PR target/115725

* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c: Adjust
test expectation.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c: Ditto.

Diff:
---
 gcc/config/riscv/autovec.md  |  3 ++-
 gcc/config/riscv/riscv-protos.h  |  4 
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c   | 12 
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c   | 12 
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c   | 12 
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c   | 12 
 6 files changed, 22 insertions(+), 33 deletions(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 66d70f678a6..0fb6316a2cf 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -1341,7 +1341,8 @@
 {
   rtx ops[] = {operands[0], operands[0], operands[1]};
   riscv_vector::emit_nonvlmax_insn (code_for_pred_broadcast (mode),
-   riscv_vector::SCALAR_MOVE_MERGED_OP, 
ops, CONST1_RTX (Pmode));
+   riscv_vector::SCALAR_MOVE_MERGED_OP_TU,
+   ops, CONST1_RTX (Pmode));
 }
   else
 {
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index a8b76173fa0..abf6e34b5cc 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -524,6 +524,10 @@ enum insn_type : unsigned int
   SCALAR_MOVE_MERGED_OP = HAS_DEST_P | HAS_MASK_P | USE_ONE_TRUE_MASK_P
  | HAS_MERGE_P | TDEFAULT_POLICY_P | MDEFAULT_POLICY_P
  | UNARY_OP_P,
+
+  SCALAR_MOVE_MERGED_OP_TU = HAS_DEST_P | HAS_MASK_P | USE_ONE_TRUE_MASK_P
+ | HAS_MERGE_P | TU_POLICY_P | MDEFAULT_POLICY_P
+ | UNARY_OP_P,
 };
 
 enum vlmul_type
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c
index ecb160933d6..99b0f625c83 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c
@@ -64,14 +64,10 @@ typedef double vnx2df __attribute__((vector_size (16)));
 TEST_ALL1 (VEC_SET)
 TEST_ALL_VAR1 (VEC_SET_VAR1)
 
-/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */
-/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m1,\s*tu,\s*ma} 5 } } */
-/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m1,\s*ta,\s*ma} 2 } } */
-/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m1,\s*tu,\s*ma} 6 } } */
-/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m1,\s*ta,\s*ma} 2 } } */
-/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m1,\s*tu,\s*ma} 6 } } */
-/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m1,\s*ta,\s*ma} 2 } } */
-/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m1,\s*tu,\s*ma} 4 } } */
+/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m1,\s*tu,\s*ma} 6 } } */
+/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m1,\s*tu,\s*ma} 8 } } */
+/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m1,\s*tu,\s*ma} 8 } } */
+/* { dg-final { scan-assembler-times 
{vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m1,\s*tu,\s*ma} 6 } } */
 
 /* { dg-final { scan-assembler-times {\tvmv.v.x} 13 } } */
 /* { dg-final { scan-assembler-times {\tvfmv.v.f} 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c
index 194abff77cc..64a40308eb1 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c
+++ b/

[gcc r15-2300] RISC-V: Allow LICM hoist POLY_INT configuration code sequence

2024-07-25 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:4cbbce045681c234387d8d56376ea179dc869229

commit r15-2300-g4cbbce045681c234387d8d56376ea179dc869229
Author: Juzhe-Zhong 
Date:   Thu Feb 1 23:45:50 2024 +0800

RISC-V: Allow LICM hoist POLY_INT configuration code sequence

Realize in recent benchmark evaluation (coremark-pro zip-test):

vid.v   v2
vmv.v.i v5,0
.L9:
vle16.v v3,0(a4)
vrsub.vxv4,v2,a6   ---> LICM failed to hoist it outside the 
loop.

The root cause is:

(insn 56 47 57 4 (set (subreg:DI (reg:HI 220) 0)
(reg:DI 223)) "rvv.c":11:9 208 {*movdi_64bit}  -> Its result used 
by the following vrsub.vx then supress the hoist of the vrsub.vx
 (nil))

(insn 57 56 59 4 (set (reg:RVVMF2HI 216)
(if_then_else:RVVMF2HI (unspec:RVVMF32BI [
(const_vector:RVVMF32BI repeat [
(const_int 1 [0x1])
])
(reg:DI 350)
(const_int 2 [0x2]) repeated x2
(const_int 1 [0x1])
(reg:SI 66 vl)
(reg:SI 67 vtype)
] UNSPEC_VPREDICATE)
(minus:RVVMF2HI (vec_duplicate:RVVMF2HI (reg:HI 220))
(reg:RVVMF2HI 217))
(unspec:RVVMF2HI [
(reg:DI 0 zero)
] UNSPEC_VUNDEF))) "rvv.c":11:9 6938 
{pred_subrvvmf2hi_reverse_scalar}
 (expr_list:REG_DEAD (reg:HI 220)
(nil)))

This patch fixes it generate (set (reg:HI) (subreg:HI (reg:DI))) instead of 
(set (subreg:DI (reg:DI)) (reg:DI)).

After this patch:

vid.v   v2
vrsub.vxv2,v2,a7
vmv.v.i v4,0
.L3:
vle16.v v3,0(a4)

Tested on both RV32 and RV64 no regression.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_legitimize_move): Fix poly_int dest 
generation.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/poly_licm-1.c: New test.
* gcc.target/riscv/rvv/autovec/poly_licm-2.c: New test.
* gcc.target/riscv/rvv/autovec/poly_licm-3.c: New test.

Diff:
---
 gcc/config/riscv/riscv.cc  |  9 
 .../gcc.target/riscv/rvv/autovec/poly_licm-1.c | 18 +++
 .../gcc.target/riscv/rvv/autovec/poly_licm-2.c | 27 ++
 .../gcc.target/riscv/rvv/autovec/poly_licm-3.c | 26 +
 4 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 1696fa296482..96c4ab65 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3230,16 +3230,17 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx 
src)
(const_poly_int:HI [m, n])
(const_poly_int:SI [m, n]).  */
  rtx tmp = gen_reg_rtx (Pmode);
- riscv_legitimize_poly_move (Pmode, gen_lowpart (Pmode, dest), tmp,
- src);
+ rtx tmp2 = gen_reg_rtx (Pmode);
+ riscv_legitimize_poly_move (Pmode, tmp2, tmp, src);
+ emit_move_insn (dest, gen_lowpart (mode, tmp2));
}
   else
{
  /* In RV32 system, handle (const_poly_int:SI [m, n])
(const_poly_int:DI [m, n]).
 In RV64 system, handle (const_poly_int:DI [m, n]).
-   FIXME: Maybe we could gen SImode in RV32 and then sign-extend to DImode,
-   the offset should not exceed 4GiB in general.  */
+FIXME: Maybe we could gen SImode in RV32 and then sign-extend to
+DImode, the offset should not exceed 4GiB in general.  */
  rtx tmp = gen_reg_rtx (mode);
  riscv_legitimize_poly_move (mode, dest, tmp, src);
}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-1.c
new file mode 100644
index ..b7da65f09964
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns 
-fno-schedule-insns2" } */
+
+extern int wsize;
+
+typedef unsigned short Posf;
+#define NIL 0
+
+void foo (Posf *p)
+{
+  register unsigned n, m;
+  do {
+  m = *--p;
+  *p = (Posf)(m >= wsize ? m-wsize : NIL);
+  } while (--n);
+}
+
+/* { dg-final { scan-assembler-times 
{vid\.v\s+v[0-9]+\s+addi\s+\s*[a-x0-9]+,\s*[a-x0-9]+,\s*-1\s+vrsub\.vx\s+} 1 } 
} */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-2.c
new file mode 100644
index ..ffb3c63149f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-2.c
@@ -0,0 +1,27 @@
+/

[gcc r15-2301] RISC-V: Error early with V and no M extension.

2024-07-25 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:e589ffb6d78881572ddea21df0d9b6c2641d574d

commit r15-2301-ge589ffb6d78881572ddea21df0d9b6c2641d574d
Author: Robin Dapp 
Date:   Wed Jul 24 09:08:00 2024 +0200

RISC-V: Error early with V and no M extension.

For calculating the value of a poly_int at runtime we use a
multiplication instruction that requires the M extension.
Instead of just asserting and ICEing this patch emits an early
error at option-parsing time.

gcc/ChangeLog:

PR target/116036

* config/riscv/riscv.cc (riscv_override_options_internal): Error
with TARGET_VECTOR && !TARGET_MUL.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/arch-31.c: Add m to arch string and expect it.
* gcc.target/riscv/arch-32.c: Ditto.
* gcc.target/riscv/arch-37.c: Ditto.
* gcc.target/riscv/arch-38.c: Ditto.
* gcc.target/riscv/predef-14.c: Ditto.
* gcc.target/riscv/predef-15.c: Ditto.
* gcc.target/riscv/predef-16.c: Ditto.
* gcc.target/riscv/predef-26.c: Ditto.
* gcc.target/riscv/predef-27.c: Ditto.
* gcc.target/riscv/predef-32.c: Ditto.
* gcc.target/riscv/predef-33.c: Ditto.
* gcc.target/riscv/predef-36.c: Ditto.
* gcc.target/riscv/predef-37.c: Ditto.
* gcc.target/riscv/rvv/autovec/pr111486.c: Add m to arch string.
* gcc.target/riscv/compare-debug-1.c: Ditto.
* gcc.target/riscv/compare-debug-2.c: Ditto.
* gcc.target/riscv/rvv/base/pr116036.c: New test.

Diff:
---
 gcc/config/riscv/riscv.cc |  5 +
 gcc/testsuite/gcc.target/riscv/arch-31.c  |  2 +-
 gcc/testsuite/gcc.target/riscv/arch-32.c  |  2 +-
 gcc/testsuite/gcc.target/riscv/arch-37.c  |  2 +-
 gcc/testsuite/gcc.target/riscv/arch-38.c  |  2 +-
 gcc/testsuite/gcc.target/riscv/compare-debug-1.c  |  2 +-
 gcc/testsuite/gcc.target/riscv/compare-debug-2.c  |  2 +-
 gcc/testsuite/gcc.target/riscv/predef-14.c|  6 +++---
 gcc/testsuite/gcc.target/riscv/predef-15.c|  4 ++--
 gcc/testsuite/gcc.target/riscv/predef-16.c|  4 ++--
 gcc/testsuite/gcc.target/riscv/predef-26.c|  6 +-
 gcc/testsuite/gcc.target/riscv/predef-27.c|  6 +-
 gcc/testsuite/gcc.target/riscv/predef-32.c|  6 +-
 gcc/testsuite/gcc.target/riscv/predef-33.c|  6 +-
 gcc/testsuite/gcc.target/riscv/predef-36.c|  6 +-
 gcc/testsuite/gcc.target/riscv/predef-37.c|  6 +-
 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr111486.c |  2 +-
 gcc/testsuite/gcc.target/riscv/rvv/base/pr116036.c| 11 +++
 18 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 96c4ab65..2bb7f2aace1b 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -9691,6 +9691,11 @@ riscv_override_options_internal (struct gcc_options 
*opts)
   else if (!TARGET_MUL_OPTS_P (opts) && TARGET_DIV_OPTS_P (opts))
 error ("%<-mdiv%> requires %<-march%> to subsume the % extension");
 
+  /* We might use a multiplication to calculate the scalable vector length at
+ runtime.  Therefore, require the M extension.  */
+  if (TARGET_VECTOR && !TARGET_MUL)
+sorry ("GCC's current % implementation requires the % extension");
+
   /* Likewise floating-point division and square root.  */
   if ((TARGET_HARD_FLOAT_OPTS_P (opts) || TARGET_ZFINX_OPTS_P (opts))
   && ((target_flags_explicit & MASK_FDIV) == 0))
diff --git a/gcc/testsuite/gcc.target/riscv/arch-31.c 
b/gcc/testsuite/gcc.target/riscv/arch-31.c
index 5180753b9057..9b867c5ecd20 100644
--- a/gcc/testsuite/gcc.target/riscv/arch-31.c
+++ b/gcc/testsuite/gcc.target/riscv/arch-31.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv32i_zvfbfmin -mabi=ilp32f" } */
+/* { dg-options "-march=rv32im_zvfbfmin -mabi=ilp32f" } */
 int foo()
 {
 }
diff --git a/gcc/testsuite/gcc.target/riscv/arch-32.c 
b/gcc/testsuite/gcc.target/riscv/arch-32.c
index 496168325129..49a3db794892 100644
--- a/gcc/testsuite/gcc.target/riscv/arch-32.c
+++ b/gcc/testsuite/gcc.target/riscv/arch-32.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64iv_zvfbfmin -mabi=lp64d" } */
+/* { dg-options "-march=rv64imv_zvfbfmin -mabi=lp64d" } */
 int foo()
 {
 }
diff --git a/gcc/testsuite/gcc.target/riscv/arch-37.c 
b/gcc/testsuite/gcc.target/riscv/arch-37.c
index 5b19a73c5567..b56ba77b973e 100644
--- a/gcc/testsuite/gcc.target/riscv/arch-37.c
+++ b/gcc/testsuite/gcc.target/riscv/arch-37.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv32i_zvfbfwma -mabi=ilp32f" } */
+/* { dg-options "-march=rv32im_zvfbfwma -mabi=ilp32f" } */
 int
 foo ()
 {}
diff --git a/gcc/testsuite/gcc.target/riscv/arch-38.c 
b/gcc/testsuite/gcc

[gcc r14-9972] RISC-V: Add VLS to mask vec_extract [PR114668].

2024-04-15 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:02cc8f3e68f9af96d484d9946ceaa9e3eed38151

commit r14-9972-g02cc8f3e68f9af96d484d9946ceaa9e3eed38151
Author: Robin Dapp 
Date:   Mon Apr 15 12:44:56 2024 +0200

RISC-V: Add VLS to mask vec_extract [PR114668].

This adds the missing VLS modes to the mask extract expanders.

gcc/ChangeLog:

PR target/114668

* config/riscv/autovec.md: Add VLS.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr114668.c: New test.

Diff:
---
 gcc/config/riscv/autovec.md|  4 +--
 .../gcc.target/riscv/rvv/autovec/pr114668.c| 35 ++
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 3b32369f68c..aa1ae0fe075 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -1427,7 +1427,7 @@
 (define_expand "vec_extractqi"
   [(set (match_operand:QI0 "register_operand")
  (vec_select:QI
-   (match_operand:VB 1 "register_operand")
+   (match_operand:VB_VLS 1 "register_operand")
(parallel
 [(match_operand  2 "nonmemory_operand")])))]
   "TARGET_VECTOR"
@@ -1453,7 +1453,7 @@
 (define_expand "vec_extractbi"
   [(set (match_operand:QI0 "register_operand")
  (vec_select:QI
-   (match_operand:VB 1 "register_operand")
+   (match_operand:VB_VLS 1 "register_operand")
(parallel
 [(match_operand  2 "nonmemory_operand")])))]
   "TARGET_VECTOR"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114668.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114668.c
new file mode 100644
index 000..3a13c3c0012
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114668.c
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v } */
+/* { dg-options { -O3 -fno-vect-cost-model -march=rv64gcv -mabi=lp64d  } } */
+
+char a;
+int b;
+short e[14];
+char f[4][12544];
+_Bool c[4][5];
+
+__attribute__ ((noipa))
+void foo (int a)
+{
+  if (a != 1)
+__builtin_abort ();
+}
+
+int main ()
+{
+  for (int i = 0; i < 4; ++i)
+for (int l = 0; l < 15; ++l)
+  for (int m = 0; m < 15; ++m)
+   f[i][l * m] = 3;
+  for (int j = 0; j < 4; j += 1)
+for (int k = 3; k < 13; k += 3)
+  for (_Bool l = 0; l < 1; l = 1)
+   for (int m = 0; m < 4; m += 1)
+ {
+   a = 0;
+   b -= e[k];
+   c[j][m] = f[j][6];
+ }
+  for (long i = 2; i < 4; ++i)
+foo (c[3][3]);
+}


[gcc r15-2337] RISC-V: Work around bare apostrophe in error string.

2024-07-26 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:3f2bf415b447a0f6bc424c688b06e1f5946688a0

commit r15-2337-g3f2bf415b447a0f6bc424c688b06e1f5946688a0
Author: Robin Dapp 
Date:   Fri Jul 26 12:58:38 2024 +0200

RISC-V: Work around bare apostrophe in error string.

An unquoted apostrophe slipped through when testing the recent
V/M extension patch.  This, again, re-words the message to
"Currently the 'V' implementation requires the 'M' extension".

Going to commit as obvious after testing.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_override_options_internal):
Reword error string without apostrophe.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr116036.c: Adjust expected error
string.

Diff:
---
 gcc/config/riscv/riscv.cc  | 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/base/pr116036.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 2bb7f2aace1b..a490b9598b04 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -9694,7 +9694,7 @@ riscv_override_options_internal (struct gcc_options *opts)
   /* We might use a multiplication to calculate the scalable vector length at
  runtime.  Therefore, require the M extension.  */
   if (TARGET_VECTOR && !TARGET_MUL)
-sorry ("GCC's current % implementation requires the % extension");
+sorry ("Currently the % implementation requires the % extension");
 
   /* Likewise floating-point division and square root.  */
   if ((TARGET_HARD_FLOAT_OPTS_P (opts) || TARGET_ZFINX_OPTS_P (opts))
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr116036.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116036.c
index a72209593f39..7b39291a91ad 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr116036.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116036.c
@@ -8,4 +8,4 @@ void init() {
   a[i_0][i_1] = 1;
 }
 
-/* { dg-excess-errors "sorry, unimplemented: GCC's current 'V' implementation 
requires the 'M' extension" } */
+/* { dg-excess-errors "sorry, unimplemented: Currently the 'V' implementation 
requires the 'M' extension" } */


[gcc r15-2649] RISC-V: Correct mode_idx attribute for viwalu wx variants [PR116149].

2024-08-01 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:f15cd1802129454029f7fcc8ee3ddd56a86cdad8

commit r15-2649-gf15cd1802129454029f7fcc8ee3ddd56a86cdad8
Author: Robin Dapp 
Date:   Wed Jul 31 16:54:03 2024 +0200

RISC-V: Correct mode_idx attribute for viwalu wx variants [PR116149].

In PR116149 we choose a wrong vector length which causes wrong values in
a reduction.  The problem happens in avlprop where we choose the
number of units in the instruction's mode as vector length.  For the
non-scalar variants the respective operand has the correct non-widened
mode.  For the scalar variants, however, the same operand has a scalar
mode which obviously only has one unit.  This makes us choose VL = 1
leaving three elements undisturbed (so potentially -1).  Those end up
in the reduction causing the wrong result.

This patch adjusts the mode_idx just for the scalar variants of the
affected instruction patterns.

gcc/ChangeLog:

PR target/116149

* config/riscv/vector.md: Fix mode_idx attribute of scalar
widen add/sub variants.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr116149.c: New test.

Diff:
---
 gcc/config/riscv/vector.md|  2 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116149.c | 18 ++
 2 files changed, 20 insertions(+)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index bcedf3d79e26..d4d9bd87e91d 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -4016,6 +4016,7 @@
   "TARGET_VECTOR"
   "vwadd.wx\t%0,%3,%z4%p1"
   [(set_attr "type" "viwalu")
+   (set_attr "mode_idx" "3")
(set_attr "mode" "")])
 
 (define_insn "@pred_single_widen_sub_extended_scalar"
@@ -4038,6 +4039,7 @@
   "TARGET_VECTOR"
   "vwsub.wx\t%0,%3,%z4%p1"
   [(set_attr "type" "viwalu")
+   (set_attr "mode_idx" "3")
(set_attr "mode" "")])
 
 (define_insn "@pred_widen_mulsu"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116149.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116149.c
new file mode 100644
index ..4f5927b96fea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116149.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv_zvl256b -mabi=lp64d -mrvv-vector-bits=zvl" 
} */
+
+long a;
+short b[6];
+short c[20];
+int main() {
+  for (short d = 0; d < 20; d += 3) {
+c[d] = 0;
+for (int e = 0; e < 20; e += 2)
+  for (int f = 1; f < 20; f += 2)
+a += (unsigned)b[f + e];
+  }
+  if (a != 0)
+__builtin_abort ();
+}
+
+/* { dg-final { scan-assembler-times "vsetivli\tzero,1" 0 } } */


[gcc r15-951] RISC-V: Do not allow v0 as dest when merging [PR115068].

2024-05-31 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:a2fd0812a54cf51520f15e900df4cfb5874b75ed

commit r15-951-ga2fd0812a54cf51520f15e900df4cfb5874b75ed
Author: Robin Dapp 
Date:   Mon May 13 13:49:57 2024 +0200

RISC-V: Do not allow v0 as dest when merging [PR115068].

This patch splits the vfw...wf pattern so we do not emit e.g. vfwadd.wf
v0,v8,fa5,v0.t anymore.

gcc/ChangeLog:

PR target/115068

* config/riscv/vector.md:  Split vfw.wf pattern.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr115068-run.c: New test.
* gcc.target/riscv/rvv/base/pr115068.c: New test.

Diff:
---
 gcc/config/riscv/vector.md | 20 +++
 .../gcc.target/riscv/rvv/base/pr115068-run.c   | 28 +
 gcc/testsuite/gcc.target/riscv/rvv/base/pr115068.c | 29 ++
 3 files changed, 67 insertions(+), 10 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index c8c9667eaa2..92bbb8ce6ae 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -7178,24 +7178,24 @@
(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
 
 (define_insn "@pred_single_widen__scalar"
-  [(set (match_operand:VWEXTF 0 "register_operand"   "=vr,   
vr")
+  [(set (match_operand:VWEXTF 0 "register_operand""=vd, vd, 
vr, vr")
(if_then_else:VWEXTF
  (unspec:
-   [(match_operand: 1 "vector_mask_operand"   
"vmWc1,vmWc1")
-(match_operand 5 "vector_length_operand"  "   rK,   
rK")
-(match_operand 6 "const_int_operand"  "i,
i")
-(match_operand 7 "const_int_operand"  "i,
i")
-(match_operand 8 "const_int_operand"  "i,
i")
-(match_operand 9 "const_int_operand"  "i,
i")
+   [(match_operand: 1 "vector_mask_operand"  " vm, 
vm,Wc1,Wc1")
+(match_operand 5 "vector_length_operand" " rK, rK, rK, 
rK")
+(match_operand 6 "const_int_operand" "  i,  i,  i, 
 i")
+(match_operand 7 "const_int_operand" "  i,  i,  i, 
 i")
+(match_operand 8 "const_int_operand" "  i,  i,  i, 
 i")
+(match_operand 9 "const_int_operand" "  i,  i,  i, 
 i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)
 (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
  (plus_minus:VWEXTF
-   (match_operand:VWEXTF 3 "register_operand" "   vr,   
vr")
+   (match_operand:VWEXTF 3 "register_operand"" vr, vr, vr, 
vr")
(float_extend:VWEXTF
  (vec_duplicate:
-   (match_operand: 4 "register_operand"   "f,
f"
- (match_operand:VWEXTF 2 "vector_merge_operand"   "   vu,
0")))]
+   (match_operand: 4 "register_operand"  "  f,  f,  f, 
 f"
+ (match_operand:VWEXTF 2 "vector_merge_operand"  " vu,  0, vu, 
 0")))]
   "TARGET_VECTOR"
   "vfw.wf\t%0,%3,%4%p1"
   [(set_attr "type" "vf")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr115068-run.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115068-run.c
new file mode 100644
index 000..95ec8e06021
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115068-run.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-add-options riscv_v } */
+/* { dg-additional-options "-std=gnu99" } */
+
+#include 
+#include 
+
+vfloat64m8_t
+test_vfwadd_wf_f64m8_m (vbool8_t vm, vfloat64m8_t vs2, float rs1, size_t vl)
+{
+  return __riscv_vfwadd_wf_f64m8_m (vm, vs2, rs1, vl);
+}
+
+char global_memory[1024];
+void *fake_memory = (void *) global_memory;
+
+int
+main ()
+{
+  asm volatile ("fence" ::: "memory");
+  vfloat64m8_t vfwadd_wf_f64m8_m_vd = test_vfwadd_wf_f64m8_m (
+__riscv_vreinterpret_v_i8m1_b8 (__riscv_vundefined_i8m1 ()),
+__riscv_vundefined_f64m8 (), 1.0, __riscv_vsetvlmax_e64m8 ());
+  asm volatile ("" ::"vr"(vfwadd_wf_f64m8_m_vd) : "memory");
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr115068.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115068.c
new file mode 100644
index 000..6d680037aa1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115068.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-add-options riscv_v } */
+/* { dg-additional-options "-std=gnu99" } */
+
+#include 
+#include 
+
+vfloat64m8_t
+test_vfwadd_wf_f64m8_m (vbool8_t vm, vfloat64m8_t vs2, float rs1, size_t vl)
+{
+  return __riscv_vfwadd_wf_f64m8_m (vm, vs2, rs1, vl);
+}
+
+char global_memory[1024];
+void *fake_memory = (void *) global_memory;
+
+int
+main ()
+{
+  asm volatile ("fence" ::: "memory");
+  vfloat64m8_t vfwadd_wf_f64m8_m_vd = test_vfwadd_wf

[gcc r15-952] RISC-V: Split vwadd.wx and vwsub.wx and add helpers.

2024-05-31 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:9781885a624f3e29634d95c14cd10940cefb1a5a

commit r15-952-g9781885a624f3e29634d95c14cd10940cefb1a5a
Author: Robin Dapp 
Date:   Thu May 16 12:43:43 2024 +0200

RISC-V: Split vwadd.wx and vwsub.wx and add helpers.

vwadd.wx and vwsub.wx have the same problem vfwadd.wf had.  This patch
splits the insn pattern in the same way vfwadd.wf was split.

It also adds two patterns to recognize extended scalars.  In practice
those do not provide a lot of improvement over what we already have but
in some instances we can get rid of redundant extensions.

gcc/ChangeLog:

* config/riscv/vector.md: Split vwadd.wx/vwsub.wx pattern and
add extended_scalar patterns.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr115068.c: Add vwadd.wx/vwsub.wx
tests.
* gcc.target/riscv/rvv/base/pr115068-run.c: Include pr115068.c.
* gcc.target/riscv/rvv/base/vwaddsub-1.c: New test.

Diff:
---
 gcc/config/riscv/vector.md | 62 ++
 .../gcc.target/riscv/rvv/base/pr115068-run.c   | 24 +
 gcc/testsuite/gcc.target/riscv/rvv/base/pr115068.c | 26 +
 .../gcc.target/riscv/rvv/base/vwaddsub-1.c | 48 +
 4 files changed, 128 insertions(+), 32 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 92bbb8ce6ae..dccf76f0003 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -3877,27 +3877,71 @@
(set_attr "mode" "")])
 
 (define_insn 
"@pred_single_widen__scalar"
-  [(set (match_operand:VWEXTI 0 "register_operand"   "=vr,   
vr")
+  [(set (match_operand:VWEXTI 0 "register_operand" "=vd,vd, 
vr, vr")
(if_then_else:VWEXTI
  (unspec:
-   [(match_operand: 1 "vector_mask_operand"   
"vmWc1,vmWc1")
-(match_operand 5 "vector_length_operand"  "   rK,   
rK")
-(match_operand 6 "const_int_operand"  "i,
i")
-(match_operand 7 "const_int_operand"  "i,
i")
-(match_operand 8 "const_int_operand"  "i,
i")
+   [(match_operand: 1 "vector_mask_operand"   " 
vm,vm,Wc1,Wc1")
+(match_operand 5 "vector_length_operand"  " rK,rK, rK, 
rK")
+(match_operand 6 "const_int_operand"  "  i, i,  i, 
 i")
+(match_operand 7 "const_int_operand"  "  i, i,  i, 
 i")
+(match_operand 8 "const_int_operand"  "  i, i,  i, 
 i")
 (reg:SI VL_REGNUM)
 (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
  (plus_minus:VWEXTI
-   (match_operand:VWEXTI 3 "register_operand" "   vr,   
vr")
+   (match_operand:VWEXTI 3 "register_operand" " vr,vr, vr, 
vr")
(any_extend:VWEXTI
  (vec_duplicate:
-   (match_operand: 4 "reg_or_0_operand"   "   rJ,   
rJ"
- (match_operand:VWEXTI 2 "vector_merge_operand"   "   vu,
0")))]
+   (match_operand: 4 "reg_or_0_operand"   " rJ,rJ, rJ, 
rJ"
+ (match_operand:VWEXTI 2 "vector_merge_operand"   " vu, 0, vu, 
 0")))]
   "TARGET_VECTOR"
   "vw.wx\t%0,%3,%z4%p1"
   [(set_attr "type" "vi")
(set_attr "mode" "")])
 
+(define_insn "@pred_single_widen_add_extended_scalar"
+  [(set (match_operand:VWEXTI 0 "register_operand" "=vd,vd, 
vr, vr")
+   (if_then_else:VWEXTI
+ (unspec:
+   [(match_operand: 1 "vector_mask_operand"   " 
vm,vm,Wc1,Wc1")
+(match_operand 5 "vector_length_operand"  " rK,rK, rK, 
rK")
+(match_operand 6 "const_int_operand"  "  i, i,  i, 
 i")
+(match_operand 7 "const_int_operand"  "  i, i,  i, 
 i")
+(match_operand 8 "const_int_operand"  "  i, i,  i, 
 i")
+(reg:SI VL_REGNUM)
+(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (plus:VWEXTI
+   (vec_duplicate:VWEXTI
+ (any_extend:
+   (match_operand: 4 "reg_or_0_operand"   " rJ,rJ, rJ, 
rJ")))
+   (match_operand:VWEXTI 3 "register_operand" " vr,vr, vr, 
vr"))
+ (match_operand:VWEXTI 2 "vector_merge_operand"   " vu, 0, vu, 
 0")))]
+  "TARGET_VECTOR"
+  "vwadd.wx\t%0,%3,%z4%p1"
+  [(set_attr "type" "viwalu")
+   (set_attr "mode" "")])
+
+(define_insn "@pred_single_widen_sub_extended_scalar"
+  [(set (match_operand:VWEXTI 0 "register_operand" "=vd,vd, 
vr, vr")
+   (if_then_else:VWEXTI
+ (unspec:
+   [(match_operand: 1 "vector_mask_operand"   " 
vm,vm,Wc1,Wc1")
+(match_operand 5 "vector_length_operand"  " rK,rK, rK, 
rK")
+(

[gcc r15-953] RISC-V: Add vwsll combine helpers.

2024-05-31 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:af4bf422a699de0e7af5a26e02997d313e7301a6

commit r15-953-gaf4bf422a699de0e7af5a26e02997d313e7301a6
Author: Robin Dapp 
Date:   Mon May 13 22:09:35 2024 +0200

RISC-V: Add vwsll combine helpers.

This patch enables the usage of vwsll in autovec context by adding the
necessary combine patterns and tests.

gcc/ChangeLog:

* config/riscv/autovec-opt.md (*vwsll_zext1_): New
pattern.
(*vwsll_zext2_): Ditto.
(*vwsll_zext1_scalar_): Ditto.
(*vwsll_zext1_trunc_): Ditto.
(*vwsll_zext2_trunc_): Ditto.
(*vwsll_zext1_trunc_scalar_): Ditto.
* config/riscv/vector-crypto.md: Make pattern similar to other
narrowing/widening patterns.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vwsll-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vwsll-run.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vwsll-template.h: New test.

Diff:
---
 gcc/config/riscv/autovec-opt.md| 126 -
 gcc/config/riscv/vector-crypto.md  |   2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vwsll-1.c   |  10 ++
 .../gcc.target/riscv/rvv/autovec/binop/vwsll-run.c |  67 +++
 .../riscv/rvv/autovec/binop/vwsll-template.h   |  49 
 5 files changed, 251 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 04f85d8e455..bc6af042bcf 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1467,5 +1467,127 @@
operands, operands[4]);
 DONE;
   }
-  [(set_attr "type" "vector")]
-)
+  [(set_attr "type" "vector")])
+
+;; vzext.vf2 + vsll = vwsll.
+(define_insn_and_split "*vwsll_zext1_"
+  [(set (match_operand:VWEXTI 0"register_operand" "=vr 
")
+  (ashift:VWEXTI
+   (zero_extend:VWEXTI
+ (match_operand: 1 "register_operand" " vr "))
+ (match_operand: 2 "vector_shift_operand" "vrvk")))]
+  "TARGET_ZVBB && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+insn_code icode = code_for_pred_vwsll (mode);
+riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
+DONE;
+  }
+  [(set_attr "type" "vwsll")])
+
+(define_insn_and_split "*vwsll_zext2_"
+  [(set (match_operand:VWEXTI 0"register_operand" "=vr 
")
+  (ashift:VWEXTI
+   (zero_extend:VWEXTI
+ (match_operand: 1 "register_operand" " vr "))
+   (zero_extend:VWEXTI
+ (match_operand: 2 "vector_shift_operand" "vrvk"]
+  "TARGET_ZVBB && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+insn_code icode = code_for_pred_vwsll (mode);
+riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
+DONE;
+  }
+  [(set_attr "type" "vwsll")])
+
+
+(define_insn_and_split "*vwsll_zext1_scalar_"
+  [(set (match_operand:VWEXTI 0"register_operand"  
  "=vr")
+  (ashift:VWEXTI
+   (zero_extend:VWEXTI
+ (match_operand: 1 "register_operand"" 
vr"))
+ (match_operand:2 "vector_scalar_shift_operand" " 
rK")))]
+  "TARGET_ZVBB && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+if (GET_CODE (operands[2]) == SUBREG)
+  operands[2] = SUBREG_REG (operands[2]);
+insn_code icode = code_for_pred_vwsll_scalar (mode);
+riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
+DONE;
+  }
+  [(set_attr "type" "vwsll")])
+
+;; For
+;;   uint16_t dst;
+;;   uint8_t a, b;
+;;   dst = vwsll (a, b)
+;; we seem to create
+;;   aa = (int) a;
+;;   bb = (int) b;
+;;   dst = (short) vwsll (aa, bb);
+;; The following patterns help to combine this idiom into one vwsll.
+
+(define_insn_and_split "*vwsll_zext1_trunc_"
+  [(set (match_operand: 0   "register_operand""=vr ")
+(truncate:
+  (ashift:VQEXTI
+   (zero_extend:VQEXTI
+ (match_operand: 1   "register_operand" " vr "))
+   (match_operand:VQEXTI   2   "vector_shift_operand" "vrvk"]
+  "TARGET_ZVBB && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+insn_code icode = code_for_pred_vwsll (mode);
+riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
+DONE;
+  }
+  [(set_attr "type" "vwsll")])
+
+(define_insn_and_split "*vwsll_zext2_trunc_"
+  [(set (match_operand: 0   "register_operand""=vr ")
+(truncate:
+  (ashift:VQEXTI
+   (zero_extend:VQEXTI
+ (match_operand: 1   "register_operand" " vr "))
+   (zero_extend:VQEXTI
+ (match_operand: 2   "vector_shift_operand" "vrvk")]
+  "TARGET_ZVBB && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+insn_code icode = code_for_pred_vwsll (mode);
+riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY

[gcc r15-954] RISC-V: Use widening shift for scatter/gather if applicable.

2024-05-31 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:309ee005aa871286c8daccbce7586f82be347440

commit r15-954-g309ee005aa871286c8daccbce7586f82be347440
Author: Robin Dapp 
Date:   Fri May 10 13:37:03 2024 +0200

RISC-V: Use widening shift for scatter/gather if applicable.

With the zvbb extension we can emit a widening shift for scatter/gather
index preparation in case we need to multiply by 2 and zero extend.

The patch also adds vwsll to the mode_idx attribute and removes the
mode from shift-count operand of the insn pattern.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (expand_gather_scatter): Use vwsll if
applicable.
* config/riscv/vector-crypto.md: Remove mode from vwsll shift
count operator.
* config/riscv/vector.md: Add vwsll to mode iterator.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp: Add zvbb.
* 
gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c: New test.

Diff:
---
 gcc/config/riscv/riscv-v.cc|  42 +---
 gcc/config/riscv/vector-crypto.md  |   4 +-
 gcc/config/riscv/vector.md |   4 +-
 .../gather-scatter/gather_load_64-12-zvbb.c| 113 +
 gcc/testsuite/lib/target-supports.exp  |  48 -
 5 files changed, 193 insertions(+), 18 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index f105f470495..9428beca268 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4016,7 +4016,7 @@ expand_gather_scatter (rtx *ops, bool is_load)
 {
   rtx ptr, vec_offset, vec_reg;
   bool zero_extend_p;
-  int scale_log2;
+  int shift;
   rtx mask = ops[5];
   rtx len = ops[6];
   if (is_load)
@@ -4025,7 +4025,7 @@ expand_gather_scatter (rtx *ops, bool is_load)
   ptr = ops[1];
   vec_offset = ops[2];
   zero_extend_p = INTVAL (ops[3]);
-  scale_log2 = exact_log2 (INTVAL (ops[4]));
+  shift = exact_log2 (INTVAL (ops[4]));
 }
   else
 {
@@ -4033,7 +4033,7 @@ expand_gather_scatter (rtx *ops, bool is_load)
   ptr = ops[0];
   vec_offset = ops[1];
   zero_extend_p = INTVAL (ops[2]);
-  scale_log2 = exact_log2 (INTVAL (ops[3]));
+  shift = exact_log2 (INTVAL (ops[3]));
 }
 
   machine_mode vec_mode = GET_MODE (vec_reg);
@@ -4043,9 +4043,12 @@ expand_gather_scatter (rtx *ops, bool is_load)
   poly_int64 nunits = GET_MODE_NUNITS (vec_mode);
   bool is_vlmax = is_vlmax_len_p (vec_mode, len);
 
+  bool use_widening_shift = false;
+
   /* Extend the offset element to address width.  */
   if (inner_offsize < BITS_PER_WORD)
 {
+  use_widening_shift = TARGET_ZVBB && zero_extend_p && shift == 1;
   /* 7.2. Vector Load/Store Addressing Modes.
 If the vector offset elements are narrower than XLEN, they are
 zero-extended to XLEN before adding to the ptr effective address. If
@@ -4054,8 +4057,8 @@ expand_gather_scatter (rtx *ops, bool is_load)
 raise an illegal instruction exception if the EEW is not supported for
 offset elements.
 
-RVV spec only refers to the scale_log == 0 case.  */
-  if (!zero_extend_p || scale_log2 != 0)
+RVV spec only refers to the shift == 0 case.  */
+  if (!zero_extend_p || shift)
{
  if (zero_extend_p)
inner_idx_mode
@@ -4064,19 +4067,32 @@ expand_gather_scatter (rtx *ops, bool is_load)
inner_idx_mode = int_mode_for_size (BITS_PER_WORD, 0).require ();
  machine_mode new_idx_mode
= get_vector_mode (inner_idx_mode, nunits).require ();
- rtx tmp = gen_reg_rtx (new_idx_mode);
- emit_insn (gen_extend_insn (tmp, vec_offset, new_idx_mode, idx_mode,
- zero_extend_p ? true : false));
- vec_offset = tmp;
+ if (!use_widening_shift)
+   {
+ rtx tmp = gen_reg_rtx (new_idx_mode);
+ emit_insn (gen_extend_insn (tmp, vec_offset, new_idx_mode, 
idx_mode,
+ zero_extend_p ? true : false));
+ vec_offset = tmp;
+   }
  idx_mode = new_idx_mode;
}
 }
 
-  if (scale_log2 != 0)
+  if (shift)
 {
-  rtx tmp = expand_binop (idx_mode, ashl_optab, vec_offset,
- gen_int_mode (scale_log2, Pmode), NULL_RTX, 0,
- OPTAB_DIRECT);
+  rtx tmp;
+  if (!use_widening_shift)
+   tmp = expand_binop (idx_mode, ashl_optab, vec_offset,
+   gen_int_mode (shift, Pmode), NULL_RTX, 0,
+   OPTAB_DIRECT);
+  else
+   {
+ tmp = gen_reg_rtx (idx_mode);
+ insn_code icode = code_for_pred_vwsll_scalar (idx_mode);
+ rtx ops[] = {tmp, vec_offset, const1_rtx};
+ emit_vlmax_insn (icode, BINARY_OP, ops);
+   }
+
   vec_offset = tmp;
 }
 
diff -

[gcc r15-955] RISC-V: Add vandn combine helper.

2024-05-31 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:f48448276f29a3823827292c72b7fc8e9cd39e1e

commit r15-955-gf48448276f29a3823827292c72b7fc8e9cd39e1e
Author: Robin Dapp 
Date:   Wed May 15 15:01:35 2024 +0200

RISC-V: Add vandn combine helper.

This patch adds a combine pattern for vandn as well as tests for it.

gcc/ChangeLog:

* config/riscv/autovec-opt.md (*vandn_): New pattern.
* config/riscv/vector.md: Add vandn to mode_idx.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vandn-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vandn-run.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vandn-template.h: New test.

Diff:
---
 gcc/config/riscv/autovec-opt.md| 18 
 gcc/config/riscv/vector.md |  2 +-
 .../gcc.target/riscv/rvv/autovec/binop/vandn-1.c   |  8 
 .../gcc.target/riscv/rvv/autovec/binop/vandn-run.c | 54 ++
 .../riscv/rvv/autovec/binop/vandn-template.h   | 38 +++
 5 files changed, 119 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index bc6af042bcf..6a2eabbd854 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1591,3 +1591,21 @@
 DONE;
   }
   [(set_attr "type" "vwsll")])
+
+;; vnot + vand = vandn.
+(define_insn_and_split "*vandn_"
+ [(set (match_operand:V_VLSI 0 "register_operand" "=vr")
+   (and:V_VLSI
+(not:V_VLSI
+  (match_operand:V_VLSI  2 "register_operand"  "vr"))
+(match_operand:V_VLSI1 "register_operand"  "vr")))]
+  "TARGET_ZVBB && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+insn_code icode = code_for_pred_vandn (mode);
+riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands);
+DONE;
+  }
+  [(set_attr "type" "vandn")])
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 69423be6917..c15af17ec62 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -743,7 +743,7 @@
vfcmp,vfminmax,vfsgnj,vfclass,vfmerge,vfmov,\

vfcvtitof,vfncvtitof,vfncvtftoi,vfncvtftof,vmalu,vmiota,vmidx,\

vimovxv,vfmovfv,vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,\
-   vgather,vcompress,vmov,vnclip,vnshift")
+   vgather,vcompress,vmov,vnclip,vnshift,vandn")
   (const_int 0)
 
   (eq_attr "type" "vimovvx,vfmovvf")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vandn-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vandn-1.c
new file mode 100644
index 000..3bb5bf8dd5b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vandn-1.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */
+
+#include "vandn-template.h"
+
+/* { dg-final { scan-assembler-times {\tvandn\.vv} 8 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vandn-run.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vandn-run.c
new file mode 100644
index 000..243c5975068
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vandn-run.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-require-effective-target "riscv_zvbb_ok" } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */
+
+#include "vandn-template.h"
+
+#include 
+
+#define SZ 512
+
+#define RUN(TYPE, VAL) 
\
+  TYPE a##TYPE[SZ];
\
+  TYPE b##TYPE[SZ];
\
+  for (int i = 0; i < SZ; i++) 
\
+{  
\
+  a##TYPE[i] = 123;
\
+  b##TYPE[i] = VAL;
\
+}  
\
+  vandn_##TYPE (a##TYPE, a##TYPE, b##TYPE, SZ);
\
+  for (int i = 0; i < SZ; i++) 
\
+assert (a##TYPE[i] == (TYPE) (123 & ~VAL));
+
+#define RUN2(TYPE, VAL)
\
+  TYPE as##TYPE[SZ];   
\
+  for (int i = 0; i < SZ; i++) 
\
+as##TYPE[i] = 123; 
\
+  vandns_##TYPE (as##TYPE, as##TYPE, VAL, SZ);

[gcc r15-956] RISC-V: Add vector popcount, clz, ctz.

2024-05-31 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:6fa4b0135439d64c0ea1816594d7dc830e836376

commit r15-956-g6fa4b0135439d64c0ea1816594d7dc830e836376
Author: Robin Dapp 
Date:   Wed May 15 17:41:07 2024 +0200

RISC-V: Add vector popcount, clz, ctz.

This patch adds the zvbb vcpop, vclz and vctz to the autovec machinery
as well as tests for them.

gcc/ChangeLog:

* config/riscv/autovec.md (ctz2): New expander.
(clz2): Ditto.
* config/riscv/generic-vector-ooo.md: Add bitmanip ops to insn
reservation.
* config/riscv/vector-crypto.md: Add VLS modes to insns.
* config/riscv/vector.md: Add bitmanip ops to mode_idx and other
attributes.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/unop/popcount-1.c: Adjust check
for zvbb.
* gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/popcount-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/popcount-3.c: New test.
* gcc.target/riscv/rvv/autovec/unop/popcount-template.h: New test.
* gcc.target/riscv/rvv/autovec/unop/clz-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/clz-run.c: New test.
* gcc.target/riscv/rvv/autovec/unop/clz-template.h: New test.
* gcc.target/riscv/rvv/autovec/unop/ctz-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/ctz-run.c: New test.
* gcc.target/riscv/rvv/autovec/unop/ctz-template.h: New test.

Diff:
---
 gcc/config/riscv/autovec.md|  30 -
 gcc/config/riscv/generic-vector-ooo.md |   2 +-
 gcc/config/riscv/vector-crypto.md  | 137 +++--
 gcc/config/riscv/vector.md |  14 +--
 .../gcc.target/riscv/rvv/autovec/unop/clz-1.c  |   8 ++
 .../gcc.target/riscv/rvv/autovec/unop/clz-run.c|  36 ++
 .../riscv/rvv/autovec/unop/clz-template.h  |  21 
 .../gcc.target/riscv/rvv/autovec/unop/ctz-1.c  |   8 ++
 .../gcc.target/riscv/rvv/autovec/unop/ctz-run.c|  36 ++
 .../riscv/rvv/autovec/unop/ctz-template.h  |  21 
 .../gcc.target/riscv/rvv/autovec/unop/popcount-1.c |   4 +-
 .../gcc.target/riscv/rvv/autovec/unop/popcount-2.c |   4 +-
 .../gcc.target/riscv/rvv/autovec/unop/popcount-3.c |   8 ++
 .../riscv/rvv/autovec/unop/popcount-run-1.c|   3 +-
 .../riscv/rvv/autovec/unop/popcount-template.h |  21 
 15 files changed, 272 insertions(+), 81 deletions(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 87d4171bc89..15db26d52c6 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -1566,7 +1566,7 @@
 })
 
 ;; 
---
-;; - [INT] POPCOUNT.
+;; - [INT] POPCOUNT, CTZ and CLZ.
 ;; 
---
 
 (define_expand "popcount2"
@@ -1574,10 +1574,36 @@
(match_operand:V_VLSI 1 "register_operand")]
   "TARGET_VECTOR"
 {
-  riscv_vector::expand_popcount (operands);
+  if (!TARGET_ZVBB)
+riscv_vector::expand_popcount (operands);
+  else
+{
+  riscv_vector::emit_vlmax_insn (code_for_pred_v (POPCOUNT, mode),
+riscv_vector::CPOP_OP, operands);
+}
   DONE;
 })
 
+(define_expand "ctz2"
+  [(match_operand:V_VLSI 0 "register_operand")
+   (match_operand:V_VLSI 1 "register_operand")]
+  "TARGET_ZVBB"
+  {
+riscv_vector::emit_vlmax_insn (code_for_pred_v (CTZ, mode),
+  riscv_vector::CPOP_OP, operands);
+DONE;
+})
+
+(define_expand "clz2"
+  [(match_operand:V_VLSI 0 "register_operand")
+   (match_operand:V_VLSI 1 "register_operand")]
+  "TARGET_ZVBB"
+  {
+riscv_vector::emit_vlmax_insn (code_for_pred_v (CLZ, mode),
+  riscv_vector::CPOP_OP, operands);
+DONE;
+})
+
 
 ;; -
 ;;  [INT] Highpart multiplication
diff --git a/gcc/config/riscv/generic-vector-ooo.md 
b/gcc/config/riscv/generic-vector-ooo.md
index 96cb1a0be29..5e933c83841 100644
--- a/gcc/config/riscv/generic-vector-ooo.md
+++ b/gcc/config/riscv/generic-vector-ooo.md
@@ -74,7 +74,7 @@
 
 ;; Vector crypto, assumed to be a generic operation for now.
 (define_insn_reservation "vec_crypto" 4
-  (eq_attr "type" "crypto")
+  (eq_attr "type" "crypto,vclz,vctz,vcpop")
   "vxu_ooo_issue,vxu_ooo_alu")
 
 ;; Vector crypto, AES
diff --git a/gcc/config/riscv/vector-crypto.md 
b/gcc/config/riscv/vector-crypto.md
index 0ddc2f3f3c6..17432b15815 100755
--- a/gcc/config/riscv/vector-crypto.md
+++ b/gcc/config/riscv/vector-crypto.md
@@ -99,42 +99,43 @@
 ;; vror.vv vror.vx vror.vi
 ;; vwsll.vv vwsll.vx vwsll.vi
 (define_insn "@pred_vandn"
-  [(set (match_operand:VI 0 "register_operand" "=vd, vr, vd, v

[gcc r15-957] RISC-V: Remove dead perm series code and document.

2024-05-31 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:30cfdd6ff56972d9d1b9dbdd43a8333c85618775

commit r15-957-g30cfdd6ff56972d9d1b9dbdd43a8333c85618775
Author: Robin Dapp 
Date:   Fri May 17 12:48:52 2024 +0200

RISC-V: Remove dead perm series code and document.

With the introduction of shuffle_series_patterns the explicit handler
code for a perm series is dead.  This patch removes it and also adds
a function-level comment to shuffle_series_patterns.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (expand_const_vector): Document.
(shuffle_extract_and_slide1up_patterns): Remove.

Diff:
---
 gcc/config/riscv/riscv-v.cc | 26 --
 1 file changed, 4 insertions(+), 22 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 9428beca268..948aaf7d8dd 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1485,28 +1485,6 @@ expand_const_vector (rtx target, rtx src)
  emit_vlmax_insn (code_for_pred_merge (mode), MERGE_OP, ops);
}
}
-  else if (npatterns == 1 && nelts_per_pattern == 3)
-   {
- /* Generate the following CONST_VECTOR:
-{ base0, base1, base1 + step, base1 + step * 2, ... }  */
- rtx base0 = builder.elt (0);
- rtx base1 = builder.elt (1);
- rtx base2 = builder.elt (2);
-
- rtx step = simplify_binary_operation (MINUS, builder.inner_mode (),
-   base2, base1);
-
- /* Step 1 - { base1, base1 + step, base1 + step * 2, ... }  */
- rtx tmp = gen_reg_rtx (mode);
- expand_vec_series (tmp, base1, step);
- /* Step 2 - { base0, base1, base1 + step, base1 + step * 2, ... }  */
- if (!rtx_equal_p (base0, const0_rtx))
-   base0 = force_reg (builder.inner_mode (), base0);
-
- insn_code icode = optab_handler (vec_shl_insert_optab, mode);
- gcc_assert (icode != CODE_FOR_nothing);
- emit_insn (GEN_FCN (icode) (target, tmp, base0));
-   }
   else
/* TODO: We will enable more variable-length vector in the future.  */
gcc_unreachable ();
@@ -3580,6 +3558,10 @@ shuffle_extract_and_slide1up_patterns (struct 
expand_vec_perm_d *d)
   return true;
 }
 
+/* This looks for a series pattern in the provided vector permute structure D.
+   If successful it emits a series insn as well as a gather to implement it.
+   Return true if successful, false otherwise.  */
+
 static bool
 shuffle_series_patterns (struct expand_vec_perm_d *d)
 {


[gcc r15-1042] RISC-V: Introduce -mvector-strict-align.

2024-06-05 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:68b0742a49de7122d5023f0bf46460ff2fb3e3dd

commit r15-1042-g68b0742a49de7122d5023f0bf46460ff2fb3e3dd
Author: Robin Dapp 
Date:   Tue May 28 21:19:26 2024 +0200

RISC-V: Introduce -mvector-strict-align.

this patch disables movmisalign by default and introduces
the -mno-vector-strict-align option to override it and re-enable
movmisalign.  For now, generic-ooo is the only uarch that supports
misaligned vector access.

The patch also adds a check_effective_target_riscv_v_misalign_ok to
the testsuite which enables or disables the vector misalignment tests
depending on whether the target under test can execute a misaligned
vle32.

Changes from v3:
 - Adressed Kito's comments.
 - Made -mscalar-strict-align a real alias.

gcc/ChangeLog:

* config/riscv/riscv-opts.h (TARGET_VECTOR_MISALIGN_SUPPORTED):
Move from here...
* config/riscv/riscv.h (TARGET_VECTOR_MISALIGN_SUPPORTED):
...to here and map to riscv_vector_unaligned_access_p.
* config/riscv/riscv.opt: Add -mvector-strict-align.
* config/riscv/riscv.cc (struct riscv_tune_param): Add
vector_unaligned_access.
(riscv_override_options_internal): Set
riscv_vector_unaligned_access_p.
* doc/invoke.texi: Document -mvector-strict-align.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp: Add
check_effective_target_riscv_v_misalign_ok.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c: Add
-mno-vector-strict-align.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/misalign-1.c: Ditto.

Diff:
---
 gcc/config/riscv/riscv-opts.h  |  3 --
 gcc/config/riscv/riscv.cc  | 19 
 gcc/config/riscv/riscv.h   |  5 
 gcc/config/riscv/riscv.opt |  8 +
 gcc/doc/invoke.texi| 17 +++
 .../vect/costmodel/riscv/rvv/dynamic-lmul2-7.c |  2 +-
 .../vect/costmodel/riscv/rvv/vla_vs_vls-10.c   |  2 +-
 .../vect/costmodel/riscv/rvv/vla_vs_vls-11.c   |  2 +-
 .../vect/costmodel/riscv/rvv/vla_vs_vls-12.c   |  2 +-
 .../gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c |  2 +-
 .../gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c |  2 +-
 .../gcc.target/riscv/rvv/autovec/vls/misalign-1.c  |  2 +-
 gcc/testsuite/lib/target-supports.exp  | 35 --
 13 files changed, 89 insertions(+), 12 deletions(-)

diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 1b2dd5757a8..f58a07abffc 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -147,9 +147,6 @@ enum rvv_vector_bits_enum {
  ? 0   
\
  : 32 << (__builtin_popcount (opts->x_riscv_zvl_flags) - 1))
 
-/* TODO: Enable RVV movmisalign by default for now.  */
-#define TARGET_VECTOR_MISALIGN_SUPPORTED 1
-
 /* The maximmum LMUL according to user configuration.  */
 #define TARGET_MAX_LMUL
\
   (int) (rvv_max_lmul == RVV_DYNAMIC ? RVV_M8 : rvv_max_lmul)
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index c5c4c777349..9704ff9c6a0 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -288,6 +288,7 @@ struct riscv_tune_param
   unsigned short memory_cost;
   unsigned short fmv_cost;
   bool slow_unaligned_access;
+  bool vector_unaligned_access;
   bool use_divmod_expansion;
   bool overlap_op_by_pieces;
   unsigned int fusible_ops;
@@ -300,6 +301,10 @@ struct riscv_tune_param
 /* Whether unaligned accesses execute very slowly.  */
 bool riscv_slow_unaligned_access_p;
 
+/* Whether misaligned vector accesses are supported (i.e. do not
+   throw an exception).  */
+bool riscv_vector_unaligned_access_p;
+
 /* Whether user explicitly passed -mstrict-align.  */
 bool riscv_user_wants_strict_align;
 
@@ -442,6 +447,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   5,   /* memory_cost */
   8,   /* fmv_cost */
   true,/* 
slow_unaligned_access */
+  false,   /* vector_unaligned_access */
   false,   /* use_divmod_expansion */
   false,   /* overlap_op_by_pieces */
   RISCV_FUSE_NOTHING,  

[gcc r15-1043] check_GNU_style: Use raw strings.

2024-06-05 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:03e1a7270314800eb33632f778401570e65345bd

commit r15-1043-g03e1a7270314800eb33632f778401570e65345bd
Author: Robin Dapp 
Date:   Mon May 13 22:05:57 2024 +0200

check_GNU_style: Use raw strings.

This silences some warnings when using check_GNU_style.

contrib/ChangeLog:

* check_GNU_style_lib.py: Use raw strings for regexps.

Diff:
---
 contrib/check_GNU_style_lib.py | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/contrib/check_GNU_style_lib.py b/contrib/check_GNU_style_lib.py
index f1a120fa8d3..6dbe4b53559 100755
--- a/contrib/check_GNU_style_lib.py
+++ b/contrib/check_GNU_style_lib.py
@@ -103,7 +103,7 @@ class SpacesCheck:
 
 class SpacesAndTabsMixedCheck:
 def __init__(self):
-self.re = re.compile('\ \t')
+self.re = re.compile(r'\ \t')
 
 def check(self, filename, lineno, line):
 stripped = line.lstrip()
@@ -115,7 +115,7 @@ class SpacesAndTabsMixedCheck:
 
 class TrailingWhitespaceCheck:
 def __init__(self):
-self.re = re.compile('(\s+)$')
+self.re = re.compile(r'(\s+)$')
 
 def check(self, filename, lineno, line):
 assert(len(line) == 0 or line[-1] != '\n')
@@ -128,7 +128,7 @@ class TrailingWhitespaceCheck:
 
 class SentenceSeparatorCheck:
 def __init__(self):
-self.re = re.compile('\w\.(\s|\s{3,})\w')
+self.re = re.compile(r'\w\.(\s|\s{3,})\w')
 
 def check(self, filename, lineno, line):
 m = self.re.search(line)
@@ -140,7 +140,7 @@ class SentenceSeparatorCheck:
 
 class SentenceEndOfCommentCheck:
 def __init__(self):
-self.re = re.compile('\w\.(\s{0,1}|\s{3,})\*/')
+self.re = re.compile(r'\w\.(\s{0,1}|\s{3,})\*/')
 
 def check(self, filename, lineno, line):
 m = self.re.search(line)
@@ -152,7 +152,7 @@ class SentenceEndOfCommentCheck:
 
 class SentenceDotEndCheck:
 def __init__(self):
-self.re = re.compile('\w(\s*\*/)')
+self.re = re.compile(r'\w(\s*\*/)')
 
 def check(self, filename, lineno, line):
 m = self.re.search(line)
@@ -164,7 +164,7 @@ class SentenceDotEndCheck:
 class FunctionParenthesisCheck:
 # TODO: filter out GTY stuff
 def __init__(self):
-self.re = re.compile('\w(\s{2,})?(\()')
+self.re = re.compile(r'\w(\s{2,})?(\()')
 
 def check(self, filename, lineno, line):
 if '#define' in line:
@@ -179,7 +179,7 @@ class FunctionParenthesisCheck:
 
 class SquareBracketCheck:
 def __init__(self):
-self.re = re.compile('\w\s+(\[)')
+self.re = re.compile(r'\w\s+(\[)')
 
 def check(self, filename, lineno, line):
 if filename.endswith('.md'):
@@ -194,7 +194,7 @@ class SquareBracketCheck:
 
 class ClosingParenthesisCheck:
 def __init__(self):
-self.re = re.compile('\S\s+(\))')
+self.re = re.compile(r'\S\s+(\))')
 
 def check(self, filename, lineno, line):
 m = self.re.search(line)
@@ -208,7 +208,7 @@ class BracesOnSeparateLineCheck:
 # This will give false positives for C99 compound literals.
 
 def __init__(self):
-self.re = re.compile('(\)|else)\s*({)')
+self.re = re.compile(r'(\)|else)\s*({)')
 
 def check(self, filename, lineno, line):
 m = self.re.search(line)
@@ -219,7 +219,7 @@ class BracesOnSeparateLineCheck:
 
 class TrailinigOperatorCheck:
 def __init__(self):
-regex = '^\s.*(([^a-zA-Z_]\*)|([-%<=&|^?])|([^*]/)|([^:][+]))$'
+regex = r'^\s.*(([^a-zA-Z_]\*)|([-%<=&|^?])|([^*]/)|([^:][+]))$'
 self.re = re.compile(regex)
 
 def check(self, filename, lineno, line):


[gcc r15-1061] RISC-V: Regenerate opt urls.

2024-06-06 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:037fc4d1012dc9d533862ef7e2c946249877dd71

commit r15-1061-g037fc4d1012dc9d533862ef7e2c946249877dd71
Author: Robin Dapp 
Date:   Thu Jun 6 09:32:28 2024 +0200

RISC-V: Regenerate opt urls.

I wasn't aware that I needed to regenerate the opt urls when
adding an option.  This patch does that.

gcc/ChangeLog:

* config/riscv/riscv.opt.urls: Regenerate.

Diff:
---
 gcc/config/riscv/riscv.opt.urls | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/gcc/config/riscv/riscv.opt.urls b/gcc/config/riscv/riscv.opt.urls
index d87e9d5c9a8..622cb6e7b44 100644
--- a/gcc/config/riscv/riscv.opt.urls
+++ b/gcc/config/riscv/riscv.opt.urls
@@ -47,6 +47,12 @@ UrlSuffix(gcc/RISC-V-Options.html#index-mcmodel_003d-4)
 mstrict-align
 UrlSuffix(gcc/RISC-V-Options.html#index-mstrict-align-4)
 
+mscalar-strict-align
+UrlSuffix(gcc/RISC-V-Options.html#index-mscalar-strict-align)
+
+mvector-strict-align
+UrlSuffix(gcc/RISC-V-Options.html#index-mvector-strict-align)
+
 ; skipping UrlSuffix for 'mexplicit-relocs' due to finding no URLs
 
 mrelax


[gcc r15-1187] vect: Merge loop mask and cond_op mask in fold-left reduction [PR115382].

2024-06-11 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:2b438a0d2aa80f051a09b245a58f643540d4004b

commit r15-1187-g2b438a0d2aa80f051a09b245a58f643540d4004b
Author: Robin Dapp 
Date:   Fri Jun 7 14:36:41 2024 +0200

vect: Merge loop mask and cond_op mask in fold-left reduction [PR115382].

Currently we discard the cond-op mask when the loop is fully masked
which causes wrong code in
gcc.dg/vect/vect-cond-reduc-in-order-2-signed-zero.c
when compiled with
-O3 -march=cascadelake --param vect-partial-vector-usage=2.

This patch ANDs both masks.

gcc/ChangeLog:

PR tree-optimization/115382

* tree-vect-loop.cc (vectorize_fold_left_reduction): Use
prepare_vec_mask.
* tree-vect-stmts.cc (check_load_store_for_partial_vectors):
Remove static of prepare_vec_mask.
* tree-vectorizer.h (prepare_vec_mask): Export.

Diff:
---
 gcc/tree-vect-loop.cc  | 10 +-
 gcc/tree-vect-stmts.cc |  2 +-
 gcc/tree-vectorizer.h  |  3 +++
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index c471f1564a72..5b1ad06eca66 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -7204,7 +7204,15 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
   tree len = NULL_TREE;
   tree bias = NULL_TREE;
   if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
-   mask = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num, vectype_in, 
i);
+   {
+ tree loop_mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
+  vec_num, vectype_in, i);
+ if (is_cond_op)
+   mask = prepare_vec_mask (loop_vinfo, TREE_TYPE (loop_mask),
+loop_mask, vec_opmask[i], gsi);
+ else
+   mask = loop_mask;
+   }
   else if (is_cond_op)
mask = vec_opmask[i];
   if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 05a169ecb2dd..831f18253765 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1643,7 +1643,7 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
MASK_TYPE is the type of both masks.  If new statements are needed,
insert them before GSI.  */
 
-static tree
+tree
 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
  tree vec_mask, gimple_stmt_iterator *gsi)
 {
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 97ec9c341e7d..6bb0f5c3a56f 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2508,6 +2508,9 @@ extern void vect_free_slp_tree (slp_tree);
 extern bool compatible_calls_p (gcall *, gcall *);
 extern int vect_slp_child_index_for_operand (const gimple *, int op, bool);
 
+extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree,
+ gimple_stmt_iterator *);
+
 /* In tree-vect-patterns.cc.  */
 extern void
 vect_mark_pattern_stmts (vec_info *, stmt_vec_info, gimple *, tree);


[gcc r15-4378] RISC-V: Use biggest_mode as mode for constants.

2024-10-16 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:cc217a1ecb04c9234b2cce7ba3c27701a050e402

commit r15-4378-gcc217a1ecb04c9234b2cce7ba3c27701a050e402
Author: Robin Dapp 
Date:   Tue Oct 15 12:10:48 2024 +0200

RISC-V: Use biggest_mode as mode for constants.

In compute_nregs_for_mode we expect that the current variable's mode is
at most as large as the biggest mode to be used for vectorization.

This might not be true for constants as they don't actually have a mode.
In that case, just use the biggest mode so max_number_of_live_regs
returns 1.

This fixes several test cases in the test suite.

gcc/ChangeLog:

PR target/116655

* config/riscv/riscv-vector-costs.cc (max_number_of_live_regs):
Use biggest mode instead of constant's saved mode.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr116655.c: New test.

Diff:
---
 gcc/config/riscv/riscv-vector-costs.cc| 14 ++
 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116655.c | 11 +++
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index 25570bd40040..67b9e3e8f413 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -194,7 +194,7 @@ compute_local_program_points (
   /* Collect the stmts that is vectorized and mark their program point.  */
   for (i = 0; i < nbbs; i++)
{
- int point = 1;
+ unsigned int point = 1;
  basic_block bb = bbs[i];
  vec program_points = vNULL;
  if (dump_enabled_p ())
@@ -489,9 +489,15 @@ max_number_of_live_regs (loop_vec_info loop_vinfo, const 
basic_block bb,
   pair live_range = (*iter).second;
   for (i = live_range.first + 1; i <= live_range.second; i++)
{
- machine_mode mode = TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE
-   ? BImode
-   : TYPE_MODE (TREE_TYPE (var));
+ machine_mode mode;
+ if (TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE)
+   mode = BImode;
+ /* Constants do not have a mode, just use the biggest so
+compute_nregs will return 1.  */
+ else if (TREE_CODE (var) == INTEGER_CST)
+   mode = biggest_mode;
+ else
+   mode = TYPE_MODE (TREE_TYPE (var));
  unsigned int nregs
= compute_nregs_for_mode (loop_vinfo, mode, biggest_mode, lmul);
  live_vars_vec[i] += nregs;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116655.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116655.c
new file mode 100644
index ..36768e37d005
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116655.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64imv -mabi=lp64d -mrvv-max-lmul=dynamic" } */
+
+short a[5];
+int b() {
+  int c = 0;
+  for (; c <= 4; c++)
+if (a[c])
+  break;
+  return c;
+}


[gcc r15-3829] RISC-V: Add more vector-vector extract cases.

2024-09-24 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:be50c763a07893416419b82538f259f43e0773d4

commit r15-3829-gbe50c763a07893416419b82538f259f43e0773d4
Author: Robin Dapp 
Date:   Tue Sep 3 17:53:34 2024 +0200

RISC-V: Add more vector-vector extract cases.

This adds a V16SI -> V4SI and related i.e. "quartering" vector-vector
extract expander for VLS modes.  It helps with spills in x264 that may
cause a load-hit-store.

gcc/ChangeLog:

* config/riscv/autovec.md (vec_extract):
Add quarter vec-vec extract.
* config/riscv/vector-iterators.md: New iterators.

Diff:
---
 gcc/config/riscv/autovec.md  |  28 ++
 gcc/config/riscv/vector-iterators.md | 184 +++
 2 files changed, 212 insertions(+)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index a53c44659f0f..836cdd4491f6 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -1488,6 +1488,34 @@
   DONE;
 })
 
+(define_expand "vec_extract"
+  [(set (match_operand:0 "nonimmediate_operand")
+ (vec_select:
+   (match_operand:VLS_HAS_QUARTER   1 "register_operand")
+   (parallel
+[(match_operand 2 "immediate_operand")])))]
+  "TARGET_VECTOR"
+{
+  int sz = GET_MODE_NUNITS (mode).to_constant ();
+  int part = INTVAL (operands[2]);
+
+  rtx start = GEN_INT (part * sz);
+  rtx tmp = operands[1];
+
+  if (part != 0)
+{
+  tmp = gen_reg_rtx (mode);
+
+  rtx ops[] = {tmp, operands[1], start};
+  riscv_vector::emit_vlmax_insn
+   (code_for_pred_slide (UNSPEC_VSLIDEDOWN, mode),
+riscv_vector::BINARY_OP, ops);
+}
+
+  emit_move_insn (operands[0], gen_lowpart (mode, tmp));
+  DONE;
+})
+
 ;; -
 ;;  [FP] Binary operations
 ;; -
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index a00b5c3feddd..43325d1ba87a 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -4328,3 +4328,187 @@
   (V256DF "v128df")
   (V512DF "v256df")
 ])
+
+(define_mode_iterator VLS_HAS_QUARTER [
+  (V4QI "riscv_vector::vls_mode_valid_p (V4QImode)")
+  (V8QI "riscv_vector::vls_mode_valid_p (V8QImode)")
+  (V16QI "riscv_vector::vls_mode_valid_p (V16QImode)")
+  (V4HI "riscv_vector::vls_mode_valid_p (V4HImode)")
+  (V8HI "riscv_vector::vls_mode_valid_p (V8HImode)")
+  (V16HI "riscv_vector::vls_mode_valid_p (V16HImode)")
+  (V4SI "riscv_vector::vls_mode_valid_p (V4SImode)")
+  (V8SI "riscv_vector::vls_mode_valid_p (V8SImode)")
+  (V16SI "riscv_vector::vls_mode_valid_p (V16SImode) && TARGET_MIN_VLEN >= 64")
+  (V4DI "riscv_vector::vls_mode_valid_p (V4DImode) && TARGET_VECTOR_ELEN_64")
+  (V8DI "riscv_vector::vls_mode_valid_p (V8DImode) && TARGET_VECTOR_ELEN_64 && 
TARGET_MIN_VLEN >= 64")
+  (V16DI "riscv_vector::vls_mode_valid_p (V16DImode) && TARGET_VECTOR_ELEN_64 
&& TARGET_MIN_VLEN >= 128")
+  (V4SF "riscv_vector::vls_mode_valid_p (V4SFmode) && 
TARGET_VECTOR_ELEN_FP_32")
+  (V8SF "riscv_vector::vls_mode_valid_p (V8SFmode) && 
TARGET_VECTOR_ELEN_FP_32")
+  (V16SF "riscv_vector::vls_mode_valid_p (V16SFmode) && 
TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 64")
+  (V4DF "riscv_vector::vls_mode_valid_p (V4DFmode) && 
TARGET_VECTOR_ELEN_FP_64")
+  (V8DF "riscv_vector::vls_mode_valid_p (V8DFmode) && TARGET_VECTOR_ELEN_FP_64 
&& TARGET_MIN_VLEN >= 64")
+  (V16DF "riscv_vector::vls_mode_valid_p (V16DFmode) && 
TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128")
+  (V32QI "riscv_vector::vls_mode_valid_p (V32QImode)")
+  (V64QI "riscv_vector::vls_mode_valid_p (V64QImode) && TARGET_MIN_VLEN >= 64")
+  (V128QI "riscv_vector::vls_mode_valid_p (V128QImode) && TARGET_MIN_VLEN >= 
128")
+  (V256QI "riscv_vector::vls_mode_valid_p (V256QImode) && TARGET_MIN_VLEN >= 
256")
+  (V512QI "riscv_vector::vls_mode_valid_p (V512QImode) && TARGET_MIN_VLEN >= 
512")
+  (V1024QI "riscv_vector::vls_mode_valid_p (V1024QImode) && TARGET_MIN_VLEN >= 
1024")
+  (V2048QI "riscv_vector::vls_mode_valid_p (V2048QImode) && TARGET_MIN_VLEN >= 
2048")
+  (V4096QI "riscv_vector::vls_mode_valid_p (V4096QImode) && TARGET_MIN_VLEN >= 
4096")
+  (V32HI "riscv_vector::vls_mode_valid_p (V32HImode) && TARGET_MIN_VLEN >= 64")
+  (V64HI "riscv_vector::vls_mode_valid_p (V64HImode) && TARGET_MIN_VLEN >= 
128")
+  (V128HI "riscv_vector::vls_mode_valid_p (V128HImode) && TARGET_MIN_VLEN >= 
256")
+  (V256HI "riscv_vector::vls_mode_valid_p (V256HImode) && TARGET_MIN_VLEN >= 
512")
+  (V512HI "riscv_vector::vls_mode_valid_p (V512HImode) && TARGET_MIN_VLEN >= 
1024")
+  (V1024HI "riscv_vector::vls_mode_valid_p (V1024HImode) && TARGET_MIN_VLEN >= 
2048")
+  (V2048HI "riscv_vector::vls_mode_valid_p (V2048HImode) && TARGET_MIN_VLEN >= 
4096")
+  (V32SI "riscv_vector::vls_mode_valid_p (V32SImode) && TARGET_MIN_VLEN >= 
128")
+  (V64SI "riscv

[gcc r15-3828] RISC-V: Fix effective target check.

2024-09-24 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:e45537f56250f19cdf2ec09a744c6b11170c1001

commit r15-3828-ge45537f56250f19cdf2ec09a744c6b11170c1001
Author: Robin Dapp 
Date:   Fri Aug 30 14:35:08 2024 +0200

RISC-V: Fix effective target check.

The return value is inverted in check_effective_target_rvv_zvl256b_ok
and check_effective_target_rvv_zvl512b_ok.  Fix this and also just use
the current march.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp: Fix effective target check.

Diff:
---
 gcc/testsuite/lib/target-supports.exp | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 8f2afe866c7c..05a63c4e9a55 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1978,15 +1978,15 @@ proc check_effective_target_riscv_v { } {
 
 proc check_effective_target_rvv_zvl256b_ok { } {
 # Check if the target has a VLENB of 32.
-set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &v]
+set gcc_march [riscv_get_arch]
 return [check_runtime ${gcc_march}_exec {
int main()
{
  int vlenb = 0;
  asm ("csrr %0,vlenb" : "=r" (vlenb) : : );
  if (vlenb == 32)
-   return 1;
- return 0;
+   return 0;
+ return 1;
}
   } "-march=${gcc_march}"]
 }
@@ -1996,15 +1996,15 @@ proc check_effective_target_rvv_zvl256b_ok { } {
 
 proc check_effective_target_rvv_zvl512b_ok { } {
 # Check if the target has a VLENB of 64.
-set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &v]
+set gcc_march [riscv_get_arch]
 return [check_runtime ${gcc_march}_exec {
int main()
{
  int vlenb = 0;
  asm ("csrr %0,vlenb" : "=r" (vlenb) : : );
  if (vlenb == 64)
-   return 1;
- return 0;
+   return 0;
+ return 1;
}
   } "-march=${gcc_march}"]
 }


[gcc r15-3830] RISC-V: testsuite: Fix SELECT_VL SLP fallout.

2024-09-24 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:4bd3ccae58d40fad6bd99ed08ef4e1e4d70fefd0

commit r15-3830-g4bd3ccae58d40fad6bd99ed08ef4e1e4d70fefd0
Author: Robin Dapp 
Date:   Thu Sep 19 05:08:47 2024 -0700

RISC-V: testsuite: Fix SELECT_VL SLP fallout.

This fixes asm-scan fallout from r15-3712-g5e3a4a01785e2d where we allow
SLP with SELECT_VL.

Assisted by sed and regtested on rv64gcv_zvfh_zvbb.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-1.c: Expect
length-controlled loop.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-25.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-26.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-27.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-28.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-29.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_

[gcc r15-5389] RISC-V: Add else operand to masked loads [PR115336].

2024-11-18 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:b89273a049a76ffc29dd43a536ad329f0d994c05

commit r15-5389-gb89273a049a76ffc29dd43a536ad329f0d994c05
Author: Robin Dapp 
Date:   Thu Aug 8 10:31:22 2024 +0200

RISC-V: Add else operand to masked loads [PR115336].

This patch adds else operands to masked loads.  Currently the default
else operand predicate just accepts "undefined" (i.e. SCRATCH) values.

PR middle-end/115336
PR middle-end/116059

gcc/ChangeLog:

* config/riscv/autovec.md: Add else operand.
* config/riscv/predicates.md (maskload_else_operand): New
predicate.
* config/riscv/riscv-v.cc (get_else_operand): Remove static.
(expand_load_store): Use get_else_operand and adjust index.
(expand_gather_scatter): Ditto.
(expand_lanes_load_store): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr115336.c: New test.
* gcc.target/riscv/rvv/autovec/pr116059.c: New test.

Diff:
---
 gcc/config/riscv/autovec.md| 50 +-
 gcc/config/riscv/predicates.md |  3 ++
 gcc/config/riscv/riscv-v.cc| 30 -
 .../gcc.target/riscv/rvv/autovec/pr115336.c| 20 +
 .../gcc.target/riscv/rvv/autovec/pr116059.c| 15 +++
 5 files changed, 88 insertions(+), 30 deletions(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index b5fbe98b5fc5..c64ef5a12b43 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -26,8 +26,9 @@
   [(match_operand:V 0 "register_operand")
(match_operand:V 1 "memory_operand")
(match_operand: 2 "vector_mask_operand")
-   (match_operand 3 "autovec_length_operand")
-   (match_operand 4 "const_0_operand")]
+   (match_operand:V 3 "maskload_else_operand")
+   (match_operand 4 "autovec_length_operand")
+   (match_operand 5 "const_0_operand")]
   "TARGET_VECTOR"
 {
   riscv_vector::expand_load_store (operands, true);
@@ -57,8 +58,9 @@
(match_operand 3 "")
(match_operand 4 "")
(match_operand: 5 "vector_mask_operand")
-   (match_operand 6 "autovec_length_operand")
-   (match_operand 7 "const_0_operand")]
+   (match_operand 6 "maskload_else_operand")
+   (match_operand 7 "autovec_length_operand")
+   (match_operand 8 "const_0_operand")]
   "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p 
(mode)"
 {
   riscv_vector::expand_gather_scatter (operands, true);
@@ -72,8 +74,9 @@
(match_operand 3 "")
(match_operand 4 "")
(match_operand: 5 "vector_mask_operand")
-   (match_operand 6 "autovec_length_operand")
-   (match_operand 7 "const_0_operand")]
+   (match_operand 6 "maskload_else_operand")
+   (match_operand 7 "autovec_length_operand")
+   (match_operand 8 "const_0_operand")]
   "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p 
(mode)"
 {
   riscv_vector::expand_gather_scatter (operands, true);
@@ -87,8 +90,9 @@
(match_operand 3 "")
(match_operand 4 "")
(match_operand: 5 "vector_mask_operand")
-   (match_operand 6 "autovec_length_operand")
-   (match_operand 7 "const_0_operand")]
+   (match_operand 6 "maskload_else_operand")
+   (match_operand 7 "autovec_length_operand")
+   (match_operand 8 "const_0_operand")]
   "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p 
(mode)"
 {
   riscv_vector::expand_gather_scatter (operands, true);
@@ -102,8 +106,9 @@
(match_operand 3 "")
(match_operand 4 "")
(match_operand: 5 "vector_mask_operand")
-   (match_operand 6 "autovec_length_operand")
-   (match_operand 7 "const_0_operand")]
+   (match_operand 6 "maskload_else_operand")
+   (match_operand 7 "autovec_length_operand")
+   (match_operand 8 "const_0_operand")]
   "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p 
(mode)"
 {
   riscv_vector::expand_gather_scatter (operands, true);
@@ -117,8 +122,9 @@
(match_operand 3 "")
(match_operand 4 "")
(match_operand: 5 "vector_mask_operand")
-   (match_operand 6 "autovec_length_operand")
-   (match_operand 7 "const_0_operand")]
+   (match_operand 6 "maskload_else_operand")
+   (match_operand 7 "autovec_length_operand")
+   (match_operand 8 "const_0_operand")]
   "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p 
(mode)"
 {
   riscv_vector::expand_gather_scatter (operands, true);
@@ -132,8 +138,9 @@
(match_operand 3 "")
(match_operand 4 "")
(match_operand: 5 "vector_mask_operand")
-   (match_operand 6 "autovec_length_operand")
-   (match_operand 7 "const_0_operand")]
+   (match_operand 6 "maskload_else_operand")
+   (match_operand 7 "autovec_length_operand")
+   (match_operand 8 "const_0_operand")]
   "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p 
(mode)"
 {
   riscv_vector::expand_gather_scatter (operands, true);
@@ -151,8 +158,9 @@
(match_operand 3 "")
(match_operand 4 "")
(match_operand: 5 "vector_m

[gcc r15-5385] vect: Add maskload else value support.

2024-11-18 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:634ae740f5a839df6567c58cfdcd32a3833c4626

commit r15-5385-g634ae740f5a839df6567c58cfdcd32a3833c4626
Author: Robin Dapp 
Date:   Thu Aug 8 14:29:05 2024 +0200

vect: Add maskload else value support.

This patch adds an else operand to vectorized masked load calls.
The current implementation adds else-value arguments to the respective
target-querying functions that is used to supply the vectorizer with the
proper else value.

We query the target for its supported else operand and uses that for the
maskload call.  If necessary, i.e. if the mode has padding bits and if
the else operand is nonzero, a VEC_COND enforcing a zero else value is
emitted.

gcc/ChangeLog:

* optabs-query.cc (supports_vec_convert_optab_p): Return icode.
(get_supported_else_val): Return supported else value for
optab's operand at index.
(supports_vec_gather_load_p): Add else argument.
(supports_vec_scatter_store_p): Ditto.
* optabs-query.h (supports_vec_gather_load_p): Ditto.
(get_supported_else_val): Ditto.
* optabs-tree.cc (target_supports_mask_load_store_p): Ditto.
(can_vec_mask_load_store_p): Ditto.
(target_supports_len_load_store_p): Ditto.
(get_len_load_store_mode): Ditto.
* optabs-tree.h (target_supports_mask_load_store_p): Ditto.
(can_vec_mask_load_store_p): Ditto.
* tree-vect-data-refs.cc (vect_lanes_optab_supported_p): Ditto.
(vect_gather_scatter_fn_p): Ditto.
(vect_check_gather_scatter): Ditto.
(vect_load_lanes_supported): Ditto.
* tree-vect-patterns.cc (vect_recog_gather_scatter_pattern):
Ditto.
* tree-vect-slp.cc (vect_get_operand_map): Adjust indices for
else operand.
(vect_slp_analyze_node_operations): Skip undefined else operand.
* tree-vect-stmts.cc (exist_non_indexing_operands_for_use_p):
Add else operand handling.
(vect_get_vec_defs_for_operand): Handle undefined else operand.
(check_load_store_for_partial_vectors): Add else argument.
(vect_truncate_gather_scatter_offset): Ditto.
(vect_use_strided_gather_scatters_p): Ditto.
(get_group_load_store_type): Ditto.
(get_load_store_type): Ditto.
(vect_get_mask_load_else): Ditto.
(vect_get_else_val_from_tree): Ditto.
(vect_build_one_gather_load_call): Add zero else operand.
(vectorizable_load): Use else operand.
* tree-vectorizer.h (vect_gather_scatter_fn_p): Add else
argument.
(vect_load_lanes_supported): Ditto.
(vect_get_mask_load_else): Ditto.
(vect_get_else_val_from_tree): Ditto.

Diff:
---
 gcc/optabs-query.cc|  70 +++---
 gcc/optabs-query.h |   3 +-
 gcc/optabs-tree.cc |  66 ++---
 gcc/optabs-tree.h  |   8 +-
 gcc/tree-vect-data-refs.cc |  74 +++---
 gcc/tree-vect-patterns.cc  |  12 +-
 gcc/tree-vect-slp.cc   |  22 ++-
 gcc/tree-vect-stmts.cc | 326 +
 gcc/tree-vectorizer.h  |  10 +-
 9 files changed, 466 insertions(+), 125 deletions(-)

diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index cc52bc0f5ea7..c1f3558af920 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -29,6 +29,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "rtl.h"
 #include "recog.h"
 #include "vec-perm-indices.h"
+#include "internal-fn.h"
+#include "memmodel.h"
+#include "optabs.h"
 
 struct target_optabs default_target_optabs;
 struct target_optabs *this_fn_optabs = &default_target_optabs;
@@ -672,34 +675,57 @@ lshift_cheap_p (bool speed_p)
that mode, given that the second mode is always an integer vector.
If MODE is VOIDmode, return true if OP supports any vector mode.  */
 
-static bool
-supports_vec_convert_optab_p (optab op, machine_mode mode)
+static enum insn_code
+supported_vec_convert_optab (optab op, machine_mode mode)
 {
   int start = mode == VOIDmode ? 0 : mode;
   int end = mode == VOIDmode ? MAX_MACHINE_MODE - 1 : mode;
+  enum insn_code icode = CODE_FOR_nothing;
   for (int i = start; i <= end; ++i)
 if (VECTOR_MODE_P ((machine_mode) i))
   for (int j = MIN_MODE_VECTOR_INT; j < MAX_MODE_VECTOR_INT; ++j)
-   if (convert_optab_handler (op, (machine_mode) i,
-  (machine_mode) j) != CODE_FOR_nothing)
- return true;
+   {
+ if ((icode
+  = convert_optab_handler (op, (machine_mode) i,
+   (machine_mode) j)) != CODE_FOR_nothing)
+   return icode;
+   }
 
-  return false;
+  return icode;
 }
 
 /* If MODE is not VOIDmode, return true if vec_gather_load is available for
that mode.  If MODE is V

[gcc r15-5387] gcn: Add else operand to masked loads.

2024-11-18 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:4a39addb4921ca1f7aa013835cd1351226e5e6b6

commit r15-5387-g4a39addb4921ca1f7aa013835cd1351226e5e6b6
Author: Robin Dapp 
Date:   Thu Aug 8 10:31:05 2024 +0200

gcn: Add else operand to masked loads.

This patch adds an undefined else operand to the masked loads.

gcc/ChangeLog:

* config/gcn/predicates.md (maskload_else_operand): New
predicate.
* config/gcn/gcn-valu.md: Use new predicate.

Diff:
---
 gcc/config/gcn/gcn-valu.md   | 23 +++
 gcc/config/gcn/predicates.md |  2 ++
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index cb2f4a780355..ce7a68f0e2d3 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -3989,7 +3989,8 @@
 (define_expand "maskloaddi"
   [(match_operand:V_MOV 0 "register_operand")
(match_operand:V_MOV 1 "memory_operand")
-   (match_operand 2 "")]
+   (match_operand 2 "")
+   (match_operand:V_MOV 3 "maskload_else_operand")]
   ""
   {
 rtx exec = force_reg (DImode, operands[2]);
@@ -3998,11 +3999,8 @@
 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
 
-/* Masked lanes are required to hold zero.  */
-emit_move_insn (operands[0], gcn_vec_constant (mode, 0));
-
 emit_insn (gen_gather_expr_exec (operands[0], addr, as, v,
-  operands[0], exec));
+  gcn_gen_undef (mode), exec));
 DONE;
   })
 
@@ -4027,7 +4025,8 @@
(match_operand: 2 "register_operand")
(match_operand 3 "immediate_operand")
(match_operand:SI 4 "gcn_alu_operand")
-   (match_operand:DI 5 "")]
+   (match_operand:DI 5 "")
+   (match_operand:V_MOV 6 "maskload_else_operand")]
   ""
   {
 rtx exec = force_reg (DImode, operands[5]);
@@ -4036,18 +4035,18 @@
  operands[2], operands[4],
  INTVAL (operands[3]), exec);
 
-/* Masked lanes are required to hold zero.  */
-emit_move_insn (operands[0], gcn_vec_constant (mode, 0));
-
 if (GET_MODE (addr) == mode)
   emit_insn (gen_gather_insn_1offset_exec (operands[0], addr,
 const0_rtx, const0_rtx,
-const0_rtx, operands[0],
-exec));
+gcn_gen_undef
+   (mode),
+operands[0], exec));
 else
   emit_insn (gen_gather_insn_2offsets_exec (operands[0], operands[1],
  addr, const0_rtx,
- const0_rtx, const0_rtx,
+ const0_rtx,
+ gcn_gen_undef
+   (mode),
  operands[0], exec));
 DONE;
   })
diff --git a/gcc/config/gcn/predicates.md b/gcc/config/gcn/predicates.md
index 3f59396a6498..21beeb586a44 100644
--- a/gcc/config/gcn/predicates.md
+++ b/gcc/config/gcn/predicates.md
@@ -228,3 +228,5 @@
   return gcn_stepped_zero_int_parallel_p (op, 1);
 })
 
+(define_predicate "maskload_else_operand"
+  (match_operand 0 "scratch_operand"))


[gcc r15-5386] aarch64: Add masked-load else operands.

2024-11-18 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:a166a6ccdc6c3d6532a24ba3a2057a177ce44752

commit r15-5386-ga166a6ccdc6c3d6532a24ba3a2057a177ce44752
Author: Robin Dapp 
Date:   Thu Aug 8 10:30:58 2024 +0200

aarch64: Add masked-load else operands.

This adds zero else operands to masked loads and their intrinsics.
I needed to adjust more than initially thought because we rely on
combine for several instructions and a change in a "base" pattern
needs to propagate to all those.

gcc/ChangeLog:

* config/aarch64/aarch64-sve-builtins-base.cc: Add else
handling.
* config/aarch64/aarch64-sve-builtins.cc 
(function_expander::use_contiguous_load_insn):
Ditto.
* config/aarch64/aarch64-sve-builtins.h: Add else operand to
contiguous load.
* config/aarch64/aarch64-sve.md 
(@aarch64_load
_):
Split and add else operand.

(@aarch64_load_):
Ditto.

(*aarch64_load__mov):
Ditto.
* config/aarch64/aarch64-sve2.md: Ditto.
* config/aarch64/iterators.md: Remove unused iterators.
* config/aarch64/predicates.md (aarch64_maskload_else_operand):
Add zero else operand.

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins-base.cc | 24 +++-
 gcc/config/aarch64/aarch64-sve-builtins.cc  | 12 +-
 gcc/config/aarch64/aarch64-sve-builtins.h   |  2 +-
 gcc/config/aarch64/aarch64-sve.md   | 52 +
 gcc/config/aarch64/aarch64-sve2.md  |  3 +-
 gcc/config/aarch64/iterators.md |  4 --
 gcc/config/aarch64/predicates.md|  4 ++
 7 files changed, 77 insertions(+), 24 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 2117eceb6063..20820fb1985c 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1524,11 +1524,12 @@ public:
 gimple_seq stmts = NULL;
 tree pred = f.convert_pred (stmts, vectype, 0);
 tree base = f.fold_contiguous_base (stmts, vectype);
+tree els = build_zero_cst (vectype);
 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
 
 tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
-gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
- base, cookie, pred);
+gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 4,
+ base, cookie, pred, els);
 gimple_call_set_lhs (new_call, f.lhs);
 return new_call;
   }
@@ -1542,7 +1543,7 @@ public:
 e.vector_mode (0), e.gp_mode (0));
 else
   icode = code_for_aarch64 (UNSPEC_LD1_COUNT, e.tuple_mode (0));
-return e.use_contiguous_load_insn (icode);
+return e.use_contiguous_load_insn (icode, true);
   }
 };
 
@@ -1555,10 +1556,10 @@ public:
   rtx
   expand (function_expander &e) const override
   {
-insn_code icode = code_for_aarch64_load (UNSPEC_LD1_SVE, extend_rtx_code 
(),
+insn_code icode = code_for_aarch64_load (extend_rtx_code (),
 e.vector_mode (0),
 e.memory_vector_mode ());
-return e.use_contiguous_load_insn (icode);
+return e.use_contiguous_load_insn (icode, true);
   }
 };
 
@@ -1577,6 +1578,8 @@ public:
 e.prepare_gather_address_operands (1);
 /* Put the predicate last, as required by mask_gather_load_optab.  */
 e.rotate_inputs_left (0, 5);
+/* Add the else operand.  */
+e.args.quick_push (CONST0_RTX (e.vector_mode (0)));
 machine_mode mem_mode = e.memory_vector_mode ();
 machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
 insn_code icode = convert_optab_handler (mask_gather_load_optab,
@@ -1600,6 +1603,8 @@ public:
 e.rotate_inputs_left (0, 5);
 /* Add a constant predicate for the extension rtx.  */
 e.args.quick_push (CONSTM1_RTX (VNx16BImode));
+/* Add the else operand.  */
+e.args.quick_push (CONST0_RTX (e.vector_mode (1)));
 insn_code icode = code_for_aarch64_gather_load (extend_rtx_code (),
e.vector_mode (0),
e.memory_vector_mode ());
@@ -1742,6 +1747,7 @@ public:
 /* Get the predicate and base pointer.  */
 gimple_seq stmts = NULL;
 tree pred = f.convert_pred (stmts, vectype, 0);
+tree els = build_zero_cst (vectype);
 tree base = f.fold_contiguous_base (stmts, vectype);
 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
 
@@ -1760,8 +1766,8 @@ public:
 
 /* Emit the load itself.  */
 tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
-gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
- 

[gcc r15-5384] tree-ifcvt: Add zero maskload else value.

2024-11-18 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:6b6bd53619fd11bab3def8dee737711a7ee539ea

commit r15-5384-g6b6bd53619fd11bab3def8dee737711a7ee539ea
Author: Robin Dapp 
Date:   Thu Aug 8 12:54:36 2024 +0200

tree-ifcvt: Add zero maskload else value.

When predicating a load we implicitly assume that the else value is
zero.  This matters in case the loaded value is padded (like e.g.
a Bool) and we must ensure that the padding bytes are zero on targets
that don't implicitly zero inactive elements.

A former version of this patch still had this handling in ifcvt but
the latest version defers it to the vectorizer.

gcc/ChangeLog:

* tree-if-conv.cc (predicate_load_or_store): Add zero else
operand and comment.

Diff:
---
 gcc/tree-if-conv.cc | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index eb981642bae1..f1a1f8fd0d35 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -2555,9 +2555,17 @@ predicate_load_or_store (gimple_stmt_iterator *gsi, 
gassign *stmt, tree mask)
   ref);
   if (TREE_CODE (lhs) == SSA_NAME)
 {
+  /* Get a zero else value.  This might not be what a target actually uses
+but we cannot be sure about which vector mode the vectorizer will
+choose.  Therefore, leave the decision whether we need to force the
+inactive elements to zero to the vectorizer.  */
+  tree els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
+ TREE_TYPE (lhs));
+
   new_stmt
-   = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
- ptr, mask);
+   = gimple_build_call_internal (IFN_MASK_LOAD, 4, addr,
+ ptr, mask, els);
+
   gimple_call_set_lhs (new_stmt, lhs);
   gimple_set_vuse (new_stmt, gimple_vuse (stmt));
 }


[gcc r15-5382] docs: Document maskload else operand and behavior.

2024-11-18 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:5214ddb464aab6c98b6eb6a267dcc9952f030d2f

commit r15-5382-g5214ddb464aab6c98b6eb6a267dcc9952f030d2f
Author: Robin Dapp 
Date:   Thu Aug 8 10:32:25 2024 +0200

docs: Document maskload else operand and behavior.

This patch amends the documentation for masked loads (maskload,
vec_mask_load_lanes, and mask_gather_load as well as their len
counterparts) with an else operand.

gcc/ChangeLog:

* doc/md.texi: Document masked load else operand.

Diff:
---
 gcc/doc/md.texi | 63 +
 1 file changed, 41 insertions(+), 22 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 25ded86f0d14..c8f1424a0424 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5014,8 +5014,10 @@ This pattern is not allowed to @code{FAIL}.
 @item @samp{vec_mask_load_lanes@var{m}@var{n}}
 Like @samp{vec_load_lanes@var{m}@var{n}}, but takes an additional
 mask operand (operand 2) that specifies which elements of the destination
-vectors should be loaded.  Other elements of the destination
-vectors are set to zero.  The operation is equivalent to:
+vectors should be loaded.  Other elements of the destination vectors are
+taken from operand 3, which is an else operand similar to the one in
+@code{maskload}.
+The operation is equivalent to:
 
 @smallexample
 int c = GET_MODE_SIZE (@var{m}) / GET_MODE_SIZE (@var{n});
@@ -5025,7 +5027,7 @@ for (j = 0; j < GET_MODE_NUNITS (@var{n}); j++)
   operand0[i][j] = operand1[j * c + i];
   else
 for (i = 0; i < c; i++)
-  operand0[i][j] = 0;
+  operand0[i][j] = operand3[j];
 @end smallexample
 
 This pattern is not allowed to @code{FAIL}.
@@ -5033,16 +5035,20 @@ This pattern is not allowed to @code{FAIL}.
 @cindex @code{vec_mask_len_load_lanes@var{m}@var{n}} instruction pattern
 @item @samp{vec_mask_len_load_lanes@var{m}@var{n}}
 Like @samp{vec_load_lanes@var{m}@var{n}}, but takes an additional
-mask operand (operand 2), length operand (operand 3) as well as bias operand 
(operand 4)
-that specifies which elements of the destination vectors should be loaded.
-Other elements of the destination vectors are undefined.  The operation is 
equivalent to:
+mask operand (operand 2), length operand (operand 4) as well as bias operand
+(operand 5) that specifies which elements of the destination vectors should be
+loaded.  Other elements of the destination vectors are taken from operand 3,
+which is an else operand similar to the one in @code{maskload}.
+The operation is equivalent to:
 
 @smallexample
 int c = GET_MODE_SIZE (@var{m}) / GET_MODE_SIZE (@var{n});
-for (j = 0; j < operand3 + operand4; j++)
-  if (operand2[j])
-for (i = 0; i < c; i++)
+for (j = 0; j < operand4 + operand5; j++)
+  for (i = 0; i < c; i++)
+if (operand2[j])
   operand0[i][j] = operand1[j * c + i];
+else
+  operand0[i][j] = operand3[j];
 @end smallexample
 
 This pattern is not allowed to @code{FAIL}.
@@ -5122,18 +5128,25 @@ address width.
 @cindex @code{mask_gather_load@var{m}@var{n}} instruction pattern
 @item @samp{mask_gather_load@var{m}@var{n}}
 Like @samp{gather_load@var{m}@var{n}}, but takes an extra mask operand as
-operand 5.  Bit @var{i} of the mask is set if element @var{i}
+operand 5.
+Other elements of the destination vectors are taken from operand 6,
+which is an else operand similar to the one in @code{maskload}.
+Bit @var{i} of the mask is set if element @var{i}
 of the result should be loaded from memory and clear if element @var{i}
-of the result should be set to zero.
+of the result should be set to operand 6.
 
 @cindex @code{mask_len_gather_load@var{m}@var{n}} instruction pattern
 @item @samp{mask_len_gather_load@var{m}@var{n}}
-Like @samp{gather_load@var{m}@var{n}}, but takes an extra mask operand 
(operand 5),
-a len operand (operand 6) as well as a bias operand (operand 7).  Similar to 
mask_len_load,
-the instruction loads at most (operand 6 + operand 7) elements from memory.
+Like @samp{gather_load@var{m}@var{n}}, but takes an extra mask operand
+(operand 5) and an else operand (operand 6) as well as a len operand
+(operand 7) and a bias operand (operand 8).
+
+Similar to mask_len_load the instruction loads at
+most (operand 7 + operand 8) elements from memory.
 Bit @var{i} of the mask is set if element @var{i} of the result should
-be loaded from memory and clear if element @var{i} of the result should be 
undefined.
-Mask elements @var{i} with @var{i} > (operand 6 + operand 7) are ignored.
+be loaded from memory and clear if element @var{i} of the result should
+be set to element @var{i} of operand 6.
+Mask elements @var{i} with @var{i} > (operand 7 + operand 8) are ignored.
 
 @cindex @code{mask_len_strided_load@var{m}} instruction pattern
 @item @samp{mask_len_strided_load@var{m}}
@@ -5392,8 +5405,13 @@ Operands 4 and 5 have a target-dependent scalar integer 
mode.
 @cindex @code{maskload@var{m}@var{n}} instruction pattern
 @item @samp{maskload@v

[gcc r15-5390] RISC-V: Add VLS modes to strided loads.

2024-11-18 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:52a392b8b797d01a7b0b06c8f20b0bf8374d489e

commit r15-5390-g52a392b8b797d01a7b0b06c8f20b0bf8374d489e
Author: Robin Dapp 
Date:   Mon Nov 4 15:34:50 2024 +0100

RISC-V: Add VLS modes to strided loads.

This patch adds VLS modes to the strided load expanders.

gcc/ChangeLog:

* config/riscv/autovec.md: Add VLS modes.
* config/riscv/vector-iterators.md: Ditto.
* config/riscv/vector.md: Ditto.

Diff:
---
 gcc/config/riscv/autovec.md  |   4 +-
 gcc/config/riscv/vector-iterators.md | 243 +++
 gcc/config/riscv/vector.md   |  22 ++--
 3 files changed, 256 insertions(+), 13 deletions(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index c64ef5a12b43..2529dc77f221 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2903,7 +2903,7 @@
 ;; == Strided Load/Store
 ;; =
 (define_expand "mask_len_strided_load_"
-  [(match_operand:V 0 "register_operand")
+  [(match_operand:V_VLS 0 "register_operand")
(match_operand   1 "pmode_reg_or_0_operand")
(match_operand   2 "pmode_reg_or_0_operand")
(match_operand:  3 "vector_mask_operand")
@@ -2919,7 +2919,7 @@
 (define_expand "mask_len_strided_store_"
   [(match_operand   0 "pmode_reg_or_0_operand")
(match_operand   1 "pmode_reg_or_0_operand")
-   (match_operand:V 2 "register_operand")
+   (match_operand:V_VLS 2 "register_operand")
(match_operand:  3 "vector_mask_operand")
(match_operand   4 "autovec_length_operand")
(match_operand   5 "const_0_operand")]
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index 43325d1ba87a..6a621459cc4a 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -3524,6 +3524,87 @@
 
   (RVVM8DF "vector_eew64_stride_operand") (RVVM4DF 
"vector_eew64_stride_operand")
   (RVVM2DF "vector_eew64_stride_operand") (RVVM1DF 
"vector_eew64_stride_operand")
+
+  (V1QI "vector_eew8_stride_operand")
+  (V2QI "vector_eew8_stride_operand")
+  (V4QI "vector_eew8_stride_operand")
+  (V8QI "vector_eew8_stride_operand")
+  (V16QI "vector_eew8_stride_operand")
+  (V32QI "vector_eew8_stride_operand")
+  (V64QI "vector_eew8_stride_operand")
+  (V128QI "vector_eew8_stride_operand")
+  (V256QI "vector_eew8_stride_operand")
+  (V512QI "vector_eew8_stride_operand")
+  (V1024QI "vector_eew8_stride_operand")
+  (V2048QI "vector_eew8_stride_operand")
+  (V4096QI "vector_eew8_stride_operand")
+  (V1HI "vector_eew16_stride_operand")
+  (V2HI "vector_eew16_stride_operand")
+  (V4HI "vector_eew16_stride_operand")
+  (V8HI "vector_eew16_stride_operand")
+  (V16HI "vector_eew16_stride_operand")
+  (V32HI "vector_eew16_stride_operand")
+  (V64HI "vector_eew16_stride_operand")
+  (V128HI "vector_eew16_stride_operand")
+  (V256HI "vector_eew16_stride_operand")
+  (V512HI "vector_eew16_stride_operand")
+  (V1024HI "vector_eew16_stride_operand")
+  (V2048HI "vector_eew16_stride_operand")
+  (V1SI "vector_eew32_stride_operand")
+  (V2SI "vector_eew32_stride_operand")
+  (V4SI "vector_eew32_stride_operand")
+  (V8SI "vector_eew32_stride_operand")
+  (V16SI "vector_eew32_stride_operand")
+  (V32SI "vector_eew32_stride_operand")
+  (V64SI "vector_eew32_stride_operand")
+  (V128SI "vector_eew32_stride_operand")
+  (V256SI "vector_eew32_stride_operand")
+  (V512SI "vector_eew32_stride_operand")
+  (V1024SI "vector_eew32_stride_operand")
+  (V1DI "vector_eew64_stride_operand")
+  (V2DI "vector_eew64_stride_operand")
+  (V4DI "vector_eew64_stride_operand")
+  (V8DI "vector_eew64_stride_operand")
+  (V16DI "vector_eew64_stride_operand")
+  (V32DI "vector_eew64_stride_operand")
+  (V64DI "vector_eew64_stride_operand")
+  (V128DI "vector_eew64_stride_operand")
+  (V256DI "vector_eew64_stride_operand")
+  (V512DI "vector_eew64_stride_operand")
+
+  (V1HF "vector_eew16_stride_operand")
+  (V2HF "vector_eew16_stride_operand")
+  (V4HF "vector_eew16_stride_operand")
+  (V8HF "vector_eew16_stride_operand")
+  (V16HF "vector_eew16_stride_operand")
+  (V32HF "vector_eew16_stride_operand")
+  (V64HF "vector_eew16_stride_operand")
+  (V128HF "vector_eew16_stride_operand")
+  (V256HF "vector_eew16_stride_operand")
+  (V512HF "vector_eew16_stride_operand")
+  (V1024HF "vector_eew16_stride_operand")
+  (V2048HF "vector_eew16_stride_operand")
+  (V1SF "vector_eew32_stride_operand")
+  (V2SF "vector_eew32_stride_operand")
+  (V4SF "vector_eew32_stride_operand")
+  (V8SF "vector_eew32_stride_operand")
+  (V16SF "vector_eew32_stride_operand")
+  (V32SF "vector_eew32_stride_operand")
+  (V64SF "vector_eew32_stride_operand")
+  (V128SF "vector_eew32_stride_operand")
+  (V256SF "vector_eew32_stride_operand")
+  (V512SF "vector_eew32_stride_operand")
+  (V1024SF "vector_eew32_stride_operand")
+  (V1DF "vector_eew64_stride_ope

[gcc r15-5388] i386: Add zero maskload else operand.

2024-11-18 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:ebf30772415cfd3fa544fc7262b28b948591538f

commit r15-5388-gebf30772415cfd3fa544fc7262b28b948591538f
Author: Robin Dapp 
Date:   Tue Nov 5 14:47:07 2024 +0100

i386: Add zero maskload else operand.

gcc/ChangeLog:

* config/i386/sse.md (maskload):
Call maskload..._1.
(maskload_1): Rename.

Diff:
---
 gcc/config/i386/sse.md | 21 ++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index efe32e5149fc..72acd5bde5e4 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -28650,7 +28650,7 @@
(set_attr "btver2_decode" "vector") 
(set_attr "mode" "")])
 
-(define_expand "maskload"
+(define_expand "maskload_1"
   [(set (match_operand:V48_128_256 0 "register_operand")
(unspec:V48_128_256
  [(match_operand: 2 "register_operand")
@@ -28658,13 +28658,28 @@
  UNSPEC_MASKMOV))]
   "TARGET_AVX")
 
+(define_expand "maskload"
+  [(set (match_operand:V48_128_256 0 "register_operand")
+   (unspec:V48_128_256
+ [(match_operand: 2 "register_operand")
+  (match_operand:V48_128_256 1 "memory_operand")
+  (match_operand:V48_128_256 3 "const0_operand")]
+ UNSPEC_MASKMOV))]
+  "TARGET_AVX"
+{
+  emit_insn (gen_maskload_1 (operands[0],
+  operands[1],
+  operands[2]));
+  DONE;
+})
+
 (define_expand "maskload"
   [(set (match_operand:V48_AVX512VL 0 "register_operand")
(vec_merge:V48_AVX512VL
  (unspec:V48_AVX512VL
[(match_operand:V48_AVX512VL 1 "memory_operand")]
UNSPEC_MASKLOAD)
- (match_dup 0)
+  (match_operand:V48_AVX512VL 3 "const0_operand")
  (match_operand: 2 "register_operand")))]
   "TARGET_AVX512F")
 
@@ -28674,7 +28689,7 @@
  (unspec:VI12HFBF_AVX512VL
[(match_operand:VI12HFBF_AVX512VL 1 "memory_operand")]
UNSPEC_MASKLOAD)
- (match_dup 0)
+  (match_operand:VI12HFBF_AVX512VL 3 "const0_operand")
  (match_operand: 2 "register_operand")))]
   "TARGET_AVX512BW")


[gcc r15-5383] ifn: Add else-operand handling.

2024-11-18 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:8f68d9cb7897df188f7dcd733d8c385f77fd8011

commit r15-5383-g8f68d9cb7897df188f7dcd733d8c385f77fd8011
Author: Robin Dapp 
Date:   Thu Aug 8 10:54:35 2024 +0200

ifn: Add else-operand handling.

This patch adds else-operand handling to the internal functions.

gcc/ChangeLog:

* internal-fn.cc (add_mask_and_len_args): Rename...
(add_mask_else_and_len_args): ...to this and add else handling.
(expand_partial_load_optab_fn): Use adjusted function.
(expand_partial_store_optab_fn): Ditto.
(expand_scatter_store_optab_fn): Ditto.
(expand_gather_load_optab_fn): Ditto.
(internal_fn_len_index): Add else handling.
(internal_fn_else_index): Ditto.
(internal_fn_mask_index): Ditto.
(get_supported_else_vals): New function.
(supported_else_val_p): New function.
(internal_gather_scatter_fn_supported_p): Add else operand.
* internal-fn.h (internal_gather_scatter_fn_supported_p): Define
else constants.
(MASK_LOAD_ELSE_ZERO): Ditto.
(MASK_LOAD_ELSE_M1): Ditto.
(MASK_LOAD_ELSE_UNDEFINED): Ditto.
(get_supported_else_vals): Declare.
(supported_else_val_p): Ditto.

Diff:
---
 gcc/internal-fn.cc | 148 +
 gcc/internal-fn.h  |  13 -
 2 files changed, 139 insertions(+), 22 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 0ee5f5bc7c55..c522a53a4334 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -333,17 +333,18 @@ get_multi_vector_move (tree array_type, convert_optab 
optab)
   return convert_optab_handler (optab, imode, vmode);
 }
 
-/* Add mask and len arguments according to the STMT.  */
+/* Add mask, else, and len arguments according to the STMT.  */
 
 static unsigned int
-add_mask_and_len_args (expand_operand *ops, unsigned int opno, gcall *stmt)
+add_mask_else_and_len_args (expand_operand *ops, unsigned int opno, gcall 
*stmt)
 {
   internal_fn ifn = gimple_call_internal_fn (stmt);
   int len_index = internal_fn_len_index (ifn);
   /* BIAS is always consecutive next of LEN.  */
   int bias_index = len_index + 1;
   int mask_index = internal_fn_mask_index (ifn);
-  /* The order of arguments are always {len,bias,mask}.  */
+
+  /* The order of arguments is always {mask, else, len, bias}.  */
   if (mask_index >= 0)
 {
   tree mask = gimple_call_arg (stmt, mask_index);
@@ -365,6 +366,22 @@ add_mask_and_len_args (expand_operand *ops, unsigned int 
opno, gcall *stmt)
   create_input_operand (&ops[opno++], mask_rtx,
TYPE_MODE (TREE_TYPE (mask)));
 }
+
+  int els_index = internal_fn_else_index (ifn);
+  if (els_index >= 0)
+{
+  tree els = gimple_call_arg (stmt, els_index);
+  tree els_type = TREE_TYPE (els);
+  if (TREE_CODE (els) == SSA_NAME
+ && SSA_NAME_IS_DEFAULT_DEF (els)
+ && VAR_P (SSA_NAME_VAR (els)))
+   create_undefined_input_operand (&ops[opno++], TYPE_MODE (els_type));
+  else
+   {
+ rtx els_rtx = expand_normal (els);
+ create_input_operand (&ops[opno++], els_rtx, TYPE_MODE (els_type));
+   }
+}
   if (len_index >= 0)
 {
   tree len = gimple_call_arg (stmt, len_index);
@@ -3024,7 +3041,7 @@ static void
 expand_partial_load_optab_fn (internal_fn ifn, gcall *stmt, convert_optab 
optab)
 {
   int i = 0;
-  class expand_operand ops[5];
+  class expand_operand ops[6];
   tree type, lhs, rhs, maskt;
   rtx mem, target;
   insn_code icode;
@@ -3054,7 +3071,7 @@ expand_partial_load_optab_fn (internal_fn ifn, gcall 
*stmt, convert_optab optab)
   target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
   create_call_lhs_operand (&ops[i++], target, TYPE_MODE (type));
   create_fixed_operand (&ops[i++], mem);
-  i = add_mask_and_len_args (ops, i, stmt);
+  i = add_mask_else_and_len_args (ops, i, stmt);
   expand_insn (icode, i, ops);
 
   assign_call_lhs (lhs, target, &ops[0]);
@@ -3100,7 +3117,7 @@ expand_partial_store_optab_fn (internal_fn ifn, gcall 
*stmt, convert_optab optab
   reg = expand_normal (rhs);
   create_fixed_operand (&ops[i++], mem);
   create_input_operand (&ops[i++], reg, TYPE_MODE (type));
-  i = add_mask_and_len_args (ops, i, stmt);
+  i = add_mask_else_and_len_args (ops, i, stmt);
   expand_insn (icode, i, ops);
 }
 
@@ -3686,7 +3703,7 @@ expand_scatter_store_optab_fn (internal_fn, gcall *stmt, 
direct_optab optab)
   create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
   create_integer_operand (&ops[i++], scale_int);
   create_input_operand (&ops[i++], rhs_rtx, TYPE_MODE (TREE_TYPE (rhs)));
-  i = add_mask_and_len_args (ops, i, stmt);
+  i = add_mask_else_and_len_args (ops, i, stmt);
 
   insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs)),
   TYPE

[gcc r15-5444] RISC-V: Load VLS perm indices directly from memory.

2024-11-19 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:a18592e1c30f0f539c71fa632c49cb82008ec45a

commit r15-5444-ga18592e1c30f0f539c71fa632c49cb82008ec45a
Author: Robin Dapp 
Date:   Thu Sep 26 11:56:08 2024 +0200

RISC-V: Load VLS perm indices directly from memory.

Instead of loading the permutation indices and using vmslt in order to
determine which elements belong to which source vector we can compute
the proper mask at compile time.  That way we can emit vlm instead of
vle + vmslt.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (shuffle_merge_patterns): Load VLS
indices directly.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls/merge-1.c: Check for vlm and
no vmsleu etc.
* gcc.target/riscv/rvv/autovec/vls/merge-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/merge-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/merge-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/merge-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/merge-6.c: Ditto.

Diff:
---
 gcc/config/riscv/riscv-v.cc| 22 --
 .../gcc.target/riscv/rvv/autovec/vls/merge-1.c |  2 ++
 .../gcc.target/riscv/rvv/autovec/vls/merge-2.c |  2 ++
 .../gcc.target/riscv/rvv/autovec/vls/merge-3.c |  2 ++
 .../gcc.target/riscv/rvv/autovec/vls/merge-4.c |  2 ++
 .../gcc.target/riscv/rvv/autovec/vls/merge-5.c |  2 ++
 .../gcc.target/riscv/rvv/autovec/vls/merge-6.c |  2 ++
 7 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a0e22b6454b7..ee7a0128c0ed 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3101,9 +3101,27 @@ shuffle_merge_patterns (struct expand_vec_perm_d *d)
   machine_mode mask_mode = get_mask_mode (vmode);
   rtx mask = gen_reg_rtx (mask_mode);
 
-  if (indices_fit_selector_p)
+  if (indices_fit_selector_p && vec_len.is_constant ())
 {
-  /* MASK = SELECTOR < NUNITS ? 1 : 0.  */
+  /* For a constant vector length we can generate the needed mask at
+compile time and load it as mask at runtime.
+This saves a compare at runtime.  */
+  rtx_vector_builder sel (mask_mode, d->perm.encoding ().npatterns (),
+ d->perm.encoding ().nelts_per_pattern ());
+  unsigned int encoded_nelts = sel.encoded_nelts ();
+  for (unsigned int i = 0; i < encoded_nelts; i++)
+   sel.quick_push (gen_int_mode (d->perm[i].to_constant ()
+ < vec_len.to_constant (),
+ GET_MODE_INNER (mask_mode)));
+  mask = sel.build ();
+}
+  else if (indices_fit_selector_p)
+{
+  /* For a dynamic vector length < 256 we keep the permutation
+indices in the literal pool, load it at runtime and create the
+mask by selecting either OP0 or OP1 by
+
+   INDICES < NUNITS ? 1 : 0.  */
   rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
   rtx x = gen_int_mode (vec_len, GET_MODE_INNER (sel_mode));
   insn_code icode = code_for_pred_cmp_scalar (sel_mode);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-1.c
index cd24922d0ad4..c34734cff6d2 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-1.c
@@ -4,3 +4,5 @@
 #include "../vls-vlmax/merge-1.c"
 
 /* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 11 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-2.c
index 52d91244f51b..68f7b62e62ff 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-2.c
@@ -4,3 +4,5 @@
 #include "../vls-vlmax/merge-2.c"
 
 /* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 11 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-3.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-3.c
index 4931d2a36047..1250dca65d11 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-3.c
@@ -4,3 +4,5 @@
 #include "../vls-vlmax/merge-3.c"
 
 /* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 11 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-4.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-4.c
index f22a18f8ef3b..1dfd8287b7f2 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge

[gcc r15-5653] RISC-V: Ensure vtype for full-register moves [PR117544].

2024-11-25 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:b82a5810e7bcc82b933e16f9067879b9d22b85c7

commit r15-5653-gb82a5810e7bcc82b933e16f9067879b9d22b85c7
Author: Robin Dapp 
Date:   Thu Nov 21 14:49:53 2024 +0100

RISC-V: Ensure vtype for full-register moves [PR117544].

As discussed in PR117544 the VTYPE register is not preserved across
function calls.  Even though vmv1r-like instructions operate
independently of the actual vtype they still require a valid vtype.  As
we cannot guarantee that the vtype is valid we must make sure to emit a
vsetvl between a function call and a vmv1r.v.

This patch makes the necessary changes by splitting the full-reg-move
insns into patterns that use the vtype register and adding vmov to the
types of instructions requiring a vset.

PR target/117544

gcc/ChangeLog:

* config/riscv/vector.md (*mov_whole): Split.
(*mov_fract): Ditto.
(*mov): Ditto.
(*mov_vls): Ditto.
(*mov_reg_whole_vtype): New pattern with vtype use.
(*mov_fract_vtype): Ditto.
(*mov_vtype): Ditto.
(*mov_vls_vtype): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/abi-call-args-4.c: Expect vsetvl.
* gcc.target/riscv/rvv/base/pr117544.c: New test.

Diff:
---
 gcc/config/riscv/vector.md | 91 --
 .../gcc.target/riscv/rvv/base/abi-call-args-4.c|  1 +
 gcc/testsuite/gcc.target/riscv/rvv/base/pr117544.c | 14 
 3 files changed, 99 insertions(+), 7 deletions(-)

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 02cbd2f56f19..57e3c34c1c5a 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -42,7 +42,8 @@
   (cond [(eq_attr "type" "vlde,vste,vldm,vstm,vlds,vsts,\
  vldux,vldox,vstux,vstox,vldff,\
  
vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,viminmax,\
- vimul,vidiv,viwmul,vimuladd,viwmuladd,vimerge,vimov,\
+ vimul,vidiv,viwmul,vimuladd,viwmuladd,vimerge,
+ vmov,vimov,\
  vsalu,vaalu,vsmul,vsshift,vnclip,\
  
vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfmuladd,vfwmuladd,vfsqrt,vfrecp,\
  vfcmp,vfminmax,vfsgnj,vfclass,vfmerge,vfmov,\
@@ -1214,21 +1215,58 @@
 ;; which is not the pattern we want.
 ;; According the facts above, we make "*mov_whole" includes 
load/store/move for whole
 ;; vector modes according to '-march' and "*mov_fract" only include 
fractional vector modes.
-(define_insn "*mov_whole"
+(define_insn_and_split "*mov_whole"
   [(set (match_operand:V_WHOLE 0 "reg_or_mem_operand" "=vr, m,vr")
(match_operand:V_WHOLE 1 "reg_or_mem_operand" "  m,vr,vr"))]
   "TARGET_VECTOR && !TARGET_XTHEADVECTOR"
   "@
vl%m1re.v\t%0,%1
vs%m1r.v\t%1,%0
-   vmv%m1r.v\t%0,%1"
+   #"
+  "&& !memory_operand (operands[0], mode)
+   && !memory_operand (operands[1], mode)"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+ (use (reg:SI VTYPE_REGNUM))])]
+  ""
   [(set_attr "type" "vldr,vstr,vmov")
(set_attr "mode" "")])
 
-(define_insn "*mov_fract"
+;; Full-register moves like vmv1r.v require a valid vtype.
+;; The ABI does not guarantee that the vtype is valid after a function
+;; call so we need to make it dependent on the vtype and have
+;; the vsetvl pass insert a vsetvl if necessary.
+;; To facilitate optimization we keep the reg-reg move patterns "regular"
+;; until split time and only then switch to a pattern like below that
+;; uses the vtype register.
+;; As the use of these patterns is limited (in the general context)
+;; there is no need for helper functions and we can just create the RTX
+;; directly.
+(define_insn "*mov_reg_whole_vtype"
+  [(set (match_operand:V_WHOLE 0 "reg_or_mem_operand" "=vr")
+   (match_operand:V_WHOLE 1 "reg_or_mem_operand" " vr"))
+   (use (reg:SI VTYPE_REGNUM))]
+  "TARGET_VECTOR && !TARGET_XTHEADVECTOR"
+  "vmv%m1r.v\t%0,%1"
+  [(set_attr "type" "vmov")
+   (set_attr "mode" "")])
+
+(define_insn_and_split "*mov_fract"
   [(set (match_operand:V_FRACT 0 "register_operand" "=vr")
(match_operand:V_FRACT 1 "register_operand" " vr"))]
   "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+ (use (reg:SI VTYPE_REGNUM))])]
+  ""
+  [(set_attr "type" "vmov")
+   (set_attr "mode" "")])
+
+(define_insn "*mov_fract_vtype"
+  [(set (match_operand:V_FRACT 0 "register_operand" "=vr")
+   (match_operand:V_FRACT 1 "register_operand" " vr"))
+   (use (reg:SI VTYPE_REGNUM))]
+  "TARGET_VECTOR"
   "vmv1r.v\t%0,%1"
   [(set_attr "type" "vmov")
(set_attr "mode" "")])
@@ -1249,10 +1287,23 @@
 DONE;
 })
 
-(define_insn "*mov"
+(define_insn_and_split "*mov"
   [(set (match_operand:VB 0 "register_operand" "=vr")
(match_operand:VB 1 "register_operand" " vr"))]

[gcc r15-5652] genemit: Distribute evenly to files [PR111600].

2024-11-25 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:2e6b3308af6ddf87925321ddd2d387bfd352e410

commit r15-5652-g2e6b3308af6ddf87925321ddd2d387bfd352e410
Author: Robin Dapp 
Date:   Thu Nov 21 15:34:37 2024 +0100

genemit: Distribute evenly to files [PR111600].

currently we distribute insn patterns in genemit, partitioning them
by the number of patterns per file.  The first 100 into file 1, the
next 100 into file 2, and so on.  Depending on the patterns this
can lead to files of very uneven sizes.

Similar to the genmatch split, this patch introduces a dynamic
choose_output () which considers the size of the output files
and selects the shortest one for the next pattern.

gcc/ChangeLog:

PR target/111600

* genemit.cc (handle_arg): Use files instead of filenames.
(main): Ditto.
* gensupport.cc (SIZED_BASED_CHUNKS): Define.
(choose_output): New function.
* gensupport.h (choose_output): Declare.

Diff:
---
 gcc/genemit.cc| 54 +-
 gcc/gensupport.cc | 33 +
 gcc/gensupport.h  |  1 +
 3 files changed, 51 insertions(+), 37 deletions(-)

diff --git a/gcc/genemit.cc b/gcc/genemit.cc
index 5d3d10f5061a..ee2f06cb7c2b 100644
--- a/gcc/genemit.cc
+++ b/gcc/genemit.cc
@@ -905,14 +905,15 @@ from the machine description file `md'.  */\n\n");
   fprintf (file, "#include \"target.h\"\n\n");
 }
 
-auto_vec output_files;
+auto_vec output_files;
 
 static bool
 handle_arg (const char *arg)
 {
   if (arg[1] == 'O')
 {
-  output_files.safe_push (&arg[2]);
+  FILE *file = fopen (&arg[2], "w");
+  output_files.safe_push (file);
   return true;
 }
   return false;
@@ -933,47 +934,21 @@ main (int argc, const char **argv)
   /* Assign sequential codes to all entries in the machine description
  in parallel with the tables in insn-output.cc.  */
 
-  int npatterns = count_patterns ();
   md_rtx_info info;
 
-  bool to_stdout = false;
-  int npatterns_per_file = npatterns;
-  if (!output_files.is_empty ())
-npatterns_per_file = npatterns / output_files.length () + 1;
-  else
-to_stdout = true;
-
-  gcc_assert (npatterns_per_file > 1);
+  if (output_files.is_empty ())
+output_files.safe_push (stdout);
 
-  /* Reverse so we can pop the first-added element.  */
-  output_files.reverse ();
+  for (auto f : output_files)
+print_header (f);
 
-  int count = 0;
   FILE *file = NULL;
+  unsigned file_idx;
 
   /* Read the machine description.  */
   while (read_md_rtx (&info))
 {
-  if (count == 0 || count == npatterns_per_file)
-   {
- bool is_last = !to_stdout && output_files.is_empty ();
- if (file && !is_last)
-   if (fclose (file) != 0)
- return FATAL_EXIT_CODE;
-
- if (!output_files.is_empty ())
-   {
- const char *const filename = output_files.pop ();
- file = fopen (filename, "w");
-   }
- else if (to_stdout)
-   file = stdout;
- else
-   break;
-
- print_header (file);
- count = 0;
-   }
+  file = choose_output (output_files, file_idx);
 
   switch (GET_CODE (info.def))
{
@@ -999,10 +974,10 @@ main (int argc, const char **argv)
default:
  break;
}
-
-  count++;
 }
 
+  file = choose_output (output_files, file_idx);
+
   /* Write out the routines to add CLOBBERs to a pattern and say whether they
  clobber a hard reg.  */
   output_add_clobbers (&info, file);
@@ -1015,5 +990,10 @@ main (int argc, const char **argv)
   handle_overloaded_gen (oname, file);
 }
 
-  return (fclose (file) != 0 ? FATAL_EXIT_CODE : SUCCESS_EXIT_CODE);
+  int ret = SUCCESS_EXIT_CODE;
+  for (FILE *f : output_files)
+if (fclose (f) != 0)
+  ret = FATAL_EXIT_CODE;
+
+  return ret;
 }
diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc
index 3a02132c8761..e0adf0c1bc54 100644
--- a/gcc/gensupport.cc
+++ b/gcc/gensupport.cc
@@ -3913,3 +3913,36 @@ find_optab (optab_pattern *p, const char *name)
 }
   return false;
 }
+
+/* Find the file to write into next.  We try to evenly distribute the contents
+   over the different files.  */
+
+#define SIZED_BASED_CHUNKS 1
+
+FILE *
+choose_output (const vec &parts, unsigned &idx)
+{
+  if (parts.length () == 0)
+gcc_unreachable ();
+#ifdef SIZED_BASED_CHUNKS
+  FILE *shortest = NULL;
+  long min = 0;
+  idx = 0;
+  for (unsigned i = 0; i < parts.length (); i++)
+{
+  FILE *part  = parts[i];
+  long len = ftell (part);
+  if (!shortest || min > len)
+   {
+ shortest = part;
+ min = len;
+ idx = i;
+   }
+}
+  return shortest;
+#else
+  static int current_file;
+  idx = current_file++ % parts.length ();
+  return parts[idx];
+#endif
+}
diff --git a/gcc/gensupport.h b/gcc/gensupport.h
index b7a1da34518c..781c9e9ffcea 100644
--- a/gcc/g

[gcc r15-5673] RISC-V: avlprop: Do not propagate VL from slidedown.

2024-11-26 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:9c82afd42e7b5c3bdb849c66879138e59d8eb866

commit r15-5673-g9c82afd42e7b5c3bdb849c66879138e59d8eb866
Author: Robin Dapp 
Date:   Mon Nov 25 12:40:53 2024 +0100

RISC-V: avlprop: Do not propagate VL from slidedown.

In the following situation (found in the
rvv/autovec/vls-vlmax/shuffle-slide.c test which is not yet pushed)

vsetivlizero,4,e8,mf4,ta,ma
vle8.v  v2,0(a1)# (1)
vle8.v  v1,0(a2)# (2)
vsetivlizero,2,e8,mf4,tu,ma
vslidedown.vi   v1,v2,2
vsetivlizero,4,e8,mf4,ta,ma
vse8.v  v1,0(a2)

we wrongly "propagate" VL=2 from vslidedown into the load.

Although we check whether the "target" instruction has a merge operand
the check only handles cases where the merge operand itself is
loaded, like (2) in the snippet above.  For (1) we load the non-merged
operand, assume propagation is valid and continue despite (2).

This patch just re-uses avl_can_be_propagated_p in order to disable
slides altogether in such situations.

gcc/ChangeLog:

* config/riscv/riscv-avlprop.cc 
(pass_avlprop::get_vlmax_ta_preferred_avl):
Check whether the use insn is valid for propagation.

Diff:
---
 gcc/config/riscv/riscv-avlprop.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-avlprop.cc 
b/gcc/config/riscv/riscv-avlprop.cc
index 91d80aa00d62..62491f9be2d4 100644
--- a/gcc/config/riscv/riscv-avlprop.cc
+++ b/gcc/config/riscv/riscv-avlprop.cc
@@ -351,7 +351,8 @@ pass_avlprop::get_vlmax_ta_preferred_avl (insn_info *insn) 
const
  if (!use_insn->can_be_optimized () || use_insn->is_asm ()
  || use_insn->is_call () || use_insn->has_volatile_refs ()
  || use_insn->has_pre_post_modify ()
- || !has_vl_op (use_insn->rtl ()))
+ || !has_vl_op (use_insn->rtl ())
+ || !avl_can_be_propagated_p (use_insn->rtl ()))
return NULL_RTX;
 
  /* We should only propagate non-VLMAX AVL into VLMAX insn when


[gcc r15-6278] RISC-V: Fix compress shuffle pattern [PR117383].

2024-12-16 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:ec870d3b5f378172006104bad674d7875463da18

commit r15-6278-gec870d3b5f378172006104bad674d7875463da18
Author: Robin Dapp 
Date:   Wed Dec 11 20:48:30 2024 +0100

RISC-V: Fix compress shuffle pattern [PR117383].

This patch makes vcompress use the tail-undisturbed policy by default
and also uses the proper VL.

PR target/117383

gcc/ChangeLog:

* config/riscv/riscv-protos.h (enum insn_type): Use TU policy.
* config/riscv/riscv-v.cc (shuffle_compress_patterns): Set VL.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vcompress-avlprop-1.c:
Expect tu.
* gcc.target/riscv/rvv/autovec/pr117383.c: New test.

Diff:
---
 gcc/config/riscv/riscv-protos.h|  4 +-
 gcc/config/riscv/riscv-v.cc|  3 +-
 .../riscv/rvv/autovec/binop/vcompress-avlprop-1.c  |  2 +-
 .../gcc.target/riscv/rvv/autovec/pr117383.c| 48 ++
 4 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 98af41c6e742..e36309bd7288 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -509,9 +509,9 @@ enum insn_type : unsigned int
 
   /* For vcompress.vm */
   COMPRESS_OP = __NORMAL_OP_TA2 | BINARY_OP_P,
-  /* has merge operand but use ta.  */
+  /* has merge operand but use tu.  */
   COMPRESS_OP_MERGE
-  = HAS_DEST_P | HAS_MERGE_P | TDEFAULT_POLICY_P | BINARY_OP_P,
+  = HAS_DEST_P | HAS_MERGE_P | TU_POLICY_P | BINARY_OP_P,
 
   /* For vslideup.up has merge operand but use ta.  */
   SLIDEUP_OP_MERGE = HAS_DEST_P | HAS_MASK_P | USE_ALL_TRUES_MASK_P
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 5fcdaca1fd0f..417c36a7587c 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3393,7 +3393,8 @@ shuffle_compress_patterns (struct expand_vec_perm_d *d)
 
   insn_code icode = code_for_pred_compress (vmode);
   rtx ops[] = {d->target, merge, d->op0, mask};
-  emit_vlmax_insn (icode, COMPRESS_OP_MERGE, ops);
+  emit_nonvlmax_insn (icode, COMPRESS_OP_MERGE, ops,
+ gen_int_mode (vlen, Pmode));
   return true;
 }
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vcompress-avlprop-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vcompress-avlprop-1.c
index 3654b03e8ed6..98e53b38f094 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vcompress-avlprop-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vcompress-avlprop-1.c
@@ -11,7 +11,7 @@ struct s sss[MAX];
 /*
 ** build_linked_list:
 **   ...
-**   vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*ta,\s*ma
+**   vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*tu,\s*ma
 **   ...
 **   vcompress\.vm\s+v[0-9]+,\s*v[0-9]+,\s*v0
 **   ...
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117383.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117383.c
new file mode 100644
index ..c01612f29028
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117383.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-require-effective-target "riscv_v_ok" } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-additional-options "-std=c99 -mrvv-vector-bits=zvl" } */
+
+typedef signed char int8_t;
+typedef int8_t vnx64i __attribute__ ((vector_size (64)));
+
+#define MASK_64
\
+  1, 2, 3, 5, 7, 9, 10, 11, 12, 14, 15, 17, 19, 21, 22, 23, 26, 28, 30, 31,
\
+37, 38, 41, 46, 47, 53, 54, 55, 60, 61, 62, 63, 76, 77, 78, 79, 80, 81,
\
+82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
\
+100, 101, 102, 103, 104, 105, 106, 107
+
+void __attribute__ ((noipa))
+test_1 (int8_t *x, int8_t *y, int8_t *out)
+{
+  vnx64i v1 = *(vnx64i *) x;
+  vnx64i v2 = *(vnx64i *) y;
+  vnx64i v3 = __builtin_shufflevector (v1, v2, MASK_64);
+  *(vnx64i *) out = v3;
+}
+
+int
+main (void)
+{
+  int8_t x[64];
+  int8_t y[64];
+  int8_t out[64];
+
+  for (int i = 0; i < 64; i++)
+{
+  x[i] = -i;
+  y[i] = i;
+}
+
+  test_1 (x, y, out);
+
+  int mask[] = {MASK_64};
+#pragma GCC novector
+  for (int i = 0; i < 64; i++)
+{
+  int idx = mask[i] < 64 ? mask[i] : mask[i] - 64;
+  int ref = mask[i] < 64 ? x[idx] : y[idx];
+  if (ref != out[i])
+__builtin_abort ();
+}
+}


[gcc r15-6277] RISC-V: Increase cost for vec_construct [PR118019].

2024-12-16 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:ce199a952bfef3e27354a4586a17bc55274c1d3c

commit r15-6277-gce199a952bfef3e27354a4586a17bc55274c1d3c
Author: Robin Dapp 
Date:   Fri Dec 13 11:23:03 2024 +0100

RISC-V: Increase cost for vec_construct [PR118019].

For a generic vec_construct from scalar elements we need to load each
scalar element and move it over to a vector register.
Right now we only use a cost of 1 per element.

This patch uses register-move cost as well as scalar_to_vec and
multiplies it with the number of elements in the vector instead.

PR target/118019

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_builtin_vectorization_cost):
Increase vec_construct cost.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr118019.c: New test.

Diff:
---
 gcc/config/riscv/riscv.cc  |  8 +++-
 .../gcc.target/riscv/rvv/autovec/pr118019.c| 52 ++
 2 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index be2ebf9d9c09..aa8a4562d9af 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -12263,7 +12263,13 @@ riscv_builtin_vectorization_cost (enum 
vect_cost_for_stmt type_of_cost,
   return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
 
 case vec_construct:
-  return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
+   {
+ /* TODO: This is too pessimistic in case we can splat.  */
+ int regmove_cost = fp ? costs->regmove->FR2VR
+   : costs->regmove->GR2VR;
+ return (regmove_cost + common_costs->scalar_to_vec_cost)
+   * estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
+   }
 
 default:
   gcc_unreachable ();
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019.c
new file mode 100644
index ..02b3ab44e7cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019.c
@@ -0,0 +1,52 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv_zvl512b -mabi=lp64d -mstrict-align 
-mvector-strict-align" } */
+
+/* Make sure we do not construct the vector element-wise despite
+   slow misaligned scalar and vector accesses.  */
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+
+#define HADAMARD4(d0, d1, d2, d3, s0, s1, s2, s3)  
\
+  {
\
+int t0 = s0 + s1;  
\
+int t1 = s0 - s1;  
\
+int t2 = s2 + s3;  
\
+int t3 = s2 - s3;  
\
+d0 = t0 + t2;  
\
+d2 = t0 - t2;  
\
+d1 = t1 + t3;  
\
+d3 = t1 - t3;  
\
+  }
+
+uint32_t
+abs2 (uint32_t a)
+{
+  uint32_t s = ((a >> 15) & 0x10001) * 0x;
+  return (a + s) ^ s;
+}
+
+int
+x264_pixel_satd_8x4 (uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2)
+{
+  uint32_t tmp[4][4];
+  uint32_t a0, a1, a2, a3;
+  int sum = 0;
+  for (int i = 0; i < 4; i++, pix1 += i_pix1, pix2 += i_pix2)
+{
+  a0 = (pix1[0] - pix2[0]) + ((pix1[4] - pix2[4]) << 16);
+  a1 = (pix1[1] - pix2[1]) + ((pix1[5] - pix2[5]) << 16);
+  a2 = (pix1[2] - pix2[2]) + ((pix1[6] - pix2[6]) << 16);
+  a3 = (pix1[3] - pix2[3]) + ((pix1[7] - pix2[7]) << 16);
+  HADAMARD4 (tmp[i][0], tmp[i][1], tmp[i][2], tmp[i][3], a0, a1, a2, a3);
+}
+  for (int i = 0; i < 4; i++)
+{
+  HADAMARD4 (a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i]);
+  sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3);
+}
+  return (((uint16_t) sum) + ((uint32_t) sum >> 16)) >> 1;
+}
+
+/* { dg-final { scan-assembler-not "lbu" } } */


[gcc r15-6279] vect: Do not try to duplicate_and_interleave one-element mode.

2024-12-16 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:99eef0cfa56573c32b9c0a1e43519ee4300ac63f

commit r15-6279-g99eef0cfa56573c32b9c0a1e43519ee4300ac63f
Author: Robin Dapp 
Date:   Fri Sep 6 16:04:03 2024 +0200

vect: Do not try to duplicate_and_interleave one-element mode.

PR112694 shows that we try to create sub-vectors of single-element
vectors because can_duplicate_and_interleave_p returns true.
The problem resurfaced in PR116611.

This patch makes can_duplicate_and_interleave_p return false
if count / nvectors > 0 and removes the corresponding check in the riscv
backend.

This partially gets rid of the FAIL in slp-19a.c.  At least when built
with cost model we don't have LOAD_LANES anymore.  Without cost model,
as in the test suite, we choose a different path and still end up with
LOAD_LANES.

Bootstrapped and regtested on x86 and power10, regtested on
rv64gcv_zvfh_zvbb.  Still waiting for the aarch64 results.

Regards
 Robin

gcc/ChangeLog:

PR target/112694
PR target/116611.

* config/riscv/riscv-v.cc (expand_vec_perm_const): Remove early
return.
* tree-vect-slp.cc (can_duplicate_and_interleave_p): Return
false when we cannot create sub-elements.

Diff:
---
 gcc/config/riscv/riscv-v.cc | 9 -
 gcc/tree-vect-slp.cc| 3 +++
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 417c36a7587c..b0de4c52b83c 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4011,15 +4011,6 @@ expand_vec_perm_const (machine_mode vmode, machine_mode 
op_mode, rtx target,
  mask to do the iteration loop control. Just disable it directly.  */
   if (GET_MODE_CLASS (vmode) == MODE_VECTOR_BOOL)
 return false;
-  /* FIXME: Explicitly disable VLA interleave SLP vectorization when we
- may encounter ICE for poly size (1, 1) vectors in loop vectorizer.
- Ideally, middle-end loop vectorizer should be able to disable it
- itself, We can remove the codes here when middle-end code is able
- to disable VLA SLP vectorization for poly size (1, 1) VF.  */
-  if (!BYTES_PER_RISCV_VECTOR.is_constant ()
-  && maybe_lt (BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL,
-  poly_int64 (16, 16)))
-return false;
 
   struct expand_vec_perm_d d;
 
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 9ad95104ec7d..7bad268d406a 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -490,6 +490,9 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned 
int count,
   if (!multiple_p (elt_bytes, 2, &elt_bytes))
return false;
   nvectors *= 2;
+  /* We need to be able to fuse COUNT / NVECTORS elements together.  */
+  if (!multiple_p (count, nvectors))
+   return false;
 }
 }


[gcc r15-6280] docs: Fix [us]abd pattern name.

2024-12-16 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:65c09538420ed820a24aac982a0299226b879d91

commit r15-6280-g65c09538420ed820a24aac982a0299226b879d91
Author: Robin Dapp 
Date:   Thu Dec 12 11:46:32 2024 +0100

docs: Fix [us]abd pattern name.

The uabd and sabd optab name is missing a 3 suffix (for its three
arguments).  This patch adds it.

gcc/ChangeLog:

* doc/md.texi: Add "3" suffix.

Diff:
---
 gcc/doc/md.texi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index f0b63a144ad2..523ce9bce17e 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6208,8 +6208,8 @@ Other shift and rotate instructions, analogous to the
 Vector shift and rotate instructions that take vectors as operand 2
 instead of a scalar type.
 
-@cindex @code{uabd@var{m}} instruction pattern
-@cindex @code{sabd@var{m}} instruction pattern
+@cindex @code{uabd@var{m}3} instruction pattern
+@cindex @code{sabd@var{m}3} instruction pattern
 @item @samp{uabd@var{m}}, @samp{sabd@var{m}}
 Signed and unsigned absolute difference instructions.  These
 instructions find the difference between operands 1 and 2


[gcc r15-6212] RISC-V: Add interleave pattern.

2024-12-13 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:cff3050a4fbec323629563b87c9a83bf3e7be908

commit r15-6212-gcff3050a4fbec323629563b87c9a83bf3e7be908
Author: Robin Dapp 
Date:   Wed Oct 16 22:39:08 2024 +0200

RISC-V: Add interleave pattern.

This patch adds efficient handling of interleaving patterns like
[0 4 1 5] to vec_perm_const.  It is implemented by a slideup and a
gather.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (shuffle_interleave_patterns): New
function.
(expand_vec_perm_const_1): Use new function.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-interleave-run.c: 
New test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-interleave.c: New 
test.

Diff:
---
 gcc/config/riscv/riscv-v.cc|  80 ++
 .../rvv/autovec/vls-vlmax/shuffle-interleave-run.c | 122 +
 .../rvv/autovec/vls-vlmax/shuffle-interleave.c |  69 
 3 files changed, 271 insertions(+)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 42c4e7d0f9ec..d58632b0a095 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3494,6 +3494,84 @@ shuffle_slide_patterns (struct expand_vec_perm_d *d)
   return true;
 }
 
+/* Recognize interleaving patterns like [0 4 1 5].  */
+
+static bool
+shuffle_interleave_patterns (struct expand_vec_perm_d *d)
+{
+  machine_mode vmode = d->vmode;
+  machine_mode sel_mode = related_int_vector_mode (vmode).require ();
+  poly_int64 vec_len = d->perm.length ();
+  int n_patterns = d->perm.encoding ().npatterns ();
+
+  if (!vec_len.is_constant ())
+return false;
+
+  if (n_patterns != 2)
+return false;
+
+  unsigned vlen = vec_len.to_constant ();
+
+  if (vlen < 4 || vlen > 64)
+return false;
+
+  if (d->one_vector_p)
+return false;
+
+  bool low = true;
+  if (d->perm.series_p (0, 2, 0, 1)
+  && d->perm.series_p (1, 2, vlen, 1))
+low = true;
+  else if (d->perm.series_p (0, 2, vlen / 2, 1)
+  && d->perm.series_p (1, 2, vlen + vlen / 2, 1))
+low = false;
+  else
+return false;
+
+  vec_perm_builder sel (vlen, 2, 1);
+  sel.safe_grow (vlen);
+  int cnt = 0;
+  for (unsigned i = 0; i < vlen; i += 2)
+{
+  sel[i] = cnt;
+  sel[i + 1] = cnt + vlen / 2;
+  cnt++;
+}
+
+  vec_perm_indices indices (sel, 2, vlen);
+
+  if (vlen != indices.length ().to_constant ())
+return false;
+
+  /* Success!  */
+  if (d->testing_p)
+return true;
+
+  int slide_cnt = vlen / 2;
+  rtx tmp = gen_reg_rtx (vmode);
+
+  if (low)
+{
+  /* No need for a vector length because we slide up until the
+end of OP1 anyway.  */
+  rtx ops[] = {tmp, d->op0, d->op1, gen_int_mode (slide_cnt, Pmode)};
+  insn_code icode = code_for_pred_slide (UNSPEC_VSLIDEUP, vmode);
+  emit_vlmax_insn (icode, SLIDEUP_OP_MERGE, ops);
+}
+  else
+{
+  rtx ops[] = {tmp, d->op1, d->op0, gen_int_mode (slide_cnt, Pmode)};
+  insn_code icode = code_for_pred_slide (UNSPEC_VSLIDEDOWN, vmode);
+  emit_nonvlmax_insn (icode, BINARY_OP_TUMA, ops,
+ gen_int_mode (slide_cnt, Pmode));
+}
+
+  rtx sel_rtx = vec_perm_indices_to_rtx (sel_mode, indices);
+  emit_vlmax_gather_insn (gen_lowpart (vmode, d->target), tmp, sel_rtx);
+
+  return true;
+}
+
 /* Recognize decompress patterns:
 
1. VEC_PERM_EXPR op0 and op1
@@ -3810,6 +3888,8 @@ expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return true;
  if (shuffle_slide_patterns (d))
return true;
+ if (shuffle_interleave_patterns (d))
+   return true;
  if (shuffle_compress_patterns (d))
return true;
  if (shuffle_decompress_patterns (d))
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-interleave-run.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-interleave-run.c
new file mode 100644
index ..57748d95362f
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-interleave-run.c
@@ -0,0 +1,122 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-add-options riscv_v } */
+/* { dg-additional-options "-O3 -mrvv-max-lmul=m8 -std=gnu99" } */
+
+#include "shuffle-interleave.c"
+
+#define SERIES_2(x, y) (x), (x + 1)
+#define SERIES_4(x, y) SERIES_2 (x, y), SERIES_2 (x + 2, y)
+#define SERIES_8(x, y) SERIES_4 (x, y), SERIES_4 (x + 4, y)
+#define SERIES_16(x, y) SERIES_8 (x, y), SERIES_8 (x + 8, y)
+#define SERIES_32(x, y) SERIES_16 (x, y), SERIES_16 (x + 16, y)
+#define SERIES_64(x, y) SERIES_32 (x, y), SERIES_32 (x + 32, y)
+
+#define comp(a, b, n)  
\
+  for (unsigned i = 0; i < n; ++i) 
\
+if ((a)[i] != (b)[i])  
\
+  __builtin_abort

[gcc r15-6210] RISC-V: Emit vector shift pattern for const_vector [PR117353].

2024-12-13 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:cfdab86f20f6e77d9c8bf982989f78ef975c7611

commit r15-6210-gcfdab86f20f6e77d9c8bf982989f78ef975c7611
Author: Robin Dapp 
Date:   Thu Dec 12 10:33:28 2024 +0100

RISC-V: Emit vector shift pattern for const_vector [PR117353].

In PR117353 and PR117878 we expand a const vector during reload.  For
this we use an unpredicated left shift.  Normally an insn like this is
split but as we introduce it late and cannot create pseudos anymore
it remains unpredicated and is not recognized by the vsetvl pass (where
we expect all insns to be in predicated RVV format).

This patch directly emits a predicated shift instead.  We could
distinguish between !lra_in_progress and lra_in_progress and emit
an unpredicated shift in the former case but we're not very likely
to optimize it anyway so it doesn't seem worth it.

PR target/117353
PR target/117878

gcc/ChangeLog:

* config/riscv/riscv-v.cc (expand_const_vector): Use predicated
instead of simple shift.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr117353.c: New test.

Diff:
---
 gcc/config/riscv/riscv-v.cc|  8 +++---
 .../gcc.target/riscv/rvv/autovec/pr117353.c| 29 ++
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 47bc0255aa38..2530fd9c9799 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1439,9 +1439,11 @@ expand_const_vector (rtx target, rtx src)
  rtx shift_count
= gen_int_mode (exact_log2 (builder.npatterns ()),
builder.inner_mode ());
- rtx tmp1 = expand_simple_binop (builder.mode (), LSHIFTRT,
-vid, shift_count, NULL_RTX,
-false, OPTAB_DIRECT);
+ rtx tmp1 = gen_reg_rtx (builder.mode ());
+ rtx shift_ops[] = {tmp1, vid, shift_count};
+ emit_vlmax_insn (code_for_pred_scalar
+  (LSHIFTRT, builder.mode ()), BINARY_OP,
+  shift_ops);
 
  /* Step 3: Generate tmp2 = tmp1 * step.  */
  rtx tmp2 = gen_reg_rtx (builder.mode ());
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117353.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117353.c
new file mode 100644
index ..135a00194c9d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117353.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gcv_zvl256b -mabi=lp64d" } */
+
+int *b;
+
+inline void c (char *d, int e)
+{
+  d[0] = 0;
+  d[1] = e;
+}
+
+void f ();
+
+void h ()
+{
+  for (;;)
+{
+  char *a;
+  long g = 8;
+  while (g)
+   {
+ c (a, *b);
+ b++;
+ a += 2;
+ g--;
+   }
+  f ();
+}
+}


[gcc r15-6211] RISC-V: Add slide to perm_const strategies.

2024-12-13 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:71bfc8c33e63f4a566079d34ed3bc98f45133e96

commit r15-6211-g71bfc8c33e63f4a566079d34ed3bc98f45133e96
Author: Robin Dapp 
Date:   Mon Sep 16 22:22:14 2024 +0200

RISC-V: Add slide to perm_const strategies.

This patch adds a shuffle_slide_patterns to expand_vec_perm_const.
It recognizes permutations like

  {0, 1, 4, 5}
or
  {2, 3, 6, 7}

which can be constructed by a slideup or slidedown of one of the vectors
into the other one.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (shuffle_slide_patterns): New.
(expand_vec_perm_const_1): Call new function.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slide-run.c: New 
test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slide.c: New test.

Diff:
---
 gcc/config/riscv/riscv-v.cc|  99 
 .../rvv/autovec/vls-vlmax/shuffle-slide-run.c  | 266 +
 .../riscv/rvv/autovec/vls-vlmax/shuffle-slide.c| 207 
 3 files changed, 572 insertions(+)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 2530fd9c9799..42c4e7d0f9ec 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3397,6 +3397,103 @@ shuffle_compress_patterns (struct expand_vec_perm_d *d)
   return true;
 }
 
+/* Recognize patterns like [4 5 6 7 12 13 14 15] where either the lower
+   or the higher parts of both vectors are combined into one.  */
+
+static bool
+shuffle_slide_patterns (struct expand_vec_perm_d *d)
+{
+  machine_mode vmode = d->vmode;
+  poly_int64 vec_len = d->perm.length ();
+
+  if (!vec_len.is_constant ())
+return false;
+
+  int vlen = vec_len.to_constant ();
+  if (vlen < 4)
+return false;
+
+  if (d->one_vector_p)
+return false;
+
+  /* For a slideup OP0 can stay, for a slidedown OP1 can.
+ The former requires that the first element of the permutation
+ is the first element of OP0, the latter that the last permutation
+ element is the last element of OP1.  */
+  bool slideup = false;
+  bool slidedown = false;
+
+  /* For a slideup the permutation must start at OP0's first element.  */
+  if (known_eq (d->perm[0], 0))
+slideup = true;
+
+  /* For a slidedown the permutation must end at OP1's last element.  */
+  if (known_eq (d->perm[vlen - 1], 2 * vlen - 1))
+slidedown = true;
+
+  if (slideup && slidedown)
+return false;
+
+  if (!slideup && !slidedown)
+return false;
+
+  /* Check for a monotonic sequence with one pivot.  */
+  int pivot = -1;
+  for (int i = 0; i < vlen; i++)
+{
+  if (pivot == -1 && known_ge (d->perm[i], vec_len))
+   pivot = i;
+  if (i > 0 && i != pivot
+ && maybe_ne (d->perm[i], d->perm[i - 1] + 1))
+   return false;
+}
+
+  if (pivot == -1)
+return false;
+
+  /* For a slideup OP1's part (to be slid up) must be a low part,
+ i.e. starting with its first element.  */
+  if (slideup && maybe_ne (d->perm[pivot], vlen))
+  return false;
+
+  /* For a slidedown OP0's part (to be slid down) must be a high part,
+ i.e. ending with its last element.  */
+  if (slidedown && maybe_ne (d->perm[pivot - 1], vlen - 1))
+return false;
+
+  /* Success!  */
+  if (d->testing_p)
+return true;
+
+  /* PIVOT is the start of the lower/higher part of OP1 or OP2.
+ For a slideup it indicates how many elements of OP1 to
+ skip/slide over.  For a slidedown it indicates how long
+ OP1's high part is, while VLEN - PIVOT is the amount to slide.  */
+  int slide_cnt = slideup ? pivot : vlen - pivot;
+  insn_code icode;
+  if (slideup)
+{
+  /* No need for a vector length because we slide up until the
+end of OP1 anyway.  */
+  rtx ops[] = {d->target, d->op0, d->op1, gen_int_mode (slide_cnt, Pmode)};
+  icode = code_for_pred_slide (UNSPEC_VSLIDEUP, vmode);
+  emit_vlmax_insn (icode, SLIDEUP_OP_MERGE, ops);
+}
+  else
+{
+  /* Here we need a length because we slide to the beginning of OP1
+leaving the remaining elements undisturbed.  */
+  int len = pivot;
+  rtx ops[] = {d->target, d->op1, d->op0,
+  gen_int_mode (slide_cnt, Pmode)};
+  icode = code_for_pred_slide (UNSPEC_VSLIDEDOWN, vmode);
+  emit_nonvlmax_insn (icode, BINARY_OP_TUMA, ops,
+ gen_int_mode (len, Pmode));
+}
+
+  return true;
+}
+
 /* Recognize decompress patterns:
 
1. VEC_PERM_EXPR op0 and op1
@@ -3711,6 +3808,8 @@ expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return true;
  if (shuffle_consecutive_patterns (d))
return true;
+ if (shuffle_slide_patterns (d))
+   return true;
  if (shuffle_compress_patterns (d))
return true;
  if (shuffle_decompress_patterns (d))
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slide-run.

[gcc r15-6213] RISC-V: Add even/odd vec_perm_const pattern.

2024-12-13 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:528567a7b1589735408eaa133206a0683162188e

commit r15-6213-g528567a7b1589735408eaa133206a0683162188e
Author: Robin Dapp 
Date:   Thu Oct 17 11:33:19 2024 +0200

RISC-V: Add even/odd vec_perm_const pattern.

This adds handling for even/odd patterns.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (shuffle_even_odd_patterns): New
function.
(expand_vec_perm_const_1): Use new function.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd-run.c: New 
test.
* gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd.c: New 
test.

Diff:
---
 gcc/config/riscv/riscv-v.cc|  66 +++
 .../rvv/autovec/vls-vlmax/shuffle-evenodd-run.c| 122 +
 .../riscv/rvv/autovec/vls-vlmax/shuffle-evenodd.c  |  68 
 3 files changed, 256 insertions(+)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index d58632b0a095..517a016270b8 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3572,6 +3572,70 @@ shuffle_interleave_patterns (struct expand_vec_perm_d *d)
   return true;
 }
 
+
+/* Recognize even/odd patterns like [0 2 4 6].  We use two compress
+   and one slideup.  */
+
+static bool
+shuffle_even_odd_patterns (struct expand_vec_perm_d *d)
+{
+  machine_mode vmode = d->vmode;
+  poly_int64 vec_len = d->perm.length ();
+  int n_patterns = d->perm.encoding ().npatterns ();
+
+  if (n_patterns != 1)
+return false;
+
+  if (!vec_len.is_constant ())
+return false;
+
+  int vlen = vec_len.to_constant ();
+  if (vlen < 4 || vlen > 64)
+return false;
+
+  if (d->one_vector_p)
+return false;
+
+  bool even = true;
+  if (!d->perm.series_p (0, 1, 0, 2))
+{
+  even = false;
+  if (!d->perm.series_p (0, 1, 1, 2))
+   return false;
+}
+
+  /* Success!  */
+  if (d->testing_p)
+return true;
+
+  machine_mode mask_mode = get_mask_mode (vmode);
+  rvv_builder builder (mask_mode, vlen, 1);
+  int bit = even ? 0 : 1;
+  for (int i = 0; i < vlen; i++)
+{
+  bit ^= 1;
+  if (bit)
+   builder.quick_push (CONST1_RTX (BImode));
+  else
+   builder.quick_push (CONST0_RTX (BImode));
+}
+  rtx mask = force_reg (mask_mode, builder.build ());
+
+  insn_code icode = code_for_pred_compress (vmode);
+  rtx ops1[] = {d->target, d->op0, mask};
+  emit_vlmax_insn (icode, COMPRESS_OP, ops1);
+
+  rtx tmp2 = gen_reg_rtx (vmode);
+  rtx ops2[] = {tmp2, d->op1, mask};
+  emit_vlmax_insn (icode, COMPRESS_OP, ops2);
+
+  rtx ops[] = {d->target, d->target, tmp2, gen_int_mode (vlen / 2, Pmode)};
+  icode = code_for_pred_slide (UNSPEC_VSLIDEUP, vmode);
+  emit_vlmax_insn (icode, SLIDEUP_OP_MERGE, ops);
+
+  return true;
+}
+
 /* Recognize decompress patterns:
 
1. VEC_PERM_EXPR op0 and op1
@@ -3890,6 +3954,8 @@ expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return true;
  if (shuffle_interleave_patterns (d))
return true;
+ if (shuffle_even_odd_patterns (d))
+   return true;
  if (shuffle_compress_patterns (d))
return true;
  if (shuffle_decompress_patterns (d))
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd-run.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd-run.c
new file mode 100644
index ..ff1ffa42ee1b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd-run.c
@@ -0,0 +1,122 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-add-options riscv_v } */
+/* { dg-additional-options "-O3 -mrvv-max-lmul=m8 -std=gnu99" } */
+
+#include "shuffle-evenodd.c"
+
+#define SERIES_2(x, y) (x), (x + 1)
+#define SERIES_4(x, y) SERIES_2 (x, y), SERIES_2 (x + 2, y)
+#define SERIES_8(x, y) SERIES_4 (x, y), SERIES_4 (x + 4, y)
+#define SERIES_16(x, y) SERIES_8 (x, y), SERIES_8 (x + 8, y)
+#define SERIES_32(x, y) SERIES_16 (x, y), SERIES_16 (x + 16, y)
+#define SERIES_64(x, y) SERIES_32 (x, y), SERIES_32 (x + 32, y)
+
+#define comp(a, b, n)  
\
+  for (unsigned i = 0; i < n; ++i) 
\
+if ((a)[i] != (b)[i])  
\
+  __builtin_abort ();
+
+#define CHECK1(TYPE, NUNITS)   
\
+  __attribute__ ((noipa)) void check1_##TYPE ()
\
+  {
\
+TYPE v0 = (TYPE){SERIES_##NUNITS (0, NUNITS)}; 
\
+TYPE v1 = (TYPE){SERIES_##NUNITS (NUNITS, NUNITS)};
\
+TYPE ref = (TYPE){MASKE_##NUNITS (0, NUNITS)}; 
\
+TYPE res;   

[gcc r15-6214] RISC-V: Improve slide1up pattern.

2024-12-13 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:12a5ab146110631edffcd307a0c10773160f2723

commit r15-6214-g12a5ab146110631edffcd307a0c10773160f2723
Author: Robin Dapp 
Date:   Sat Nov 16 15:13:09 2024 +0100

RISC-V: Improve slide1up pattern.

This patch adds a second variant to implement the extract/slide1up
pattern.  In order to do a permutation like
<3, 4, 5, 6> from vectors <0, 1, 2, 3> and <4, 5, 6, 7>
we currently extract <3> from the first vector and re-insert it into the
second vector.  Unless register-file crossing latency is essentially
zero it should be preferable to first slide the second vector up by
one, then slide down the first vector by (nunits - 1).

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_register_move_cost):
Export.
* config/riscv/riscv-v.cc (shuffle_extract_and_slide1up_patterns):
Rename...
(shuffle_off_by_one_patterns): ... to this and add slideup/slidedown
variant.
(expand_vec_perm_const_1): Call renamed function.
* config/riscv/riscv.cc (riscv_secondary_memory_needed): Remove
static.
(riscv_register_move_cost): Add VR<->GR/FR handling.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr112599-2.c: Adjust test
expectation.

Diff:
---
 gcc/config/riscv/riscv-protos.h|  1 +
 gcc/config/riscv/riscv-v.cc| 52 --
 gcc/config/riscv/riscv.cc  | 18 +++-
 .../gcc.target/riscv/rvv/autovec/pr112599-2.c  |  2 +-
 4 files changed, 57 insertions(+), 16 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 99ee6ef1e0dc..98af41c6e742 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -139,6 +139,7 @@ extern void riscv_expand_ussub (rtx, rtx, rtx);
 extern void riscv_expand_sssub (rtx, rtx, rtx);
 extern void riscv_expand_ustrunc (rtx, rtx);
 extern void riscv_expand_sstrunc (rtx, rtx);
+extern int riscv_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 
 #ifdef RTX_CODE
 extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool 
*invert_ptr = 0);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 517a016270b8..5fcdaca1fd0f 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3798,11 +3798,13 @@ shuffle_bswap_pattern (struct expand_vec_perm_d *d)
   return true;
 }
 
-/* Recognize the pattern that can be shuffled by vec_extract and slide1up
-   approach.  */
+/* Recognize patterns like [3 4 5 6] where we combine the last element
+   of the first vector and the first n - 1 elements of the second vector.
+   This can be implemented by slides or by extracting and re-inserting
+   (slide1up) the first vector's last element.  */
 
 static bool
-shuffle_extract_and_slide1up_patterns (struct expand_vec_perm_d *d)
+shuffle_off_by_one_patterns (struct expand_vec_perm_d *d)
 {
   poly_int64 nunits = GET_MODE_NUNITS (d->vmode);
 
@@ -3820,17 +3822,39 @@ shuffle_extract_and_slide1up_patterns (struct 
expand_vec_perm_d *d)
   if (d->testing_p)
 return true;
 
-  /* Extract the last element of the first vector.  */
-  scalar_mode smode = GET_MODE_INNER (d->vmode);
-  rtx tmp = gen_reg_rtx (smode);
-  emit_vec_extract (tmp, d->op0, gen_int_mode (nunits - 1, Pmode));
+  int scalar_cost = riscv_register_move_cost (d->vmode, V_REGS, GR_REGS)
++ riscv_register_move_cost (d->vmode, GR_REGS, V_REGS) + 2;
+  int slide_cost = 2;
+
+  if (slide_cost < scalar_cost)
+{
+  /* This variant should always be preferable because we just need two
+slides.  The extract-variant also requires two slides but additionally
+pays the latency for register-file crossing.  */
+  rtx tmp = gen_reg_rtx (d->vmode);
+  rtx ops[] = {tmp, d->op1, gen_int_mode (1, Pmode)};
+  insn_code icode = code_for_pred_slide (UNSPEC_VSLIDEUP, d->vmode);
+  emit_vlmax_insn (icode, BINARY_OP, ops);
+
+  rtx ops2[] = {d->target, tmp, d->op0, gen_int_mode (nunits - 1, Pmode)};
+  icode = code_for_pred_slide (UNSPEC_VSLIDEDOWN, d->vmode);
+  emit_nonvlmax_insn (icode, BINARY_OP_TUMA, ops2, gen_int_mode (1, 
Pmode));
+}
+  else
+{
+  /* Extract the last element of the first vector.  */
+  scalar_mode smode = GET_MODE_INNER (d->vmode);
+  rtx tmp = gen_reg_rtx (smode);
+  emit_vec_extract (tmp, d->op0, gen_int_mode (nunits - 1, Pmode));
+
+  /* Insert the scalar into element 0.  */
+  unsigned int unspec
+   = FLOAT_MODE_P (d->vmode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP;
+  insn_code icode = code_for_pred_slide (unspec, d->vmode);
+  rtx ops[] = {d->target, d->op1, tmp};
+  emit_vlmax_insn (icode, BINARY_OP, ops);
+}
 
-  /* Insert the scalar into element 0.  */
-  unsigned int unspec
-= FLOAT_MODE_P (d->vmode) 

[gcc r15-6223] genrecog: Split into separate partitions [PR111600].

2024-12-13 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:6dcfe8743134936db17ffdfd0a5102a87338f494

commit r15-6223-g6dcfe8743134936db17ffdfd0a5102a87338f494
Author: Robin Dapp 
Date:   Tue Nov 26 14:44:17 2024 +0100

genrecog: Split into separate partitions [PR111600].

Hi,

this patch makes genrecog split its output into separate files (10 by
default) in the same vein genemit does.  The changes are mostly
mechanical again, changing printfs and puts to fprintf.
As insn-recog.cc relies on being able to call other recog functions a
header insn-recog.h is introduced that pre declares all of those.

For simplicity the number of files is determined by (re-using)
--with-insnemit-partitions.  Naming suggestions welcome :)

Bootstrapped and regtested on x86 and power10, regtested on riscv.
aarch64 bootstrap is currently blocked because of the
"maybe uninitialized" issue discussed on IRC.

Regards
 Robin

PR target/111600

gcc/ChangeLog:

* Makefile.in:  Add insn-recog split.
* configure: Regenerate.
* configure.ac: Document that the number of insnemit partitions is
used for insn-recog as well.
* genconditions.cc (write_one_condition): Use fprintf.
* genpreds.cc (write_predicate_expr): Ditto.
(write_init_reg_class_start_regs): Ditto.
* genrecog.cc (write_header): Add header file to includes.
(printf_indent): Use fprintf.
(change_state): Ditto.
(print_code): Ditto.
(print_host_wide_int): Ditto.
(print_parameter_value): Ditto.
(print_test_rtx): Ditto.
(print_nonbool_test): Ditto.
(print_label_value): Ditto.
(print_test): Ditto.
(print_decision): Ditto.
(print_state): Ditto.
(print_subroutine_call): Ditto.
(print_acceptance): Ditto.
(print_subroutine_start): Ditto.
(print_pattern): Ditto.
(print_subroutine): Ditto.
(print_subroutine_group): Ditto.
(handle_arg): Add -O and -H for output and header file handling.
(main): Use callback.
* gentarget-def.cc (def_target_insn): Use fprintf.
* read-md.cc (md_reader::print_c_condition): Ditto.
* read-md.h (class md_reader): Ditto.

Diff:
---
 gcc/Makefile.in  |  29 ++-
 gcc/configure|   4 +-
 gcc/configure.ac |   4 +-
 gcc/genconditions.cc |   4 +-
 gcc/genpreds.cc  |   4 +-
 gcc/genrecog.cc  | 552 ++-
 gcc/gentarget-def.cc |   2 +-
 gcc/read-md.cc   |   4 +-
 gcc/read-md.h|   2 +-
 9 files changed, 360 insertions(+), 245 deletions(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 27fbbd4bf19e..493ec6a5cb6e 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -239,6 +239,12 @@ INSNEMIT_SEQ_SRC = $(patsubst %, insn-emit-%.cc, 
$(INSNEMIT_SPLITS_SEQ))
 INSNEMIT_SEQ_TMP = $(patsubst %, tmp-emit-%.cc, $(INSNEMIT_SPLITS_SEQ))
 INSNEMIT_SEQ_O = $(patsubst %, insn-emit-%.o, $(INSNEMIT_SPLITS_SEQ))
 
+# Re-use the split number for insn-recog as well.
+INSNRECOG_SPLITS_SEQ = $(wordlist 1,$(NUM_INSNEMIT_SPLITS),$(one_to_))
+INSNRECOG_SEQ_SRC = $(patsubst %, insn-recog-%.cc, $(INSNRECOG_SPLITS_SEQ))
+INSNRECOG_SEQ_TMP = $(patsubst %, tmp-recog-%.cc, $(INSNRECOG_SPLITS_SEQ))
+INSNRECOG_SEQ_O = $(patsubst %, insn-recog-%.o, $(INSNRECOG_SPLITS_SEQ))
+
 # These files are to have specific diagnostics suppressed, or are not to
 # be subject to -Werror:
 # flex output may yield harmless "no previous prototype" warnings
@@ -1385,7 +1391,7 @@ OBJS = \
insn-output.o \
insn-peep.o \
insn-preds.o \
-   insn-recog.o \
+   $(INSNRECOG_SEQ_O) \
insn-enums.o \
ggc-page.o \
adjust-alignment.o \
@@ -1909,8 +1915,8 @@ TREECHECKING = @TREECHECKING@
 FULL_DRIVER_NAME=$(target_noncanonical)-gcc-$(version)$(exeext)
 
 MOSTLYCLEANFILES = insn-flags.h insn-config.h insn-codes.h \
- insn-output.cc insn-recog.cc $(INSNEMIT_SEQ_SRC) \
- insn-extract.cc insn-peep.cc \
+ insn-output.cc $(INSNRECOG_SEQ_SRC) insn-recog.h \
+ $(INSNEMIT_SEQ_SRC) insn-extract.cc insn-peep.cc \
  insn-attr.h insn-attr-common.h insn-attrtab.cc insn-dfatab.cc \
  insn-latencytab.cc insn-opinit.cc insn-opinit.h insn-preds.cc 
insn-constants.h \
  tm-preds.h tm-constrs.h checksum-options $(GIMPLE_MATCH_PD_SEQ_SRC) \
@@ -2677,7 +2683,8 @@ $(common_out_object_file): $(common_out_file)
 # and compile them.
 
 .PRECIOUS: insn-config.h insn-flags.h insn-codes.h insn-constants.h \
-  $(INSNEMIT_SEQ_SRC) insn-recog.cc insn-extract.cc insn-output.cc \
+  $(INSNEMIT_SEQ_SRC) insn-recog.h $(INSNRECOG_SEQ_SRC) \
+  insn-extract.cc insn-output.cc \
   insn-peep.cc insn-attr.h insn-attr-common.h insn-attrtab.cc \
   insn-dfatab.cc insn-latencytab.cc insn-preds.cc \
   $(GIMPLE_MATCH_PD_

[gcc r15-7110] RISC-V: Unbreak bootstrap.

2025-01-21 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:e324619281239bb513840600436b735dfbd32416

commit r15-7110-ge324619281239bb513840600436b735dfbd32416
Author: Robin Dapp 
Date:   Tue Jan 21 18:07:41 2025 +0100

RISC-V: Unbreak bootstrap.

This fixes a wrong format specifier and an unused variable which should
re-enable bootstrap.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_file_end): Fix format string.
(riscv_lshift_subword): Mark MODE as unused.

Diff:
---
 gcc/config/riscv/riscv.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index f5e672bb7f50..5a3a05041773 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -10375,7 +10375,7 @@ riscv_file_end ()
   fprintf (asm_out_file, "\t.long\t4f - 3f\n");
   fprintf (asm_out_file, "3:\n");
   /* zicfiss, zicfilp.  */
-  fprintf (asm_out_file, "\t.long\t%x\n", feature_1_and);
+  fprintf (asm_out_file, "\t.long\t%lx\n", feature_1_and);
   fprintf (asm_out_file, "4:\n");
   fprintf (asm_out_file, "\t.p2align\t%u\n", p2align);
   fprintf (asm_out_file, "5:\n");
@@ -11959,7 +11959,7 @@ riscv_subword_address (rtx mem, rtx *aligned_mem, rtx 
*shift, rtx *mask,
 /* Leftshift a subword within an SImode register.  */
 
 void
-riscv_lshift_subword (machine_mode mode, rtx value, rtx shift,
+riscv_lshift_subword (machine_mode mode ATTRIBUTE_UNUSED, rtx value, rtx shift,
  rtx *shifted_value)
 {
   rtx value_reg = gen_reg_rtx (SImode);


[gcc r15-7236] RISC-V: testsuite: Fix gather_load_64-12-zvbb.c

2025-01-27 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:c0c2304e19be438a64841f6a3c56b134ba02d9a6

commit r15-7236-gc0c2304e19be438a64841f6a3c56b134ba02d9a6
Author: Robin Dapp 
Date:   Wed Jan 22 16:19:49 2025 +0100

RISC-V: testsuite: Fix gather_load_64-12-zvbb.c

The test fails with _zvfh because we vectorize more.  Just adjust the
test expectations.

gcc/testsuite/ChangeLog:

* 
gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c:
Distinguish between zvfh and !zvfh.

Diff:
---
 .../riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c
 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c
index de5a5ed7d56a..698f0091390e 100644
--- 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c
@@ -106,7 +106,8 @@ TEST_LOOP (_Float16, uint64_t)
 TEST_LOOP (float, uint64_t)
 TEST_LOOP (double, uint64_t)
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 80 
"vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 80 
"vect" { target { ! riscv_zvfh } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 88 
"vect" { target riscv_zvfh } } } */
 /* { dg-final { scan-tree-dump " \.MASK_LEN_GATHER_LOAD" "vect" } } */
 /* { dg-final { scan-tree-dump-not " \.GATHER_LOAD" "vect" } } */
 /* { dg-final { scan-tree-dump-not " \.MASK_GATHER_LOAD" "vect" } } */


[gcc r15-7235] RISC-V: Disable two-source permutes for now [PR117173].

2025-01-27 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:006b4e45f3ab169a47146b31d9721c79098236ac

commit r15-7235-g006b4e45f3ab169a47146b31d9721c79098236ac
Author: Robin Dapp 
Date:   Thu Oct 17 18:39:16 2024 +0200

RISC-V: Disable two-source permutes for now [PR117173].

After testing on the BPI (4.2% improvement for x264 input 1, 4.4% for
input 2) and the discussion in PR117173 I figured it's best to disable
the two-source permutes by default for now.

The patch adds a parameter "riscv-two-source-permutes" which restores
the old behavior.

PR target/117173

gcc/ChangeLog:

* config/riscv/riscv-v.cc (shuffle_generic_patterns): Only
support single-source permutes by default.
* config/riscv/riscv.opt: New param "riscv-two-source-permutes".

gcc/testsuite/ChangeLog:

* gcc.dg/fold-perm-2.c: Run with two-source permutes.
* gcc.dg/pr54346.c: Ditto.

Diff:
---
 gcc/config/riscv/riscv-v.cc| 13 -
 gcc/config/riscv/riscv.opt |  4 
 gcc/testsuite/gcc.dg/fold-perm-2.c |  1 +
 gcc/testsuite/gcc.dg/pr54346.c |  1 +
 4 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index e1172e9c7d2b..9847439ca779 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3947,11 +3947,22 @@ shuffle_generic_patterns (struct expand_vec_perm_d *d)
   if (!get_gather_index_mode (d).exists (&sel_mode))
 return false;
 
+  rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
+  poly_uint64 nunits = GET_MODE_NUNITS (sel_mode);
+  rtx elt;
+
+  bool is_simple = d->one_vector_p
+|| const_vec_duplicate_p (sel, &elt)
+|| (nunits.is_constant ()
+   && const_vec_all_in_range_p (sel, 0, nunits - 1));
+
+  if (!is_simple && !riscv_two_source_permutes)
+return false;
+
   /* Success! */
   if (d->testing_p)
 return true;
 
-  rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
   /* Some FIXED-VLMAX/VLS vector permutation situations call targethook
  instead of expand vec_perm, we handle it directly.  */
   expand_vec_perm (d->target, d->op0, d->op1, sel);
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index f51f8fd1cdf4..7515c8ea13dd 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -622,6 +622,10 @@ Enum(vsetvl_strategy) String(optim-no-fusion) 
Value(VSETVL_OPT_NO_FUSION)
 Target Undocumented RejectNegative Joined Enum(vsetvl_strategy) 
Var(vsetvl_strategy) Init(VSETVL_OPT)
 -param=vsetvl-strategy=Set the optimization level of VSETVL 
insert pass.
 
+-param=riscv-two-source-permutes
+Target Undocumented Uinteger Var(riscv_two_source_permutes) Init(0)
+-param=riscv-two-source-permutes Enable permutes with two source vectors.
+
 Enum
 Name(stringop_strategy) Type(enum stringop_strategy_enum)
 Valid arguments to -mstringop-strategy=:
diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c 
b/gcc/testsuite/gcc.dg/fold-perm-2.c
index 1a4ab4065de0..9fd809ee2967 100644
--- a/gcc/testsuite/gcc.dg/fold-perm-2.c
+++ b/gcc/testsuite/gcc.dg/fold-perm-2.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O -fdump-tree-fre1" } */
+/* { dg-additional-options "--param=riscv-two-source-permutes" { target 
riscv*-*-* } } */
 
 typedef int veci __attribute__ ((vector_size (4 * sizeof (int;
 typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned 
int;
diff --git a/gcc/testsuite/gcc.dg/pr54346.c b/gcc/testsuite/gcc.dg/pr54346.c
index 5ec0609f1e50..b78e0533ac21 100644
--- a/gcc/testsuite/gcc.dg/pr54346.c
+++ b/gcc/testsuite/gcc.dg/pr54346.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O -fdump-tree-dse1 -Wno-psabi" } */
+/* { dg-additional-options "--param=riscv-two-source-permutes" { target 
riscv*-*-* } } */
 
 typedef int veci __attribute__ ((vector_size (4 * sizeof (int;


[gcc r15-7237] RISC-V: testsuite: Fix reduc-8.c and reduc-9.c

2025-01-27 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:f7dc4fd62ce4d9287988892b1e94bbdd0ca1c8fa

commit r15-7237-gf7dc4fd62ce4d9287988892b1e94bbdd0ca1c8fa
Author: Robin Dapp 
Date:   Wed Jan 22 18:05:44 2025 +0100

RISC-V: testsuite: Fix reduc-8.c and reduc-9.c

In both tests we expect a VEC_SHL_INSERT expression but we now add the
initial value at the end.  Just remove that scan check.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/reduc/reduc-8.c: Remove
VEC_SHL_INSERT check.
* gcc.target/riscv/rvv/autovec/reduc/reduc-9.c: Ditto.

Diff:
---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-8.c | 1 -
 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-9.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-8.c
index fe47aa3648dd..518f0c33cc4e 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-8.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-8.c
@@ -12,5 +12,4 @@ add_loop (int *x, int n, int res)
   return res;
 }
 
-/* { dg-final { scan-tree-dump-times "VEC_SHL_INSERT" 1 "optimized" } } */
 /* { dg-final { scan-assembler-times 
{vslide1up\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-9.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-9.c
index 6630d3027210..a5bb8dcccb81 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-9.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-9.c
@@ -12,5 +12,4 @@ add_loop (float *x, int n, float res)
   return res;
 }
 
-/* { dg-final { scan-tree-dump-times "VEC_SHL_INSERT" 1 "optimized" } } */
 /* { dg-final { scan-assembler-times 
{vfslide1up\.vf\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+} 1 } } */


[gcc r15-6891] match: Keep conditional in simplification to constant [PR118140].

2025-01-14 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:14cb0610559fa33f211e1546260458496fdc5e71

commit r15-6891-g14cb0610559fa33f211e1546260458496fdc5e71
Author: Robin Dapp 
Date:   Fri Dec 27 17:29:25 2024 +0100

match: Keep conditional in simplification to constant [PR118140].

In PR118140 we simplify

  _ifc__33 = .COND_IOR (_41, d_lsm.7_11, _46, d_lsm.7_11);

to 1:

Match-and-simplified .COND_IOR (_41, d_lsm.7_11, _46, d_lsm.7_11) to 1

when _46 == 1.  This happens by removing the conditional and applying
a | 1 = 1.  Normally we re-introduce the conditional and its else value
if needed but that does not happen here as we're not dealing with a
vector type.  For correctness's sake, we must not remove the conditional
even for non-vector types.

This patch re-introduces a COND_EXPR in such cases.  For PR118140 this
result in a non-vectorized loop.

PR middle-end/118140

gcc/ChangeLog:

* gimple-match-exports.cc (maybe_resimplify_conditional_op): Add
COND_EXPR when we simplified to a scalar gimple value but still
have an else value.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/pr118140.c: New test.
* gcc.target/riscv/rvv/autovec/pr118140.c: New test.

Diff:
---
 gcc/gimple-match-exports.cc| 26 +++
 gcc/testsuite/gcc.dg/vect/pr118140.c   | 27 
 .../gcc.target/riscv/rvv/autovec/pr118140.c| 29 ++
 3 files changed, 72 insertions(+), 10 deletions(-)

diff --git a/gcc/gimple-match-exports.cc b/gcc/gimple-match-exports.cc
index e06a8aaa1712..ccba046a1d4f 100644
--- a/gcc/gimple-match-exports.cc
+++ b/gcc/gimple-match-exports.cc
@@ -337,23 +337,29 @@ maybe_resimplify_conditional_op (gimple_seq *seq, 
gimple_match_op *res_op,
 }
 
   /* If the "then" value is a gimple value and the "else" value matters,
- create a VEC_COND_EXPR between them, then see if it can be further
+ create a (VEC_)COND_EXPR between them, then see if it can be further
  simplified.  */
   gimple_match_op new_op;
   if (res_op->cond.else_value
-  && VECTOR_TYPE_P (res_op->type)
   && gimple_simplified_result_is_gimple_val (res_op))
 {
-  tree len = res_op->cond.len;
-  if (!len)
-   new_op.set_op (VEC_COND_EXPR, res_op->type,
-  res_op->cond.cond, res_op->ops[0],
-  res_op->cond.else_value);
+  if (VECTOR_TYPE_P (res_op->type))
+   {
+ tree len = res_op->cond.len;
+ if (!len)
+   new_op.set_op (VEC_COND_EXPR, res_op->type,
+  res_op->cond.cond, res_op->ops[0],
+  res_op->cond.else_value);
+ else
+   new_op.set_op (IFN_VCOND_MASK_LEN, res_op->type,
+  res_op->cond.cond, res_op->ops[0],
+  res_op->cond.else_value,
+  res_op->cond.len, res_op->cond.bias);
+   }
   else
-   new_op.set_op (IFN_VCOND_MASK_LEN, res_op->type,
+   new_op.set_op (COND_EXPR, res_op->type,
   res_op->cond.cond, res_op->ops[0],
-  res_op->cond.else_value,
-  res_op->cond.len, res_op->cond.bias);
+  res_op->cond.else_value);
   *res_op = new_op;
   return gimple_resimplify3 (seq, res_op, valueize);
 }
diff --git a/gcc/testsuite/gcc.dg/vect/pr118140.c 
b/gcc/testsuite/gcc.dg/vect/pr118140.c
new file mode 100644
index ..2dab98bfc913
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr118140.c
@@ -0,0 +1,27 @@
+/* { dg-do run { target { aarch64*-*-* || riscv*-*-* } } } */
+/* { dg-additional-options "-std=gnu99" } */
+
+long long a;
+_Bool d;
+char e;
+_Bool f[17];
+_Bool f_3;
+
+int main() {
+  for (char g = 3; g < 16; g++) {
+  d |= ({
+int h = f[g - 1] ? 2 : 0;
+_Bool t;
+if (f[g - 1])
+  t = f_3;
+else
+  t = 0;
+int i = t;
+h > i;
+  });
+e += f[g + 1];
+  }
+
+  if (d != 0)
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118140.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118140.c
new file mode 100644
index ..31134de7b3a3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118140.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-add-options riscv_v } */
+/* { dg-additional-options "-std=gnu99 -Wno-pedantic" } */
+
+long long a;
+_Bool d;
+char e;
+_Bool f[17];
+_Bool f_3;
+
+int main() {
+  for (char g = 3; g < 16; g++) {
+  d |= ({
+int h = f[g - 1] ? 2 : 0;
+_Bool t;
+if (f[g - 1])
+  t = f_3;
+else
+  t = 0;
+int i = t;
+h > i;
+  });
+e += f[g + 1];
+  }
+
+  if (d != 0)
+__builtin_abort ();
+}


[gcc r15-6892] RISC-V: Fix vsetvl compatibility predicate [PR118154].

2025-01-14 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:e5e9e50fc6816713d012f1d96ae308a0946d5a14

commit r15-6892-ge5e9e50fc6816713d012f1d96ae308a0946d5a14
Author: Robin Dapp 
Date:   Thu Jan 9 20:45:10 2025 +0100

RISC-V: Fix vsetvl compatibility predicate [PR118154].

In PR118154 we emit strided stores but the first of those does not
always have the proper VTYPE.  That's because we erroneously delete
a necessary vsetvl.

In order to determine whether to elide

(1)
  Expr[7]: VALID (insn 116, bb 17)
Demand fields: demand_ratio_and_ge_sew demand_avl
SEW=8, VLMUL=mf2, RATIO=16, MAX_SEW=64
TAIL_POLICY=agnostic, MASK_POLICY=agnostic
AVL=(reg:DI 0 zero)

when e.g.

(2)
  Expr[3]: VALID (insn 360, bb 15)
Demand fields: demand_sew_lmul demand_avl
SEW=64, VLMUL=m1, RATIO=64, MAX_SEW=64
TAIL_POLICY=agnostic, MASK_POLICY=agnostic
AVL=(reg:DI 0 zero)
VL=(reg:DI 13 a3 [345])

is already available, we use
sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p.

(1) requires RATIO = SEW/LMUL = 16 and an SEW >= 8.  (2) has ratio = 64,
though, so we cannot directly elide (1).

This patch uses ratio_eq_p instead of next_ratio_valid_for_prev_sew_p.

PR target/118154

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc (MAX_LMUL): New define.
(pre_vsetvl::earliest_fuse_vsetvl_info): Use.
(pre_vsetvl::pre_global_vsetvl_info): New predicate with equal
ratio.
* config/riscv/riscv-vsetvl.def: Use.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr118154-1.c: New test.
* gcc.target/riscv/rvv/autovec/pr118154-2.c: New test.

Diff:
---
 gcc/config/riscv/riscv-vsetvl.cc   | 14 +++---
 gcc/config/riscv/riscv-vsetvl.def  |  4 +--
 .../gcc.target/riscv/rvv/autovec/pr118154-1.c  | 23 
 .../gcc.target/riscv/rvv/autovec/pr118154-2.c  | 31 ++
 4 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index e9de21787dda..a4016beebc0c 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -223,6 +223,8 @@ enum emit_type
   EMIT_AFTER,
 };
 
+static const int MAX_LMUL = 8;
+
 /* dump helper functions */
 static const char *
 vlmul_to_str (vlmul_type vlmul)
@@ -1445,14 +1447,13 @@ private:
   inline bool prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev,
   const vsetvl_info &next)
   {
-return prev.get_ratio () >= (next.get_sew () / 8);
+return prev.get_ratio () >= (next.get_sew () / MAX_LMUL);
   }
   inline bool next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev,
   const vsetvl_info &next)
   {
-return next.get_ratio () >= (prev.get_sew () / 8);
+return next.get_ratio () >= (prev.get_sew () / MAX_LMUL);
   }
-
   inline bool sew_ge_and_ratio_eq_p (const vsetvl_info &prev,
 const vsetvl_info &next)
   {
@@ -1470,6 +1471,13 @@ private:
 return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next)
   && next_ratio_valid_for_prev_sew_p (prev, next);
   }
+  inline bool
+  sew_ge_and_prev_sew_le_next_max_sew_and_ratio_eq_p (
+const vsetvl_info &prev, const vsetvl_info &next)
+  {
+return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next)
+  && ratio_eq_p (prev, next);
+  }
   inline bool sew_le_and_next_sew_le_prev_max_sew_p (const vsetvl_info &prev,
 const vsetvl_info &next)
   {
diff --git a/gcc/config/riscv/riscv-vsetvl.def 
b/gcc/config/riscv/riscv-vsetvl.def
index 2dfff71d987f..d7a5ada772d0 100644
--- a/gcc/config/riscv/riscv-vsetvl.def
+++ b/gcc/config/riscv/riscv-vsetvl.def
@@ -53,8 +53,8 @@ DEF_SEW_LMUL_RULE (sew_lmul, ge_sew, sew_lmul,
   sew_ge_and_prev_sew_le_next_max_sew_p, nop)
 DEF_SEW_LMUL_RULE (
   sew_lmul, ratio_and_ge_sew, sew_lmul,
-  sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p,
-  sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p, nop)
+  sew_ge_and_prev_sew_le_next_max_sew_and_ratio_eq_p,
+  sew_ge_and_prev_sew_le_next_max_sew_and_ratio_eq_p, nop)
 
 DEF_SEW_LMUL_RULE (ratio_only, sew_lmul, sew_lmul, ratio_eq_p, always_false,
   use_next_sew_lmul)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118154-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118154-1.c
new file mode 100644
index ..55386568a5f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118154-1.c
@@ -0,0 +1,23 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* 

[gcc r14-11210] match: Keep conditional in simplification to constant [PR118140].

2025-01-14 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:9b8488f662b070933d0427df22811bf1eaac661e

commit r14-11210-g9b8488f662b070933d0427df22811bf1eaac661e
Author: Robin Dapp 
Date:   Fri Dec 27 17:29:25 2024 +0100

match: Keep conditional in simplification to constant [PR118140].

In PR118140 we simplify

  _ifc__33 = .COND_IOR (_41, d_lsm.7_11, _46, d_lsm.7_11);

to 1:

Match-and-simplified .COND_IOR (_41, d_lsm.7_11, _46, d_lsm.7_11) to 1

when _46 == 1.  This happens by removing the conditional and applying
a | 1 = 1.  Normally we re-introduce the conditional and its else value
if needed but that does not happen here as we're not dealing with a
vector type.  For correctness's sake, we must not remove the conditional
even for non-vector types.

This patch re-introduces a COND_EXPR in such cases.  For PR118140 this
result in a non-vectorized loop.

PR middle-end/118140

gcc/ChangeLog:

* gimple-match-exports.cc (maybe_resimplify_conditional_op): Add
COND_EXPR when we simplified to a scalar gimple value but still
have an else value.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/pr118140.c: New test.
* gcc.target/riscv/rvv/autovec/pr118140.c: New test.

(cherry picked from commit 14cb0610559fa33f211e1546260458496fdc5e71)

Diff:
---
 gcc/gimple-match-exports.cc| 26 +++
 gcc/testsuite/gcc.dg/vect/pr118140.c   | 27 
 .../gcc.target/riscv/rvv/autovec/pr118140.c| 29 ++
 3 files changed, 72 insertions(+), 10 deletions(-)

diff --git a/gcc/gimple-match-exports.cc b/gcc/gimple-match-exports.cc
index aacf3ff04145..1fe6c0e38833 100644
--- a/gcc/gimple-match-exports.cc
+++ b/gcc/gimple-match-exports.cc
@@ -323,23 +323,29 @@ maybe_resimplify_conditional_op (gimple_seq *seq, 
gimple_match_op *res_op,
 }
 
   /* If the "then" value is a gimple value and the "else" value matters,
- create a VEC_COND_EXPR between them, then see if it can be further
+ create a (VEC_)COND_EXPR between them, then see if it can be further
  simplified.  */
   gimple_match_op new_op;
   if (res_op->cond.else_value
-  && VECTOR_TYPE_P (res_op->type)
   && gimple_simplified_result_is_gimple_val (res_op))
 {
-  tree len = res_op->cond.len;
-  if (!len)
-   new_op.set_op (VEC_COND_EXPR, res_op->type,
-  res_op->cond.cond, res_op->ops[0],
-  res_op->cond.else_value);
+  if (VECTOR_TYPE_P (res_op->type))
+   {
+ tree len = res_op->cond.len;
+ if (!len)
+   new_op.set_op (VEC_COND_EXPR, res_op->type,
+  res_op->cond.cond, res_op->ops[0],
+  res_op->cond.else_value);
+ else
+   new_op.set_op (IFN_VCOND_MASK_LEN, res_op->type,
+  res_op->cond.cond, res_op->ops[0],
+  res_op->cond.else_value,
+  res_op->cond.len, res_op->cond.bias);
+   }
   else
-   new_op.set_op (IFN_VCOND_MASK_LEN, res_op->type,
+   new_op.set_op (COND_EXPR, res_op->type,
   res_op->cond.cond, res_op->ops[0],
-  res_op->cond.else_value,
-  res_op->cond.len, res_op->cond.bias);
+  res_op->cond.else_value);
   *res_op = new_op;
   return gimple_resimplify3 (seq, res_op, valueize);
 }
diff --git a/gcc/testsuite/gcc.dg/vect/pr118140.c 
b/gcc/testsuite/gcc.dg/vect/pr118140.c
new file mode 100644
index ..2dab98bfc913
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr118140.c
@@ -0,0 +1,27 @@
+/* { dg-do run { target { aarch64*-*-* || riscv*-*-* } } } */
+/* { dg-additional-options "-std=gnu99" } */
+
+long long a;
+_Bool d;
+char e;
+_Bool f[17];
+_Bool f_3;
+
+int main() {
+  for (char g = 3; g < 16; g++) {
+  d |= ({
+int h = f[g - 1] ? 2 : 0;
+_Bool t;
+if (f[g - 1])
+  t = f_3;
+else
+  t = 0;
+int i = t;
+h > i;
+  });
+e += f[g + 1];
+  }
+
+  if (d != 0)
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118140.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118140.c
new file mode 100644
index ..31134de7b3a3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118140.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-add-options riscv_v } */
+/* { dg-additional-options "-std=gnu99 -Wno-pedantic" } */
+
+long long a;
+_Bool d;
+char e;
+_Bool f[17];
+_Bool f_3;
+
+int main() {
+  for (char g = 3; g < 16; g++) {
+  d |= ({
+int h = f[g - 1] ? 2 : 0;
+_Bool t;
+if (f[g - 1])
+  t = f_3;
+else
+  t = 0;
+int i = t;
+h > i;
+  });
+e +

[gcc r15-8084] RISC-V: Mask values before initializing bitmask vector [PR119114].

2025-03-17 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:b58a0af4e2e28b395a3cb4b7283f16f05c0cf3c8

commit r15-8084-gb58a0af4e2e28b395a3cb4b7283f16f05c0cf3c8
Author: Robin Dapp 
Date:   Tue Mar 11 14:30:48 2025 +0100

RISC-V: Mask values before initializing bitmask vector [PR119114].

In the somewhat convoluted vector code of PR119114 we extract a
 mask value from a vector mask.  After some middle-end
simplifications we end up with a value of -2.  Its lowest bit is
correctly unset representing "false".  When initializing a bitmak vector
from values we compare the full value/register against zero instead of
just the last bit.  This causes erroneous mask values.

This patch masks the values by & 0x1 before comparing against 0.

PR target/119114

gcc/ChangeLog:

* config/riscv/autovec.md: Apply & 0x1 mask when initializing
bitmask vector.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr119114.c: New test.

Diff:
---
 gcc/config/riscv/autovec.md| 14 +++-
 .../gcc.target/riscv/rvv/autovec/pr119114.c| 37 ++
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 92e6942b5233..c7f12f9e36f5 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -405,16 +405,28 @@
 
 ;; Provide a vec_init for mask registers by initializing
 ;; a QImode vector and comparing it against 0.
+;; As we need to ignore all but the lowest bit apply an AND mask
+;; before doing the comparison.
 (define_expand "vec_initqi"
   [(match_operand:VB 0 "register_operand")
(match_operand 1 "")]
   "TARGET_VECTOR"
   {
+/* Expand into a QImode vector.  */
 machine_mode qimode = riscv_vector::get_vector_mode
(QImode, GET_MODE_NUNITS (mode)).require ();
 rtx tmp = gen_reg_rtx (qimode);
 riscv_vector::expand_vec_init (tmp, operands[1]);
-riscv_vector::expand_vec_cmp (operands[0], NE, tmp, CONST0_RTX (qimode));
+
+/* & 0x1.  */
+insn_code icode = code_for_pred (AND, qimode);
+rtx tmp2 = gen_reg_rtx (qimode);
+rtx ones = gen_const_vec_duplicate (qimode, GEN_INT (1));
+rtx ops[] = {tmp2, tmp, ones};
+riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops);
+
+/* Compare against zero.  */
+riscv_vector::expand_vec_cmp (operands[0], NE, tmp2, CONST0_RTX (qimode));
 DONE;
   }
 )
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr119114.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr119114.c
new file mode 100644
index ..01025d62f1fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr119114.c
@@ -0,0 +1,37 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-add-options riscv_v } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -std=gnu99 -fwhole-program 
-mrvv-vector-bits=zvl" } */
+
+_Bool a;
+short b[18];
+long long al;
+_Bool e;
+char f = 010;
+short t[18];
+unsigned short w[8][18][18][18];
+
+void
+c (_Bool e, char f, short t[], unsigned short w[][18][18][18])
+{
+  for (int ae = 1; ae < f + 5; ae += 2)
+{
+  a -= (_Bool) (t[ae - 1] & t[ae + 3]);
+  for (short af = 0; af < 18; af += 2)
+   for (_Bool ah = 0; ah < (w[e][1][af][0] > 0); ah = 5)
+ b[af] |= 9;
+}
+}
+
+int
+main ()
+{
+  for (int ad = 0; ad < 18; ad++)
+t[ad] = 3;
+
+  c (e, f, t, w);
+  al = a;
+  if (al != 0)
+__builtin_abort ();
+}


[gcc r15-9329] expr: Use constant_lower_bound classifying constructor els [PR116595].

2025-04-09 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:f183ae0ae891a471764876eb1e69239904598bb4

commit r15-9329-gf183ae0ae891a471764876eb1e69239904598bb4
Author: Robin Dapp 
Date:   Thu Apr 3 16:46:05 2025 +0200

expr: Use constant_lower_bound classifying constructor els [PR116595].

In categorize_ctor_elements_1 we do
  VECTOR_CST_NELTS (value).to_constant ()
but VALUE's type can be a VLA vector (since r15-5780-g17b520a10cdaab).

This patch uses constant_lower_bound instead.

PR middle-end/116595

gcc/ChangeLog:

* expr.cc (categorize_ctor_elements_1): Use
constant_lower_bound.

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/autovec/pr116595.C: New test.

Diff:
---
 gcc/expr.cc   |  6 +++---
 gcc/testsuite/g++.target/riscv/rvv/autovec/pr116595.C | 10 ++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 2147eedad7be..3815c565e2d8 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -7193,9 +7193,9 @@ categorize_ctor_elements_1 (const_tree ctor, 
HOST_WIDE_INT *p_nz_elts,
 
case VECTOR_CST:
  {
-   /* We can only construct constant-length vectors using
-  CONSTRUCTOR.  */
-   unsigned int nunits = VECTOR_CST_NELTS (value).to_constant ();
+   unsigned int nunits
+ = constant_lower_bound
+ (TYPE_VECTOR_SUBPARTS (TREE_TYPE (value)));
for (unsigned int i = 0; i < nunits; ++i)
  {
tree v = VECTOR_CST_ELT (value, i);
diff --git a/gcc/testsuite/g++.target/riscv/rvv/autovec/pr116595.C 
b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr116595.C
new file mode 100644
index ..6d509d2cf74e
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr116595.C
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv" } */
+
+#include 
+
+void
+vsseg (float *a, vfloat32mf2_t b, vfloat32mf2_t c, unsigned long vl)
+{
+  vfloat32mf2x2_t foo = vfloat32mf2x2_t ();
+}


[gcc r15-9330] testsuite: Add -mabi to pr116595.C

2025-04-09 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:ac1044da4b3db6cba7aa5d9faa1f0622b10ff823

commit r15-9330-gac1044da4b3db6cba7aa5d9faa1f0622b10ff823
Author: Robin Dapp 
Date:   Wed Apr 9 12:11:52 2025 +0200

testsuite: Add -mabi to pr116595.C

As usual, I forgot to add -mabi=lp64d to the test case.  This patch adds
it.  Going to push as obvious.

gcc/testsuite/ChangeLog:

* g++.target/riscv/rvv/autovec/pr116595.C: Add -mabi.

Diff:
---
 gcc/testsuite/g++.target/riscv/rvv/autovec/pr116595.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.target/riscv/rvv/autovec/pr116595.C 
b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr116595.C
index 6d509d2cf74e..37475493a214 100644
--- a/gcc/testsuite/g++.target/riscv/rvv/autovec/pr116595.C
+++ b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr116595.C
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d" } */
 
 #include 


[gcc r15-9491] RISC-V: Do not lift up vsetvl into non-transparent blocks [PR119547].

2025-04-15 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:517f7e3f02b4c945d2b4bdabb490961cf986391e

commit r15-9491-g517f7e3f02b4c945d2b4bdabb490961cf986391e
Author: Robin Dapp 
Date:   Fri Apr 4 17:06:44 2025 +0200

RISC-V: Do not lift up vsetvl into non-transparent blocks [PR119547].

When lifting up a vsetvl into a block we currently don't consider the
block's transparency with respect to the vsetvl as in other parts of the
pass.  This patch does not perform the lift when transparency is not
guaranteed.

This condition is more restrictive than necessary as we can still
perform a vsetvl lift if the conflicting register is only every used
in vsetvls and no regular insns but given how late we are in the GCC 15
cycle it seems better to defer this.  Therefore
gcc.target/riscv/rvv/vsetvl/avl_single-68.c is XFAILed for now.

This issue was found in OpenCV where it manifests as a runtime error.
Zhijin Zeng debugged PR119547 and provided an initial patch.

Reported-By: 曾治金 

PR target/119547

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc 
(pre_vsetvl::earliest_fuse_vsetvl_info):
Do not perform lift if block is not transparent.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/vsetvl/avl_single-68.c: xfail.
* g++.target/riscv/rvv/autovec/pr119547.C: New test.
* g++.target/riscv/rvv/autovec/pr119547-2.C: New test.
* gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c: Adjust.

Diff:
---
 gcc/config/riscv/riscv-vsetvl.cc   |  12 ++
 .../g++.target/riscv/rvv/autovec/pr119547-2.C  | 212 +
 .../g++.target/riscv/rvv/autovec/pr119547.C|  82 
 .../gcc.target/riscv/rvv/vsetvl/avl_single-68.c|   8 +-
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-10.c   |   4 +-
 5 files changed, 315 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 0ac2538f596f..c4046bcc3455 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -3022,6 +3022,18 @@ pre_vsetvl::earliest_fuse_vsetvl_info (int iter)
  continue;
}
 
+ /* We cannot lift a vsetvl into the source block if the block is
+not transparent WRT to it.
+This is too restrictive for blocks where a register's use only
+feeds into vsetvls and no regular insns.  One example is the
+test rvv/vsetvl/avl_single-68.c which is currently XFAILed for
+that reason.
+In order to support this case we'd need to check the vsetvl's
+AVL operand's uses in the source block and make sure they are
+only used in other vsetvls.  */
+ if (!bitmap_bit_p (m_transp[eg->src->index], expr_index))
+   continue;
+
  if (dump_file && (dump_flags & TDF_DETAILS))
{
  fprintf (dump_file,
diff --git a/gcc/testsuite/g++.target/riscv/rvv/autovec/pr119547-2.C 
b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr119547-2.C
new file mode 100644
index ..1b98d3d0c32b
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr119547-2.C
@@ -0,0 +1,212 @@
+/* { dg-do run { target rv64 } } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d 
--param=logical-op-non-short-circuit=0" } */
+
+#include 
+
+using v_uint8 = vuint8m2_t;
+using v_int8 = vint8m2_t;
+using v_uint16 = vuint16m2_t;
+using v_int16 = vint16m2_t;
+using v_uint32 = vuint32m2_t;
+using v_int32 = vint32m2_t;
+using v_uint64 = vuint64m2_t;
+using v_int64 = vint64m2_t;
+using v_float32 = vfloat32m2_t;
+using v_float64 = vfloat64m2_t;
+
+using uchar = unsigned char;
+using schar = signed char;
+using ushort = unsigned short;
+using uint = unsigned int;
+using uint64 = unsigned long int;
+using int64 = long int;
+
+struct Size
+{
+  int width;
+  int height;
+};
+
+template  struct VTraits;
+
+template <> struct VTraits
+{
+  static inline int vlanes () { return __riscv_vsetvlmax_e32m1 (); }
+  using lane_type = int32_t;
+  static const int max_nlanes = 1024 / 32 * 2;
+};
+template <> struct VTraits
+{
+  static inline int vlanes () { return __riscv_vsetvlmax_e32m2 (); }
+  using lane_type = int32_t;
+  static const int max_nlanes = 1024 / 32 * 2;
+};
+template <> struct VTraits
+{
+  static inline int vlanes () { return __riscv_vsetvlmax_e32m4 (); }
+  using lane_type = int32_t;
+  static const int max_nlanes = 1024 / 32 * 2;
+};
+template <> struct VTraits
+{
+  static inline int vlanes () { return __riscv_vsetvlmax_e32m8 (); }
+  using lane_type = int32_t;
+  static const int max_nlanes = 1024 / 32 * 2;
+};
+
+template <> struct VTraits
+{
+  static inline int vlanes () { return __riscv_vsetvlmax_e64m1 (); }
+  using lane_type = double;
+  static const int max_nlanes = 102

[gcc r14-11747] vect: Use original LHS type for gather pattern [PR118950].

2025-05-07 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:bed0053a28e238cb9c079a3ec89341541390f933

commit r14-11747-gbed0053a28e238cb9c079a3ec89341541390f933
Author: Robin Dapp 
Date:   Fri Feb 21 07:19:40 2025 +0100

vect: Use original LHS type for gather pattern [PR118950].

In PR118950 we do not zero masked elements in a gather load.
While recognizing a gather/scatter pattern we do not use the original
type of the LHS.  This matters because the type can differ with bool
patterns (e.g. _Bool vs unsigned char) and we don't notice the need
for zeroing out the padding bytes.

This patch just uses the original LHS's type.

PR middle-end/118950

gcc/ChangeLog:

* tree-vect-patterns.cc (vect_recog_gather_scatter_pattern): Use
original LHS's type.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr118950.c: New test.

(cherry picked from commit f3d4208e798afafcba5246334004e9646e390681)

Diff:
---
 .../gcc.target/riscv/rvv/autovec/pr118950.c| 29 ++
 gcc/tree-vect-patterns.cc  |  3 ++-
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118950.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118950.c
new file mode 100644
index ..604d4264eac1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118950.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-add-options riscv_v } */
+/* { dg-additional-options "-std=gnu99 -Wno-pedantic" } */
+
+unsigned char a;
+long long r;
+_Bool h = 1;
+short j[23];
+_Bool k[3][23];
+
+void b(_Bool h, short j[], _Bool k[][23]) {
+  for (int m = 0; m < 23; m += 3)
+for (short n = 0; n < 22; n += 4)
+  a = ({
+unsigned char o = a;
+unsigned char p = j[n] ? h : k[m][n];
+o > p ? o : p;
+  });
+}
+
+int main() {
+  for (int m = 0; m < 23; ++m)
+j[m] = 10;
+  b(h, j, k);
+  r = a;
+  if (r != 1)
+__builtin_abort ();
+}
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 87c2acff386d..ed6cac77d2a1 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -6264,7 +6264,8 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
   else
pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
   offset, scale, zero);
-  tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
+  tree lhs = gimple_get_lhs (stmt_info->stmt);
+  tree load_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
   gimple_call_set_lhs (pattern_stmt, load_lhs);
 }
   else


[gcc r15-7608] RISC-V: Fix ratio in vsetvl fuse rule [PR115703].

2025-02-18 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:44d4a1086d965fb5280daf65c7c4a253ad6cc8a1

commit r15-7608-g44d4a1086d965fb5280daf65c7c4a253ad6cc8a1
Author: Robin Dapp 
Date:   Thu Feb 6 14:43:17 2025 +0100

RISC-V: Fix ratio in vsetvl fuse rule [PR115703].

In PR115703 we fuse two vsetvls:

Fuse curr info since prev info compatible with it:
  prev_info: VALID (insn 438, bb 2)
Demand fields: demand_ge_sew demand_non_zero_avl
SEW=32, VLMUL=m1, RATIO=32, MAX_SEW=64
TAIL_POLICY=agnostic, MASK_POLICY=agnostic
AVL=(reg:DI 0 zero)
VL=(reg:DI 9 s1 [312])
  curr_info: VALID (insn 92, bb 20)
Demand fields: demand_ratio_and_ge_sew demand_avl
SEW=64, VLMUL=m1, RATIO=64, MAX_SEW=64
TAIL_POLICY=agnostic, MASK_POLICY=agnostic
AVL=(const_int 4 [0x4])
VL=(nil)
  prev_info after fused: VALID (insn 438, bb 2)
Demand fields: demand_ratio_and_ge_sew demand_avl
SEW=64, VLMUL=mf2, RATIO=64, MAX_SEW=64
TAIL_POLICY=agnostic, MASK_POLICY=agnostic
AVL=(const_int 4 [0x4])
VL=(nil).

The result is vsetvl zero, zero, e64, mf2, ta, ma.  The previous vsetvl
set vl = 4 but here we wrongly set it to vl = 2.  As all the following
vsetvls only ever change the ratio we never recover.

The issue is quite difficult to trigger because we can often
deduce the value of d at runtime.  Then very check for the value of
d will be optimized away.

The last known bad commit is r15-3458-g5326306e7d9d36.  With that commit
the output is wrong but -fno-schedule-insns makes it correct.  From the
next commit on the issue is latent.  I still added the PR's test as scan
and run check even if they don't trigger right now.  Not sure if the
run test will ever fail but well.  I verified that the
patch fixes the issue when applied on top of r15-3458-g5326306e7d9d36.

PR target/115703

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc: Use max_sew for calculating the
new LMUL.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr115703-run.c: New test.
* gcc.target/riscv/rvv/autovec/pr115703.c: New test.

Diff:
---
 gcc/config/riscv/riscv-vsetvl.cc   |  3 +-
 .../gcc.target/riscv/rvv/autovec/pr115703-run.c| 44 ++
 .../gcc.target/riscv/rvv/autovec/pr115703.c| 38 +++
 3 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 72c4c59514e5..82284624a242 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1756,7 +1756,8 @@ private:
   inline void use_max_sew_and_lmul_with_next_ratio (vsetvl_info &prev,
const vsetvl_info &next)
   {
-prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ()));
+int max_sew = MAX (prev.get_sew (), next.get_sew ());
+prev.set_vlmul (calculate_vlmul (max_sew, next.get_ratio ()));
 use_max_sew (prev, next);
 prev.set_ratio (next.get_ratio ());
   }
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703-run.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703-run.c
new file mode 100644
index ..0c2c3d7d4fcc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703-run.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-require-effective-target rvv_zvl256b_ok } */
+/* { dg-options "-O3 -march=rv64gcv_zvl256b -mabi=lp64d -fwhole-program 
-fwrapv" } */
+
+int a, i;
+unsigned long b;
+unsigned c, f;
+long long d = 1;
+short e, m;
+long g, h;
+
+__attribute__ ((noipa))
+void check (unsigned long long x)
+{
+  if (x != 13667643351234938049ull)
+__builtin_abort ();
+}
+
+int main() {
+  for (int q = 0; q < 2; q += 1) {
+for (short r = 0; r < 2; r += 1)
+  for (char s = 0; s < 6; s++)
+for (short t = 0; t < 011; t += 12081 - 12080)
+  for (short u = 0; u < 11; u++) {
+a = ({ a > 1 ? a : 1; });
+b = ({ b > 5 ? b : 5; });
+for (short j = 0; j < 2; j = 2080)
+  c = ({ c > 030 ? c : 030; });
+for (short k = 0; k < 2; k += 2080)
+  d *= 7;
+e *= 10807;
+f = ({ f > 3 ? f : 3; });
+  }
+for (int l = 0; l < 21; l += 1)
+  for (int n = 0; n < 16; n++) {
+g = ({ m ? g : m; });
+for (char o = 0; o < 7; o += 1)
+  h *= 3;
+i = ({ i < 0 ? i : 0; });
+  }
+  }
+
+  check (d);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703.c
new file mode 100644
index ..207ff3c86ec3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703.c
@@ -0,0 +1,38 @@
+/* { dg-do

[gcc r15-8021] RISC-V: Adjust LMUL when using maximum SEW [PR117955].

2025-03-13 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:f043ef2b6a59088b16a269b55f09023f76c92e32

commit r15-8021-gf043ef2b6a59088b16a269b55f09023f76c92e32
Author: Robin Dapp 
Date:   Tue Feb 25 12:55:08 2025 +0100

RISC-V: Adjust LMUL when using maximum SEW [PR117955].

When merging two vsetvls that both only demand "SEW >= ..." we
use their maximum SEW and keep the LMUL.  That may lead to invalid
vector configurations like
  e64, mf4.
As we make sure that the SEW requirements overlap we can use the SEW
and LMUL of the configuration with the larger SEW.

Ma Jin already touched this merge rule some weeks ago and fixed the
ratio calculation (r15-6873).  Calculating the ratio from an invalid
SEW/LMUL combination lead to an overflow in the ratio variable, though.
I'd argue the proper fix is to update SEW and LMUL, keeping the ratio
as before.  This "breaks" bug-10.c but its check only checked for a
workaround anyway so I turned it into a run test.

Ma Jin helped minify the PR's test and provided a larger test case for
bug-10.

PR target/117955

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc: Use LMUL/ratio from vsetvl with
larger SEW.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/bug-10.c: Convert to run test.
* gcc.target/riscv/rvv/base/bug-10-2.c: New test.
* gcc.target/riscv/rvv/base/pr117955.c: New test.

Diff:
---
 gcc/config/riscv/riscv-vsetvl.cc   |  8 +-
 gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c | 93 ++
 gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c   | 33 +++-
 gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c | 26 ++
 4 files changed, 154 insertions(+), 6 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 82284624a242..f0165f7b8c8c 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1729,9 +1729,11 @@ private:
   }
   inline void use_max_sew (vsetvl_info &prev, const vsetvl_info &next)
   {
-int max_sew = MAX (prev.get_sew (), next.get_sew ());
-prev.set_sew (max_sew);
-prev.set_ratio (calculate_ratio (prev.get_sew (), prev.get_vlmul ()));
+bool prev_sew_larger = prev.get_sew () >= next.get_sew ();
+const vsetvl_info from = prev_sew_larger ? prev : next;
+prev.set_sew (from.get_sew ());
+prev.set_vlmul (from.get_vlmul ());
+prev.set_ratio (from.get_ratio ());
 use_min_of_max_sew (prev, next);
   }
   inline void use_next_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c
new file mode 100644
index ..fe3a1efb8d86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c
@@ -0,0 +1,93 @@
+/* { dg-do run { target { rv64 } } } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-require-effective-target riscv_zvfh_ok } */
+/* { dg-options " -march=rv64gcv_zvfh -mabi=lp64d -O2" } */
+
+#include 
+
+int8_t a[1];
+uint16_t b[1];
+float c[1], n[1];
+uint16_t d[1];
+uint8_t e[1];
+uint16_t f[1];
+_Float16 g[1], k[1], m[1], p[1];
+uint16_t i[1];
+int8_t j[1];
+uint8_t o[1];
+uint32_t l[1];
+uint16_t q[1];
+uint32_t r[1];
+uint32_t s[1];
+int16_t t[1];
+int main()
+{
+  int u = 25;
+  int8_t *v = a;
+  uint32_t *w;
+  uint16_t *aa = b;
+  float *ab = c, *as = n;
+  uint32_t *ad;
+  uint16_t *ah = f;
+  _Float16 *ai = g, *aj = k, *an = m, *au = p;
+  int32_t *ak;
+  int16_t *al;
+  uint16_t *am = i;
+  int8_t *ao = j;
+  uint8_t *ap = o;
+  uint32_t *aq = l;
+  uint16_t *ar = q;
+  uint32_t *at = r;
+  uint32_t *av = s;
+  int32_t *ax;
+  int16_t *ay = t;
+  for (size_t az; u; u -= az)
+  {
+az = __riscv_vsetvl_e32m8(u);
+vint8m2_t ba = __riscv_vle8_v_i8m2(v, az);
+vbool4_t bb = __riscv_vmseq_vx_i8m2_b4(ba, 1, az);
+vuint16m4_t bc = __riscv_vsll_vx_u16m4(__riscv_vid_v_u16m4(az), 2, az);
+vuint32m8_t bd = __riscv_vsll_vx_u32m8(__riscv_vid_v_u32m8(az), 1, az);
+vuint32m8_t be = __riscv_vluxei16_v_u32m8_m(bb, w, bc, az);
+vuint16m4_t bf;
+__riscv_vsuxei16_v_u32m8_m(bb, aq, bf, be, az);
+vuint8m2_t bg = __riscv_vsll_vx_u8m2(__riscv_vid_v_u8m2(az), 1, az);
+vuint16m4_t bh = __riscv_vloxei8_v_u16m4(aa, bg, az);
+vfloat16m4_t bi;
+vuint16m4_t bj = __riscv_vsll_vx_u16m4(__riscv_vid_v_u16m4(az), 1, az);
+vint16m4_t bk = __riscv_vloxei32_v_i16m4_m(bb, al, bd, az);
+__riscv_vsse16_v_u16m4(ar, 2, bh, az);
+vuint16m4_t bl = __riscv_vloxei16_v_u16m4(d, bj, az);
+vfloat16m4_t bm = __riscv_vle16_v_f16m4(ai, az);
+vuint16m4_t bn = __riscv_vlse16_v_u16m4(ah, 2, az);
+vint32m8_t bo = __riscv_vle32_v_i32m8_m(bb, ak, az);
+vfloat16m1_t bp = __riscv_vle16_v_f16m1(aj, az);
+vuint16m4_t bq = __riscv_vrgatherei16_vv_u16m4(bl, bn, az);
+

[gcc r15-8022] RISC-V: Do not delete fused vsetvl if it has uses [PR119115].

2025-03-13 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:77ef91d7159613c0cfc2920ddd5a32952c61ff5b

commit r15-8022-g77ef91d7159613c0cfc2920ddd5a32952c61ff5b
Author: Robin Dapp 
Date:   Wed Mar 5 18:16:57 2025 +0100

RISC-V: Do not delete fused vsetvl if it has uses [PR119115].

In PR119115 we end up with an orphaned
vsetvli zero,t1,e16,m1,ta,ma.
t1 originally came from another vsetvl that was fused from
vsetvli a4,a3,e8,mf2,ta,ma
vsetvli t1,a3,e8,mf2,ta,ma   (1)
to
vsetvli zero,a3,e16,m1,ta,ma.

This patch checks if t1, the VL operand of (1), has AVL uses and does
not delete the vsetvl if so.  While doing so, it also wraps the search
for VL uses into two new functions reg_used and reg_single_use_in_avl.

PR target/119115

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc (reg_used): New function.
(reg_single_use_in_avl): Ditto.
(pre_vsetvl::fuse_local_vsetvl_info): Use reg_single_use_in_avl
when checking if vsetvl can be deleted.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr119115.c: New test.

Diff:
---
 gcc/config/riscv/riscv-vsetvl.cc   | 95 --
 gcc/testsuite/gcc.target/riscv/rvv/base/pr119115.c | 59 ++
 2 files changed, 131 insertions(+), 23 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index f0165f7b8c8c..0ac2538f596f 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -780,6 +780,36 @@ enum class avl_demand_type : unsigned
   ignore_avl = demand_flags::DEMAND_EMPTY_P,
 };
 
+/* Go through all uses of INSN looking for a single use of register REG.
+   Return true if we find
+- Uses in a non-RVV insn
+- More than one use in an RVV insn
+- A single use in the VL operand of an RVV insn
+   and false otherwise.
+   A single use in the AVL operand does not count as use as we take care of
+   those separately in the pass.  */
+
+static bool
+reg_used (insn_info *insn, rtx reg)
+{
+  unsigned int regno = REGNO (reg);
+  const hash_set vl_uses = get_all_real_uses (insn, regno);
+  for (use_info *use : vl_uses)
+{
+  gcc_assert (use->insn ()->is_real ());
+  rtx_insn *rinsn = use->insn ()->rtl ();
+  if (!has_vl_op (rinsn)
+ || count_regno_occurrences (rinsn, regno) != 1)
+   return true;
+
+  rtx avl = ::get_avl (rinsn);
+  if (!avl || !REG_P (avl) || regno != REGNO (avl))
+   return true;
+}
+  return false;
+}
+
+
 class vsetvl_info
 {
 private:
@@ -1142,27 +1172,7 @@ public:
 
 /* Determine if dest operand(vl) has been used by non-RVV instructions.  */
 if (dest_vl)
-  {
-   const hash_set vl_uses
- = get_all_real_uses (get_insn (), REGNO (dest_vl));
-   for (use_info *use : vl_uses)
- {
-   gcc_assert (use->insn ()->is_real ());
-   rtx_insn *rinsn = use->insn ()->rtl ();
-   if (!has_vl_op (rinsn)
-   || count_regno_occurrences (rinsn, REGNO (dest_vl)) != 1)
- {
-   m_vl_used_by_non_rvv_insn = true;
-   break;
- }
-   rtx avl = ::get_avl (rinsn);
-   if (!avl || !REG_P (avl) || REGNO (dest_vl) != REGNO (avl))
- {
-   m_vl_used_by_non_rvv_insn = true;
-   break;
- }
- }
-  }
+  m_vl_used_by_non_rvv_insn = reg_used (get_insn (), dest_vl);
 
 /* Collect the read vl insn for the fault-only-first rvv loads.  */
 if (fault_first_load_p (insn->rtl ()))
@@ -1369,6 +1379,35 @@ public:
   void set_empty_info () { global_info.set_empty (); }
 };
 
+/* Same as REG_USED () but looks for a single use in an RVV insn's AVL
+   operand.  */
+static bool
+reg_single_use_in_avl (insn_info *insn, rtx reg)
+{
+  if (!reg)
+return false;
+  unsigned int regno = REGNO (reg);
+  const hash_set vl_uses = get_all_real_uses (insn, regno);
+  for (use_info *use : vl_uses)
+{
+  gcc_assert (use->insn ()->is_real ());
+  rtx_insn *rinsn = use->insn ()->rtl ();
+  if (!has_vl_op (rinsn)
+ || count_regno_occurrences (rinsn, regno) != 1)
+   return false;
+
+  vsetvl_info info = vsetvl_info (use->insn ());
+
+  if (!info.has_nonvlmax_reg_avl ())
+   return false;
+
+  rtx avl = info.get_avl ();
+  if (avl && REG_P (avl) && regno == REGNO (avl))
+   return true;
+}
+  return false;
+}
+
 /* Demand system is the RVV-based VSETVL info analysis tools wrapper.
It defines compatible rules for SEW/LMUL, POLICY and AVL.
Also, it provides 3 interfaces available_p, compatible_p and
@@ -2797,8 +2836,18 @@ pre_vsetvl::fuse_local_vsetvl_info ()
 64 into 32.  */
  prev_info.set_max_sew (
MIN (prev_info.get_max_sew (), curr_info.get_max_sew ()));
- if (!curr_info.vl_use

[gcc r15-7687] vect: Use original LHS type for gather pattern [PR118950].

2025-02-24 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:f3d4208e798afafcba5246334004e9646e390681

commit r15-7687-gf3d4208e798afafcba5246334004e9646e390681
Author: Robin Dapp 
Date:   Fri Feb 21 07:19:40 2025 +0100

vect: Use original LHS type for gather pattern [PR118950].

In PR118950 we do not zero masked elements in a gather load.
While recognizing a gather/scatter pattern we do not use the original
type of the LHS.  This matters because the type can differ with bool
patterns (e.g. _Bool vs unsigned char) and we don't notice the need
for zeroing out the padding bytes.

This patch just uses the original LHS's type.

PR middle-end/118950

gcc/ChangeLog:

* tree-vect-patterns.cc (vect_recog_gather_scatter_pattern): Use
original LHS's type.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr118950.c: New test.

Diff:
---
 .../gcc.target/riscv/rvv/autovec/pr118950.c| 29 ++
 gcc/tree-vect-patterns.cc  |  3 ++-
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118950.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118950.c
new file mode 100644
index ..604d4264eac1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118950.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-add-options riscv_v } */
+/* { dg-additional-options "-std=gnu99 -Wno-pedantic" } */
+
+unsigned char a;
+long long r;
+_Bool h = 1;
+short j[23];
+_Bool k[3][23];
+
+void b(_Bool h, short j[], _Bool k[][23]) {
+  for (int m = 0; m < 23; m += 3)
+for (short n = 0; n < 22; n += 4)
+  a = ({
+unsigned char o = a;
+unsigned char p = j[n] ? h : k[m][n];
+o > p ? o : p;
+  });
+}
+
+int main() {
+  for (int m = 0; m < 23; ++m)
+j[m] = 10;
+  b(h, j, k);
+  r = a;
+  if (r != 1)
+__builtin_abort ();
+}
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 6fc97d1b6ef9..4f0a7ea162b9 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -6022,7 +6022,8 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
   else
pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
   offset, scale, zero);
-  tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
+  tree lhs = gimple_get_lhs (stmt_info->stmt);
+  tree load_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
   gimple_call_set_lhs (pattern_stmt, load_lhs);
 }
   else


[gcc r15-7688] RISC-V: Include pattern stmts for dynamic LMUL computation [PR114516].

2025-02-24 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:6be1b9e94d9a2ead15e3625e833f1e34503ab803

commit r15-7688-g6be1b9e94d9a2ead15e3625e833f1e34503ab803
Author: Robin Dapp 
Date:   Fri Feb 21 17:08:16 2025 +0100

RISC-V: Include pattern stmts for dynamic LMUL computation [PR114516].

When scanning for program points, i.e. vector statements, we're missing
pattern statements.  In PR114516 this becomes obvious as we choose
LMUL=8 assuming there are only three statements but the divmod pattern
adds another three.  Those push us beyond four registers so we need to
switch to LMUL=4.

This patch adds pattern statements to the program points which helps
calculate a better register pressure estimate.

PR target/114516

gcc/ChangeLog:

* config/riscv/riscv-vector-costs.cc (compute_estimated_lmul):
Add pattern statements to program points.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/riscv/rvv/pr114516.c: New test.

Diff:
---
 gcc/config/riscv/riscv-vector-costs.cc | 29 ++
 .../gcc.dg/vect/costmodel/riscv/rvv/pr114516.c | 29 ++
 2 files changed, 58 insertions(+)

diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index d4571b65e193..167375ca7516 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -217,6 +217,35 @@ compute_local_program_points (
 "program point %d: %G", info.point,
 gsi_stmt (si));
}
+
+ /* If the statement is part of a pattern, also add the other
+pattern statements.  */
+ gimple_seq pattern_def_seq;
+ if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+ && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
+   {
+ gimple_stmt_iterator si2;
+
+ for (si2 = gsi_start (pattern_def_seq);
+  !gsi_end_p (si2);
+  gsi_next (&si2))
+   {
+ stmt_vec_info pattern_def_stmt_info
+   = vinfo->lookup_stmt (gsi_stmt (si2));
+ if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
+ || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
+   {
+ stmt_point info = {point, gsi_stmt (si2),
+ pattern_def_stmt_info};
+ program_points.safe_push (info);
+ point++;
+ if (dump_enabled_p ())
+   dump_printf_loc (MSG_NOTE, vect_location,
+"program point %d: %G",
+info.point, gsi_stmt (si2));
+   }
+   }
+   }
}
  program_points_per_bb.put (bb, program_points);
}
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114516.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114516.c
new file mode 100644
index ..55d036c3ad7b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114516.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zba_zbb -mabi=lp64d -mrvv-max-lmul=dynamic -O3 
-fdump-tree-vect-details" } */
+
+typedef float real_t;
+__attribute__((aligned(64))) real_t a[32000];
+real_t s315()
+{
+for (int i = 0; i < 32000; i++)
+a[i] = (i * 7) % 32000;
+real_t x, chksum;
+int index;
+for (int nl = 0; nl < 256; nl++) {
+x = a[0];
+index = 0;
+for (int i = 0; i < 32000; ++i) {
+if (a[i] > x) {
+x = a[i];
+index = i;
+}
+}
+chksum = x + (real_t) index;
+}
+return index + x + 1;
+}
+
+/* { dg-final { scan-assembler {e32,m4} } } */
+/* { dg-final { scan-assembler-not {e32,m8} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it 
has unexpected spills" 1 "vect" } } */


[gcc r15-9161] RISC-V: Fix vec_duplicate[bimode] expander [PR119572].

2025-04-02 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:716d39f0a248c1003033e6a312c736180790ef70

commit r15-9161-g716d39f0a248c1003033e6a312c736180790ef70
Author: Robin Dapp 
Date:   Tue Apr 1 21:17:54 2025 +0200

RISC-V: Fix vec_duplicate[bimode] expander [PR119572].

Since r15-9062-g70391e3958db79 we perform vector bitmask initialization
via the vec_duplicate expander directly.  This triggered a latent bug in
ours where we missed to mask out the single bit which resulted in an
execution FAIL of pr119114.c

The attached patch adds the 1-masking of the broadcast operand.

PR target/119572

gcc/ChangeLog:

* config/riscv/autovec.md: Mask broadcast value.

Diff:
---
 gcc/config/riscv/autovec.md | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index f53ed3a5e3fd..9e51e3ce6a30 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -330,7 +330,15 @@
   {
 poly_int64 nunits = GET_MODE_NUNITS (mode);
 machine_mode mode = riscv_vector::get_vector_mode (QImode, nunits).require 
();
-rtx dup = expand_vector_broadcast (mode, operands[1]);
+
+/* The 1-bit mask is in a QImode register, make sure we only use the last
+   bit.  See also PR119114 and the respective vec_init expander.  */
+rtx tmp = gen_reg_rtx (Xmode);
+emit_insn
+  (gen_rtx_SET (tmp, gen_rtx_AND (Xmode, gen_lowpart (Xmode, operands[1]),
+ CONST1_RTX (Xmode;
+
+rtx dup = expand_vector_broadcast (mode, gen_lowpart (QImode, tmp));
 riscv_vector::expand_vec_cmp (operands[0], NE, dup, CONST0_RTX (mode));
 DONE;
   }


[gcc r16-845] RISC-V: Add autovec mode param.

2025-05-23 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:b949d048e914a4cd11a63004a9a2d42e51bc3ac8

commit r16-845-gb949d048e914a4cd11a63004a9a2d42e51bc3ac8
Author: Robin Dapp 
Date:   Wed May 7 21:02:21 2025 +0200

RISC-V: Add autovec mode param.

This patch adds a --param=autovec-mode=.  When the param is
specified we make autovectorize_vector_modes return exactly this mode if
it is available.  This helps when testing different vectorizer settings.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (autovectorize_vector_modes): Return
user-specified mode if available.
* config/riscv/riscv.opt: New param.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/param-autovec-mode.c: New test.

Diff:
---
 gcc/config/riscv/riscv-v.cc| 22 ++
 gcc/config/riscv/riscv.opt |  4 
 .../riscv/rvv/autovec/param-autovec-mode.c | 16 
 3 files changed, 42 insertions(+)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index e406e7a7f590..be6147b80a2c 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2821,6 +2821,28 @@ autovectorize_vector_modes (vector_modes *modes, bool)
i++;
size = base_size / (1U << i);
  }
+
+  /* If the user specified the exact mode to use look if it is available and
+ remove all other ones before returning.  */
+  if (riscv_autovec_mode)
+{
+  auto_vector_modes ms;
+  ms.safe_splice (*modes);
+  modes->truncate (0);
+
+  for (machine_mode mode : ms)
+   {
+ if (!strcmp (GET_MODE_NAME (mode), riscv_autovec_mode))
+   {
+ modes->safe_push (mode);
+ return 0;
+   }
+   }
+
+  /* Nothing found, fall back to regular handling.  */
+  modes->safe_splice (ms);
+}
+
   /* Enable LOOP_VINFO comparison in COST model.  */
   return VECT_COMPARE_COSTS;
 }
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 527e09549a8a..b2b9d3311f4e 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -286,6 +286,10 @@ Max number of bytes to compare as part of inlined 
strcmp/strncmp routines (defau
 Target RejectNegative Joined UInteger Var(gpr2vr_cost) 
Init(GPR2VR_COST_UNPROVIDED)
 Set the cost value of the rvv instruction when operate from GPR to VR.
 
+-param=riscv-autovec-mode=
+Target Undocumented RejectNegative Joined Var(riscv_autovec_mode) Save
+Set the only autovec mode to try.
+
 Enum
 Name(rvv_max_lmul) Type(enum rvv_max_lmul_enum)
 The RVV possible LMUL (-mrvv-max-lmul=):
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/param-autovec-mode.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/param-autovec-mode.c
new file mode 100644
index ..b2ec8f9dc774
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/param-autovec-mode.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d --param=autovec-mode=V4QI 
-fdump-tree-vect-details" } */
+
+/* By default we will use RVVM1SI mode for vectorization because N is not
+   known.  Check that we use V4QI and create an epilogue when the autovec-mode
+   param is specified.  */
+
+void
+foo (int *a, int *b, int n)
+{
+  for (int i = 0; i < n; i++)
+a[i] = b[i] + 1;
+}
+
+/* { dg-final { scan-tree-dump "Choosing vector mode V4QI" "vect" } } */
+/* { dg-final { scan-tree-dump "Choosing epilogue vector mode RVVM1SI" "vect" 
} } */


[gcc r16-846] RISC-V: Support CPUs in -march.

2025-05-23 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:4a182418c89666e7594bcb0e5edc5194aa147910

commit r16-846-g4a182418c89666e7594bcb0e5edc5194aa147910
Author: Robin Dapp 
Date:   Thu May 8 09:51:45 2025 +0200

RISC-V: Support CPUs in -march.

This patch allows an -march string like

  -march=sifive-p670

in order override a previous -march in a simple way.

Suppose we have a Makefile that specifies -march=rv64gc by default.
A user-specified -mcpu=sifive-p670 would be after the -march in the
options string and thus only set -mtune=sifive-p670 (as -mcpu does not
override a previously specified -march or -mtune).

So if we wanted to override we would need to specify the full, lengthy
-march=rv64gcv_... string instead of a simple -mcpu=...

Therefore this patch always first tries to interpret -march= as CPU
string.  If it is a supported CPU we use its march properties and let it
override previously specified options.  Otherwise the behavior is as
before.  This enables the "last-specified option wins" behavior GCC
normally employs.

Note that -march does not imply -mtune like on x86 or other targets.
So an -march=CPU won't override a previously specified -mtune=other-CPU.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc 
(riscv_subset_list::parse_base_ext):
Adjust error message.
(riscv_handle_option): Parse as CPU string first.
(riscv_expand_arch): Ditto.
* doc/invoke.texi: Document.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/arch-56.c: New test.

Diff:
---
 gcc/common/config/riscv/riscv-common.cc  | 19 ---
 gcc/doc/invoke.texi  |  2 +-
 gcc/testsuite/gcc.target/riscv/arch-56.c | 13 +
 3 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index c843393998cb..a6d8763f032b 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -980,8 +980,9 @@ riscv_subset_list::parse_base_ext (const char *p)
 }
   else
 {
-  error_at (m_loc, "%<-march=%s%>: ISA string must begin with rv32, rv64 "
-   "or Profiles", m_arch);
+  error_at (m_loc, "%<-march=%s%>: ISA string must begin with rv32, rv64,"
+   " a supported RVA profile or refer to a supported CPU",
+   m_arch);
   return NULL;
 }
 
@@ -1708,7 +1709,8 @@ riscv_handle_option (struct gcc_options *opts,
   switch (decoded->opt_index)
 {
 case OPT_march_:
-  riscv_parse_arch_string (decoded->arg, opts, loc);
+  if (riscv_find_cpu (decoded->arg) == NULL)
+   riscv_parse_arch_string (decoded->arg, opts, loc);
   return true;
 
 case OPT_mcpu_:
@@ -1725,15 +1727,18 @@ riscv_handle_option (struct gcc_options *opts,
 /* Expand arch string with implied extensions.  */
 
 const char *
-riscv_expand_arch (int argc ATTRIBUTE_UNUSED,
+riscv_expand_arch (int argc,
   const char **argv)
 {
   gcc_assert (argc == 1);
   location_t loc = UNKNOWN_LOCATION;
-  riscv_parse_arch_string (argv[0], NULL, loc);
+  /* Try to interpret the arch as CPU first.  */
+  const char *arch_str = riscv_expand_arch_from_cpu (argc, argv);
+  if (!strlen (arch_str))
+riscv_parse_arch_string (argv[0], NULL, loc);
   const std::string arch = riscv_arch_str (false);
-  if (arch.length())
-return xasprintf ("-march=%s", arch.c_str());
+  if (arch.length ())
+return xasprintf ("-march=%s", arch.c_str ());
   else
 return "";
 }
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 124db1232845..fe47ce564873 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1268,7 +1268,7 @@ See RS/6000 and PowerPC Options.
 -mfence-tso  -mno-fence-tso
 -mdiv  -mno-div
 -misa-spec=@var{ISA-spec-string}
--march=@var{ISA-string|Profiles|Profiles_ISA-string}
+-march=@var{ISA-string|Profiles|Profiles_ISA-string|CPU/processor string}
 -mtune=@var{processor-string}
 -mpreferred-stack-boundary=@var{num}
 -msmall-data-limit=@var{N-bytes}
diff --git a/gcc/testsuite/gcc.target/riscv/arch-56.c 
b/gcc/testsuite/gcc.target/riscv/arch-56.c
new file mode 100644
index ..e075f9661eef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-56.c
@@ -0,0 +1,13 @@
+/* Check whether the second -march overrides the first.  */
+/* { dg-do compile { target rv64 } } */
+/* { dg-options "-O3 -march=rv64gc -march=sifive-p670" } */
+
+void
+foo (char *a, char *b, int n)
+{
+  for (int i = 0; i < n; i++)
+a[i] = b[i] + 1;
+}
+
+/* { dg-final { scan-assembler "vset" } } */
+/* { dg-final { scan-assembler "zvl128b" } } */


[gcc r16-843] RISC-V: Fix some dynamic LMUL costing.

2025-05-23 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:1eb30ea5e36a0cf01e356337f5fabcd3e05fd373

commit r16-843-g1eb30ea5e36a0cf01e356337f5fabcd3e05fd373
Author: Robin Dapp 
Date:   Fri Feb 7 15:42:28 2025 +0100

RISC-V: Fix some dynamic LMUL costing.

With all-SLP we annotate statements slightly differently.  This patch
uses STMT_VINFO_RELEVANT_P in order to walk through potential program
points.

Also it makes the LMUL estimate always use the same path.  This helps
fix a number of test cases that regressed since GCC 14.

There are still some failing ones but it appears to me that the chosen
LMUL is still correct and we just expect different log messages.

gcc/ChangeLog:

* config/riscv/riscv-vector-costs.cc (compute_estimated_lmul):
Always use vect_vf_for_cost and TARGET_MIN_VLEN.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c: Adjust
expectations.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-9.c: Ditto.

Diff:
---
 gcc/config/riscv/riscv-vector-costs.cc | 25 ++
 .../vect/costmodel/riscv/rvv/dynamic-lmul4-3.c |  2 +-
 .../vect/costmodel/riscv/rvv/dynamic-lmul8-9.c |  1 -
 3 files changed, 3 insertions(+), 25 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index c28eecd1110e..a39b611e4cef 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -205,9 +205,7 @@ compute_local_program_points (
  if (!is_gimple_assign_or_call (gsi_stmt (si)))
continue;
  stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
- enum stmt_vec_info_type type
-   = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
- if (type != undef_vec_info_type)
+ if (STMT_VINFO_RELEVANT_P (stmt_info))
{
  stmt_point info = {point, gsi_stmt (si), stmt_info};
  program_points.safe_push (info);
@@ -626,7 +624,7 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, 
machine_mode mode)
   int regno_alignment = riscv_get_v_regno_alignment (loop_vinfo->vector_mode);
   if (riscv_v_ext_vls_mode_p (loop_vinfo->vector_mode))
 return regno_alignment;
-  else if (known_eq (LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo), 1U))
+  else
 {
   int estimated_vf = vect_vf_for_cost (loop_vinfo);
   int estimated_lmul = estimated_vf * GET_MODE_BITSIZE (mode).to_constant 
()
@@ -636,25 +634,6 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, 
machine_mode mode)
   else
return estimated_lmul;
 }
-  else
-{
-  /* Estimate the VLA SLP LMUL.  */
-  if (regno_alignment > RVV_M1)
-   return regno_alignment;
-  else if (mode != QImode
-  || LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo).is_constant ())
-   {
- int ratio;
- if (can_div_trunc_p (BYTES_PER_RISCV_VECTOR,
-  GET_MODE_SIZE (loop_vinfo->vector_mode), &ratio))
-   {
- if (ratio == 1)
-   return RVV_M4;
- else if (ratio == 2)
-   return RVV_M2;
-   }
-   }
-}
   return 0;
 }
 
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c
index 85e3021f1c2b..b5a7f180228a 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c
@@ -45,4 +45,4 @@ void foo2 (int64_t *__restrict a,
 /* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it 
has unexpected spills" "vect" } } */
 /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
 /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 3 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-9.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-9.c
index 793d16418bf1..56234580fa82 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-9.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-9.c
@@ -14,6 +14,5 @@ foo (int64_t *__restrict a, int64_t init, int n)
 /* { dg-final { scan-assembler {e64,m8} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
 /* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it 
has unexpected spills" "vect" } } */
-/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
 /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
 /* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */


[gcc r16-844] RISC-V: Default-initialize variable.

2025-05-23 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:e6fea0a8be95e2a1cbe733301a205b8c00db9202

commit r16-844-ge6fea0a8be95e2a1cbe733301a205b8c00db9202
Author: Robin Dapp 
Date:   Thu May 8 10:17:26 2025 +0200

RISC-V: Default-initialize variable.

This patch initializes saved_vxrm_mode to VXRM_MODE_NONE.  This is a
warning (but no error) when building the compiler so better fix it.

gcc/ChangeLog:

* config/riscv/riscv.cc (singleton_vxrm_need): Init
saved_vxrm_mode.

Diff:
---
 gcc/config/riscv/riscv.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 358d1ec5d32e..5c0c8beec3bd 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -12321,7 +12321,7 @@ singleton_vxrm_need (void)
   /* Walk the IL noting if VXRM is needed and if there's more than one
  mode needed.  */
   bool found = false;
-  int saved_vxrm_mode;
+  int saved_vxrm_mode = VXRM_MODE_NONE;
   for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
 {
   if (!INSN_P (insn) || DEBUG_INSN_P (insn))


[gcc r16-937] vect: Remove non-SLP paths in strided slp/elementwise.

2025-05-28 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:0f73ae763c02ad3b2917c33eecba9b47efdcc73b

commit r16-937-g0f73ae763c02ad3b2917c33eecba9b47efdcc73b
Author: Robin Dapp 
Date:   Tue May 20 11:23:34 2025 +0200

vect: Remove non-SLP paths in strided slp/elementwise.

This patch removes non-SLP paths in the
VMAT_STRIDED_SLP/VMAT_ELEMENTWISE part of vectorizable_load.

gcc/ChangeLog:

* tree-vect-stmts.cc (vectorizable_load): Remove non-SLP paths.

Diff:
---
 gcc/tree-vect-stmts.cc | 49 ++---
 1 file changed, 18 insertions(+), 31 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 66958543bf86..3710694ac75d 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -10689,8 +10689,7 @@ vectorizable_load (vec_info *vinfo,
  first_dr_info = dr_info;
}
 
-  if (slp && grouped_load
- && memory_access_type == VMAT_STRIDED_SLP)
+  if (grouped_load && memory_access_type == VMAT_STRIDED_SLP)
{
  group_size = DR_GROUP_SIZE (first_stmt_info);
  ref_type = get_group_alias_ptr_type (first_stmt_info);
@@ -10830,22 +10829,20 @@ vectorizable_load (vec_info *vinfo,
  ltype = build_aligned_type (ltype, align * BITS_PER_UNIT);
}
 
-  if (slp)
+  /* For SLP permutation support we need to load the whole group,
+not only the number of vector stmts the permutation result
+fits in.  */
+  if (slp_perm)
{
- /* For SLP permutation support we need to load the whole group,
-not only the number of vector stmts the permutation result
-fits in.  */
- if (slp_perm)
-   {
- /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
-variable VF.  */
- unsigned int const_vf = vf.to_constant ();
- ncopies = CEIL (group_size * const_vf, const_nunits);
- dr_chain.create (ncopies);
-   }
- else
-   ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
+variable VF.  */
+ unsigned int const_vf = vf.to_constant ();
+ ncopies = CEIL (group_size * const_vf, const_nunits);
+ dr_chain.create (ncopies);
}
+  else
+   ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+
   unsigned int group_el = 0;
   unsigned HOST_WIDE_INT
elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
@@ -10883,14 +10880,13 @@ vectorizable_load (vec_info *vinfo,
CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_temp);
 
  group_el += lnel;
- if (! slp
- || group_el == group_size)
+ if (group_el == group_size)
{
  n_groups++;
  /* When doing SLP make sure to not load elements from
 the next vector iteration, those will not be accessed
 so just use the last element again.  See PR107451.  */
- if (!slp || known_lt (n_groups, vf))
+ if (known_lt (n_groups, vf))
{
  tree newoff = copy_ssa_name (running_off);
  gimple *incr
@@ -10938,19 +10934,10 @@ vectorizable_load (vec_info *vinfo,
 
  if (!costing_p)
{
- if (slp)
-   {
- if (slp_perm)
-   dr_chain.quick_push (gimple_assign_lhs (new_stmt));
- else
-   slp_node->push_vec_def (new_stmt);
-   }
+ if (slp_perm)
+   dr_chain.quick_push (gimple_assign_lhs (new_stmt));
  else
-   {
- if (j == 0)
-   *vec_stmt = new_stmt;
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
-   }
+   slp_node->push_vec_def (new_stmt);
}
}
   if (slp_perm)


[gcc r16-936] RISC-V: Avoid division by zero in check_builtin_call [PR120436].

2025-05-28 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:6ccf845d9fa157e7ebcf2c589a9fc5d8f298961f

commit r16-936-g6ccf845d9fa157e7ebcf2c589a9fc5d8f298961f
Author: Robin Dapp 
Date:   Mon May 26 16:16:36 2025 +0200

RISC-V: Avoid division by zero in check_builtin_call [PR120436].

In check_builtin_call we eventually perform a division by zero when no
vector modes are present.  This patch just avoids the division in that
case.

PR target/120436

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-shapes.cc (struct vset_def):
Avoid division by zero.
(struct vget_def): Ditto.
* config/riscv/riscv-vector-builtins.h (struct function_group_info):
Use required_extensions_specified instead of duplicating code.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr120436.c: New test.

Diff:
---
 gcc/config/riscv/riscv-vector-builtins-shapes.cc   |  4 +++
 gcc/config/riscv/riscv-vector-builtins.h   | 40 +-
 gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c | 16 +
 3 files changed, 21 insertions(+), 39 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc 
b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
index b855d4c5fa5a..9832eb9e3d1b 100644
--- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
@@ -908,6 +908,8 @@ struct vset_def : public build_base
   {
 poly_int64 outer_size = GET_MODE_SIZE (c.arg_mode (0));
 poly_int64 inner_size = GET_MODE_SIZE (c.arg_mode (2));
+if (maybe_eq (inner_size, 0))
+  return false;
 unsigned int nvecs = exact_div (outer_size, inner_size).to_constant ();
 return c.require_immediate (1, 0, nvecs - 1);
   }
@@ -920,6 +922,8 @@ struct vget_def : public misc_def
   {
 poly_int64 outer_size = GET_MODE_SIZE (c.arg_mode (0));
 poly_int64 inner_size = GET_MODE_SIZE (c.ret_mode ());
+if (maybe_eq (inner_size, 0))
+  return false;
 unsigned int nvecs = exact_div (outer_size, inner_size).to_constant ();
 return c.require_immediate (1, 0, nvecs - 1);
   }
diff --git a/gcc/config/riscv/riscv-vector-builtins.h 
b/gcc/config/riscv/riscv-vector-builtins.h
index ffc289364b06..1f2587ab6afa 100644
--- a/gcc/config/riscv/riscv-vector-builtins.h
+++ b/gcc/config/riscv/riscv-vector-builtins.h
@@ -331,45 +331,7 @@ struct function_group_info
   /* Return true if required extension is enabled */
   bool match (required_ext ext_value) const
   {
-switch (ext_value)
-{
-  case VECTOR_EXT:
-return TARGET_VECTOR;
-  case ZVBB_EXT:
-return TARGET_ZVBB;
-  case ZVBB_OR_ZVKB_EXT:
-return (TARGET_ZVBB || TARGET_ZVKB);
-  case ZVBC_EXT:
-return TARGET_ZVBC;
-  case ZVKG_EXT:
-return TARGET_ZVKG;
-  case ZVKNED_EXT:
-return TARGET_ZVKNED;
-  case ZVKNHA_OR_ZVKNHB_EXT:
-return (TARGET_ZVKNHA || TARGET_ZVKNHB);
-  case ZVKNHB_EXT:
-return TARGET_ZVKNHB;
-  case ZVKSED_EXT:
-return TARGET_ZVKSED;
-  case ZVKSH_EXT:
-return TARGET_ZVKSH;
-  case XTHEADVECTOR_EXT:
-   return TARGET_XTHEADVECTOR;
-  case ZVFBFMIN_EXT:
-   return TARGET_ZVFBFMIN;
-  case ZVFBFWMA_EXT:
-   return TARGET_ZVFBFWMA;
-  case XSFVQMACCQOQ_EXT:
-   return TARGET_XSFVQMACCQOQ;
-  case XSFVQMACCDOD_EXT:
-   return TARGET_XSFVQMACCDOD;
-  case XSFVFNRCLIPXFQF_EXT:
-   return TARGET_XSFVFNRCLIPXFQF;
-  case XSFVCP_EXT:
-   return TARGET_XSFVCP;
-  default:
-gcc_unreachable ();
-}
+return required_extensions_specified (ext_value);
   }
   /* The base name, as a string.  */
   const char *base_name;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c
new file mode 100644
index ..d22091e59490
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O0" } */
+
+/* Use -O0 as otherwise the unused intrinsics get
+   optimized away.  We used to ICE here instead of
+   emitting an error.  */
+
+#include "riscv_vector.h"
+
+void
+clean_subreg (int32_t *in, int32_t *out, size_t m) /* { dg-error {this 
operation requires the RVV ISA extension} } */
+{
+  vint16m8_t v24, v8, v16;
+  vint32m8_t result = __riscv_vle32_v_i32m8 (in, 32); /* { dg-error {built-in 
function '__riscv_vle32_v_i32m8\(in, 32\)' requires the 'v' ISA extension} } */
+  vint32m1_t v0 = __riscv_vget_v_i32m8_i32m1 (result, 0);
+}


[gcc r16-2102] RISC-V: Do not use vsetivli for THeadVector.

2025-07-08 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:dab5951af891c58aa4dd60755edb49df882b680a

commit r16-2102-gdab5951af891c58aa4dd60755edb49df882b680a
Author: Robin Dapp 
Date:   Tue Jul 8 11:35:12 2025 +0200

RISC-V: Do not use vsetivli for THeadVector.

In emit_vlmax_insn_lra we use a vsetivli for an immediate AVL.
XTHeadVector does not support this, so guard appropriately.

PR target/120461

gcc/ChangeLog:

* config/riscv/riscv-v.cc (emit_vlmax_insn_lra): Do not emit
vsetivli for XTHeadVector.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/xtheadvector/pr120461.c: New test.

Diff:
---
 gcc/config/riscv/riscv-v.cc| 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr120461.c | 6 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a5ab8dd4e2fe..22d194909cfa 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -408,7 +408,7 @@ emit_vlmax_insn_lra (unsigned icode, unsigned insn_flags, 
rtx *ops, rtx vl)
   gcc_assert (!can_create_pseudo_p ());
   machine_mode mode = GET_MODE (ops[0]);
 
-  if (imm_avl_p (mode))
+  if (imm_avl_p (mode) && !TARGET_XTHEADVECTOR)
 {
   /* Even though VL is a real hardreg already allocated since
 it is post-RA now, we still gain benefits that we emit
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr120461.c 
b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr120461.c
new file mode 100644
index ..69391570970f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr120461.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=xt-c920 -mrvv-vector-bits=zvl 
-fzero-call-used-regs=all" */
+
+void
+foo ()
+{}


[gcc r16-2101] RISC-V: Ignore non-types in builtin function hash.

2025-07-08 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:6968656d631c081889f30c2247bf255e0831

commit r16-2101-g6968656d631c081889f30c2247bf255e0831
Author: Robin Dapp 
Date:   Tue Jul 8 11:17:41 2025 +0200

RISC-V: Ignore non-types in builtin function hash.

If a user passes a string that doesn't represent a variable we still try
to compute a hash for its type.  Its tree does not represent a type but
just an exceptional, though.  This patch just ignores it, leaving the
error to the checking code later.

PR target/113829

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins.cc 
(registered_function::overloaded_hash):
Skip non-type arguments.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr113829.c: New test.

Diff:
---
 gcc/config/riscv/riscv-vector-builtins.cc  |  6 ++
 gcc/testsuite/gcc.target/riscv/rvv/base/pr113829.c | 10 ++
 2 files changed, 16 insertions(+)

diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index f652a125dc35..8810af0d9ccb 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -4977,6 +4977,12 @@ registered_function::overloaded_hash () const
   for (unsigned int i = 0; i < argument_types.length (); i++)
 {
   type = argument_types[i];
+
+  /* If we're passed something entirely unreasonable, just ignore here.
+We'll warn later anyway.  */
+  if (TREE_CODE_CLASS (TREE_CODE (type)) != tcc_type)
+   continue;
+
   unsigned_p = POINTER_TYPE_P (type) ? TYPE_UNSIGNED (TREE_TYPE (type))
 : TYPE_UNSIGNED (type);
   mode_p = POINTER_TYPE_P (type) ? TYPE_MODE (TREE_TYPE (type))
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr113829.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr113829.c
new file mode 100644
index ..48c291a92026
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr113829.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gcv -mabi=lp64d" } */
+
+#pragma riscv intrinsic "vector"
+void
+foo (void)
+{
+  __riscv_vfredosum_tu (X); /* { dg-error "undeclared" } */
+  /* { dg-error "too many arguments" "" { target *-*-* } .-1 } */
+}


[gcc r16-2174] RISC-V: Make zero-stride load broadcast a tunable.

2025-07-10 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:dcba959fb30dc250eeb6fdd05aa878e5f1fc8c2d

commit r16-2174-gdcba959fb30dc250eeb6fdd05aa878e5f1fc8c2d
Author: Robin Dapp 
Date:   Thu Jul 10 09:41:48 2025 +0200

RISC-V: Make zero-stride load broadcast a tunable.

This patch makes the zero-stride load broadcast idiom dependent on a
uarch-tunable "use_zero_stride_load".  Right now we have quite a few
paths that reach a strided load and some of them are not exactly
straightforward.

While broadcast is relatively rare on rv64 targets it is more common on
rv32 targets that want to vectorize 64-bit elements.

While the patch is more involved than I would have liked it could have
even touched more places.  The whole broadcast-like insn path feels a
bit hackish due to the several optimizations we employ.  Some of the
complications stem from the fact that we lump together real broadcasts,
vector single-element sets, and strided broadcasts.  The strided-load
alternatives currently require a memory_constraint to work properly
which causes more complications when trying to disable just these.

In short, the whole pred_broadcast handling in combination with the
sew64_scalar_helper could use work in the future.  I was about to start
with it in this patch but soon realized that it would only distract from
the original intent.  What can help in the future is split strided and
non-strided broadcast entirely, as well as the single-element sets.

Yet unclear is whether we need to pay special attention for misaligned
strided loads (PR120782).

I regtested on rv32 and rv64 with strided_load_broadcast_p forced to
true and false.  With either I didn't observe any new execution failures
but obviously there are new scan failures with strided broadcast turned
off.

PR target/118734

gcc/ChangeLog:

* config/riscv/constraints.md (Wdm): Use tunable for Wdm
constraint.
* config/riscv/riscv-protos.h (emit_avltype_insn): Declare.
(can_be_broadcasted_p): Rename to...
(can_be_broadcast_p): ...this.
* config/riscv/predicates.md: Use renamed function.
(strided_load_broadcast_p): Declare.
* config/riscv/riscv-selftests.cc (run_broadcast_selftests):
Only run broadcast selftest if strided broadcasts are OK.
* config/riscv/riscv-v.cc (emit_avltype_insn): New function.
(sew64_scalar_helper): Only emit a pred_broadcast if the new
tunable says so.
(can_be_broadcasted_p): Rename to...
(can_be_broadcast_p): ...this and use new tunable.
* config/riscv/riscv.cc (struct riscv_tune_param): Add strided
broad tunable.
(strided_load_broadcast_p): Implement.
* config/riscv/vector.md: Use strided_load_broadcast_p () and
work around 64-bit broadcast on rv32 targets.

Diff:
---
 gcc/config/riscv/constraints.md |  7 ++--
 gcc/config/riscv/predicates.md  |  2 +-
 gcc/config/riscv/riscv-protos.h |  4 ++-
 gcc/config/riscv/riscv-selftests.cc | 10 --
 gcc/config/riscv/riscv-v.cc | 58 +++-
 gcc/config/riscv/riscv.cc   | 20 +++
 gcc/config/riscv/vector.md  | 66 +++--
 7 files changed, 133 insertions(+), 34 deletions(-)

diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index ccab1a2e29df..5ecaa19eb014 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -237,10 +237,11 @@
  (and (match_code "const_vector")
   (match_test "rtx_equal_p (op, riscv_vector::gen_scalar_move_mask 
(GET_MODE (op)))")))
 
-(define_memory_constraint "Wdm"
+(define_constraint "Wdm"
   "Vector duplicate memory operand"
-  (and (match_code "mem")
-   (match_code "reg" "0")))
+  (and (match_test "strided_load_broadcast_p ()")
+   (and (match_code "mem")
+   (match_code "reg" "0"
 
 ;; Vendor ISA extension constraints.
 
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 8baad2fae7a9..1f9a6b562e53 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -617,7 +617,7 @@
 
 ;; The scalar operand can be directly broadcast by RVV instructions.
 (define_predicate "direct_broadcast_operand"
-  (match_test "riscv_vector::can_be_broadcasted_p (op)"))
+  (match_test "riscv_vector::can_be_broadcast_p (op)"))
 
 ;; A CONST_INT operand that has exactly two bits cleared.
 (define_predicate "const_nottwobits_operand"
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 38f63ea84248..a41c4c299fac 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -604,6 +604,7 @@ void emit_vlmax_vsetvl (machine_mode, rtx);
 void emit_hard_vlmax_vsetvl (machine_mode, rtx);

[gcc r16-2175] expand: ICE if asked to expand RDIV with non-float type.

2025-07-10 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:5aa21765236730c1772c19454cbb71365b84d583

commit r16-2175-g5aa21765236730c1772c19454cbb71365b84d583
Author: Robin Dapp 
Date:   Wed Jul 9 15:58:05 2025 +0200

expand: ICE if asked to expand RDIV with non-float type.

This patch adds asserts that ensure we only expand an RDIV_EXPR with
actual float mode.  It also replaces the RDIV_EXPR in setting a
vectorized loop's length by EXACT_DIV_EXPR.  The code in question is
only used with length-control targets (riscv, powerpc, s390).

PR target/121014

gcc/ChangeLog:

* cfgexpand.cc (expand_debug_expr): Assert FLOAT_MODE_P.
* optabs-tree.cc (optab_for_tree_code): Assert FLOAT_TYPE_P.
* tree-vect-loop.cc (vect_get_loop_len): Use EXACT_DIV_EXPR.

Diff:
---
 gcc/cfgexpand.cc  | 2 ++
 gcc/optabs-tree.cc| 2 ++
 gcc/tree-vect-loop.cc | 2 +-
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index 33649d43f71c..a656ccebf176 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -5358,6 +5358,8 @@ expand_debug_expr (tree exp)
   return simplify_gen_binary (MULT, mode, op0, op1);
 
 case RDIV_EXPR:
+  gcc_assert (FLOAT_MODE_P (mode));
+  /* Fall through.  */
 case TRUNC_DIV_EXPR:
 case EXACT_DIV_EXPR:
   if (unsignedp)
diff --git a/gcc/optabs-tree.cc b/gcc/optabs-tree.cc
index 6dfe8ee4c4e4..9308a6dfd65c 100644
--- a/gcc/optabs-tree.cc
+++ b/gcc/optabs-tree.cc
@@ -82,6 +82,8 @@ optab_for_tree_code (enum tree_code code, const_tree type,
return unknown_optab;
   /* FALLTHRU */
 case RDIV_EXPR:
+  gcc_assert (FLOAT_TYPE_P (type));
+  /* FALLTHRU */
 case TRUNC_DIV_EXPR:
 case EXACT_DIV_EXPR:
   if (TYPE_SATURATING (type))
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 8ea0f45d79fc..56f80db57bbc 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11079,7 +11079,7 @@ vect_get_loop_len (loop_vec_info loop_vinfo, 
gimple_stmt_iterator *gsi,
  factor = exact_div (nunits1, nunits2).to_constant ();
  tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
  gimple_seq seq = NULL;
- loop_len = gimple_build (&seq, RDIV_EXPR, iv_type, loop_len,
+ loop_len = gimple_build (&seq, EXACT_DIV_EXPR, iv_type, loop_len,
   build_int_cst (iv_type, factor));
  if (seq)
gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);


[gcc r16-2285] RISC-V: Fix vsetvl merge rule.

2025-07-16 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:9041f2bff8202d9b87d8c27f21e4ffa8d50b36a3

commit r16-2285-g9041f2bff8202d9b87d8c27f21e4ffa8d50b36a3
Author: Robin Dapp 
Date:   Mon Jul 14 13:53:12 2025 +0200

RISC-V: Fix vsetvl merge rule.

In PR120297 we fuse
  vsetvl e8,mf2,...
  vsetvl e64,m1,...
into
  vsetvl e64,m4,...

Individually, that's ok but we also change the new vsetvl's demand to
"SEW only" even though the first original one demanded SEW >= 8 and
ratio = 16.

As we forget the ratio after the merge we find that the vsetvl following
the merged one has ratio = 64 demand and we fuse into
  vsetvl e64,m1,..
which obviously doesn't have ratio = 16 any more.

Regtested on rv64gcv_zvl512b.

PR target/120297

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.def: Do not forget ratio demand of
previous vsetvl.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/pr120297.c: New test.

Diff:
---
 gcc/config/riscv/riscv-vsetvl.def |  6 ++--
 gcc/testsuite/gcc.target/riscv/rvv/pr120297.c | 50 +++
 2 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.def 
b/gcc/config/riscv/riscv-vsetvl.def
index d7a5ada772d0..0f999d2276d4 100644
--- a/gcc/config/riscv/riscv-vsetvl.def
+++ b/gcc/config/riscv/riscv-vsetvl.def
@@ -79,7 +79,7 @@ DEF_SEW_LMUL_RULE (sew_only, sew_only, sew_only, sew_eq_p, 
sew_eq_p, nop)
 DEF_SEW_LMUL_RULE (sew_only, ge_sew, sew_only,
   sew_ge_and_prev_sew_le_next_max_sew_p, sew_ge_p, nop)
 DEF_SEW_LMUL_RULE (
-  sew_only, ratio_and_ge_sew, sew_lmul,
+  sew_only, ratio_and_ge_sew, ratio_and_ge_sew,
   sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p,
   always_false, modify_lmul_with_next_ratio)
 
@@ -104,9 +104,9 @@ DEF_SEW_LMUL_RULE (ratio_and_ge_sew, sew_lmul, sew_lmul,
 DEF_SEW_LMUL_RULE (ratio_and_ge_sew, ratio_only, ratio_and_ge_sew, ratio_eq_p,
   ratio_eq_p, use_max_sew_and_lmul_with_prev_ratio)
 DEF_SEW_LMUL_RULE (
-  ratio_and_ge_sew, sew_only, sew_only,
+  ratio_and_ge_sew, sew_only, ratio_and_ge_sew,
   sew_le_and_next_sew_le_prev_max_sew_and_prev_ratio_valid_for_next_sew_p,
-  always_false, use_next_sew_with_prev_ratio)
+  sew_eq_p, use_next_sew_with_prev_ratio)
 DEF_SEW_LMUL_RULE (ratio_and_ge_sew, ge_sew, ratio_and_ge_sew,
   max_sew_overlap_and_prev_ratio_valid_for_next_sew_p,
   sew_ge_p, use_max_sew_and_lmul_with_prev_ratio)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/pr120297.c 
b/gcc/testsuite/gcc.target/riscv/rvv/pr120297.c
new file mode 100644
index ..3d1845d0fe66
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/pr120297.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fwhole-program" } */
+
+unsigned a;
+short c;
+char d;
+unsigned long e;
+_Bool f[10][10];
+unsigned g[10];
+long long ak;
+char i = 7;
+long long t[10];
+short x[10][10][10][10];
+short y[10][10][10][10];
+
+void
+h (char i, long long t[], short x[][10][10][10], short y[][10][10][10],
+   _Bool aa)
+{
+  for (int j = 2; j < 8; j += 2)
+{
+  for (short k = 0; k < 10; k++)
+   {
+ for (int l = 3; l < 8; l += 2)
+   a = x[1][j][k][l];
+ c = x[c][1][1][c];
+   }
+  for (int k = 0; k < 10; k++)
+   {
+ f[2][k] |= (_Bool) t[c];
+ g[c] = t[c + 1];
+ d += y[j][1][k][k];
+ e = e > i ? e : i;
+   }
+}
+}
+
+int
+main ()
+{
+  t[c] = 1;
+  h (i, t, x, y, a);
+  for (int j = 0; j < 10; ++j)
+for (int k = 0; k < 10; ++k)
+  ak ^= f[j][k] + 238516665 + (ak >> 2);
+  ak ^= g[c] + 238516665 + (ak >> 2);
+  if (ak != 234635118ull)
+__builtin_abort ();
+}


[gcc r16-2286] expand: Allow fixed-point arithmetic for RDIV_EXPR.

2025-07-16 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:4648fe556e26d54abfc863ebb2ed046f1a260fdb

commit r16-2286-g4648fe556e26d54abfc863ebb2ed046f1a260fdb
Author: Robin Dapp 
Date:   Tue Jul 15 10:55:36 2025 +0200

expand: Allow fixed-point arithmetic for RDIV_EXPR.

r16-2175-g5aa21765236730 introduced an assert for floating-point modes
when expanding an RDIV_EXPR but forgot fixed-point modes.  This patch
adds ALL_FIXED_POINT_MODE_P to the assert.

PR middle-end/121065

gcc/ChangeLog:

* cfgexpand.cc (expand_debug_expr): Allow fixed-point modes for
RDIV_EXPR.
* optabs-tree.cc (optab_for_tree_code): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/arm/pr121065.c: New test.

Diff:
---
 gcc/cfgexpand.cc|  3 ++-
 gcc/optabs-tree.cc  |  3 ++-
 gcc/testsuite/gcc.target/arm/pr121065.c | 11 +++
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index a656ccebf176..8a55f4f472a2 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -5358,7 +5358,8 @@ expand_debug_expr (tree exp)
   return simplify_gen_binary (MULT, mode, op0, op1);
 
 case RDIV_EXPR:
-  gcc_assert (FLOAT_MODE_P (mode));
+  gcc_assert (FLOAT_MODE_P (mode)
+ || ALL_FIXED_POINT_MODE_P (mode));
   /* Fall through.  */
 case TRUNC_DIV_EXPR:
 case EXACT_DIV_EXPR:
diff --git a/gcc/optabs-tree.cc b/gcc/optabs-tree.cc
index 9308a6dfd65c..0de74c7966af 100644
--- a/gcc/optabs-tree.cc
+++ b/gcc/optabs-tree.cc
@@ -82,7 +82,8 @@ optab_for_tree_code (enum tree_code code, const_tree type,
return unknown_optab;
   /* FALLTHRU */
 case RDIV_EXPR:
-  gcc_assert (FLOAT_TYPE_P (type));
+  gcc_assert (FLOAT_TYPE_P (type)
+ || ALL_FIXED_POINT_MODE_P (TYPE_MODE (type)));
   /* FALLTHRU */
 case TRUNC_DIV_EXPR:
 case EXACT_DIV_EXPR:
diff --git a/gcc/testsuite/gcc.target/arm/pr121065.c 
b/gcc/testsuite/gcc.target/arm/pr121065.c
new file mode 100644
index ..dfc6059a46d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/pr121065.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=cortex-m55" } */
+
+_Accum sa;
+char c;
+
+void
+div_csa ()
+{
+  c /= sa;
+}


[gcc r16-2451] RISC-V: testsuite: Fix vx_vf_*run-1-f16.c run tests.

2025-07-23 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:4b62bd519a852af8f69b077ba7b0ed531f4a14c5

commit r16-2451-g4b62bd519a852af8f69b077ba7b0ed531f4a14c5
Author: Robin Dapp 
Date:   Mon Jul 21 15:32:09 2025 +0200

RISC-V: testsuite: Fix vx_vf_*run-1-f16.c run tests.

This patch fixes the vf_vfmacc-run-1-f16.c test failures on rv32
by adding zvfh requirements as well as options to the test and
the target harness.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f16.c:
Add zvfh requirements and options.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c:
Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f16.c:
Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f16.c:
Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmacc-run-1-f16.c:
Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmadd-run-1-f16.c:
Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmsac-run-1-f16.c:
Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmsub-run-1-f16.c:
Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmacc-run-1-f16.c:
Ditto.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmsac-run-1-f16.c:
Ditto.
* lib/target-supports.exp: Add zvfh options.

Diff:
---
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f16.c  | 6 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c  | 6 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f16.c  | 6 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f16.c  | 6 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmacc-run-1-f16.c | 6 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmadd-run-1-f16.c | 6 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmsac-run-1-f16.c | 6 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmsub-run-1-f16.c | 6 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmacc-run-1-f16.c | 6 +-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmsac-run-1-f16.c | 6 +-
 gcc/testsuite/lib/target-supports.exp | 8 
 11 files changed, 58 insertions(+), 10 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f16.c
index 982dd9736acc..fd8aa30be17a 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f16.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f16.c
@@ -1,5 +1,9 @@
 /* { dg-do run { target { riscv_v } } } */
-/* { dg-additional-options "-march=rv64gcv_zvfh --param=fpr2vr-cost=0" } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-require-effective-target riscv_zvfh } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvfh" } */
+/* { dg-additional-options "--param=fpr2vr-cost=0" } */
 
 #include "vf_mulop.h"
 #include "vf_mulop_data.h"
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c
index 400bbcd1d79f..8fd855288993 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c
@@ -1,5 +1,9 @@
 /* { dg-do run { target { riscv_v } } } */
-/* { dg-additional-options "-march=rv64gcv_zvfh --param=fpr2vr-cost=0" } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-require-effective-target riscv_zvfh } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvfh" } */
+/* { dg-additional-options "--param=fpr2vr-cost=0" } */
 
 #include "vf_mulop.h"
 #include "vf_mulop_data.h"
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f16.c
index 21c1860c0f1c..e91fd15a5b73 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f16.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f16.c
@@ -1,5 +1,9 @@
 /* { dg-do run { target { riscv_v } } } */
-/* { dg-additional-options "-march=rv64gcv_zvfh --param=fpr2vr-cost=0" } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-require-effective-target riscv_zvfh } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvfh" } */
+/* { dg-additional-options "--param=fpr2vr-cost=0" } */
 
 #include "vf_mulop.h"
 #include "vf_mulop_data.h"
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f16.c
index 163b5bd21b42..ca7e0db17b5b 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f16.c
+++ b/gcc/testsuite/gcc.targe

[gcc r16-2452] RISC-V: Rework broadcast handling [PR121073].

2025-07-23 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:f796f819c35cc08e0a6e7242aee92eb96f62a3b5

commit r16-2452-gf796f819c35cc08e0a6e7242aee92eb96f62a3b5
Author: Robin Dapp 
Date:   Thu Jul 17 11:09:43 2025 +0200

RISC-V: Rework broadcast handling [PR121073].

During the last weeks it became clear that our current broadcast
handling needs an overhaul in order to improve maintainability.
PR121073 showed that my intermediate fix wasn't enough and caused
regressions.

This patch now goes a first step towards untangling broadcast
(vmv.v.x), "set first" (vmv.s.x), and zero-strided load (vlse).
Also can_be_broadcast_p is rewritten and strided_broadcast_p is
introduced to make the distinction clear directly in the predicates.

Due to the pervasiveness of the patterns I needed to touch a lot
of places and tried to clear up some things while at it.  The patch
therefore also introduces new helpers expand_broadcast for vmv.v.x
that dispatches to regular as well as strided broadcast and
expand_set_first that does the same thing for vmv.s.x.

The non-strided fallbacks are now implemented as splitters of the
strided variants.  This makes it easier to see where and when things
happen.

The test cases I touched appeared wrong to me so this patch sets a new
baseline for some of the scalar_move tests.

There is still work to be done but IMHO that can be deferred: It would
be clearer if the three broadcast-like variants differed not just in
name but also in RTL pattern so matching is not as confusing.  Right now
vmv.v.x and vmv.s.x only differ in the mask and are interchangeable by
just changing it from "all ones" to a "single one".

As last time, I regtested on rv64 and rv32 with strided_broadcast turned
on and off.  Note there are regressions cond_fma_fnma-[78].c.  Those are
due to the patch exposing more fwprop/late-combine opportunities.  For
fma/fnma we don't yet have proper costing for vv/vx in place but I'll
expect that to be addressed soon and figured we can live with those for
the time being.

PR target/121073

gcc/ChangeLog:

* config/riscv/autovec-opt.md: Use new helpers.
* config/riscv/autovec.md: Ditto.
* config/riscv/predicates.md (strided_broadcast_mask_operand):
New predicate.
(strided_broadcast_operand): Ditto.
(any_broadcast_operand): Ditto.
* config/riscv/riscv-protos.h (expand_broadcast): Declare.
(expand_set_first): Ditto.
(expand_set_first_tu): Ditto.
(strided_broadcast_p): Ditto.
* config/riscv/riscv-string.cc (expand_vec_setmem): Use new
helpers.
* config/riscv/riscv-v.cc (expand_broadcast): New functionk.
(expand_set_first): Ditto.
(expand_set_first_tu): Ditto.
(expand_const_vec_duplicate): Use new helpers.
(expand_const_vector_duplicate_repeating): Ditto.
(expand_const_vector_duplicate_default): Ditto.
(sew64_scalar_helper): Ditto.
(expand_vector_init_merge_repeating_sequence): Ditto.
(expand_reduction): Ditto.
(strided_broadcast_p): New function.
(whole_reg_to_reg_move_p): Use new helpers.
* config/riscv/riscv-vector-builtins-bases.cc: Use either
broadcast or strided broadcast.
* config/riscv/riscv-vector-builtins.cc 
(function_expander::use_ternop_insn):
Ditto.
(function_expander::use_widen_ternop_insn): Ditto.
(function_expander::use_scalar_broadcast_insn): Ditto.
* config/riscv/riscv-vector-builtins.h: Declare scalar
broadcast.
* config/riscv/vector.md (*pred_broadcast): Split into
regular and strided broadcast.
(*pred_broadcast_zvfh): Split.
(pred_broadcast_zvfh): Ditto.
(*pred_broadcast_zvfhmin): Ditto.
(@pred_strided_broadcast): Ditto.
(*pred_strided_broadcast): Ditto.
(*pred_strided_broadcast_zvfhmin): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-6.c: Adjust test
expectation.
* gcc.target/riscv/rvv/base/scalar_move-5.c: Ditto.
* gcc.target/riscv/rvv/base/scalar_move-6.c: Ditto.
* gcc.target/riscv/rvv/base/scalar_move-7.c: Ditto.
* gcc.target/riscv/rvv/base/scalar_move-8.c: Ditto.
* gcc.target/riscv/rvv/base/scalar_move-9.c: Ditto.
* gcc.target/riscv/rvv/pr121073.c: New test.

Diff:
---
 gcc/config/riscv/autovec-opt.md|   3 +-
 gcc/config/riscv/autovec.md|   7 +-
 gcc/config/riscv/predicates.md |  13 +
 gcc/config/riscv/riscv-protos.h|   4 +
 gcc/config/riscv/riscv-st

[gcc r16-2482] vect: Misalign checks for gather/scatter.

2025-07-24 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:83afbe552e6b0baff37210dd1f344cfee4b26a2f

commit r16-2482-g83afbe552e6b0baff37210dd1f344cfee4b26a2f
Author: Robin Dapp 
Date:   Thu Jul 3 11:04:29 2025 +0200

vect: Misalign checks for gather/scatter.

This patch adds simple misalignment checks for gather/scatter
operations.  Previously, we assumed that those perform element accesses
internally so alignment does not matter.  The riscv vector spec however
explicitly states that vector operations are allowed to fault on
element-misaligned accesses.  Reasonable uarchs won't, but...

For gather/scatter we have two paths in the vectorizer:

 (1) Regular analysis based on datarefs.  Here we can also create
 strided loads.
 (2) Non-affine access where each gather index is relative to the
 initial address.

The assumption this patch works on is that once the alignment for the
first scalar is correct, all others will fall in line, as the index is
always a multiple of the first element's size.

For (1) we have a dataref and can check it for alignment as in other
cases.  For (2) this patch checks the object alignment of BASE and
compares it against the natural alignment of the current vectype's unit.

The patch also adds a pointer argument to the gather/scatter IFNs that
contains the necessary alignment.  Most of the patch is thus mechanical
in that it merely adjusts indices.

I tested the riscv version with a custom qemu version that faults on
element-misaligned vector accesses.  With this patch applied, there is
just a single fault left, which is due to PR120782 and which will be
addressed separately.

Bootstrapped and regtested on x86 and aarch64.  Regtested on
rv64gcv_zvl512b with and without unaligned vector support.

gcc/ChangeLog:

* internal-fn.cc (internal_fn_len_index): Adjust indices for new
alias_ptr param.
(internal_fn_else_index): Ditto.
(internal_fn_mask_index): Ditto.
(internal_fn_stored_value_index): Ditto.
(internal_fn_alias_ptr_index): Ditto.
(internal_fn_offset_index): Ditto.
(internal_fn_scale_index): Ditto.
(internal_gather_scatter_fn_supported_p): Ditto.
* internal-fn.h (internal_fn_alias_ptr_index): Ditto.
* optabs-query.cc (supports_vec_gather_load_p): Ditto.
* tree-vect-data-refs.cc (vect_check_gather_scatter): Add alias
pointer.
* tree-vect-patterns.cc (vect_recog_gather_scatter_pattern): Add
alias pointer.
* tree-vect-slp.cc (vect_get_operand_map): Adjust for alias
pointer.
* tree-vect-stmts.cc (vect_truncate_gather_scatter_offset): Add
alias pointer and misalignment handling.
(get_load_store_type): Move from here...
(get_group_load_store_type): ...To here.
(vectorizable_store): Add alias pointer.
(vectorizable_load): Ditto.
* tree-vectorizer.h (struct gather_scatter_info): Ditto.

Diff:
---
 gcc/internal-fn.cc |  43 ---
 gcc/internal-fn.h  |   1 +
 gcc/optabs-query.cc|   6 +-
 gcc/tree-vect-data-refs.cc |  61 +++
 gcc/tree-vect-patterns.cc  |  17 +++--
 gcc/tree-vect-slp.cc   |  16 ++--
 gcc/tree-vect-stmts.cc | 179 +
 gcc/tree-vectorizer.h  |   7 +-
 8 files changed, 206 insertions(+), 124 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 1411f4497892..bf2fac818070 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4967,11 +4967,13 @@ internal_fn_len_index (internal_fn fn)
   return 2;
 
 case IFN_MASK_LEN_SCATTER_STORE:
+  return 6;
+
 case IFN_MASK_LEN_STRIDED_LOAD:
   return 5;
 
 case IFN_MASK_LEN_GATHER_LOAD:
-  return 6;
+  return 7;
 
 case IFN_COND_LEN_FMA:
 case IFN_COND_LEN_FMS:
@@ -5075,7 +5077,7 @@ internal_fn_else_index (internal_fn fn)
 
 case IFN_MASK_GATHER_LOAD:
 case IFN_MASK_LEN_GATHER_LOAD:
-  return 5;
+  return 6;
 
 default:
   return -1;
@@ -5110,7 +5112,7 @@ internal_fn_mask_index (internal_fn fn)
 case IFN_MASK_SCATTER_STORE:
 case IFN_MASK_LEN_GATHER_LOAD:
 case IFN_MASK_LEN_SCATTER_STORE:
-  return 4;
+  return 5;
 
 case IFN_VCOND_MASK:
 case IFN_VCOND_MASK_LEN:
@@ -5135,10 +5137,11 @@ internal_fn_stored_value_index (internal_fn fn)
 
 case IFN_MASK_STORE:
 case IFN_MASK_STORE_LANES:
+  return 3;
 case IFN_SCATTER_STORE:
 case IFN_MASK_SCATTER_STORE:
 case IFN_MASK_LEN_SCATTER_STORE:
-  return 3;
+  return 4;
 
 case IFN_LEN_STORE:
   return 4;
@@ -5152,6 +5155,28 @@ internal_fn_stored_value_index (internal_fn fn)
 }
 }
 
+/* If FN has an alias pointer return its index, otherwise return -

[gcc r16-2480] vect: Add helper macros for gather/scatter.

2025-07-24 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:d581d84f919cd269b1fd153d9af51eb9efd10d82

commit r16-2480-gd581d84f919cd269b1fd153d9af51eb9efd10d82
Author: Robin Dapp 
Date:   Thu Jul 3 11:04:25 2025 +0200

vect: Add helper macros for gather/scatter.

This encapsulates the IFN and the builtin-function way of handling
gather/scatter via three defines:

  GATHER_SCATTER_IFN_P
  GATHER_SCATTER_LEGACY_P
  GATHER_SCATTER_EMULATED_P

and introduces a helper define for SLP operand handling as well.

gcc/ChangeLog:

* tree-vect-slp.cc (GATHER_SCATTER_OFFSET): New define.
(vect_get_and_check_slp_defs): Use.
* tree-vectorizer.h (GATHER_SCATTER_LEGACY_P): New define.
(GATHER_SCATTER_IFN_P): Ditto.
(GATHER_SCATTER_EMULATED_P): Ditto.
* tree-vect-stmts.cc (vectorizable_store): Use.
(vectorizable_load): Use.

Diff:
---
 gcc/tree-vect-slp.cc   | 12 +++-
 gcc/tree-vect-stmts.cc | 19 +--
 gcc/tree-vectorizer.h  |  8 
 3 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 7776b2f1d8e6..cfa841b3ce23 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -507,6 +507,8 @@ vect_def_types_match (enum vect_def_type dta, enum 
vect_def_type dtb)
  && (dtb == vect_external_def || dtb == vect_constant_def)));
 }
 
+#define GATHER_SCATTER_OFFSET (-3)
+
 static const int no_arg_map[] = { 0 };
 static const int arg0_map[] = { 1, 0 };
 static const int arg1_map[] = { 1, 1 };
@@ -516,10 +518,10 @@ static const int arg1_arg4_arg5_map[] = { 3, 1, 4, 5 };
 static const int arg1_arg3_arg4_map[] = { 3, 1, 3, 4 };
 static const int arg3_arg2_map[] = { 2, 3, 2 };
 static const int op1_op0_map[] = { 2, 1, 0 };
-static const int off_map[] = { 1, -3 };
-static const int off_op0_map[] = { 2, -3, 0 };
-static const int off_arg2_arg3_map[] = { 3, -3, 2, 3 };
-static const int off_arg3_arg2_map[] = { 3, -3, 3, 2 };
+static const int off_map[] = { 1, GATHER_SCATTER_OFFSET };
+static const int off_op0_map[] = { 2, GATHER_SCATTER_OFFSET, 0 };
+static const int off_arg2_arg3_map[] = { 3, GATHER_SCATTER_OFFSET, 2, 3 };
+static const int off_arg3_arg2_map[] = { 3, GATHER_SCATTER_OFFSET, 3, 2 };
 static const int mask_call_maps[6][7] = {
   { 1, 1, },
   { 2, 1, 2, },
@@ -691,7 +693,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char 
swap,
 {
   oprnd_info = (*oprnds_info)[i];
   int opno = map ? map[i] : int (i);
-  if (opno == -3)
+  if (opno == GATHER_SCATTER_OFFSET)
{
  gcc_assert (STMT_VINFO_GATHER_SCATTER_P (stmt_info));
  if (!is_a  (vinfo)
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 7eb072b66dd2..5b8168fdea14 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2455,7 +2455,7 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
stmt_info,
 If that failed for some reason (e.g. because another pattern
 took priority), just handle cases in which the offset already
 has the right type.  */
-  else if (gs_info->ifn != IFN_LAST
+  else if (GATHER_SCATTER_IFN_P (*gs_info)
   && !is_gimple_call (stmt_info->stmt)
   && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
  TREE_TYPE (gs_info->offset_vectype)))
@@ -8104,7 +8104,8 @@ vectorizable_store (vec_info *vinfo,
}
   else if (memory_access_type != VMAT_LOAD_STORE_LANES
   && (memory_access_type != VMAT_GATHER_SCATTER
-  || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype
+  || (GATHER_SCATTER_LEGACY_P (gs_info)
+  && !VECTOR_BOOLEAN_TYPE_P (mask_vectype
{
  if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -8112,8 +8113,7 @@ vectorizable_store (vec_info *vinfo,
  return false;
}
   else if (memory_access_type == VMAT_GATHER_SCATTER
-  && gs_info.ifn == IFN_LAST
-  && !gs_info.decl)
+  && GATHER_SCATTER_EMULATED_P (gs_info))
{
  if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -8838,7 +8838,7 @@ vectorizable_store (vec_info *vinfo,
   final_mask, vec_mask, gsi);
}
 
- if (gs_info.ifn != IFN_LAST)
+ if (GATHER_SCATTER_IFN_P (gs_info))
{
  if (costing_p)
{
@@ -8901,7 +8901,7 @@ vectorizable_store (vec_info *vinfo,
  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
  new_stmt = call;
}
-  else if (gs_info.decl)
+ else if (GATHER_SCATTER_LEGACY_P (gs_info))
{
  /* The builtin decls path for scatter is legacy, x86 only.  */
  g

[gcc r16-2481] vect: Add is_gather_scatter argument to misalignment hook.

2025-07-24 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:de75875d6779d7dc144aea2c5d26fb9aa2373d04

commit r16-2481-gde75875d6779d7dc144aea2c5d26fb9aa2373d04
Author: Robin Dapp 
Date:   Wed Jul 2 10:02:16 2025 +0200

vect: Add is_gather_scatter argument to misalignment hook.

This patch adds an is_gather_scatter argument to the
support_vector_misalignment hook.  All targets but riscv do not care
about alignment for gather/scatter so return true for is_gather_scatter.

gcc/ChangeLog:

* config/aarch64/aarch64.cc 
(aarch64_builtin_support_vector_misalignment):
Return true for gather/scatter.
* config/arm/arm.cc (arm_builtin_support_vector_misalignment):
Ditto.
* config/epiphany/epiphany.cc 
(epiphany_support_vector_misalignment):
Ditto.
* config/gcn/gcn.cc (gcn_vectorize_support_vector_misalignment):
Ditto.
* config/loongarch/loongarch.cc 
(loongarch_builtin_support_vector_misalignment):
Ditto.
* config/riscv/riscv.cc (riscv_support_vector_misalignment):
Add gather/scatter argument.
* config/rs6000/rs6000.cc 
(rs6000_builtin_support_vector_misalignment):
Return true for gather/scatter.
* config/s390/s390.cc (s390_support_vector_misalignment):
Ditto.
* doc/tm.texi: Add argument.
* target.def: Ditto.
* targhooks.cc (default_builtin_support_vector_misalignment):
Ditto.
* targhooks.h (default_builtin_support_vector_misalignment):
Ditto.
* tree-vect-data-refs.cc (vect_supportable_dr_alignment):
Ditto.

Diff:
---
 gcc/config/aarch64/aarch64.cc | 12 +---
 gcc/config/arm/arm.cc | 12 +---
 gcc/config/epiphany/epiphany.cc   |  8 ++--
 gcc/config/gcn/gcn.cc |  6 +-
 gcc/config/loongarch/loongarch.cc |  8 ++--
 gcc/config/riscv/riscv.cc | 29 +++--
 gcc/config/rs6000/rs6000.cc   | 11 ++-
 gcc/config/s390/s390.cc   |  6 --
 gcc/doc/tm.texi   |  8 +---
 gcc/target.def| 14 +-
 gcc/targhooks.cc  |  2 ++
 gcc/targhooks.h   |  2 +-
 gcc/tree-vect-data-refs.cc|  2 +-
 13 files changed, 90 insertions(+), 30 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 9e4a37bcaff0..2871b5f6e002 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -356,7 +356,8 @@ static int aarch64_address_cost (rtx, machine_mode, 
addr_space_t, bool);
 static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
 const_tree type,
 int misalignment,
-bool is_packed);
+bool is_packed,
+bool 
is_gather_scatter);
 static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64);
 static bool aarch64_print_address_internal (FILE*, machine_mode, rtx,
aarch64_addr_query_type);
@@ -24414,10 +24415,14 @@ aarch64_simd_vector_alignment_reachable (const_tree 
type, bool is_packed)
 static bool
 aarch64_builtin_support_vector_misalignment (machine_mode mode,
 const_tree type, int misalignment,
-bool is_packed)
+bool is_packed,
+bool is_gather_scatter)
 {
   if (TARGET_SIMD && STRICT_ALIGNMENT)
 {
+  if (is_gather_scatter)
+   return true;
+
   /* Return if movmisalign pattern is not supported for this mode.  */
   if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
 return false;
@@ -24427,7 +24432,8 @@ aarch64_builtin_support_vector_misalignment 
(machine_mode mode,
return false;
 }
   return default_builtin_support_vector_misalignment (mode, type, misalignment,
- is_packed);
+ is_packed,
+ is_gather_scatter);
 }
 
 /* If VALS is a vector constant that can be loaded into a register
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index bde06f3fa866..29b45ae96bda 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -289,7 +289,8 @@ static bool arm_vector_alignment_reachable (const_tree 
type, bool is_packed);
 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 const_tree type,
   

[gcc r16-2479] ifn: Add helper functions for gather/scatter.

2025-07-24 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:a901860c34ff8966b33f5729d7872a53b3bfa722

commit r16-2479-ga901860c34ff8966b33f5729d7872a53b3bfa722
Author: Robin Dapp 
Date:   Wed Jul 2 10:04:58 2025 +0200

ifn: Add helper functions for gather/scatter.

This patch adds access helpers for the gather/scatter offset and scale
parameters.

gcc/ChangeLog:

* internal-fn.cc (expand_scatter_store_optab_fn): Use new
function.
(expand_gather_load_optab_fn): Ditto.
(internal_fn_offset_index): Ditto.
(internal_fn_scale_index): Ditto.
* internal-fn.h (internal_fn_offset_index): New function.
(internal_fn_scale_index): Ditto.
* tree-vect-data-refs.cc (vect_describe_gather_scatter_call):
Use new function.

Diff:
---
 gcc/internal-fn.cc | 57 ++
 gcc/internal-fn.h  |  2 ++
 gcc/tree-vect-data-refs.cc |  6 +++--
 3 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 114f5a9da18d..1411f4497892 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -3652,8 +3652,8 @@ expand_scatter_store_optab_fn (internal_fn, gcall *stmt, 
direct_optab optab)
   internal_fn ifn = gimple_call_internal_fn (stmt);
   int rhs_index = internal_fn_stored_value_index (ifn);
   tree base = gimple_call_arg (stmt, 0);
-  tree offset = gimple_call_arg (stmt, 1);
-  tree scale = gimple_call_arg (stmt, 2);
+  tree offset = gimple_call_arg (stmt, internal_fn_offset_index (ifn));
+  tree scale = gimple_call_arg (stmt, internal_fn_scale_index (ifn));
   tree rhs = gimple_call_arg (stmt, rhs_index);
 
   rtx base_rtx = expand_normal (base);
@@ -3678,12 +3678,12 @@ expand_scatter_store_optab_fn (internal_fn, gcall 
*stmt, direct_optab optab)
 /* Expand {MASK_,}GATHER_LOAD call CALL using optab OPTAB.  */
 
 static void
-expand_gather_load_optab_fn (internal_fn, gcall *stmt, direct_optab optab)
+expand_gather_load_optab_fn (internal_fn ifn, gcall *stmt, direct_optab optab)
 {
   tree lhs = gimple_call_lhs (stmt);
   tree base = gimple_call_arg (stmt, 0);
-  tree offset = gimple_call_arg (stmt, 1);
-  tree scale = gimple_call_arg (stmt, 2);
+  tree offset = gimple_call_arg (stmt, internal_fn_offset_index (ifn));
+  tree scale = gimple_call_arg (stmt, internal_fn_scale_index (ifn));
 
   rtx lhs_rtx = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
   rtx base_rtx = expand_normal (base);
@@ -5152,6 +5152,53 @@ internal_fn_stored_value_index (internal_fn fn)
 }
 }
 
+/* If FN is a gather/scatter return the index of its offset argument,
+   otherwise return -1.  */
+
+int
+internal_fn_offset_index (internal_fn fn)
+{
+  if (!internal_gather_scatter_fn_p (fn))
+return -1;
+
+  switch (fn)
+{
+case IFN_GATHER_LOAD:
+case IFN_MASK_GATHER_LOAD:
+case IFN_MASK_LEN_GATHER_LOAD:
+case IFN_SCATTER_STORE:
+case IFN_MASK_SCATTER_STORE:
+case IFN_MASK_LEN_SCATTER_STORE:
+  return 1;
+
+default:
+  return -1;
+}
+}
+
+/* If FN is a gather/scatter return the index of its scale argument,
+   otherwise return -1.  */
+
+int
+internal_fn_scale_index (internal_fn fn)
+{
+  if (!internal_gather_scatter_fn_p (fn))
+return -1;
+
+  switch (fn)
+{
+case IFN_GATHER_LOAD:
+case IFN_MASK_GATHER_LOAD:
+case IFN_MASK_LEN_GATHER_LOAD:
+case IFN_SCATTER_STORE:
+case IFN_MASK_SCATTER_STORE:
+case IFN_MASK_LEN_SCATTER_STORE:
+  return 2;
+
+default:
+  return -1;
+}
+}
 
 /* Store all supported else values for the optab referred to by ICODE
in ELSE_VALS.  The index of the else operand must be specified in
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index 02731ea03aeb..825381660bb1 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -240,6 +240,8 @@ extern int internal_fn_mask_index (internal_fn);
 extern int internal_fn_len_index (internal_fn);
 extern int internal_fn_else_index (internal_fn);
 extern int internal_fn_stored_value_index (internal_fn);
+extern int internal_fn_offset_index (internal_fn fn);
+extern int internal_fn_scale_index (internal_fn fn);
 extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
tree, tree, int,
vec * = nullptr);
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 5f672132a8ac..32f3f0e1fa9e 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -4539,10 +4539,12 @@ vect_describe_gather_scatter_call (stmt_vec_info 
stmt_info,
   info->ifn = gimple_call_internal_fn (call);
   info->decl = NULL_TREE;
   info->base = gimple_call_arg (call, 0);
-  info->offset = gimple_call_arg (call, 1);
+  info->offset = gimple_call_arg
+ (call, internal_fn_offset_index (info->ifn));
   info->offset_dt = vect_unknown_def_type;
   info->offset_vectype = NUL

[gcc r16-2483] riscv: testsuite: Fix misalignment check.

2025-07-24 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:45665da440d07e5f559a7638e40e40099acbe78a

commit r16-2483-g45665da440d07e5f559a7638e40e40099acbe78a
Author: Robin Dapp 
Date:   Wed Jul 2 10:28:57 2025 +0200

riscv: testsuite: Fix misalignment check.

This fixes a thinko in the misalignment check.  If we want to check for
vector misalignment support we need to load 16-byte elements, not
8-byte elements that will never be misaligned.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp: Fix misalignment check.

Diff:
---
 gcc/testsuite/lib/target-supports.exp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 018a6d3a764d..e375b1ec02b8 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2440,7 +2440,7 @@ proc check_effective_target_riscv_v_misalign_ok { } {
= {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
  asm ("vsetivli zero,7,e8,m1,ta,ma");
  asm ("addi a7,%0,1" : : "r" (a) : "a7" );
- asm ("vle8.v v8,0(a7)" : : : "v8");
+ asm ("vle16.v v8,0(a7)" : : : "v8");
  return 0; } } "-march=${gcc_march}"] } {
return 1
 }


[gcc r16-2515] RISC-V: Prepare dynamic LMUL heuristic for SLP.

2025-07-25 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:529ae1484a992aba184509eebb71d6595768c887

commit r16-2515-g529ae1484a992aba184509eebb71d6595768c887
Author: Robin Dapp 
Date:   Mon Jul 21 16:00:51 2025 +0200

RISC-V: Prepare dynamic LMUL heuristic for SLP.

This patch prepares the dynamic LMUL vector costing to use the coming
SLP_TREE_TYPE instead of the (to-be-removed) STMT_VINFO_TYPE.

Even though the whole approach should be reviewed and adjusted at some
point, the patch chooses the path of least resistance and uses a hash
map for the stmt_info -> slp node relationship.  A node is mapped to the
accompanying stmt_info during add_stmt_cost.  In finish_cost we go
through all statements as before, and obtain the corresponding slp nodes
as well as their types.

This allows us to operate largely as before.  We don't yet do the switch
over from STMT_VINFO_TYPE to SLP_TREE_TYPE, though but only take care
of the necessary refactoring upfront.

Regtested on rv64gcv_zvl512b with -mrvv-max-lmul=dynamic.  There are a
few regressions but nothing worse than what we already have.  I'd rather
accept these now and take it as an incentive to work on the heuristic
later than block the SLP work until it is fixed.

gcc/ChangeLog:

* config/riscv/riscv-vector-costs.cc (get_live_range):
Move compute_local_program_points to cost class.
(variable_vectorized_p): Add slp node parameter.
(need_additional_vector_vars_p): Move from here...
(costs::need_additional_vector_vars_p): ... to here and add slp
parameter.
(compute_estimated_lmul): Move update_local_live_ranges to cost
class.
(has_unexpected_spills_p): Move from here...
(costs::has_unexpected_spills_p): ... to here.
(costs::record_lmul_spills): New function.
(costs::add_stmt_cost): Add stmt_info, slp mapping.
(costs::finish_cost): Analyze loop.
* config/riscv/riscv-vector-costs.h: Move declarations to class.

Diff:
---
 gcc/config/riscv/riscv-vector-costs.cc | 71 ++
 gcc/config/riscv/riscv-vector-costs.h  | 16 
 2 files changed, 62 insertions(+), 25 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index 4d8170de9b2c..df924fafd8e5 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -178,8 +178,8 @@ get_live_range (hash_map *live_ranges, tree arg)
STMT 5 (be vectorized)  -- point 2
...
 */
-static void
-compute_local_program_points (
+void
+costs::compute_local_program_points (
   vec_info *vinfo,
   hash_map> &program_points_per_bb)
 {
@@ -274,14 +274,14 @@ loop_invariant_op_p (class loop *loop,
 
 /* Return true if the variable should be counted into liveness.  */
 static bool
-variable_vectorized_p (class loop *loop, stmt_vec_info stmt_info, tree var,
-  bool lhs_p)
+variable_vectorized_p (class loop *loop, stmt_vec_info stmt_info,
+  slp_tree node ATTRIBUTE_UNUSED, tree var, bool lhs_p)
 {
   if (!var)
 return false;
   gimple *stmt = STMT_VINFO_STMT (stmt_info);
-  enum stmt_vec_info_type type
-= STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
+  stmt_info = vect_stmt_to_vectorize (stmt_info);
+  enum stmt_vec_info_type type = STMT_VINFO_TYPE (stmt_info);
   if (is_gimple_call (stmt) && gimple_call_internal_p (stmt))
 {
   if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE
@@ -357,8 +357,8 @@ variable_vectorized_p (class loop *loop, stmt_vec_info 
stmt_info, tree var,
 
The live range of SSA 1 is [1, 3] in bb 2.
The live range of SSA 2 is [0, 4] in bb 3.  */
-static machine_mode
-compute_local_live_ranges (
+machine_mode
+costs::compute_local_live_ranges (
   loop_vec_info loop_vinfo,
   const hash_map> &program_points_per_bb,
   hash_map> &live_ranges_per_bb)
@@ -388,8 +388,11 @@ compute_local_live_ranges (
  unsigned int point = program_point.point;
  gimple *stmt = program_point.stmt;
  tree lhs = gimple_get_lhs (stmt);
- if (variable_vectorized_p (loop, program_point.stmt_info, lhs,
-true))
+ slp_tree *node = vinfo_slp_map.get (program_point.stmt_info);
+ if (!node)
+   continue;
+ if (variable_vectorized_p (loop, program_point.stmt_info,
+*node, lhs, true))
{
  biggest_mode = get_biggest_mode (biggest_mode,
   TYPE_MODE (TREE_TYPE (lhs)));
@@ -406,8 +409,8 @@ compute_local_live_ranges (
  for (i = 0; i < gimple_num_args (stmt); i++)
{
  tree var = gimple_arg (stmt, i);
- if (variable_vectorized_p