Hi,

after testing on the BPI (4.2% improvement for x264 input 1, 4.4% for input 2)
and the discussion in PR117173 I figured it's best to disable the two-source
permutes by default for now.  We quickly talked about this on the patchwork
call last week.  Conclusion was to just post the patch and discuss here.

The patch adds a parameter "riscv-two-source-permutes" which restores
the old behavior.  It does not add a uarch knob to override the default.

It's still possible to get two-source permutes in a register via
the expander but the obvious constant cases are covered.

Regtested on rv64gcv_zvl512b.

Regards
 Robin

        PR target/117173

gcc/ChangeLog:

        * config/riscv/riscv-v.cc (shuffle_generic_patterns): Only
        support single-source permutes by default.
        * config/riscv/riscv.opt: New param "riscv-two-source-permutes".

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/rvv.exp: Run with two-source permutes.
        * lib/target-supports.exp: Ditto.
        * gcc.dg/fold-perm-2.c: Ditto.
        * gcc.dg/pr54346.c: Ditto.
        * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c: Ditto.
        * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-12.c: Ditto.
        * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c: Ditto.
        * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c: Ditto.
        * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-12.c: Ditto.
        * gcc.dg/vect/costmodel/riscv/rvv/pr111848.c: Ditto.
        * gcc.dg/vect/costmodel/riscv/rvv/rvv-costmodel-vect.exp: Ditto.
---
 gcc/config/riscv/riscv-v.cc                               | 8 +++++++-
 gcc/config/riscv/riscv.opt                                | 4 ++++
 gcc/testsuite/gcc.dg/fold-perm-2.c                        | 1 +
 gcc/testsuite/gcc.dg/pr54346.c                            | 1 +
 .../gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c    | 1 +
 .../gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-12.c    | 1 +
 .../gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c     | 2 +-
 .../gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c     | 2 +-
 .../gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-12.c    | 1 +
 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111848.c  | 1 +
 .../vect/costmodel/riscv/rvv/rvv-costmodel-vect.exp       | 2 +-
 gcc/testsuite/gcc.target/riscv/rvv/rvv.exp                | 4 ++--
 gcc/testsuite/lib/target-supports.exp                     | 2 ++
 13 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index e1172e9c7d2..2d6f6da94ce 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3947,11 +3947,17 @@ shuffle_generic_patterns (struct expand_vec_perm_d *d)
   if (!get_gather_index_mode (d).exists (&sel_mode))
     return false;
 
+  rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
+  poly_uint64 nunits = GET_MODE_NUNITS (sel_mode);
+  if ((!nunits.is_constant ()
+       || !const_vec_all_in_range_p (sel, 0, nunits - 1))
+      && !riscv_two_source_permutes)
+    return false;
+
   /* Success! */
   if (d->testing_p)
     return true;
 
-  rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
   /* Some FIXED-VLMAX/VLS vector permutation situations call targethook
      instead of expand vec_perm<mode>, we handle it directly.  */
   expand_vec_perm (d->target, d->op0, d->op1, sel);
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index f51f8fd1cdf..ed0695e20d3 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -622,6 +622,10 @@ Enum(vsetvl_strategy) String(optim-no-fusion) 
Value(VSETVL_OPT_NO_FUSION)
 Target Undocumented RejectNegative Joined Enum(vsetvl_strategy) 
Var(vsetvl_strategy) Init(VSETVL_OPT)
 -param=vsetvl-strategy=<string>        Set the optimization level of VSETVL 
insert pass.
 
+-param=riscv-two-source-permutes
+Target Undocumented Uinteger Var(riscv_two_source_permutes) Init(0)
+-param=riscv-two-source-permutes Enable permutes/gathers with two sources 
vectors.
+
 Enum
 Name(stringop_strategy) Type(enum stringop_strategy_enum)
 Valid arguments to -mstringop-strategy=:
diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c 
b/gcc/testsuite/gcc.dg/fold-perm-2.c
index 1a4ab4065de..9fd809ee296 100644
--- a/gcc/testsuite/gcc.dg/fold-perm-2.c
+++ b/gcc/testsuite/gcc.dg/fold-perm-2.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O -fdump-tree-fre1" } */
+/* { dg-additional-options "--param=riscv-two-source-permutes" { target 
riscv*-*-* } } */
 
 typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
 typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned 
int))));
diff --git a/gcc/testsuite/gcc.dg/pr54346.c b/gcc/testsuite/gcc.dg/pr54346.c
index 5ec0609f1e5..b78e0533ac2 100644
--- a/gcc/testsuite/gcc.dg/pr54346.c
+++ b/gcc/testsuite/gcc.dg/pr54346.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O -fdump-tree-dse1 -Wno-psabi" } */
+/* { dg-additional-options "--param=riscv-two-source-permutes" { target 
riscv*-*-* } } */
 
 typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
 
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c
index 4a372edea5a..8b434cac167 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-mrvv-max-lmul=dynamic -fdump-tree-vect-details" } */
+/* { dg-additional-options "--param=riscv-two-source-permutes" } */
 
 int
 bar (int *x, int a, int b, int n)
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-12.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-12.c
index 29e6dfc223e..2058f0823c8 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-12.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-12.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-mrvv-max-lmul=dynamic -fdump-tree-vect-details" } */
+/* { dg-additional-options "--param=riscv-two-source-permutes" } */
 
 void
 f (int *restrict a, int *restrict b, int *restrict c, int *restrict d,
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c
index 3734b5c18a7..503d99fc254 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -ftree-vectorize 
-mrvv-max-lmul=dynamic -mrvv-vector-bits=scalable -fselective-scheduling 
-fdump-tree-vect-details" } */
-/* { dg-additional-options "-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-additional-options "-fno-schedule-insns -fno-schedule-insns2 
--param=riscv-two-source-permutes" } */
 
 #include <stdint-gcc.h>
 
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c
index 756176d4212..460d9f8c470 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -ftree-vectorize 
-mrvv-max-lmul=dynamic -mrvv-vector-bits=scalable -fselective-scheduling 
-fdump-tree-vect-details" } */
-/* { dg-additional-options "-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-additional-options "-fno-schedule-insns -fno-schedule-insns2 
--param=riscv-two-source-permutes" } */
 
 #include <stdint-gcc.h>
 
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-12.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-12.c
index 9fa6b69a7c9..8434c215070 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-12.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-12.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -ftree-vectorize 
-mrvv-max-lmul=dynamic -mrvv-vector-bits=scalable -fselective-scheduling 
-fdump-tree-vect-details" } */
+/* { dg-additional-options "--param=riscv-two-source-permutes" } */
 
 void
 foo (int *restrict a, int *restrict b, int n)
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111848.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111848.c
index 339c80f8805..3373ca90e08 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111848.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111848.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-mrvv-max-lmul=dynamic -fdump-tree-vect-details" } */
+/* { dg-additional-options "--param=riscv-two-source-permutes" } */
 
 #include <stdint-gcc.h>
 void
diff --git 
a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/rvv-costmodel-vect.exp 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/rvv-costmodel-vect.exp
index 2e9384ee45c..9ff364c2f7d 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/rvv-costmodel-vect.exp
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/rvv-costmodel-vect.exp
@@ -44,7 +44,7 @@ if [istarget riscv32-*-*] then {
 dg-init
 
 # Main loop.
-set CFLAGS "$DEFAULT_CFLAGS -march=$gcc_march -mabi=$gcc_mabi -O3"
+set CFLAGS "$DEFAULT_CFLAGS -march=$gcc_march -mabi=$gcc_mabi -O3 
--param=riscv-two-source-permutes"
 dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \
        "" $CFLAGS
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp 
b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
index 3824997c908..b1bfd2bccbe 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
+++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
@@ -65,7 +65,7 @@ set AUTOVEC_TEST_OPTS [list \
   {-ftree-vectorize -O2 -mrvv-max-lmul=dynamic} ]
 foreach op $AUTOVEC_TEST_OPTS {
   dg-runtest [lsort [glob -nocomplain 
$srcdir/$subdir/autovec/partial/*.\[cS\]]] \
-    "$op" ""
+    "$op --param=riscv-two-source-permutes" ""
   dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/binop/*.\[cS\]]] 
\
     "$op" ""
   dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/cmp/*.\[cS\]]] \
@@ -77,7 +77,7 @@ foreach op $AUTOVEC_TEST_OPTS {
   dg-runtest [lsort [glob -nocomplain 
$srcdir/$subdir/autovec/ternop/*.\[cS\]]] \
     "$op" ""
   dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/reduc/*.\[cS\]]] 
\
-    "$op" ""
+    "$op --param=riscv-two-source-permutes" ""
   dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/cond/*.\[cS\]]] \
     "$op" ""
   dg-runtest [lsort [glob -nocomplain 
$srcdir/$subdir/autovec/builtin/*.\[cS\]]] \
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index baafa1d8fcf..1bbfff28bc1 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -12233,6 +12233,8 @@ proc check_vect_support_and_set_flags { } {
            # dependent D extension is missing).
            return 0
         }
+       # Enable permutes with two source vectors.
+       lappend DEFAULT_VECTCFLAGS "--param=riscv-two-source-permutes"
     } elseif [istarget loongarch*-*-*] {
       # Set the default vectorization option to "-mlsx" due to the problem
       # of non-aligned memory access when using 256-bit vectorization.
-- 
2.47.1

Reply via email to