Hi, after testing on the BPI (4.2% improvement for x264 input 1, 4.4% for input 2) and the discussion in PR117173 I figured it's best to disable the two-source permutes by default for now. We quickly talked about this on the patchwork call last week. Conclusion was to just post the patch and discuss here.
The patch adds a parameter "riscv-two-source-permutes" which restores the old behavior. It does not add a uarch knob to override the default. It's still possible to get two-source permutes in a register via the expander but the obvious constant cases are covered. Regtested on rv64gcv_zvl512b. Regards Robin PR target/117173 gcc/ChangeLog: * config/riscv/riscv-v.cc (shuffle_generic_patterns): Only support single-source permutes by default. * config/riscv/riscv.opt: New param "riscv-two-source-permutes". gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/rvv.exp: Run with two-source permutes. * lib/target-supports.exp: Ditto. * gcc.dg/fold-perm-2.c: Ditto. * gcc.dg/pr54346.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-12.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-12.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/pr111848.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/rvv-costmodel-vect.exp: Ditto. --- gcc/config/riscv/riscv-v.cc | 8 +++++++- gcc/config/riscv/riscv.opt | 4 ++++ gcc/testsuite/gcc.dg/fold-perm-2.c | 1 + gcc/testsuite/gcc.dg/pr54346.c | 1 + .../gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c | 1 + .../gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-12.c | 1 + .../gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c | 2 +- .../gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c | 2 +- .../gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-12.c | 1 + gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111848.c | 1 + .../vect/costmodel/riscv/rvv/rvv-costmodel-vect.exp | 2 +- gcc/testsuite/gcc.target/riscv/rvv/rvv.exp | 4 ++-- gcc/testsuite/lib/target-supports.exp | 2 ++ 13 files changed, 24 insertions(+), 6 deletions(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index e1172e9c7d2..2d6f6da94ce 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -3947,11 +3947,17 @@ shuffle_generic_patterns (struct expand_vec_perm_d *d) if (!get_gather_index_mode (d).exists (&sel_mode)) return false; + rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm); + poly_uint64 nunits = GET_MODE_NUNITS (sel_mode); + if ((!nunits.is_constant () + || !const_vec_all_in_range_p (sel, 0, nunits - 1)) + && !riscv_two_source_permutes) + return false; + /* Success! */ if (d->testing_p) return true; - rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm); /* Some FIXED-VLMAX/VLS vector permutation situations call targethook instead of expand vec_perm<mode>, we handle it directly. */ expand_vec_perm (d->target, d->op0, d->op1, sel); diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt index f51f8fd1cdf..ed0695e20d3 100644 --- a/gcc/config/riscv/riscv.opt +++ b/gcc/config/riscv/riscv.opt @@ -622,6 +622,10 @@ Enum(vsetvl_strategy) String(optim-no-fusion) Value(VSETVL_OPT_NO_FUSION) Target Undocumented RejectNegative Joined Enum(vsetvl_strategy) Var(vsetvl_strategy) Init(VSETVL_OPT) -param=vsetvl-strategy=<string> Set the optimization level of VSETVL insert pass. +-param=riscv-two-source-permutes +Target Undocumented Uinteger Var(riscv_two_source_permutes) Init(0) +-param=riscv-two-source-permutes Enable permutes/gathers with two sources vectors. + Enum Name(stringop_strategy) Type(enum stringop_strategy_enum) Valid arguments to -mstringop-strategy=: diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c b/gcc/testsuite/gcc.dg/fold-perm-2.c index 1a4ab4065de..9fd809ee296 100644 --- a/gcc/testsuite/gcc.dg/fold-perm-2.c +++ b/gcc/testsuite/gcc.dg/fold-perm-2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O -fdump-tree-fre1" } */ +/* { dg-additional-options "--param=riscv-two-source-permutes" { target riscv*-*-* } } */ typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned int)))); diff --git a/gcc/testsuite/gcc.dg/pr54346.c b/gcc/testsuite/gcc.dg/pr54346.c index 5ec0609f1e5..b78e0533ac2 100644 --- a/gcc/testsuite/gcc.dg/pr54346.c +++ b/gcc/testsuite/gcc.dg/pr54346.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O -fdump-tree-dse1 -Wno-psabi" } */ +/* { dg-additional-options "--param=riscv-two-source-permutes" { target riscv*-*-* } } */ typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c index 4a372edea5a..8b434cac167 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mrvv-max-lmul=dynamic -fdump-tree-vect-details" } */ +/* { dg-additional-options "--param=riscv-two-source-permutes" } */ int bar (int *x, int a, int b, int n) diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-12.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-12.c index 29e6dfc223e..2058f0823c8 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-12.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-12.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mrvv-max-lmul=dynamic -fdump-tree-vect-details" } */ +/* { dg-additional-options "--param=riscv-two-source-permutes" } */ void f (int *restrict a, int *restrict b, int *restrict c, int *restrict d, diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c index 3734b5c18a7..503d99fc254 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -ftree-vectorize -mrvv-max-lmul=dynamic -mrvv-vector-bits=scalable -fselective-scheduling -fdump-tree-vect-details" } */ -/* { dg-additional-options "-fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-additional-options "-fno-schedule-insns -fno-schedule-insns2 --param=riscv-two-source-permutes" } */ #include <stdint-gcc.h> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c index 756176d4212..460d9f8c470 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -ftree-vectorize -mrvv-max-lmul=dynamic -mrvv-vector-bits=scalable -fselective-scheduling -fdump-tree-vect-details" } */ -/* { dg-additional-options "-fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-additional-options "-fno-schedule-insns -fno-schedule-insns2 --param=riscv-two-source-permutes" } */ #include <stdint-gcc.h> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-12.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-12.c index 9fa6b69a7c9..8434c215070 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-12.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-12.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -ftree-vectorize -mrvv-max-lmul=dynamic -mrvv-vector-bits=scalable -fselective-scheduling -fdump-tree-vect-details" } */ +/* { dg-additional-options "--param=riscv-two-source-permutes" } */ void foo (int *restrict a, int *restrict b, int n) diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111848.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111848.c index 339c80f8805..3373ca90e08 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111848.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111848.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mrvv-max-lmul=dynamic -fdump-tree-vect-details" } */ +/* { dg-additional-options "--param=riscv-two-source-permutes" } */ #include <stdint-gcc.h> void diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/rvv-costmodel-vect.exp b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/rvv-costmodel-vect.exp index 2e9384ee45c..9ff364c2f7d 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/rvv-costmodel-vect.exp +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/rvv-costmodel-vect.exp @@ -44,7 +44,7 @@ if [istarget riscv32-*-*] then { dg-init # Main loop. -set CFLAGS "$DEFAULT_CFLAGS -march=$gcc_march -mabi=$gcc_mabi -O3" +set CFLAGS "$DEFAULT_CFLAGS -march=$gcc_march -mabi=$gcc_mabi -O3 --param=riscv-two-source-permutes" dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \ "" $CFLAGS diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp index 3824997c908..b1bfd2bccbe 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp +++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp @@ -65,7 +65,7 @@ set AUTOVEC_TEST_OPTS [list \ {-ftree-vectorize -O2 -mrvv-max-lmul=dynamic} ] foreach op $AUTOVEC_TEST_OPTS { dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/partial/*.\[cS\]]] \ - "$op" "" + "$op --param=riscv-two-source-permutes" "" dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/binop/*.\[cS\]]] \ "$op" "" dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/cmp/*.\[cS\]]] \ @@ -77,7 +77,7 @@ foreach op $AUTOVEC_TEST_OPTS { dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/ternop/*.\[cS\]]] \ "$op" "" dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/reduc/*.\[cS\]]] \ - "$op" "" + "$op --param=riscv-two-source-permutes" "" dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/cond/*.\[cS\]]] \ "$op" "" dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/builtin/*.\[cS\]]] \ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index baafa1d8fcf..1bbfff28bc1 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -12233,6 +12233,8 @@ proc check_vect_support_and_set_flags { } { # dependent D extension is missing). return 0 } + # Enable permutes with two source vectors. + lappend DEFAULT_VECTCFLAGS "--param=riscv-two-source-permutes" } elseif [istarget loongarch*-*-*] { # Set the default vectorization option to "-mlsx" due to the problem # of non-aligned memory access when using 256-bit vectorization. -- 2.47.1