Committed, thanks Kito. Pan
-----Original Message----- From: Gcc-patches <gcc-patches-bounces+pan2.li=intel....@gcc.gnu.org> On Behalf Of Kito Cheng via Gcc-patches Sent: Monday, July 31, 2023 10:42 AM To: Juzhe-Zhong <juzhe.zh...@rivai.ai> Cc: gcc-patches@gcc.gnu.org; kito.ch...@sifive.com; jeffreya...@gmail.com; rdapp....@gmail.com Subject: Re: [PATCH V2] RISC-V: Enable basic VLS auto-vectorization LGTM, thanks :) On Mon, Jul 31, 2023 at 10:14 AM Juzhe-Zhong <juzhe.zh...@rivai.ai> wrote: > > Consider this following case: > void > foo (int8_t *in, int8_t *out, int8_t x) > { > for (int i = 0; i < 16; i++) > in[i] = x; > } > > Compile option: --param=riscv-autovec-preference=scalable -fno-builtin > > Before this patch: > > foo: > li a5,16 > csrr a4,vlenb > vsetvli a3,zero,e8,m1,ta,ma > vmv.v.x v1,a2 > bleu a5,a4,.L2 > mv a5,a4 > .L2: > vsetvli zero,a5,e8,m1,ta,ma > vse8.v v1,0(a0) > ret > > After this patch: > > foo: > vsetivli zero,16,e8,mf8,ta,ma > vmv.v.x v1,a2 > vse8.v v1,0(a0) > ret > > gcc/ChangeLog: > > * config/riscv/autovec-vls.md (@vec_duplicate<mode>): New pattern. > * config/riscv/riscv-v.cc (autovectorize_vector_modes): Add VLS > autovec support. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/autovec/v-1.c: Adapt test. > * gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c: Ditto. > * gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c: Ditto. > * gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c: Ditto. > * gcc.target/riscv/rvv/autovec/vls/dup-1.c: New test. > * gcc.target/riscv/rvv/autovec/vls/dup-2.c: New test. > * gcc.target/riscv/rvv/autovec/vls/dup-3.c: New test. > * gcc.target/riscv/rvv/autovec/vls/dup-4.c: New test. > * gcc.target/riscv/rvv/autovec/vls/dup-5.c: New test. > * gcc.target/riscv/rvv/autovec/vls/dup-6.c: New test. > * gcc.target/riscv/rvv/autovec/vls/dup-7.c: New test. > > --- > gcc/config/riscv/autovec-vls.md | 19 ++ > gcc/config/riscv/riscv-v.cc | 21 ++- > .../gcc.target/riscv/rvv/autovec/v-1.c | 2 +- > .../gcc.target/riscv/rvv/autovec/vls/dup-1.c | 168 ++++++++++++++++++ > .../gcc.target/riscv/rvv/autovec/vls/dup-2.c | 153 ++++++++++++++++ > .../gcc.target/riscv/rvv/autovec/vls/dup-3.c | 153 ++++++++++++++++ > .../gcc.target/riscv/rvv/autovec/vls/dup-4.c | 137 ++++++++++++++ > .../gcc.target/riscv/rvv/autovec/vls/dup-5.c | 137 ++++++++++++++ > .../gcc.target/riscv/rvv/autovec/vls/dup-6.c | 122 +++++++++++++ > .../gcc.target/riscv/rvv/autovec/vls/dup-7.c | 122 +++++++++++++ > .../riscv/rvv/autovec/zve32f_zvl128b-1.c | 2 +- > .../riscv/rvv/autovec/zve64d_zvl128b-1.c | 2 +- > .../riscv/rvv/autovec/zve64f_zvl128b-1.c | 2 +- > 13 files changed, 1034 insertions(+), 6 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c > > diff --git a/gcc/config/riscv/autovec-vls.md b/gcc/config/riscv/autovec-vls.md > index 9ece317ca4e..1a64dfdd91e 100644 > --- a/gcc/config/riscv/autovec-vls.md > +++ b/gcc/config/riscv/autovec-vls.md > @@ -139,3 +139,22 @@ > "vmv%m1r.v\t%0,%1" > [(set_attr "type" "vmov") > (set_attr "mode" "<MODE>")]) > + > +;; ----------------------------------------------------------------- > +;; ---- Duplicate Operations > +;; ----------------------------------------------------------------- > + > +(define_insn_and_split "@vec_duplicate<mode>" > + [(set (match_operand:VLS 0 "register_operand") > + (vec_duplicate:VLS > + (match_operand:<VEL> 1 "reg_or_int_operand")))] > + "TARGET_VECTOR && can_create_pseudo_p ()" > + "#" > + "&& 1" > + [(const_int 0)] > + { > + riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode), > + riscv_vector::RVV_UNOP, operands); > + DONE; > + } > +) > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc > index 9e89f970a4c..c10e51b362e 100644 > --- a/gcc/config/riscv/riscv-v.cc > +++ b/gcc/config/riscv/riscv-v.cc > @@ -2533,7 +2533,6 @@ autovectorize_vector_modes (vector_modes *modes, bool) > { > if (autovec_use_vlmax_p ()) > { > - /* TODO: We will support RVV VLS auto-vectorization mode in the > future. */ > poly_uint64 full_size > = BYTES_PER_RISCV_VECTOR * ((int) riscv_autovec_lmul); > > @@ -2561,7 +2560,25 @@ autovectorize_vector_modes (vector_modes *modes, bool) > modes->safe_push (mode); > } > } > - return 0; > + unsigned int flag = 0; > + if (TARGET_VECTOR_VLS) > + { > + /* Enable VECT_COMPARE_COSTS between VLA modes VLS modes for scalable > + auto-vectorization. */ > + flag |= VECT_COMPARE_COSTS; > + /* Push all VLSmodes according to TARGET_MIN_VLEN. */ > + unsigned int i = 0; > + unsigned int base_size = TARGET_MIN_VLEN * riscv_autovec_lmul / 8; > + unsigned int size = base_size; > + machine_mode mode; > + while (size > 0 && get_vector_mode (QImode, size).exists (&mode)) > + { > + modes->safe_push (mode); > + i++; > + size = base_size / (1U << i); > + } > + } > + return flag; > } > > /* If the given VECTOR_MODE is an RVV mode, first get the largest number > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c > index e68d05f5f48..ebbe5e210c5 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c > @@ -3,4 +3,4 @@ > > #include "template-1.h" > > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 > "vect" } } */ > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 > "vect" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c > new file mode 100644 > index 00000000000..1f520f2b0a7 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c > @@ -0,0 +1,168 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin > -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include "def.h" > + > +/* > +** foo1: > +** vsetivli\s+zero,\s*4,\s*e8,\s*mf8,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo1 (int8_t *in, int8_t *out, int8_t x) > +{ > + for (int i = 0; i < 4; i++) > + in[i] = x; > +} > + > +/* > +** foo2: > +** vsetivli\s+zero,\s*8,\s*e8,\s*mf8,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo2 (int8_t *in, int8_t *out, int8_t x) > +{ > + for (int i = 0; i < 8; i++) > + in[i] = x; > +} > + > +/* > +** foo3: > +** vsetivli\s+zero,\s*16,\s*e8,\s*mf8,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo3 (int8_t *in, int8_t *out, int8_t x) > +{ > + for (int i = 0; i < 16; i++) > + in[i] = x; > +} > + > +/* > +** foo4: > +** li\s+[a-x0-9]+,32 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo4 (int8_t *in, int8_t *out, int8_t x) > +{ > + for (int i = 0; i < 32; i++) > + in[i] = x; > +} > + > +/* > +** foo5: > +** li\s+[a-x0-9]+,64 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo5 (int8_t *in, int8_t *out, int8_t x) > +{ > + for (int i = 0; i < 64; i++) > + in[i] = x; > +} > + > +/* > +** foo6: > +** li\s+[a-x0-9]+,128 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf4,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo6 (int8_t *in, int8_t *out, int8_t x) > +{ > + for (int i = 0; i < 128; i++) > + in[i] = x; > +} > + > +/* > +** foo7: > +** li\s+[a-x0-9]+,256 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo7 (int8_t *in, int8_t *out, int8_t x) > +{ > + for (int i = 0; i < 256; i++) > + in[i] = x; > +} > + > +/* > +** foo8: > +** li\s+[a-x0-9]+,512 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m1,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo8 (int8_t *in, int8_t *out, int8_t x) > +{ > + for (int i = 0; i < 512; i++) > + in[i] = x; > +} > + > +/* > +** foo9: > +** li\s+[a-x0-9]+,1024 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m2,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo9 (int8_t *in, int8_t *out, int8_t x) > +{ > + for (int i = 0; i < 1024; i++) > + in[i] = x; > +} > + > +/* > +** foo10: > +** li\s+[a-x0-9]+,4096 > +** addi\s+[a-x0-9]+,[a-x0-9]+,-2048 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m4,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo10 (int8_t *in, int8_t *out, int8_t x) > +{ > + for (int i = 0; i < 2048; i++) > + in[i] = x; > +} > + > +/* > +** foo11: > +** li\s+[a-x0-9]+,4096 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo11 (int8_t *in, int8_t *out, int8_t x) > +{ > + for (int i = 0; i < 4096; i++) > + in[i] = x; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c > new file mode 100644 > index 00000000000..1a930d059c8 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c > @@ -0,0 +1,153 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin > -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include "def.h" > + > +/* > +** foo1: > +** vsetivli\s+zero,\s*4,\s*e16,\s*mf4,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo1 (int16_t *in, int16_t *out, int16_t x) > +{ > + for (int i = 0; i < 4; i++) > + in[i] = x; > +} > + > +/* > +** foo2: > +** vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo2 (int16_t *in, int16_t *out, int16_t x) > +{ > + for (int i = 0; i < 8; i++) > + in[i] = x; > +} > + > +/* > +** foo3: > +** vsetivli\s+zero,\s*16,\s*e16,\s*mf4,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo3 (int16_t *in, int16_t *out, int16_t x) > +{ > + for (int i = 0; i < 16; i++) > + in[i] = x; > +} > + > +/* > +** foo4: > +** li\s+[a-x0-9]+,32 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo4 (int16_t *in, int16_t *out, int16_t x) > +{ > + for (int i = 0; i < 32; i++) > + in[i] = x; > +} > + > +/* > +** foo5: > +** li\s+[a-x0-9]+,64 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo5 (int16_t *in, int16_t *out, int16_t x) > +{ > + for (int i = 0; i < 64; i++) > + in[i] = x; > +} > + > +/* > +** foo6: > +** li\s+[a-x0-9]+,128 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf2,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo6 (int16_t *in, int16_t *out, int16_t x) > +{ > + for (int i = 0; i < 128; i++) > + in[i] = x; > +} > + > +/* > +** foo7: > +** li\s+[a-x0-9]+,256 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m1,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo7 (int16_t *in, int16_t *out, int16_t x) > +{ > + for (int i = 0; i < 256; i++) > + in[i] = x; > +} > + > +/* > +** foo8: > +** li\s+[a-x0-9]+,512 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m2,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo8 (int16_t *in, int16_t *out, int16_t x) > +{ > + for (int i = 0; i < 512; i++) > + in[i] = x; > +} > + > +/* > +** foo9: > +** li\s+[a-x0-9]+,1024 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m4,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo9 (int16_t *in, int16_t *out, int16_t x) > +{ > + for (int i = 0; i < 1024; i++) > + in[i] = x; > +} > + > +/* > +** foo10: > +** li\s+[a-x0-9]+,4096 > +** addi\s+[a-x0-9]+,[a-x0-9]+,-2048 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo10 (int16_t *in, int16_t *out, int16_t x) > +{ > + for (int i = 0; i < 2048; i++) > + in[i] = x; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c > new file mode 100644 > index 00000000000..46fb5a525a5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c > @@ -0,0 +1,153 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin > -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include "def.h" > + > +/* > +** foo1: > +** vsetivli\s+zero,\s*4,\s*e16,\s*mf4,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo1 (_Float16 *in, _Float16 *out, _Float16 x) > +{ > + for (int i = 0; i < 4; i++) > + in[i] = x; > +} > + > +/* > +** foo2: > +** vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo2 (_Float16 *in, _Float16 *out, _Float16 x) > +{ > + for (int i = 0; i < 8; i++) > + in[i] = x; > +} > + > +/* > +** foo3: > +** vsetivli\s+zero,\s*16,\s*e16,\s*mf4,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo3 (_Float16 *in, _Float16 *out, _Float16 x) > +{ > + for (int i = 0; i < 16; i++) > + in[i] = x; > +} > + > +/* > +** foo4: > +** li\s+[a-x0-9]+,32 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo4 (_Float16 *in, _Float16 *out, _Float16 x) > +{ > + for (int i = 0; i < 32; i++) > + in[i] = x; > +} > + > +/* > +** foo5: > +** li\s+[a-x0-9]+,64 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo5 (_Float16 *in, _Float16 *out, _Float16 x) > +{ > + for (int i = 0; i < 64; i++) > + in[i] = x; > +} > + > +/* > +** foo6: > +** li\s+[a-x0-9]+,128 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf2,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo6 (_Float16 *in, _Float16 *out, _Float16 x) > +{ > + for (int i = 0; i < 128; i++) > + in[i] = x; > +} > + > +/* > +** foo7: > +** li\s+[a-x0-9]+,256 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m1,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo7 (_Float16 *in, _Float16 *out, _Float16 x) > +{ > + for (int i = 0; i < 256; i++) > + in[i] = x; > +} > + > +/* > +** foo8: > +** li\s+[a-x0-9]+,512 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m2,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo8 (_Float16 *in, _Float16 *out, _Float16 x) > +{ > + for (int i = 0; i < 512; i++) > + in[i] = x; > +} > + > +/* > +** foo9: > +** li\s+[a-x0-9]+,1024 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m4,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo9 (_Float16 *in, _Float16 *out, _Float16 x) > +{ > + for (int i = 0; i < 1024; i++) > + in[i] = x; > +} > + > +/* > +** foo10: > +** li\s+[a-x0-9]+,4096 > +** addi\s+[a-x0-9]+,[a-x0-9]+,-2048 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo10 (_Float16 *in, _Float16 *out, _Float16 x) > +{ > + for (int i = 0; i < 2048; i++) > + in[i] = x; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c > new file mode 100644 > index 00000000000..7e46dc42526 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c > @@ -0,0 +1,137 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin > -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include "def.h" > + > +/* > +** foo1: > +** vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo1 (int32_t *in, int32_t *out, int32_t x) > +{ > + for (int i = 0; i < 4; i++) > + in[i] = x; > +} > + > +/* > +** foo2: > +** vsetivli\s+zero,\s*8,\s*e32,\s*mf2,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo2 (int32_t *in, int32_t *out, int32_t x) > +{ > + for (int i = 0; i < 8; i++) > + in[i] = x; > +} > + > +/* > +** foo3: > +** vsetivli\s+zero,\s*16,\s*e32,\s*mf2,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo3 (int32_t *in, int32_t *out, int32_t x) > +{ > + for (int i = 0; i < 16; i++) > + in[i] = x; > +} > + > +/* > +** foo4: > +** li\s+[a-x0-9]+,32 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo4 (int32_t *in, int32_t *out, int32_t x) > +{ > + for (int i = 0; i < 32; i++) > + in[i] = x; > +} > + > +/* > +** foo5: > +** li\s+[a-x0-9]+,64 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo5 (int32_t *in, int32_t *out, int32_t x) > +{ > + for (int i = 0; i < 64; i++) > + in[i] = x; > +} > + > +/* > +** foo6: > +** li\s+[a-x0-9]+,128 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo6 (int32_t *in, int32_t *out, int32_t x) > +{ > + for (int i = 0; i < 128; i++) > + in[i] = x; > +} > + > +/* > +** foo7: > +** li\s+[a-x0-9]+,256 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m2,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo7 (int32_t *in, int32_t *out, int32_t x) > +{ > + for (int i = 0; i < 256; i++) > + in[i] = x; > +} > + > +/* > +** foo8: > +** li\s+[a-x0-9]+,512 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo8 (int32_t *in, int32_t *out, int32_t x) > +{ > + for (int i = 0; i < 512; i++) > + in[i] = x; > +} > + > +/* > +** foo9: > +** li\s+[a-x0-9]+,1024 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo9 (int32_t *in, int32_t *out, int32_t x) > +{ > + for (int i = 0; i < 1024; i++) > + in[i] = x; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c > new file mode 100644 > index 00000000000..9b9327bdd4d > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c > @@ -0,0 +1,137 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin > -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include "def.h" > + > +/* > +** foo1: > +** vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo1 (float *in, float *out, float x) > +{ > + for (int i = 0; i < 4; i++) > + in[i] = x; > +} > + > +/* > +** foo2: > +** vsetivli\s+zero,\s*8,\s*e32,\s*mf2,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo2 (float *in, float *out, float x) > +{ > + for (int i = 0; i < 8; i++) > + in[i] = x; > +} > + > +/* > +** foo3: > +** vsetivli\s+zero,\s*16,\s*e32,\s*mf2,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo3 (float *in, float *out, float x) > +{ > + for (int i = 0; i < 16; i++) > + in[i] = x; > +} > + > +/* > +** foo4: > +** li\s+[a-x0-9]+,32 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo4 (float *in, float *out, float x) > +{ > + for (int i = 0; i < 32; i++) > + in[i] = x; > +} > + > +/* > +** foo5: > +** li\s+[a-x0-9]+,64 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo5 (float *in, float *out, float x) > +{ > + for (int i = 0; i < 64; i++) > + in[i] = x; > +} > + > +/* > +** foo6: > +** li\s+[a-x0-9]+,128 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo6 (float *in, float *out, float x) > +{ > + for (int i = 0; i < 128; i++) > + in[i] = x; > +} > + > +/* > +** foo7: > +** li\s+[a-x0-9]+,256 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m2,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo7 (float *in, float *out, float x) > +{ > + for (int i = 0; i < 256; i++) > + in[i] = x; > +} > + > +/* > +** foo8: > +** li\s+[a-x0-9]+,512 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo8 (float *in, float *out, float x) > +{ > + for (int i = 0; i < 512; i++) > + in[i] = x; > +} > + > +/* > +** foo9: > +** li\s+[a-x0-9]+,1024 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo9 (float *in, float *out, float x) > +{ > + for (int i = 0; i < 1024; i++) > + in[i] = x; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c > new file mode 100644 > index 00000000000..52d5a65b44e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c > @@ -0,0 +1,122 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin > -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include "def.h" > + > +/* > +** foo1: > +** vsetivli\s+zero,\s*4,\s*e64,\s*m1,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo1 (int64_t *in, int64_t *out, int64_t x) > +{ > + for (int i = 0; i < 4; i++) > + in[i] = x; > +} > + > +/* > +** foo2: > +** vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo2 (int64_t *in, int64_t *out, int64_t x) > +{ > + for (int i = 0; i < 8; i++) > + in[i] = x; > +} > + > +/* > +** foo3: > +** vsetivli\s+zero,\s*16,\s*e64,\s*m1,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo3 (int64_t *in, int64_t *out, int64_t x) > +{ > + for (int i = 0; i < 16; i++) > + in[i] = x; > +} > + > +/* > +** foo4: > +** li\s+[a-x0-9]+,32 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo4 (int64_t *in, int64_t *out, int64_t x) > +{ > + for (int i = 0; i < 32; i++) > + in[i] = x; > +} > + > +/* > +** foo5: > +** li\s+[a-x0-9]+,64 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo5 (int64_t *in, int64_t *out, int64_t x) > +{ > + for (int i = 0; i < 64; i++) > + in[i] = x; > +} > + > +/* > +** foo6: > +** li\s+[a-x0-9]+,128 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m2,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo6 (int64_t *in, int64_t *out, int64_t x) > +{ > + for (int i = 0; i < 128; i++) > + in[i] = x; > +} > + > +/* > +** foo7: > +** li\s+[a-x0-9]+,256 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo7 (int64_t *in, int64_t *out, int64_t x) > +{ > + for (int i = 0; i < 256; i++) > + in[i] = x; > +} > + > +/* > +** foo8: > +** li\s+[a-x0-9]+,512 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m8,\s*t[au],\s*m[au] > +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo8 (int64_t *in, int64_t *out, int64_t x) > +{ > + for (int i = 0; i < 512; i++) > + in[i] = x; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c > new file mode 100644 > index 00000000000..39f27ece2e7 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c > @@ -0,0 +1,122 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin > -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +#include "def.h" > + > +/* > +** foo1: > +** vsetivli\s+zero,\s*4,\s*e64,\s*m1,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo1 (double *in, double *out, double x) > +{ > + for (int i = 0; i < 4; i++) > + in[i] = x; > +} > + > +/* > +** foo2: > +** vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo2 (double *in, double *out, double x) > +{ > + for (int i = 0; i < 8; i++) > + in[i] = x; > +} > + > +/* > +** foo3: > +** vsetivli\s+zero,\s*16,\s*e64,\s*m1,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo3 (double *in, double *out, double x) > +{ > + for (int i = 0; i < 16; i++) > + in[i] = x; > +} > + > +/* > +** foo4: > +** li\s+[a-x0-9]+,32 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo4 (double *in, double *out, double x) > +{ > + for (int i = 0; i < 32; i++) > + in[i] = x; > +} > + > +/* > +** foo5: > +** li\s+[a-x0-9]+,64 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo5 (double *in, double *out, double x) > +{ > + for (int i = 0; i < 64; i++) > + in[i] = x; > +} > + > +/* > +** foo6: > +** li\s+[a-x0-9]+,128 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m2,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo6 (double *in, double *out, double x) > +{ > + for (int i = 0; i < 128; i++) > + in[i] = x; > +} > + > +/* > +** foo7: > +** li\s+[a-x0-9]+,256 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo7 (double *in, double *out, double x) > +{ > + for (int i = 0; i < 256; i++) > + in[i] = x; > +} > + > +/* > +** foo8: > +** li\s+[a-x0-9]+,512 > +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m8,\s*t[au],\s*m[au] > +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ > +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) > +** ret > +*/ > +void > +foo8 (double *in, double *out, double x) > +{ > + for (int i = 0; i < 512; i++) > + in[i] = x; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c > index ecfda79e19a..345e2f963d5 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c > @@ -3,4 +3,4 @@ > > #include "template-1.h" > > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 > "vect" } } */ > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 > "vect" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c > index 6b320ca6f38..e13c27dcdb0 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c > @@ -3,4 +3,4 @@ > > #include "template-1.h" > > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 > "vect" } } */ > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 > "vect" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c > index ae3f066477c..e767629ae54 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c > @@ -3,4 +3,4 @@ > > #include "template-1.h" > > -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 > "vect" } } */ > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 > "vect" } } */ > -- > 2.36.3 >