RE: [PATCH V3] RISC-V: Add basic vec_init for VLS RVV auto-vectorization

Li, Pan2 via Gcc-patches Thu, 11 May 2023 20:16:39 -0700

Committed to trunk.

Pan


-----Original Message-----
From: Gcc-patches <gcc-patches-bounces+pan2.li=intel....@gcc.gnu.org> On Behalf 
Of Kito Cheng via Gcc-patches
Sent: Friday, May 12, 2023 11:00 AM
To: 钟居哲 <juzhe.zh...@rivai.ai>
Cc: GCC Patches <gcc-patches@gcc.gnu.org>; Palmer Dabbelt <pal...@dabbelt.com>; 
Jeff Law <jeffreya...@gmail.com>; Robin Dapp <rdapp....@gmail.com>
Subject: Re: [PATCH V3] RISC-V: Add basic vec_init for VLS RVV 
auto-vectorization

Ok

<juzhe.zh...@rivai.ai> 於 2023年5月12日 週五 10:57 寫道：

> From: Juzhe-Zhong <juzhe.zh...@rivai.ai>
>
> #include <stdint.h>
>
> typedef int8_t vnx16qi __attribute__((vector_size (16)));
>
> #include <stdint.h>
>
> typedef int8_t vnx16qi __attribute__ ((vector_size (16)));
> typedef int8_t vnx32qi __attribute__ ((vector_size (32)));
> typedef int8_t vnx64qi __attribute__ ((vector_size (64)));
> typedef int8_t vnx128qi __attribute__ ((vector_size (128)));
>
> __attribute__ ((noipa)) void
> f_vnx128qi (int8_t a, int8_t b, int8_t c, int8_t d, int8_t e, int8_t f,
> int8_t g, int8_t h, int8_t *out)
> {
>   vnx128qi v
>     = {a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h,
>        a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h,
>        a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h,
>        a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h,
>        a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h,
>        a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h,
>        a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h,
>        a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h};
>   *(vnx128qi *) out = v;
> }
>
> This patch codegen:
> f_vnx128qi:
>         andi    a1,a1,0xff
>         andi    a0,a0,0xff
>         slli    a1,a1,8
>         andi    a2,a2,0xff
>         or      a1,a1,a0
>         slli    a2,a2,16
>         andi    a3,a3,0xff
>         or      a2,a2,a1
>         slli    a3,a3,24
>         andi    a4,a4,0xff
>         or      a3,a3,a2
>         slli    a4,a4,32
>         andi    a5,a5,0xff
>         or      a4,a4,a3
>         slli    a5,a5,40
>         andi    a6,a6,0xff
>         or      a5,a5,a4
>         slli    a6,a6,48
>         or      a6,a6,a5
>         vsetvli a5,zero,e64,m8,ta,ma
>         ld      a5,0(sp)
>         slli    a7,a7,56
>         or      a7,a7,a6
>         vmv.v.x v8,a7
>         vs8r.v  v8,0(a5)
>         ret
>
> We support more optimizations cases in the future. But they are not
> included in this patch.
>
> gcc/ChangeLog:
>
>         * config/riscv/autovec.md (vec_init<mode><vel>): New pattern.
>         * config/riscv/riscv-protos.h (expand_vec_init): New function.
>         * config/riscv/riscv-v.cc (class rvv_builder): New class.
>         (rvv_builder::can_duplicate_repeating_sequence_p): New function.
>         (rvv_builder::get_merged_repeating_sequence): Ditto.
>         (expand_vector_init_insert_elems): Ditto.
>         (expand_vec_init): Ditto.
>         * config/riscv/vector-iterators.md: New attribute.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/rvv.exp:
>         * gcc.target/riscv/rvv/autovec/vls-vlmax/insert-1.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls-vlmax/insert-2.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls-vlmax/insert-3.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls-vlmax/insert_run-1.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls-vlmax/insert_run-2.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-1.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-2.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-3.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-4.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-5.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-6.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-1.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-2.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-3.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-4.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-5.c: New test.
>         * gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-6.c: New test.
>
> ---
>  gcc/config/riscv/autovec.md                   |  16 ++
>  gcc/config/riscv/riscv-protos.h               |   1 +
>  gcc/config/riscv/riscv-v.cc                   | 127 +++++++++++
>  gcc/config/riscv/vector-iterators.md          |   9 +
>  .../riscv/rvv/autovec/vls-vlmax/insert-1.c    |  41 ++++
>  .../riscv/rvv/autovec/vls-vlmax/insert-2.c    |  41 ++++
>  .../riscv/rvv/autovec/vls-vlmax/insert-3.c    |  41 ++++
>  .../rvv/autovec/vls-vlmax/insert_run-1.c      |  46 ++++
>  .../rvv/autovec/vls-vlmax/insert_run-2.c      |  46 ++++
>  .../riscv/rvv/autovec/vls-vlmax/repeat-1.c    |  75 +++++++
>  .../riscv/rvv/autovec/vls-vlmax/repeat-2.c    |  61 ++++++
>  .../riscv/rvv/autovec/vls-vlmax/repeat-3.c    |  53 +++++
>  .../riscv/rvv/autovec/vls-vlmax/repeat-4.c    |  39 ++++
>  .../riscv/rvv/autovec/vls-vlmax/repeat-5.c    |  74 +++++++
>  .../riscv/rvv/autovec/vls-vlmax/repeat-6.c    |  78 +++++++
>  .../rvv/autovec/vls-vlmax/repeat_run-1.c      | 125 +++++++++++
>  .../rvv/autovec/vls-vlmax/repeat_run-2.c      | 145 +++++++++++++
>  .../rvv/autovec/vls-vlmax/repeat_run-3.c      | 202 ++++++++++++++++++
>  .../rvv/autovec/vls-vlmax/repeat_run-4.c      |  77 +++++++
>  .../rvv/autovec/vls-vlmax/repeat_run-5.c      | 124 +++++++++++
>  .../rvv/autovec/vls-vlmax/repeat_run-6.c      | 122 +++++++++++
>  gcc/testsuite/gcc.target/riscv/rvv/rvv.exp    |   4 +
>  22 files changed, 1547 insertions(+)
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert-1.c
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert-2.c
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert-3.c
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert_run-1.c
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert_run-2.c
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-1.c
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-2.c
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-3.c
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-4.c
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-5.c
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-6.c
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-1.c
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-2.c
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-3.c
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-4.c
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-5.c
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-6.c
>
> diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
> index ac0c939d277..ce0b46537ad 100644
> --- a/gcc/config/riscv/autovec.md
> +++ b/gcc/config/riscv/autovec.md
> @@ -83,6 +83,22 @@
>    }
>  )
>
> +;;
> -------------------------------------------------------------------------
> +;; ---- [INT,FP] Initialize from individual elements
> +;;
> -------------------------------------------------------------------------
> +;; This is the pattern initialize the vector
> +;;
> -------------------------------------------------------------------------
> +
> +(define_expand "vec_init<mode><vel>"
> +  [(match_operand:V 0 "register_operand")
> +   (match_operand 1 "")]
> +  "TARGET_VECTOR"
> +  {
> +    riscv_vector::expand_vec_init (operands[0], operands[1]);
> +    DONE;
> +  }
> +)
> +
>  ;;
> ========================================================================
>  ;; == Vector operations
>  ;;
> =========================================================================
> diff --git a/gcc/config/riscv/riscv-protos.h
> b/gcc/config/riscv/riscv-protos.h
> index 55a43d6270e..bc71f9cbbba 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -222,6 +222,7 @@ void expand_tuple_move (machine_mode, rtx *);
>  machine_mode preferred_simd_mode (scalar_mode);
>  opt_machine_mode get_mask_mode (machine_mode);
>  void expand_vec_series (rtx, rtx, rtx);
> +void expand_vec_init (rtx, rtx);
>  }
>
>  /* We classify builtin types into two classes:
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 7cf5ec9f3ba..b8dc333f54e 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -1131,4 +1131,131 @@ preferred_simd_mode (scalar_mode mode)
>    return word_mode;
>  }
>
> +class rvv_builder : public rtx_vector_builder
> +{
> +public:
> +  rvv_builder () : rtx_vector_builder () {}
> +  rvv_builder (machine_mode mode, unsigned int npatterns,
> +              unsigned int nelts_per_pattern)
> +    : rtx_vector_builder (mode, npatterns, nelts_per_pattern)
> +  {
> +    m_inner_mode = GET_MODE_INNER (mode);
> +    m_inner_size = GET_MODE_BITSIZE (m_inner_mode).to_constant ();
> +  }
> +
> +  bool can_duplicate_repeating_sequence_p ();
> +  rtx get_merged_repeating_sequence ();
> +
> +  machine_mode new_mode () const { return m_new_mode; }
> +
> +private:
> +  machine_mode m_inner_mode;
> +  machine_mode m_new_mode;
> +  scalar_int_mode m_new_inner_mode;
> +  unsigned int m_inner_size;
> +};
> +
> +/* Return true if the vector duplicated by a super element which is the
> fusion
> +   of consecutive elements.
> +
> +     v = { a, b, a, b } super element = ab, v = { ab, ab }  */
> +bool
> +rvv_builder::can_duplicate_repeating_sequence_p ()
> +{
> +  poly_uint64 new_size = exact_div (full_nelts (), npatterns ());
> +  unsigned int new_inner_size = m_inner_size * npatterns ();
> +  if (!int_mode_for_size (new_inner_size, 0).exists (&m_new_inner_mode)
> +      || GET_MODE_SIZE (m_new_inner_mode) > UNITS_PER_WORD
> +      || !get_vector_mode (m_new_inner_mode, new_size).exists
> (&m_new_mode))
> +    return false;
> +  return repeating_sequence_p (0, full_nelts ().to_constant (), npatterns
> ());
> +}
> +
> +/* Merge the repeating sequence into a single element and return the
> RTX.  */
> +rtx
> +rvv_builder::get_merged_repeating_sequence ()
> +{
> +  scalar_int_mode mode = Pmode;
> +  rtx target = gen_reg_rtx (mode);
> +  emit_move_insn (target, const0_rtx);
> +  rtx imm = gen_int_mode ((1ULL << m_inner_size) - 1, mode);
> +  /* { a, b, a, b }: Generate duplicate element = b << bits | a.  */
> +  for (unsigned int i = 0; i < npatterns (); i++)
> +    {
> +      unsigned int loc = m_inner_size * i;
> +      rtx shift = gen_int_mode (loc, mode);
> +      rtx ele = gen_lowpart (mode, elt (i));
> +      rtx tmp = expand_simple_binop (mode, AND, ele, imm, NULL_RTX, false,
> +                                    OPTAB_DIRECT);
> +      rtx tmp2 = expand_simple_binop (mode, ASHIFT, tmp, shift, NULL_RTX,
> false,
> +                                     OPTAB_DIRECT);
> +      rtx tmp3 = expand_simple_binop (mode, IOR, tmp2, target, NULL_RTX,
> false,
> +                                     OPTAB_DIRECT);
> +      emit_move_insn (target, tmp3);
> +    }
> +  if (GET_MODE_SIZE (m_new_inner_mode) < UNITS_PER_WORD)
> +    return gen_lowpart (m_new_inner_mode, target);
> +  return target;
> +}
> +
> +/* Subroutine of riscv_vector_expand_vector_init.
> +   Works as follows:
> +   (a) Initialize TARGET by broadcasting element NELTS_REQD - 1 of
> BUILDER.
> +   (b) Skip leading elements from BUILDER, which are the same as
> +       element NELTS_REQD - 1.
> +   (c) Insert earlier elements in reverse order in TARGET using
> vslide1down.  */
> +
> +static void
> +expand_vector_init_insert_elems (rtx target, const rvv_builder &builder,
> +                                int nelts_reqd)
> +{
> +  machine_mode mode = GET_MODE (target);
> +  scalar_mode elem_mode = GET_MODE_INNER (mode);
> +  machine_mode mask_mode;
> +  gcc_assert (get_mask_mode (mode).exists (&mask_mode));
> +  rtx dup = expand_vector_broadcast (mode, builder.elt (0));
> +  emit_move_insn (target, dup);
> +  int ndups = builder.count_dups (0, nelts_reqd - 1, 1);
> +  for (int i = ndups; i < nelts_reqd; i++)
> +    {
> +      unsigned int unspec
> +       = FLOAT_MODE_P (mode) ? UNSPEC_VFSLIDE1DOWN : UNSPEC_VSLIDE1DOWN;
> +      insn_code icode = code_for_pred_slide (unspec, mode);
> +      emit_len_binop (icode, target, target, builder.elt (i), NULL,
> mask_mode,
> +                     elem_mode);
> +    }
> +}
> +
> +/* Initialize register TARGET from the elements in PARALLEL rtx VALS.  */
> +
> +void
> +expand_vec_init (rtx target, rtx vals)
> +{
> +  machine_mode mode = GET_MODE (target);
> +  int nelts = XVECLEN (vals, 0);
> +
> +  rvv_builder v (mode, nelts, 1);
> +  for (int i = 0; i < nelts; i++)
> +    v.quick_push (XVECEXP (vals, 0, i));
> +  v.finalize ();
> +
> +  if (nelts > 3)
> +    {
> +      /* Case 1: Convert v = { a, b, a, b } into v = { ab, ab }.  */
> +      if (v.can_duplicate_repeating_sequence_p ())
> +       {
> +         rtx ele = v.get_merged_repeating_sequence ();
> +         rtx dup = expand_vector_broadcast (v.new_mode (), ele);
> +         emit_move_insn (target, gen_lowpart (mode, dup));
> +         return;
> +       }
> +      /* TODO: We will support more Initialization of vector in the
> future.  */
> +    }
> +
> +  /* Handle common situation by vslide1down. This function can handle any
> +     situation of vec_init<mode>. Only the cases that are not optimized
> above
> +     will fall through here.  */
> +  expand_vector_init_insert_elems (target, v, nelts);
> +}
> +
>  } // namespace riscv_vector
> diff --git a/gcc/config/riscv/vector-iterators.md
> b/gcc/config/riscv/vector-iterators.md
> index 5cf958ba845..a282861335d 100644
> --- a/gcc/config/riscv/vector-iterators.md
> +++ b/gcc/config/riscv/vector-iterators.md
> @@ -996,6 +996,15 @@
>    (VNx1DF "DF") (VNx2DF "DF") (VNx4DF "DF") (VNx8DF "DF") (VNx16DF "DF")
>  ])
>
> +(define_mode_attr vel [
> +  (VNx1QI "qi") (VNx2QI "qi") (VNx4QI "qi") (VNx8QI "qi") (VNx16QI "qi")
> (VNx32QI "qi") (VNx64QI "qi") (VNx128QI "qi")
> +  (VNx1HI "hi") (VNx2HI "hi") (VNx4HI "hi") (VNx8HI "hi") (VNx16HI "hi")
> (VNx32HI "hi") (VNx64HI "hi")
> +  (VNx1SI "si") (VNx2SI "si") (VNx4SI "si") (VNx8SI "si") (VNx16SI "si")
> (VNx32SI "si")
> +  (VNx1DI "di") (VNx2DI "di") (VNx4DI "di") (VNx8DI "di") (VNx16DI "di")
> +  (VNx1SF "sf") (VNx2SF "sf") (VNx4SF "sf") (VNx8SF "sf") (VNx16SF "sf")
> (VNx32SF "sf")
> +  (VNx1DF "df") (VNx2DF "df") (VNx4DF "df") (VNx8DF "df") (VNx16DF "df")
> +])
> +
>  (define_mode_attr VSUBEL [
>    (VNx1HI "QI") (VNx2HI "QI") (VNx4HI "QI") (VNx8HI "QI") (VNx16HI "QI")
> (VNx32HI "QI") (VNx64HI "QI")
>    (VNx1SI "HI") (VNx2SI "HI") (VNx4SI "HI") (VNx8SI "HI") (VNx16SI "HI")
> (VNx32SI "HI")
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert-1.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert-1.c
> new file mode 100644
> index 00000000000..ed25fbf2bd7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert-1.c
> @@ -0,0 +1,41 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
> +
> +#include <stdint-gcc.h>
> +
> +typedef int64_t vnx2di __attribute__ ((vector_size (16)));
> +typedef int64_t vnx4di __attribute__ ((vector_size (32)));
> +typedef int64_t vnx8di __attribute__ ((vector_size (64)));
> +typedef int64_t vnx16di __attribute__ ((vector_size (128)));
> +
> +__attribute__ ((noipa)) void
> +f_vnx2di (int64_t a, int64_t b, int64_t *out)
> +{
> +  vnx2di v = {a, b};
> +  *(vnx2di *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx4di (int64_t a, int64_t b, int64_t c, int64_t d, int64_t *out)
> +{
> +  vnx4di v = {a, b, c, d};
> +  *(vnx4di *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx8di (int64_t a, int64_t b, int64_t c, int64_t d, int64_t e, int64_t
> f, int64_t g, int64_t h, int64_t *out)
> +{
> +  vnx8di v = {a, b, c, d, e, f, g, h};
> +  *(vnx8di *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx16di (int64_t a, int64_t b, int64_t c, int64_t d, int64_t e, int64_t
> f,
> +          int64_t g, int64_t h, int64_t a2, int64_t b2, int64_t c2,
> int64_t d2,
> +          int64_t e2, int64_t f2, int64_t g2, int64_t h2, int64_t *out)
> +{
> +  vnx16di v = {a, b, c, d, e, f, g, h, a2, b2, c2, d2, e2, f2, g2, h2};
> +  *(vnx16di *) out = v;
> +}
> +
> +/* { dg-final { scan-assembler-times
> {vslide1down\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+} 26 } } */
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert-2.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert-2.c
> new file mode 100644
> index 00000000000..333f0a20fd6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert-2.c
> @@ -0,0 +1,41 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d" } */
> +
> +#include <stdint-gcc.h>
> +
> +typedef int64_t vnx2di __attribute__ ((vector_size (16)));
> +typedef int64_t vnx4di __attribute__ ((vector_size (32)));
> +typedef int64_t vnx8di __attribute__ ((vector_size (64)));
> +typedef int64_t vnx16di __attribute__ ((vector_size (128)));
> +
> +__attribute__ ((noipa)) void
> +f_vnx2di (int64_t a, int64_t b, int64_t *out)
> +{
> +  vnx2di v = {a, b};
> +  *(vnx2di *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx4di (int64_t a, int64_t b, int64_t c, int64_t d, int64_t *out)
> +{
> +  vnx4di v = {a, b, c, d};
> +  *(vnx4di *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx8di (int64_t a, int64_t b, int64_t c, int64_t d, int64_t e, int64_t
> f, int64_t g, int64_t h, int64_t *out)
> +{
> +  vnx8di v = {a, b, c, d, e, f, g, h};
> +  *(vnx8di *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx16di (int64_t a, int64_t b, int64_t c, int64_t d, int64_t e, int64_t
> f,
> +          int64_t g, int64_t h, int64_t a2, int64_t b2, int64_t c2,
> int64_t d2,
> +          int64_t e2, int64_t f2, int64_t g2, int64_t h2, int64_t *out)
> +{
> +  vnx16di v = {a, b, c, d, e, f, g, h, a2, b2, c2, d2, e2, f2, g2, h2};
> +  *(vnx16di *) out = v;
> +}
> +
> +/* { dg-final { scan-assembler-times
> {vslide1down\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+} 52 } } */
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert-3.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert-3.c
> new file mode 100644
> index 00000000000..e03441ae05e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert-3.c
> @@ -0,0 +1,41 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
> +
> +#include <stdint-gcc.h>
> +
> +typedef double vnx2df __attribute__ ((vector_size (16)));
> +typedef double vnx4df __attribute__ ((vector_size (32)));
> +typedef double vnx8df __attribute__ ((vector_size (64)));
> +typedef double vnx16df __attribute__ ((vector_size (128)));
> +
> +__attribute__ ((noipa)) void
> +f_vnx2df (double a, double b, double *out)
> +{
> +  vnx2df v = {a, b};
> +  *(vnx2df *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx4df (double a, double b, double c, double d, double *out)
> +{
> +  vnx4df v = {a, b, c, d};
> +  *(vnx4df *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx8df (double a, double b, double c, double d, double e, double f,
> double g, double h, double *out)
> +{
> +  vnx8df v = {a, b, c, d, e, f, g, h};
> +  *(vnx8df *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx16df (double a, double b, double c, double d, double e, double f,
> +          double g, double h, double a2, double b2, double c2, double d2,
> +          double e2, double f2, double g2, double h2, double *out)
> +{
> +  vnx16df v = {a, b, c, d, e, f, g, h, a2, b2, c2, d2, e2, f2, g2, h2};
> +  *(vnx16df *) out = v;
> +}
> +
> +/* { dg-final { scan-assembler-times
> {vfslide1down\.vf\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+} 26 } } */
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert_run-1.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert_run-1.c
> new file mode 100644
> index 00000000000..7eb129cde68
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert_run-1.c
> @@ -0,0 +1,46 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
> +
> +#include "insert-1.c"
> +
> +int
> +main ()
> +{
> +  int64_t in[16] = {-458615, -1551,  -552, -458615, -45815, -551,  -2,
>  -4585,
> +                   -134,    -99294, -789, -51284,  -7324,  -3333, -6666,
> -11};
> +
> +  int64_t v_vnx2di[sizeof (vnx2di) / sizeof (int64_t)];
> +  f_vnx2di (in[0], in[1], v_vnx2di);
> +  for (int i = 0; i < sizeof (vnx2di) / sizeof (int64_t); i++)
> +    {
> +      if (v_vnx2di[i] != in[i])
> +       __builtin_abort ();
> +    }
> +
> +  int64_t v_vnx4di[sizeof (vnx4di) / sizeof (int64_t)];
> +  f_vnx4di (in[0], in[1], in[2], in[3], v_vnx4di);
> +  for (int i = 0; i < sizeof (vnx2di) / sizeof (int64_t); i++)
> +    {
> +      if (v_vnx2di[i] != in[i])
> +       __builtin_abort ();
> +    }
> +
> +  int64_t v_vnx8di[sizeof (vnx8di) / sizeof (int64_t)];
> +  f_vnx8di (in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7],
> v_vnx8di);
> +  for (int i = 0; i < sizeof (vnx2di) / sizeof (int64_t); i++)
> +    {
> +      if (v_vnx2di[i] != in[i])
> +       __builtin_abort ();
> +    }
> +
> +  int64_t v_vnx16di[sizeof (vnx16di) / sizeof (int64_t)];
> +  f_vnx16di (in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7],
> in[8],
> +            in[9], in[10], in[11], in[12], in[13], in[14], in[15],
> v_vnx16di);
> +  for (int i = 0; i < sizeof (vnx2di) / sizeof (int64_t); i++)
> +    {
> +      if (v_vnx2di[i] != in[i])
> +       __builtin_abort ();
> +    }
> +
> +  return 0;
> +}
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert_run-2.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert_run-2.c
> new file mode 100644
> index 00000000000..e3b97be385b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/insert_run-2.c
> @@ -0,0 +1,46 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
> +
> +#include "insert-3.c"
> +
> +int
> +main ()
> +{
> +  double in[16] = {-458.615, -1.551,  -55.2, -4586.15, -4581.5, -55.1,
> -2,      -4.585,
> +                   -13.4,    -9929.4, -7.89, -512.84,  -73.24,  -33.33,
> -666.6, -1.1};
> +
> +  double v_vnx2df[sizeof (vnx2df) / sizeof (double)];
> +  f_vnx2df (in[0], in[1], v_vnx2df);
> +  for (int i = 0; i < sizeof (vnx2df) / sizeof (double); i++)
> +    {
> +      if (v_vnx2df[i] != in[i])
> +       __builtin_abort ();
> +    }
> +
> +  double v_vnx4df[sizeof (vnx4df) / sizeof (double)];
> +  f_vnx4df (in[0], in[1], in[2], in[3], v_vnx4df);
> +  for (int i = 0; i < sizeof (vnx2df) / sizeof (double); i++)
> +    {
> +      if (v_vnx2df[i] != in[i])
> +       __builtin_abort ();
> +    }
> +
> +  double v_vnx8df[sizeof (vnx8df) / sizeof (double)];
> +  f_vnx8df (in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7],
> v_vnx8df);
> +  for (int i = 0; i < sizeof (vnx2df) / sizeof (double); i++)
> +    {
> +      if (v_vnx2df[i] != in[i])
> +       __builtin_abort ();
> +    }
> +
> +  double v_vnx16df[sizeof (vnx16df) / sizeof (double)];
> +  f_vnx16df (in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7],
> in[8],
> +            in[9], in[10], in[11], in[12], in[13], in[14], in[15],
> v_vnx16df);
> +  for (int i = 0; i < sizeof (vnx2df) / sizeof (double); i++)
> +    {
> +      if (v_vnx2df[i] != in[i])
> +       __builtin_abort ();
> +    }
> +
> +  return 0;
> +}
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-1.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-1.c
> new file mode 100644
> index 00000000000..5d6d0e63724
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-1.c
> @@ -0,0 +1,75 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d" } */
> +
> +#include <stdint-gcc.h>
> +
> +typedef int8_t vnx2qi __attribute__ ((vector_size (2)));
> +typedef int8_t vnx4qi __attribute__ ((vector_size (4)));
> +typedef int8_t vnx8qi __attribute__ ((vector_size (8)));
> +typedef int8_t vnx16qi __attribute__ ((vector_size (16)));
> +typedef int8_t vnx32qi __attribute__ ((vector_size (32)));
> +typedef int8_t vnx64qi __attribute__ ((vector_size (64)));
> +typedef int8_t vnx128qi __attribute__ ((vector_size (128)));
> +
> +__attribute__ ((noipa)) void
> +f_vnx2qi (int8_t a, int8_t b, int8_t *out)
> +{
> +  vnx2qi v = {a, b};
> +  *(vnx2qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx4qi (int8_t a, int8_t b, int8_t *out)
> +{
> +  vnx4qi v = {a, b, a, b};
> +  *(vnx4qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx8qi (int8_t a, int8_t b, int8_t *out)
> +{
> +  vnx8qi v = {a, b, a, b, a, b, a, b};
> +  *(vnx8qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx16qi (int8_t a, int8_t b, int8_t *out)
> +{
> +  vnx16qi v = {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b};
> +  *(vnx16qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx32qi (int8_t a, int8_t b, int8_t *out)
> +{
> +  vnx32qi v = {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
> +              a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b};
> +  *(vnx32qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx64qi (int8_t a, int8_t b, int8_t *out)
> +{
> +  vnx64qi v = {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a,
> b, a, b,
> +              a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
> a, b,
> +              a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b};
> +  *(vnx64qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx128qi (int8_t a, int8_t b, int8_t *out)
> +{
> +  vnx128qi v
> +    = {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
> +       a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
> +       a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
> +       a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
> +       a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
> +       a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b};
> +  *(vnx128qi *) out = v;
> +}
> +
> +/* { dg-final { scan-assembler-times {vmv.v.x\tv[0-9]+,\s*[a-x0-9]+} 7 }
> } */
> +/* { dg-final { scan-assembler-times {slli\t[a-x0-9]+,\s*[a-x0-9]+,\s*8}
> 6 } } */
> +/* { dg-final { scan-assembler-times
> {or\t[a-x0-9]+,\s*[a-x0-9]+,\s*[a-x0-9]+} 6 } } */
> +/* { dg-final { scan-assembler-times
> {vslide1down\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+} 1 } } */
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-2.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-2.c
> new file mode 100644
> index 00000000000..4cc356ef409
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-2.c
> @@ -0,0 +1,61 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d" } */
> +
> +#include <stdint-gcc.h>
> +
> +typedef int8_t vnx8qi __attribute__ ((vector_size (8)));
> +typedef int8_t vnx16qi __attribute__ ((vector_size (16)));
> +typedef int8_t vnx32qi __attribute__ ((vector_size (32)));
> +typedef int8_t vnx64qi __attribute__ ((vector_size (64)));
> +typedef int8_t vnx128qi __attribute__ ((vector_size (128)));
> +
> +__attribute__ ((noipa)) void
> +f_vnx8qi (int8_t a, int8_t b, int8_t c, int8_t d, int8_t *out)
> +{
> +  vnx8qi v = {a, b, c, d, a, b, c, d};
> +  *(vnx8qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx16qi (int8_t a, int8_t b, int8_t c, int8_t d, int8_t *out)
> +{
> +  vnx16qi v = {a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d};
> +  *(vnx16qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx32qi (int8_t a, int8_t b, int8_t c, int8_t d, int8_t *out)
> +{
> +  vnx32qi v = {a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d,
> +              a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d};
> +  *(vnx32qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx64qi (int8_t a, int8_t b, int8_t c, int8_t d, int8_t *out)
> +{
> +  vnx64qi v = {a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d,
> +              a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d,
> +              a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d,
> +              a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d};
> +  *(vnx64qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx128qi (int8_t a, int8_t b, int8_t c, int8_t d, int8_t *out)
> +{
> +  vnx128qi v
> +    = {a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d,
> +       a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d,
> +       a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d,
> +       a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d,
> +       a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d,
> +       a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d,
> +       a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d,
> +       a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d};
> +  *(vnx128qi *) out = v;
> +}
> +
> +/* { dg-final { scan-assembler-times {vmv.v.x\tv[0-9]+,\s*[a-x0-9]+} 5 }
> } */
> +/* { dg-final { scan-assembler-times {slli} 15 } } */
> +/* { dg-final { scan-assembler-times {or} 15 } } */
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-3.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-3.c
> new file mode 100644
> index 00000000000..eeef356aded
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-3.c
> @@ -0,0 +1,53 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
> +
> +#include <stdint-gcc.h>
> +
> +typedef int8_t vnx16qi __attribute__ ((vector_size (16)));
> +typedef int8_t vnx32qi __attribute__ ((vector_size (32)));
> +typedef int8_t vnx64qi __attribute__ ((vector_size (64)));
> +typedef int8_t vnx128qi __attribute__ ((vector_size (128)));
> +
> +__attribute__ ((noipa)) void
> +f_vnx16qi (int8_t a, int8_t b, int8_t c, int8_t d, int8_t e, int8_t f,
> int8_t g, int8_t h, int8_t *out)
> +{
> +  vnx16qi v = {a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h};
> +  *(vnx16qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx32qi (int8_t a, int8_t b, int8_t c, int8_t d, int8_t e, int8_t f,
> int8_t g, int8_t h, int8_t *out)
> +{
> +  vnx32qi v = {a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h,
> +              a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h};
> +  *(vnx32qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx64qi (int8_t a, int8_t b, int8_t c, int8_t d, int8_t e, int8_t f,
> int8_t g, int8_t h, int8_t *out)
> +{
> +  vnx64qi v = {a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h,
> +              a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h,
> +              a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h,
> +              a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h};
> +  *(vnx64qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx128qi (int8_t a, int8_t b, int8_t c, int8_t d, int8_t e, int8_t f,
> int8_t g, int8_t h, int8_t *out)
> +{
> +  vnx128qi v
> +    = {a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h,
> +       a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h,
> +       a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h,
> +       a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h,
> +       a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h,
> +       a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h,
> +       a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h,
> +       a, b, c, d, e, f, g, h, a, b, c, d, e, f, g, h};
> +  *(vnx128qi *) out = v;
> +}
> +
> +/* { dg-final { scan-assembler-times {vmv\.v\.x\tv[0-9]+,\s*[a-x0-9]+} 4
> } } */
> +/* { dg-final { scan-assembler-times {slli} 28 } } */
> +/* { dg-final { scan-assembler-times {or} 28 } } */
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-4.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-4.c
> new file mode 100644
> index 00000000000..8e1faed2615
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-4.c
> @@ -0,0 +1,39 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
> +
> +#include <stdint-gcc.h>
> +
> +typedef float vnx4sf __attribute__ ((vector_size (16)));
> +typedef float vnx8sf __attribute__ ((vector_size (32)));
> +typedef float vnx16sf __attribute__ ((vector_size (64)));
> +typedef float vnx32sf __attribute__ ((vector_size (128)));
> +
> +__attribute__ ((noipa)) void
> +f_vnx4sf (float a, float b, float *out)
> +{
> +  vnx4sf v = {a, b, a, b};
> +  *(vnx4sf *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx8sf (float a, float b, float *out)
> +{
> +  vnx8sf v = {a, b, a, b, a, b, a, b};
> +  *(vnx8sf *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx16sf (float a, float b, float *out)
> +{
> +  vnx16sf v = {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b};
> +  *(vnx16sf *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx32sf (float a, float b, float *out)
> +{
> +  vnx32sf v = {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a,
> b, a, b, a, b, a, b, a, b, a, b, a, b};
> +  *(vnx32sf *) out = v;
> +}
> +
> +/* { dg-final { scan-assembler-times {vmv\.v\.x\tv[0-9]+,\s*[a-x0-9]+} 4
> } } */
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-5.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-5.c
> new file mode 100644
> index 00000000000..48ac7993a56
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-5.c
> @@ -0,0 +1,74 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d" } */
> +
> +#include <stdint-gcc.h>
> +
> +typedef int8_t vnx2qi __attribute__ ((vector_size (2)));
> +typedef int8_t vnx4qi __attribute__ ((vector_size (4)));
> +typedef int8_t vnx8qi __attribute__ ((vector_size (8)));
> +typedef int8_t vnx16qi __attribute__ ((vector_size (16)));
> +typedef int8_t vnx32qi __attribute__ ((vector_size (32)));
> +typedef int8_t vnx64qi __attribute__ ((vector_size (64)));
> +typedef int8_t vnx128qi __attribute__ ((vector_size (128)));
> +
> +__attribute__ ((noipa)) void
> +f_vnx2qi (int8_t a, int8_t *out)
> +{
> +  vnx2qi v = {a, -88};
> +  *(vnx2qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx4qi (int8_t a, int8_t *out)
> +{
> +  vnx4qi v = {a, -88, a, -88};
> +  *(vnx4qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx8qi (int8_t a, int8_t *out)
> +{
> +  vnx8qi v = {a, -88, a, -88, a, -88, a, -88};
> +  *(vnx8qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx16qi (int8_t a, int8_t *out)
> +{
> +  vnx16qi v = {a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a,
> -88};
> +  *(vnx16qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx32qi (int8_t a, int8_t *out)
> +{
> +  vnx32qi v = {a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a,
> -88,
> +              a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a,
> -88};
> +  *(vnx32qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx64qi (int8_t a, int8_t *out)
> +{
> +  vnx64qi v = {a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a,
> -88, a, -88, a, -88, a, -88,
> +              a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a,
> -88, a, -88, a, -88, a, -88,
> +              a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a,
> -88, a, -88, a, -88};
> +  *(vnx64qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx128qi (int8_t a, int8_t *out)
> +{
> +  vnx128qi v
> +    = {a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a,
> -88, a, -88, a, -88,
> +       a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a,
> -88, a, -88, a, -88,
> +       a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a,
> -88, a, -88, a, -88,
> +       a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a,
> -88, a, -88, a, -88,
> +       a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a,
> -88, a, -88, a, -88,
> +       a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a, -88, a,
> -88};
> +  *(vnx128qi *) out = v;
> +}
> +
> +/* { dg-final { scan-assembler-times {vmv.v.x\tv[0-9]+,\s*[a-x0-9]+} 7 }
> } */
> +/* { dg-final { scan-assembler-times
> {or\t[a-x0-9]+,\s*[a-x0-9]+,\s*[a-x0-9]+} 6 } } */
> +/* { dg-final { scan-assembler-times
> {vslide1down\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+} 1 } } */
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-6.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-6.c
> new file mode 100644
> index 00000000000..4dc5703d894
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-6.c
> @@ -0,0 +1,78 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d" } */
> +
> +#include <stdint-gcc.h>
> +
> +typedef int8_t vnx2qi __attribute__ ((vector_size (2)));
> +typedef int8_t vnx4qi __attribute__ ((vector_size (4)));
> +typedef int8_t vnx8qi __attribute__ ((vector_size (8)));
> +typedef int8_t vnx16qi __attribute__ ((vector_size (16)));
> +typedef int8_t vnx32qi __attribute__ ((vector_size (32)));
> +typedef int8_t vnx64qi __attribute__ ((vector_size (64)));
> +typedef int8_t vnx128qi __attribute__ ((vector_size (128)));
> +
> +int8_t a = -33;
> +int8_t b = -123;
> +
> +__attribute__ ((noipa)) void
> +f_vnx2qi (int8_t *out)
> +{
> +  vnx2qi v = {a, b};
> +  *(vnx2qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx4qi (int8_t *out)
> +{
> +  vnx4qi v = {a, b, a, b};
> +  *(vnx4qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx8qi (int8_t *out)
> +{
> +  vnx8qi v = {a, b, a, b, a, b, a, b};
> +  *(vnx8qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx16qi (int8_t *out)
> +{
> +  vnx16qi v = {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b};
> +  *(vnx16qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx32qi (int8_t *out)
> +{
> +  vnx32qi v = {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
> +              a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b};
> +  *(vnx32qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx64qi (int8_t *out)
> +{
> +  vnx64qi v = {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a,
> b, a, b,
> +              a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
> a, b,
> +              a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b};
> +  *(vnx64qi *) out = v;
> +}
> +
> +__attribute__ ((noipa)) void
> +f_vnx128qi (int8_t *out)
> +{
> +  vnx128qi v
> +    = {a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
> +       a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
> +       a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
> +       a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
> +       a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b,
> +       a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b, a, b};
> +  *(vnx128qi *) out = v;
> +}
> +
> +/* { dg-final { scan-assembler-times {vmv.v.x\tv[0-9]+,\s*[a-x0-9]+} 6 }
> } */
> +/* { dg-final { scan-assembler-times {slli\t[a-x0-9]+,\s*[a-x0-9]+,\s*8}
> 6 } } */
> +/* { dg-final { scan-assembler-times
> {or\t[a-x0-9]+,\s*[a-x0-9]+,\s*[a-x0-9]+} 6 } } */
> +/* { dg-final { scan-assembler-times
> {vslide1down\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+} 1 } } */
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-1.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-1.c
> new file mode 100644
> index 00000000000..85ec963c47b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-1.c
> @@ -0,0 +1,125 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
> +
> +#include "repeat-1.c"
> +
> +int
> +main ()
> +{
> +  int8_t a = -17;
> +  int8_t b = 89;
> +
> +  int8_t v_vnx2qi[sizeof (vnx2qi) / sizeof (int8_t)];
> +  f_vnx2qi (a, b, v_vnx2qi);
> +  for (int i = 0; i < sizeof (vnx2qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx2qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx2qi[i] != b)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx4qi[sizeof (vnx4qi) / sizeof (int8_t)];
> +  f_vnx4qi (a, b, v_vnx4qi);
> +  for (int i = 0; i < sizeof (vnx4qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx4qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx4qi[i] != b)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx8qi[sizeof (vnx8qi) / sizeof (int8_t)];
> +  f_vnx8qi (a, b, v_vnx8qi);
> +  for (int i = 0; i < sizeof (vnx8qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx8qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx8qi[i] != b)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx16qi[sizeof (vnx16qi) / sizeof (int8_t)];
> +  f_vnx16qi (a, b, v_vnx16qi);
> +  for (int i = 0; i < sizeof (vnx16qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx16qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx16qi[i] != b)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx32qi[sizeof (vnx32qi) / sizeof (int8_t)];
> +  f_vnx32qi (a, b, v_vnx32qi);
> +  for (int i = 0; i < sizeof (vnx32qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx32qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx32qi[i] != b)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx64qi[sizeof (vnx64qi) / sizeof (int8_t)];
> +  f_vnx64qi (a, b, v_vnx64qi);
> +  for (int i = 0; i < sizeof (vnx64qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx64qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx64qi[i] != b)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx128qi[sizeof (vnx128qi) / sizeof (int8_t)];
> +  f_vnx128qi (a, b, v_vnx128qi);
> +  for (int i = 0; i < sizeof (vnx128qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx128qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx128qi[i] != b)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  return 0;
> +}
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-2.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-2.c
> new file mode 100644
> index 00000000000..cb054b6c43c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-2.c
> @@ -0,0 +1,145 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
> +
> +#include "repeat-2.c"
> +
> +int
> +main ()
> +{
> +  int8_t a = -17;
> +  int8_t b = -120;
> +  int8_t c = 111;
> +  int8_t d = -11;
> +
> +  int8_t v_vnx8qi[sizeof (vnx8qi) / sizeof (int8_t)];
> +  f_vnx8qi (a, b, c, d, v_vnx8qi);
> +  for (int i = 0; i < sizeof (vnx8qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 4 == 0)
> +       {
> +         if (v_vnx8qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else if (i % 4 == 1)
> +       {
> +         if (v_vnx8qi[i] != b)
> +           __builtin_abort ();
> +       }
> +      else if (i % 4 == 2)
> +       {
> +         if (v_vnx8qi[i] != c)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx8qi[i] != d)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx16qi[sizeof (vnx16qi) / sizeof (int8_t)];
> +  f_vnx16qi (a, b, c, d, v_vnx16qi);
> +  for (int i = 0; i < sizeof (vnx16qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 4 == 0)
> +       {
> +         if (v_vnx16qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else if (i % 4 == 1)
> +       {
> +         if (v_vnx16qi[i] != b)
> +           __builtin_abort ();
> +       }
> +      else if (i % 4 == 2)
> +       {
> +         if (v_vnx16qi[i] != c)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx16qi[i] != d)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx32qi[sizeof (vnx32qi) / sizeof (int8_t)];
> +  f_vnx32qi (a, b, c, d, v_vnx32qi);
> +  for (int i = 0; i < sizeof (vnx32qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 4 == 0)
> +       {
> +         if (v_vnx32qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else if (i % 4 == 1)
> +       {
> +         if (v_vnx32qi[i] != b)
> +           __builtin_abort ();
> +       }
> +      else if (i % 4 == 2)
> +       {
> +         if (v_vnx32qi[i] != c)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx32qi[i] != d)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx64qi[sizeof (vnx64qi) / sizeof (int8_t)];
> +  f_vnx64qi (a, b, c, d, v_vnx64qi);
> +  for (int i = 0; i < sizeof (vnx64qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 4 == 0)
> +       {
> +         if (v_vnx64qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else if (i % 4 == 1)
> +       {
> +         if (v_vnx64qi[i] != b)
> +           __builtin_abort ();
> +       }
> +      else if (i % 4 == 2)
> +       {
> +         if (v_vnx64qi[i] != c)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx64qi[i] != d)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx128qi[sizeof (vnx128qi) / sizeof (int8_t)];
> +  f_vnx128qi (a, b, c, d, v_vnx128qi);
> +  for (int i = 0; i < sizeof (vnx128qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 4 == 0)
> +       {
> +         if (v_vnx128qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else if (i % 4 == 1)
> +       {
> +         if (v_vnx128qi[i] != b)
> +           __builtin_abort ();
> +       }
> +      else if (i % 4 == 2)
> +       {
> +         if (v_vnx128qi[i] != c)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx128qi[i] != d)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  return 0;
> +}
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-3.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-3.c
> new file mode 100644
> index 00000000000..2cbe1c2bf95
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-3.c
> @@ -0,0 +1,202 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
> +#include "repeat-3.c"
> +
> +int
> +main ()
> +{
> +  int8_t a = -17;
> +  int8_t b = -120;
> +  int8_t c = 111;
> +  int8_t d = -11;
> +  int8_t e = -34;
> +  int8_t f = -101;
> +  int8_t g = 33;
> +  int8_t h = 9;
> +
> +  int8_t v_vnx16qi[sizeof (vnx16qi) / sizeof (int8_t)];
> +  f_vnx16qi (a, b, c, d, e, f, g, h, v_vnx16qi);
> +  for (int i = 0; i < sizeof (vnx16qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 8 == 0)
> +       {
> +         if (v_vnx16qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 1)
> +       {
> +         if (v_vnx16qi[i] != b)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 2)
> +       {
> +         if (v_vnx16qi[i] != c)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 3)
> +       {
> +         if (v_vnx16qi[i] != d)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 4)
> +       {
> +         if (v_vnx16qi[i] != e)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 5)
> +       {
> +         if (v_vnx16qi[i] != f)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 6)
> +       {
> +         if (v_vnx16qi[i] != g)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 7)
> +       {
> +         if (v_vnx16qi[i] != h)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx32qi[sizeof (vnx32qi) / sizeof (int8_t)];
> +  f_vnx32qi (a, b, c, d, e, f, g, h, v_vnx32qi);
> +  for (int i = 0; i < sizeof (vnx32qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 8 == 0)
> +       {
> +         if (v_vnx32qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 1)
> +       {
> +         if (v_vnx32qi[i] != b)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 2)
> +       {
> +         if (v_vnx32qi[i] != c)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 3)
> +       {
> +         if (v_vnx32qi[i] != d)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 4)
> +       {
> +         if (v_vnx32qi[i] != e)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 5)
> +       {
> +         if (v_vnx32qi[i] != f)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 6)
> +       {
> +         if (v_vnx32qi[i] != g)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 7)
> +       {
> +         if (v_vnx32qi[i] != h)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx64qi[sizeof (vnx64qi) / sizeof (int8_t)];
> +  f_vnx64qi (a, b, c, d, e, f, g, h, v_vnx64qi);
> +  for (int i = 0; i < sizeof (vnx64qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 8 == 0)
> +       {
> +         if (v_vnx64qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 1)
> +       {
> +         if (v_vnx64qi[i] != b)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 2)
> +       {
> +         if (v_vnx64qi[i] != c)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 3)
> +       {
> +         if (v_vnx64qi[i] != d)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 4)
> +       {
> +         if (v_vnx64qi[i] != e)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 5)
> +       {
> +         if (v_vnx64qi[i] != f)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 6)
> +       {
> +         if (v_vnx64qi[i] != g)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 7)
> +       {
> +         if (v_vnx64qi[i] != h)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx128qi[sizeof (vnx128qi) / sizeof (int8_t)];
> +  f_vnx128qi (a, b, c, d, e, f, g, h, v_vnx128qi);
> +  for (int i = 0; i < sizeof (vnx128qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 8 == 0)
> +       {
> +         if (v_vnx128qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 1)
> +       {
> +         if (v_vnx128qi[i] != b)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 2)
> +       {
> +         if (v_vnx128qi[i] != c)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 3)
> +       {
> +         if (v_vnx128qi[i] != d)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 4)
> +       {
> +         if (v_vnx128qi[i] != e)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 5)
> +       {
> +         if (v_vnx128qi[i] != f)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 6)
> +       {
> +         if (v_vnx128qi[i] != g)
> +           __builtin_abort ();
> +       }
> +      else if (i % 8 == 7)
> +       {
> +         if (v_vnx128qi[i] != h)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  return 0;
> +}
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-4.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-4.c
> new file mode 100644
> index 00000000000..9efb6b2bc39
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-4.c
> @@ -0,0 +1,77 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
> +
> +#include "repeat-4.c"
> +
> +int
> +main ()
> +{
> +  float a = -9523.33;
> +  float b = 8156.55;
> +
> +  float v_vnx4sf[sizeof (vnx4sf) / sizeof (float)];
> +  f_vnx4sf (a, b, v_vnx4sf);
> +  for (int i = 0; i < sizeof (vnx4sf) / sizeof (float); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx4sf[i] != a)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx4sf[i] != b)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  float v_vnx8sf[sizeof (vnx8sf) / sizeof (float)];
> +  f_vnx8sf (a, b, v_vnx8sf);
> +  for (int i = 0; i < sizeof (vnx8sf) / sizeof (float); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx8sf[i] != a)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx8sf[i] != b)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  float v_vnx16sf[sizeof (vnx16sf) / sizeof (float)];
> +  f_vnx16sf (a, b, v_vnx16sf);
> +  for (int i = 0; i < sizeof (vnx16sf) / sizeof (float); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx16sf[i] != a)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx16sf[i] != b)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  float v_vnx32sf[sizeof (vnx32sf) / sizeof (float)];
> +  f_vnx32sf (a, b, v_vnx32sf);
> +  for (int i = 0; i < sizeof (vnx32sf) / sizeof (float); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx32sf[i] != a)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx32sf[i] != b)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  return 0;
> +}
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-5.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-5.c
> new file mode 100644
> index 00000000000..efd7d293c0b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-5.c
> @@ -0,0 +1,124 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
> +
> +#include "repeat-5.c"
> +
> +int
> +main ()
> +{
> +  int8_t a = -122;
> +
> +  int8_t v_vnx2qi[sizeof (vnx2qi) / sizeof (int8_t)];
> +  f_vnx2qi (a, v_vnx2qi);
> +  for (int i = 0; i < sizeof (vnx2qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx2qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx2qi[i] != -88)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx4qi[sizeof (vnx4qi) / sizeof (int8_t)];
> +  f_vnx4qi (a, v_vnx4qi);
> +  for (int i = 0; i < sizeof (vnx4qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx4qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx4qi[i] != -88)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx8qi[sizeof (vnx8qi) / sizeof (int8_t)];
> +  f_vnx8qi (a, v_vnx8qi);
> +  for (int i = 0; i < sizeof (vnx8qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx8qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx8qi[i] != -88)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx16qi[sizeof (vnx16qi) / sizeof (int8_t)];
> +  f_vnx16qi (a, v_vnx16qi);
> +  for (int i = 0; i < sizeof (vnx16qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx16qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx16qi[i] != -88)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx32qi[sizeof (vnx32qi) / sizeof (int8_t)];
> +  f_vnx32qi (a, v_vnx32qi);
> +  for (int i = 0; i < sizeof (vnx32qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx32qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx32qi[i] != -88)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx64qi[sizeof (vnx64qi) / sizeof (int8_t)];
> +  f_vnx64qi (a, v_vnx64qi);
> +  for (int i = 0; i < sizeof (vnx64qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx64qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx64qi[i] != -88)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx128qi[sizeof (vnx128qi) / sizeof (int8_t)];
> +  f_vnx128qi (a, v_vnx128qi);
> +  for (int i = 0; i < sizeof (vnx128qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx128qi[i] != a)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx128qi[i] != -88)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  return 0;
> +}
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-6.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-6.c
> new file mode 100644
> index 00000000000..53836956c3b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-6.c
> @@ -0,0 +1,122 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
> +
> +#include "repeat-6.c"
> +
> +int
> +main ()
> +{
> +  int8_t v_vnx2qi[sizeof (vnx2qi) / sizeof (int8_t)];
> +  f_vnx2qi (v_vnx2qi);
> +  for (int i = 0; i < sizeof (vnx2qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx2qi[i] != -33)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx2qi[i] != -123)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx4qi[sizeof (vnx4qi) / sizeof (int8_t)];
> +  f_vnx4qi (v_vnx4qi);
> +  for (int i = 0; i < sizeof (vnx4qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx4qi[i] != -33)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx4qi[i] != -123)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx8qi[sizeof (vnx8qi) / sizeof (int8_t)];
> +  f_vnx8qi (v_vnx8qi);
> +  for (int i = 0; i < sizeof (vnx8qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx8qi[i] != -33)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx8qi[i] != -123)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx16qi[sizeof (vnx16qi) / sizeof (int8_t)];
> +  f_vnx16qi (v_vnx16qi);
> +  for (int i = 0; i < sizeof (vnx16qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx16qi[i] != -33)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx16qi[i] != -123)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx32qi[sizeof (vnx32qi) / sizeof (int8_t)];
> +  f_vnx32qi (v_vnx32qi);
> +  for (int i = 0; i < sizeof (vnx32qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx32qi[i] != -33)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx32qi[i] != -123)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx64qi[sizeof (vnx64qi) / sizeof (int8_t)];
> +  f_vnx64qi (v_vnx64qi);
> +  for (int i = 0; i < sizeof (vnx64qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx64qi[i] != -33)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx64qi[i] != -123)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  int8_t v_vnx128qi[sizeof (vnx128qi) / sizeof (int8_t)];
> +  f_vnx128qi (v_vnx128qi);
> +  for (int i = 0; i < sizeof (vnx128qi) / sizeof (int8_t); i++)
> +    {
> +      if (i % 2 == 0)
> +       {
> +         if (v_vnx128qi[i] != -33)
> +           __builtin_abort ();
> +       }
> +      else
> +       {
> +         if (v_vnx128qi[i] != -123)
> +           __builtin_abort ();
> +       }
> +    }
> +
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> index be2457fbd99..bc99cc0c3cf 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> @@ -65,5 +65,9 @@ foreach op $AUTOVEC_TEST_OPTS {
>      "" "$op"
>  }
>
> +# VLS-VLMAX tests
> +dg-runtest [lsort [glob -nocomplain
> $srcdir/$subdir/autovec/vls-vlmax/*.\[cS\]]] \
> +       "-std=c99 -O3 -ftree-vectorize --param
> riscv-autovec-preference=fixed-vlmax" $CFLAGS
> +
>  # All done.
>  dg-finish
> --
> 2.36.1
>
>

RE: [PATCH V3] RISC-V: Add basic vec_init for VLS RVV auto-vectorization

Reply via email to