Consider this following case: void foo (int8_t *in, int8_t *out, int8_t x) { for (int i = 0; i < 16; i++) in[i] = x; }
Compile option: --param=riscv-autovec-preference=scalable -fno-builtin Before this patch: foo: li a5,16 csrr a4,vlenb vsetvli a3,zero,e8,m1,ta,ma vmv.v.x v1,a2 bleu a5,a4,.L2 mv a5,a4 .L2: vsetvli zero,a5,e8,m1,ta,ma vse8.v v1,0(a0) ret After this patch: foo: vsetivli zero,16,e8,mf8,ta,ma vmv.v.x v1,a2 vse8.v v1,0(a0) ret gcc/ChangeLog: * config/riscv/autovec-vls.md (@vec_duplicate<mode>): New pattern. * config/riscv/riscv-v.cc (autovectorize_vector_modes): Enable VLS auto-vectorization. * config/riscv/riscv.cc (riscv_estimated_poly_value): Fix incorrect poly estimation. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/v-1.c: Adapt test. * gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c: Ditto. * gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c: Ditto. * gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/dup-1.c: New test. * gcc.target/riscv/rvv/autovec/vls/dup-2.c: New test. * gcc.target/riscv/rvv/autovec/vls/dup-3.c: New test. * gcc.target/riscv/rvv/autovec/vls/dup-4.c: New test. * gcc.target/riscv/rvv/autovec/vls/dup-5.c: New test. * gcc.target/riscv/rvv/autovec/vls/dup-6.c: New test. * gcc.target/riscv/rvv/autovec/vls/dup-7.c: New test. --- gcc/config/riscv/autovec-vls.md | 19 ++ gcc/config/riscv/riscv-v.cc | 21 ++- gcc/config/riscv/riscv.cc | 16 +- .../gcc.target/riscv/rvv/autovec/v-1.c | 2 +- .../gcc.target/riscv/rvv/autovec/vls/dup-1.c | 168 ++++++++++++++++++ .../gcc.target/riscv/rvv/autovec/vls/dup-2.c | 153 ++++++++++++++++ .../gcc.target/riscv/rvv/autovec/vls/dup-3.c | 153 ++++++++++++++++ .../gcc.target/riscv/rvv/autovec/vls/dup-4.c | 137 ++++++++++++++ .../gcc.target/riscv/rvv/autovec/vls/dup-5.c | 137 ++++++++++++++ .../gcc.target/riscv/rvv/autovec/vls/dup-6.c | 122 +++++++++++++ .../gcc.target/riscv/rvv/autovec/vls/dup-7.c | 122 +++++++++++++ .../riscv/rvv/autovec/zve32f_zvl128b-1.c | 2 +- .../riscv/rvv/autovec/zve64d_zvl128b-1.c | 2 +- .../riscv/rvv/autovec/zve64f_zvl128b-1.c | 2 +- 14 files changed, 1043 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c diff --git a/gcc/config/riscv/autovec-vls.md b/gcc/config/riscv/autovec-vls.md index 9ece317ca4e..1a64dfdd91e 100644 --- a/gcc/config/riscv/autovec-vls.md +++ b/gcc/config/riscv/autovec-vls.md @@ -139,3 +139,22 @@ "vmv%m1r.v\t%0,%1" [(set_attr "type" "vmov") (set_attr "mode" "<MODE>")]) + +;; ----------------------------------------------------------------- +;; ---- Duplicate Operations +;; ----------------------------------------------------------------- + +(define_insn_and_split "@vec_duplicate<mode>" + [(set (match_operand:VLS 0 "register_operand") + (vec_duplicate:VLS + (match_operand:<VEL> 1 "reg_or_int_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode), + riscv_vector::RVV_UNOP, operands); + DONE; + } +) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 9e89f970a4c..c10e51b362e 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -2533,7 +2533,6 @@ autovectorize_vector_modes (vector_modes *modes, bool) { if (autovec_use_vlmax_p ()) { - /* TODO: We will support RVV VLS auto-vectorization mode in the future. */ poly_uint64 full_size = BYTES_PER_RISCV_VECTOR * ((int) riscv_autovec_lmul); @@ -2561,7 +2560,25 @@ autovectorize_vector_modes (vector_modes *modes, bool) modes->safe_push (mode); } } - return 0; + unsigned int flag = 0; + if (TARGET_VECTOR_VLS) + { + /* Enable VECT_COMPARE_COSTS between VLA modes VLS modes for scalable + auto-vectorization. */ + flag |= VECT_COMPARE_COSTS; + /* Push all VLSmodes according to TARGET_MIN_VLEN. */ + unsigned int i = 0; + unsigned int base_size = TARGET_MIN_VLEN * riscv_autovec_lmul / 8; + unsigned int size = base_size; + machine_mode mode; + while (size > 0 && get_vector_mode (QImode, size).exists (&mode)) + { + modes->safe_push (mode); + i++; + size = base_size / (1U << i); + } + } + return flag; } /* If the given VECTOR_MODE is an RVV mode, first get the largest number diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index dc78f4cf977..a3e9141e099 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -7481,15 +7481,16 @@ riscv_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor, conservative behavior of auto-vectorizing with RVV when it is a win even for 128-bit RVV. When RVV width information is available VAL.coeffs[1] is multiplied by - the number of VQ chunks over the initial Advanced SIMD 128 bits. */ + the number of rvv chunks over the VLS modes TARGET_MIN_VLEN. */ static HOST_WIDE_INT riscv_estimated_poly_value (poly_int64 val, poly_value_estimate_kind kind = POLY_VALUE_LIKELY) { - unsigned int width_source = BITS_PER_RISCV_VECTOR.is_constant () - ? (unsigned int) BITS_PER_RISCV_VECTOR.to_constant () - : (unsigned int) RVV_SCALABLE; + unsigned int width_source + = BITS_PER_RISCV_VECTOR.is_constant () + ? (unsigned int) BITS_PER_RISCV_VECTOR.to_constant () + : (unsigned int) RVV_SCALABLE; /* If there is no core-specific information then the minimum and likely values are based on 128-bit vectors and the maximum is based on @@ -7502,7 +7503,8 @@ riscv_estimated_poly_value (poly_int64 val, return val.coeffs[0]; case POLY_VALUE_MAX: - return val.coeffs[0] + val.coeffs[1] * 15; + return val.coeffs[0] + + val.coeffs[1] * (BYTES_PER_RISCV_VECTOR.coeffs[0] - 1); } /* Allow BITS_PER_RISCV_VECTOR to be a bitmask of different VL, treating the @@ -7514,8 +7516,8 @@ riscv_estimated_poly_value (poly_int64 val, width_source = least_bit_hwi (width_source); /* If the core provides width information, use that. */ - HOST_WIDE_INT over_128 = width_source - 128; - return val.coeffs[0] + val.coeffs[1] * over_128 / 128; + HOST_WIDE_INT over_min_vlen = width_source - TARGET_MIN_VLEN; + return val.coeffs[0] + val.coeffs[1] * over_min_vlen / TARGET_MIN_VLEN; } /* Return true if the vector misalignment factor is supported by the diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c index e68d05f5f48..ebbe5e210c5 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c @@ -3,4 +3,4 @@ #include "template-1.h" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c new file mode 100644 index 00000000000..1f520f2b0a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c @@ -0,0 +1,168 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} + +/* +** foo9: +** li\s+[a-x0-9]+,1024 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo9 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 1024; i++) + in[i] = x; +} + +/* +** foo10: +** li\s+[a-x0-9]+,4096 +** addi\s+[a-x0-9]+,[a-x0-9]+,-2048 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo10 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 2048; i++) + in[i] = x; +} + +/* +** foo11: +** li\s+[a-x0-9]+,4096 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo11 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 4096; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c new file mode 100644 index 00000000000..1a930d059c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c @@ -0,0 +1,153 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} + +/* +** foo9: +** li\s+[a-x0-9]+,1024 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo9 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 1024; i++) + in[i] = x; +} + +/* +** foo10: +** li\s+[a-x0-9]+,4096 +** addi\s+[a-x0-9]+,[a-x0-9]+,-2048 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo10 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 2048; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c new file mode 100644 index 00000000000..46fb5a525a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c @@ -0,0 +1,153 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} + +/* +** foo9: +** li\s+[a-x0-9]+,1024 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo9 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 1024; i++) + in[i] = x; +} + +/* +** foo10: +** li\s+[a-x0-9]+,4096 +** addi\s+[a-x0-9]+,[a-x0-9]+,-2048 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo10 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 2048; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c new file mode 100644 index 00000000000..7e46dc42526 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c @@ -0,0 +1,137 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} + +/* +** foo9: +** li\s+[a-x0-9]+,1024 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo9 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 1024; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c new file mode 100644 index 00000000000..9b9327bdd4d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c @@ -0,0 +1,137 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (float *in, float *out, float x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (float *in, float *out, float x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (float *in, float *out, float x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (float *in, float *out, float x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (float *in, float *out, float x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (float *in, float *out, float x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (float *in, float *out, float x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (float *in, float *out, float x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} + +/* +** foo9: +** li\s+[a-x0-9]+,1024 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo9 (float *in, float *out, float x) +{ + for (int i = 0; i < 1024; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c new file mode 100644 index 00000000000..52d5a65b44e --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c @@ -0,0 +1,122 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c new file mode 100644 index 00000000000..39f27ece2e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c @@ -0,0 +1,122 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (double *in, double *out, double x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (double *in, double *out, double x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (double *in, double *out, double x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (double *in, double *out, double x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (double *in, double *out, double x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (double *in, double *out, double x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (double *in, double *out, double x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m8,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (double *in, double *out, double x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c index ecfda79e19a..345e2f963d5 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c @@ -3,4 +3,4 @@ #include "template-1.h" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c index 6b320ca6f38..e13c27dcdb0 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c @@ -3,4 +3,4 @@ #include "template-1.h" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c index ae3f066477c..e767629ae54 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c @@ -3,4 +3,4 @@ #include "template-1.h" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */ -- 2.36.3