> +(define_mode_iterator V_HAS_HALF [ > + V2QI V4QI V8QI V16QI V32QI V64QI V128QI V256QI V512QI V1024QI V2048QI > V4096QI > + V2HI V4HI V8HI V16HI V32HI V64HI V128HI V256HI V512HI V1024HI V2048HI > + V2SI V4SI V8SI V16SI V32SI V64SI V128SI V256SI V512SI V1024SI > + V2DI V4DI V8DI V16DI V32DI V64DI V128DI V256DI V512DI > + V2SF V4SF V8SF V16SF V32SF V64SF V128SF V256SF V512SF V1024SF > + V2DF V4DF V8DF V16DF V32DF V64DF V128DF V256DF V512DF > +]) > > Seems you missed predicate here ? > Like: > (V4096QI "riscv_vector::vls_mode_valid_p (V4096QImode) && TARGET_MIN_VLEN >= > 4096")(V32DF "riscv_vector::vls_mode_valid_p (V32DFmode) && > TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 256")
Yes I did while copying things over, thanks. Attached is V2 with that changed. [PATCH v2] RISC-V: Fix subreg of VLS modes larger than a vector [PR116086]. When the source mode is potentially larger than one vector (e.g. an LMUL2 mode for VLEN=128) we don't know which vector the subreg actually refers to. For zvl128b and LMUL=2 the subreg in (subreg:V2DI (reg:V4DI)) could actually be the a full (high) vector register of a two-register group (at VLEN=128) or the higher part of a single register (at VLEN>128). As the subreg is statically ambiguous we prevent such situations in can_change_mode_class. The culprit in PR116086 is _12 = BIT_FIELD_REF <vect_cst__42, 128, 128>; which can be expanded with a vector-vector extract (from V4DI to V2DI). This patch adds a VLS-mode vector-vector extract that handles "halving" cases like this one by sliding down the source vector, thus making sure the correct part is used. PR target/116086 gcc/ChangeLog: * config/riscv/autovec.md (vec_extract<mode><v_half>): Add vector-vector extract for VLS modes. * config/riscv/riscv.cc (riscv_can_change_mode_class): Forbid VLS modes larger than one vector. * config/riscv/vector-iterators.md: Add vector-vector extract iterators. gcc/testsuite/ChangeLog: * lib/target-supports.exp: Add effective target checks for zvl256b and zvl512b. * gcc.target/riscv/rvv/autovec/pr116086-2-run.c: New test. * gcc.target/riscv/rvv/autovec/pr116086-2.c: New test. * gcc.target/riscv/rvv/autovec/pr116086.c: New test. --- gcc/config/riscv/autovec.md | 35 +++ gcc/config/riscv/riscv.cc | 11 + gcc/config/riscv/vector-iterators.md | 202 ++++++++++++++++++ .../riscv/rvv/autovec/pr116086-2-run.c | 6 + .../gcc.target/riscv/rvv/autovec/pr116086-2.c | 18 ++ .../gcc.target/riscv/rvv/autovec/pr116086.c | 76 +++++++ gcc/testsuite/lib/target-supports.exp | 37 ++++ 7 files changed, 385 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086-2-run.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086.c diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index f422ec0dc1e..4703b079fcb 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -1462,6 +1462,41 @@ (define_expand "vec_extract<mode>bi" DONE; }) +;; ------------------------------------------------------------------------- +;; ---- [INT,FP] Extract a vector from a vector. +;; ------------------------------------------------------------------------- +;; TODO: This can be extended to allow basically any extract mode. +;; For now this helps optimize VLS subregs like (subreg:V2DI (reg:V4DI) 16) +;; that would otherwise need to go via memory. + +(define_expand "vec_extract<mode><vls_half>" + [(set (match_operand:<VLS_HALF> 0 "nonimmediate_operand") + (vec_select:<VLS_HALF> + (match_operand:VLS_HAS_HALF 1 "register_operand") + (parallel + [(match_operand 2 "immediate_operand")])))] + "TARGET_VECTOR" +{ + int sz = GET_MODE_NUNITS (<VLS_HALF>mode).to_constant (); + int part = INTVAL (operands[2]); + + rtx start = GEN_INT (part * sz); + rtx tmp = operands[1]; + + if (part != 0) + { + tmp = gen_reg_rtx (<MODE>mode); + + rtx ops[] = {tmp, operands[1], start}; + riscv_vector::emit_vlmax_insn + (code_for_pred_slide (UNSPEC_VSLIDEDOWN, <MODE>mode), + riscv_vector::BINARY_OP, ops); + } + + emit_move_insn (operands[0], gen_lowpart (<VLS_HALF>mode, tmp)); + DONE; +}) + ;; ------------------------------------------------------------------------- ;; ---- [FP] Binary operations ;; ------------------------------------------------------------------------- diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 8538d405f50..4b9f3081ac5 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -10630,6 +10630,17 @@ riscv_can_change_mode_class (machine_mode from, machine_mode to, if (reg_classes_intersect_p (V_REGS, rclass) && !ordered_p (GET_MODE_PRECISION (from), GET_MODE_PRECISION (to))) return false; + + /* Subregs of modes larger than one vector are ambiguous. + A V4DImode with rv64gcv_zvl128b could, for example, span two registers/one + register group of two at VLEN = 128 or one register at VLEN >= 256 and + we cannot, statically, determine which part of it to extract. + Therefore prevent that. */ + if (reg_classes_intersect_p (V_REGS, rclass) + && riscv_v_ext_vls_mode_p (from) + && !ordered_p (BITS_PER_RISCV_VECTOR, GET_MODE_PRECISION (from))) + return false; + return !reg_classes_intersect_p (FP_REGS, rclass); } diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index cbbd248c9bb..a00b5c3fedd 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -4126,3 +4126,205 @@ (define_mode_attr VSIX8 [ (define_mode_attr VSIX16 [ (RVVMF2SI "RVVM8SI") ]) + +(define_mode_iterator VLS_HAS_HALF [ + (V2QI "riscv_vector::vls_mode_valid_p (V2QImode)") + (V4QI "riscv_vector::vls_mode_valid_p (V4QImode)") + (V8QI "riscv_vector::vls_mode_valid_p (V8QImode)") + (V16QI "riscv_vector::vls_mode_valid_p (V16QImode)") + (V2HI "riscv_vector::vls_mode_valid_p (V2HImode)") + (V4HI "riscv_vector::vls_mode_valid_p (V4HImode)") + (V8HI "riscv_vector::vls_mode_valid_p (V8HImode)") + (V16HI "riscv_vector::vls_mode_valid_p (V16HImode)") + (V2SI "riscv_vector::vls_mode_valid_p (V2SImode)") + (V4SI "riscv_vector::vls_mode_valid_p (V4SImode)") + (V8SI "riscv_vector::vls_mode_valid_p (V8SImode)") + (V16SI "riscv_vector::vls_mode_valid_p (V16SImode) && TARGET_MIN_VLEN >= 64") + (V2DI "riscv_vector::vls_mode_valid_p (V2DImode) && TARGET_VECTOR_ELEN_64") + (V4DI "riscv_vector::vls_mode_valid_p (V4DImode) && TARGET_VECTOR_ELEN_64") + (V8DI "riscv_vector::vls_mode_valid_p (V8DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 64") + (V16DI "riscv_vector::vls_mode_valid_p (V16DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128") + (V2SF "riscv_vector::vls_mode_valid_p (V2SFmode) && TARGET_VECTOR_ELEN_FP_32") + (V4SF "riscv_vector::vls_mode_valid_p (V4SFmode) && TARGET_VECTOR_ELEN_FP_32") + (V8SF "riscv_vector::vls_mode_valid_p (V8SFmode) && TARGET_VECTOR_ELEN_FP_32") + (V16SF "riscv_vector::vls_mode_valid_p (V16SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 64") + (V2DF "riscv_vector::vls_mode_valid_p (V2DFmode) && TARGET_VECTOR_ELEN_FP_64") + (V4DF "riscv_vector::vls_mode_valid_p (V4DFmode) && TARGET_VECTOR_ELEN_FP_64") + (V8DF "riscv_vector::vls_mode_valid_p (V8DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 64") + (V16DF "riscv_vector::vls_mode_valid_p (V16DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128") + (V32QI "riscv_vector::vls_mode_valid_p (V32QImode)") + (V64QI "riscv_vector::vls_mode_valid_p (V64QImode) && TARGET_MIN_VLEN >= 64") + (V128QI "riscv_vector::vls_mode_valid_p (V128QImode) && TARGET_MIN_VLEN >= 128") + (V256QI "riscv_vector::vls_mode_valid_p (V256QImode) && TARGET_MIN_VLEN >= 256") + (V512QI "riscv_vector::vls_mode_valid_p (V512QImode) && TARGET_MIN_VLEN >= 512") + (V1024QI "riscv_vector::vls_mode_valid_p (V1024QImode) && TARGET_MIN_VLEN >= 1024") + (V2048QI "riscv_vector::vls_mode_valid_p (V2048QImode) && TARGET_MIN_VLEN >= 2048") + (V4096QI "riscv_vector::vls_mode_valid_p (V4096QImode) && TARGET_MIN_VLEN >= 4096") + (V32HI "riscv_vector::vls_mode_valid_p (V32HImode) && TARGET_MIN_VLEN >= 64") + (V64HI "riscv_vector::vls_mode_valid_p (V64HImode) && TARGET_MIN_VLEN >= 128") + (V128HI "riscv_vector::vls_mode_valid_p (V128HImode) && TARGET_MIN_VLEN >= 256") + (V256HI "riscv_vector::vls_mode_valid_p (V256HImode) && TARGET_MIN_VLEN >= 512") + (V512HI "riscv_vector::vls_mode_valid_p (V512HImode) && TARGET_MIN_VLEN >= 1024") + (V1024HI "riscv_vector::vls_mode_valid_p (V1024HImode) && TARGET_MIN_VLEN >= 2048") + (V2048HI "riscv_vector::vls_mode_valid_p (V2048HImode) && TARGET_MIN_VLEN >= 4096") + (V32SI "riscv_vector::vls_mode_valid_p (V32SImode) && TARGET_MIN_VLEN >= 128") + (V64SI "riscv_vector::vls_mode_valid_p (V64SImode) && TARGET_MIN_VLEN >= 256") + (V128SI "riscv_vector::vls_mode_valid_p (V128SImode) && TARGET_MIN_VLEN >= 512") + (V256SI "riscv_vector::vls_mode_valid_p (V256SImode) && TARGET_MIN_VLEN >= 1024") + (V512SI "riscv_vector::vls_mode_valid_p (V512SImode) && TARGET_MIN_VLEN >= 2048") + (V1024SI "riscv_vector::vls_mode_valid_p (V1024SImode) && TARGET_MIN_VLEN >= 4096") + (V32DI "riscv_vector::vls_mode_valid_p (V32DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 256") + (V64DI "riscv_vector::vls_mode_valid_p (V64DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 512") + (V128DI "riscv_vector::vls_mode_valid_p (V128DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 1024") + (V256DI "riscv_vector::vls_mode_valid_p (V256DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 2048") + (V512DI "riscv_vector::vls_mode_valid_p (V512DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 4096") + (V32SF "riscv_vector::vls_mode_valid_p (V32SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") + (V64SF "riscv_vector::vls_mode_valid_p (V64SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 256") + (V128SF "riscv_vector::vls_mode_valid_p (V128SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 512") + (V256SF "riscv_vector::vls_mode_valid_p (V256SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 1024") + (V512SF "riscv_vector::vls_mode_valid_p (V512SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 2048") + (V1024SF "riscv_vector::vls_mode_valid_p (V1024SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 4096") + (V32DF "riscv_vector::vls_mode_valid_p (V32DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 256") + (V64DF "riscv_vector::vls_mode_valid_p (V64DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 512") + (V128DF "riscv_vector::vls_mode_valid_p (V128DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 1024") + (V256DF "riscv_vector::vls_mode_valid_p (V256DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 2048") + (V512DF "riscv_vector::vls_mode_valid_p (V512DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 4096") +]) + +(define_mode_attr VLS_HALF [ + (V2QI "V1QI") + (V4QI "V2QI") + (V8QI "V4QI") + (V16QI "V8QI") + (V32QI "V16QI") + (V64QI "V32QI") + (V128QI "V64QI") + (V256QI "V128QI") + (V512QI "V256QI") + (V1024QI "V512QI") + (V2048QI "V1024QI") + (V4096QI "V2048QI") + + (V2HI "V1HI") + (V4HI "V2HI") + (V8HI "V4HI") + (V16HI "V8HI") + (V32HI "V16HI") + (V64HI "V32HI") + (V128HI "V64HI") + (V256HI "V128HI") + (V512HI "V256HI") + (V1024HI "V512HI") + (V2048HI "V1024HI") + + (V2SI "V1SI") + (V4SI "V2SI") + (V8SI "V4SI") + (V16SI "V8SI") + (V32SI "V16SI") + (V64SI "V32SI") + (V128SI "V64SI") + (V256SI "V128SI") + (V512SI "V256SI") + (V1024SI "V512SI") + + (V2DI "V1DI") + (V4DI "V2DI") + (V8DI "V4DI") + (V16DI "V8DI") + (V32DI "V16DI") + (V64DI "V32DI") + (V128DI "V64DI") + (V256DI "V128DI") + (V512DI "V256DI") + + (V2SF "V1SF") + (V4SF "V2SF") + (V8SF "V4SF") + (V16SF "V8SF") + (V32SF "V16SF") + (V64SF "V32SF") + (V128SF "V64SF") + (V256SF "V128SF") + (V512SF "V256SF") + (V1024SF "V512SF") + + (V2DF "V1DF") + (V4DF "V2DF") + (V8DF "V4DF") + (V16DF "V8DF") + (V32DF "V16DF") + (V64DF "V32DF") + (V128DF "V64DF") + (V256DF "V128DF") + (V512DF "V256DF") +]) + +(define_mode_attr vls_half [ + (V2QI "v1qi") + (V4QI "v2qi") + (V8QI "v4qi") + (V16QI "v8qi") + (V32QI "v16qi") + (V64QI "v32qi") + (V128QI "v64qi") + (V256QI "v128qi") + (V512QI "v256qi") + (V1024QI "v512qi") + (V2048QI "v1024qi") + (V4096QI "v2048qi") + + (V2HI "v1hi") + (V4HI "v2hi") + (V8HI "v4hi") + (V16HI "v8hi") + (V32HI "v16hi") + (V64HI "v32hi") + (V128HI "v64hi") + (V256HI "v128hi") + (V512HI "v256hi") + (V1024HI "v512hi") + (V2048HI "v1024hi") + + (V2SI "v1si") + (V4SI "v2si") + (V8SI "v4si") + (V16SI "v8si") + (V32SI "v16si") + (V64SI "v32si") + (V128SI "v64si") + (V256SI "v128si") + (V512SI "v256si") + (V1024SI "v512si") + + (V2DI "v1di") + (V4DI "v2di") + (V8DI "v4di") + (V16DI "v8di") + (V32DI "v16di") + (V64DI "v32di") + (V128DI "v64di") + (V256DI "v128di") + (V512DI "v256di") + + (V2SF "v1sf") + (V4SF "v2sf") + (V8SF "v4sf") + (V16SF "v8sf") + (V32SF "v16sf") + (V64SF "v32sf") + (V128SF "v64sf") + (V256SF "v128sf") + (V512SF "v256sf") + (V1024SF "v512sf") + + (V2DF "v1df") + (V4DF "v2df") + (V8DF "v4df") + (V16DF "v8df") + (V32DF "v16df") + (V64DF "v32df") + (V128DF "v64df") + (V256DF "v128df") + (V512DF "v256df") +]) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086-2-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086-2-run.c new file mode 100644 index 00000000000..87cd3834067 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086-2-run.c @@ -0,0 +1,6 @@ +/* { dg-do run } */ +/* { dg-require-effective-target riscv_v } */ +/* { dg-require-effective-target rvv_zvl256b_ok } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -mrvv-max-lmul=m2" } */ + +#include "pr116086-2.c" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086-2.c new file mode 100644 index 00000000000..8b5ea6be955 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086-2.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -mrvv-max-lmul=m2" } */ + +long a; +long b; +long c[80]; +int main() { + for (int d = 0; d < 16; d++) + c[d] = a; + for (int d = 16; d < 80; d++) + c[d] = c[d - 2]; + for (int d = 0; d < 80; d += 8) + b += c[d]; + if (b != 0) + __builtin_abort (); +} + +/* { dg-final { scan-assembler-times "vmv1r" 0 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086.c new file mode 100644 index 00000000000..0d711f69437 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116086.c @@ -0,0 +1,76 @@ +/* { dg-do run } */ +/* { dg-require-effective-target riscv_v } */ +/* { dg-require-effective-target rvv_zvl256b_ok } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -mrvv-max-lmul=m2" } */ + +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; + +typedef struct +{ + uint64_t length; + uint64_t state[8]; + uint32_t curlen; + unsigned char buf[128]; +} sha512_state; + +static uint64_t load64(const unsigned char* y) +{ + uint64_t res = 0; + for(int i = 0; i != 8; ++i) + res |= (uint64_t)(y[i]) << ((7-i) * 8); + return res; +} + +static const uint64_t K[80] = +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + +__attribute__ ((noipa)) +static void sha_compress(sha512_state *md, const unsigned char *buf) +{ + uint64_t S[8], W[80]; + + for(int i = 0; i < 8; i++) + S[i] = 0; + + // Copy the state into 1024-bits into W[0..15] + for(int i = 0; i < 16; i++) + W[i] = load64(buf + (8*i)); + + // Fill W[16..79] + for(int i = 16; i < 80; i++) + W[i] = W[i - 2] + W[i - 7] + W[i - 15] + W[i - 16]; + + S[7] = W[72]; + + // Feedback + for(int i = 0; i < 8; i++) + md->state[i] = md->state[i] + S[i]; +} + +int main () +{ + sha512_state md; + md.curlen = 0; + md.length = 0; + md.state[0] = 0; + md.state[1] = 0; + md.state[2] = 0; + md.state[3] = 0; + md.state[4] = 0; + md.state[5] = 0; + md.state[6] = 0; + md.state[7] = 0; + + for (int i = 0; i < 128; i++) + md.buf[i] = 0; + + md.buf[md.curlen++] = (unsigned char)0x80; + + sha_compress (&md, md.buf); + + if (md.state[7] != 0x8000000000000000ULL) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 3501ce44b76..1f5247bcb6f 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -1955,6 +1955,43 @@ proc check_effective_target_riscv_v { } { }] } +# Return 1 if the target runtime supports 256-bit vectors, 0 otherwise. +# Cache the result. + +proc check_effective_target_rvv_zvl256b_ok { } { + # Check if the target has a VLENB of 32. + set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &v] + return [check_runtime ${gcc_march}_exec { + int main() + { + int vlenb = 0; + asm ("csrr %0,vlenb" : "=r" (vlenb) : : ); + if (vlenb == 32) + return 1; + return 0; + } + } "-march=${gcc_march}"] +} + +# Return 1 if the target runtime supports 512-bit vectors, 0 otherwise. +# Cache the result. + +proc check_effective_target_rvv_zvl512b_ok { } { + # Check if the target has a VLENB of 64. + set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &v] + return [check_runtime ${gcc_march}_exec { + int main() + { + int vlenb = 0; + asm ("csrr %0,vlenb" : "=r" (vlenb) : : ); + if (vlenb == 64) + return 1; + return 0; + } + } "-march=${gcc_march}"] +} + + # Return 1 if the target arch supports the Zvfh extension, 0 otherwise. # Cache the result. -- 2.46.0