Hi, this adds a V16SI -> V4SI and related i.e. "quartering" vector-vector extract expander for VLS modes. It helps with unnecessary spills in x264.
Regtested on rv64gcv_zvfh_zvbb. Regards Robin gcc/ChangeLog: * config/riscv/autovec.md (vec_extract<mode><vls_quarter>): Add quarter vec-vec extract. * config/riscv/vector-iterators.md: New iterators. --- gcc/config/riscv/autovec.md | 28 ++++ gcc/config/riscv/vector-iterators.md | 184 +++++++++++++++++++++++++++ 2 files changed, 212 insertions(+) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index a07aa0c26fd..905dcfe2dbc 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -1488,6 +1488,34 @@ (define_expand "vec_extract<mode><vls_half>" DONE; }) +(define_expand "vec_extract<mode><vls_quarter>" + [(set (match_operand:<VLS_QUARTER> 0 "nonimmediate_operand") + (vec_select:<VLS_QUARTER> + (match_operand:VLS_HAS_QUARTER 1 "register_operand") + (parallel + [(match_operand 2 "immediate_operand")])))] + "TARGET_VECTOR" +{ + int sz = GET_MODE_NUNITS (<VLS_QUARTER>mode).to_constant (); + int part = INTVAL (operands[2]); + + rtx start = GEN_INT (part * sz); + rtx tmp = operands[1]; + + if (part != 0) + { + tmp = gen_reg_rtx (<MODE>mode); + + rtx ops[] = {tmp, operands[1], start}; + riscv_vector::emit_vlmax_insn + (code_for_pred_slide (UNSPEC_VSLIDEDOWN, <MODE>mode), + riscv_vector::BINARY_OP, ops); + } + + emit_move_insn (operands[0], gen_lowpart (<VLS_QUARTER>mode, tmp)); + DONE; +}) + ;; ------------------------------------------------------------------------- ;; ---- [FP] Binary operations ;; ------------------------------------------------------------------------- diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index f27b89e841b..62195f65170 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -4552,3 +4552,187 @@ (define_mode_attr vls_half [ (V256DF "v128df") (V512DF "v256df") ]) + +(define_mode_iterator VLS_HAS_QUARTER [ + (V4QI "riscv_vector::vls_mode_valid_p (V4QImode)") + (V8QI "riscv_vector::vls_mode_valid_p (V8QImode)") + (V16QI "riscv_vector::vls_mode_valid_p (V16QImode)") + (V4HI "riscv_vector::vls_mode_valid_p (V4HImode)") + (V8HI "riscv_vector::vls_mode_valid_p (V8HImode)") + (V16HI "riscv_vector::vls_mode_valid_p (V16HImode)") + (V4SI "riscv_vector::vls_mode_valid_p (V4SImode)") + (V8SI "riscv_vector::vls_mode_valid_p (V8SImode)") + (V16SI "riscv_vector::vls_mode_valid_p (V16SImode) && TARGET_MIN_VLEN >= 64") + (V4DI "riscv_vector::vls_mode_valid_p (V4DImode) && TARGET_VECTOR_ELEN_64") + (V8DI "riscv_vector::vls_mode_valid_p (V8DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 64") + (V16DI "riscv_vector::vls_mode_valid_p (V16DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128") + (V4SF "riscv_vector::vls_mode_valid_p (V4SFmode) && TARGET_VECTOR_ELEN_FP_32") + (V8SF "riscv_vector::vls_mode_valid_p (V8SFmode) && TARGET_VECTOR_ELEN_FP_32") + (V16SF "riscv_vector::vls_mode_valid_p (V16SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 64") + (V4DF "riscv_vector::vls_mode_valid_p (V4DFmode) && TARGET_VECTOR_ELEN_FP_64") + (V8DF "riscv_vector::vls_mode_valid_p (V8DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 64") + (V16DF "riscv_vector::vls_mode_valid_p (V16DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128") + (V32QI "riscv_vector::vls_mode_valid_p (V32QImode)") + (V64QI "riscv_vector::vls_mode_valid_p (V64QImode) && TARGET_MIN_VLEN >= 64") + (V128QI "riscv_vector::vls_mode_valid_p (V128QImode) && TARGET_MIN_VLEN >= 128") + (V256QI "riscv_vector::vls_mode_valid_p (V256QImode) && TARGET_MIN_VLEN >= 256") + (V512QI "riscv_vector::vls_mode_valid_p (V512QImode) && TARGET_MIN_VLEN >= 512") + (V1024QI "riscv_vector::vls_mode_valid_p (V1024QImode) && TARGET_MIN_VLEN >= 1024") + (V2048QI "riscv_vector::vls_mode_valid_p (V2048QImode) && TARGET_MIN_VLEN >= 2048") + (V4096QI "riscv_vector::vls_mode_valid_p (V4096QImode) && TARGET_MIN_VLEN >= 4096") + (V32HI "riscv_vector::vls_mode_valid_p (V32HImode) && TARGET_MIN_VLEN >= 64") + (V64HI "riscv_vector::vls_mode_valid_p (V64HImode) && TARGET_MIN_VLEN >= 128") + (V128HI "riscv_vector::vls_mode_valid_p (V128HImode) && TARGET_MIN_VLEN >= 256") + (V256HI "riscv_vector::vls_mode_valid_p (V256HImode) && TARGET_MIN_VLEN >= 512") + (V512HI "riscv_vector::vls_mode_valid_p (V512HImode) && TARGET_MIN_VLEN >= 1024") + (V1024HI "riscv_vector::vls_mode_valid_p (V1024HImode) && TARGET_MIN_VLEN >= 2048") + (V2048HI "riscv_vector::vls_mode_valid_p (V2048HImode) && TARGET_MIN_VLEN >= 4096") + (V32SI "riscv_vector::vls_mode_valid_p (V32SImode) && TARGET_MIN_VLEN >= 128") + (V64SI "riscv_vector::vls_mode_valid_p (V64SImode) && TARGET_MIN_VLEN >= 256") + (V128SI "riscv_vector::vls_mode_valid_p (V128SImode) && TARGET_MIN_VLEN >= 512") + (V256SI "riscv_vector::vls_mode_valid_p (V256SImode) && TARGET_MIN_VLEN >= 1024") + (V512SI "riscv_vector::vls_mode_valid_p (V512SImode) && TARGET_MIN_VLEN >= 2048") + (V1024SI "riscv_vector::vls_mode_valid_p (V1024SImode) && TARGET_MIN_VLEN >= 4096") + (V32DI "riscv_vector::vls_mode_valid_p (V32DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 256") + (V64DI "riscv_vector::vls_mode_valid_p (V64DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 512") + (V128DI "riscv_vector::vls_mode_valid_p (V128DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 1024") + (V256DI "riscv_vector::vls_mode_valid_p (V256DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 2048") + (V512DI "riscv_vector::vls_mode_valid_p (V512DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 4096") + (V32SF "riscv_vector::vls_mode_valid_p (V32SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") + (V64SF "riscv_vector::vls_mode_valid_p (V64SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 256") + (V128SF "riscv_vector::vls_mode_valid_p (V128SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 512") + (V256SF "riscv_vector::vls_mode_valid_p (V256SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 1024") + (V512SF "riscv_vector::vls_mode_valid_p (V512SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 2048") + (V1024SF "riscv_vector::vls_mode_valid_p (V1024SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 4096") + (V32DF "riscv_vector::vls_mode_valid_p (V32DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 256") + (V64DF "riscv_vector::vls_mode_valid_p (V64DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 512") + (V128DF "riscv_vector::vls_mode_valid_p (V128DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 1024") + (V256DF "riscv_vector::vls_mode_valid_p (V256DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 2048") + (V512DF "riscv_vector::vls_mode_valid_p (V512DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 4096") +]) + +(define_mode_attr VLS_QUARTER [ + (V4QI "V1QI") + (V8QI "V2QI") + (V16QI "V4QI") + (V32QI "V8QI") + (V64QI "V16QI") + (V128QI "V32QI") + (V256QI "V64QI") + (V512QI "V128QI") + (V1024QI "V256QI") + (V2048QI "V512QI") + (V4096QI "V1024QI") + + (V4HI "V1HI") + (V8HI "V2HI") + (V16HI "V4HI") + (V32HI "V8HI") + (V64HI "V16HI") + (V128HI "V32HI") + (V256HI "V64HI") + (V512HI "V128HI") + (V1024HI "V256HI") + (V2048HI "V512HI") + + (V4SI "V1SI") + (V8SI "V2SI") + (V16SI "V4SI") + (V32SI "V8SI") + (V64SI "V16SI") + (V128SI "V32SI") + (V256SI "V64SI") + (V512SI "V128SI") + (V1024SI "V256SI") + + (V4DI "V1DI") + (V8DI "V2DI") + (V16DI "V4DI") + (V32DI "V8DI") + (V64DI "V16DI") + (V128DI "V32DI") + (V256DI "V64DI") + (V512DI "V128DI") + + (V4SF "V1SF") + (V8SF "V2SF") + (V16SF "V4SF") + (V32SF "V8SF") + (V64SF "V16SF") + (V128SF "V32SF") + (V256SF "V64SF") + (V512SF "V128SF") + (V1024SF "V256SF") + + (V4DF "V1DF") + (V8DF "V2DF") + (V16DF "V4DF") + (V32DF "V8DF") + (V64DF "V16DF") + (V128DF "V32DF") + (V256DF "V64DF") + (V512DF "V128DF") +]) + +(define_mode_attr vls_quarter [ + (V4QI "v1qi") + (V8QI "v2qi") + (V16QI "v4qi") + (V32QI "v8qi") + (V64QI "v16qi") + (V128QI "v32qi") + (V256QI "v64qi") + (V512QI "v128qi") + (V1024QI "v256qi") + (V2048QI "v512qi") + (V4096QI "v1024qi") + + (V4HI "v1hi") + (V8HI "v2hi") + (V16HI "v4hi") + (V32HI "v8hi") + (V64HI "v16hi") + (V128HI "v32hi") + (V256HI "v64hi") + (V512HI "v128hi") + (V1024HI "v256hi") + (V2048HI "v512hi") + + (V4SI "v1si") + (V8SI "v2si") + (V16SI "v4si") + (V32SI "v8si") + (V64SI "v16si") + (V128SI "v32si") + (V256SI "v64si") + (V512SI "v128si") + (V1024SI "v256si") + + (V4DI "v1di") + (V8DI "v2di") + (V16DI "v4di") + (V32DI "v8di") + (V64DI "v16di") + (V128DI "v32di") + (V256DI "v64di") + (V512DI "v128di") + + (V4SF "v1sf") + (V8SF "v2sf") + (V16SF "v4sf") + (V32SF "v8sf") + (V64SF "v16sf") + (V128SF "v32sf") + (V256SF "v64sf") + (V512SF "v128sf") + (V1024SF "v256sf") + + (V4DF "v1df") + (V8DF "v2df") + (V16DF "v4df") + (V32DF "v8df") + (V64DF "v16df") + (V128DF "v32df") + (V256DF "v64df") + (V512DF "v128df") +]) -- 2.46.0