From: Yunze Zhu <yunze...@linux.alibaba.com> This commit add support for xtheadvector-specific indexed segment load/store intrinsics with b/h/w suffix. We also defined enum to be used in thead-vector.md https://github.com/XUANTIE-RV/thead-extension-spec/pull/66
V2: Change to reuse existed thead function base th_loadstore_width. gcc/ChangeLog: * config/riscv/riscv-vector-builtins-bases.cc (BASE): New base_name. * config/riscv/riscv-vector-builtins-bases.h: New function_base. * config/riscv/riscv-vector-builtins-shapes.cc (build): Define new builtin shapes. * config/riscv/riscv-vector-builtins.cc: New rvv_op_info. * config/riscv/thead-vector-builtins-functions.def (vlxsegb): New intrinsics def. (vlxsegh): Ditto. (vlxsegw): Ditto. (vlxsegbu): Ditto. (vlxseghu): Ditto. (vlxsegwu): Ditto. (vsxsegb): Ditto. (vsxsegh): Ditto. (vsxsegw): Ditto. * config/riscv/thead-vector.md (@pred_th_indexed_load<vlmem_op_attr><VT:mode><VI:mode>): New RTL mode. (@pred_th_indexed_store<vlmem_op_attr><VT:mode><VI:mode>): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/xtheadvector/vlxseg-vsxseg.c: New test. * gcc.target/riscv/rvv/xtheadvector/vlxsegu-vsxseg.c: New test. --- .../riscv/riscv-vector-builtins-bases.cc | 31 ++++- .../riscv/riscv-vector-builtins-bases.h | 9 ++ .../riscv/riscv-vector-builtins-shapes.cc | 4 + gcc/config/riscv/riscv-vector-builtins.cc | 24 ++++ .../riscv/thead-vector-builtins-functions.def | 9 ++ gcc/config/riscv/thead-vector.md | 60 +++++++++ .../riscv/rvv/xtheadvector/vlxseg-vsxseg.c | 125 ++++++++++++++++++ .../riscv/rvv/xtheadvector/vlxsegu-vsxseg.c | 125 ++++++++++++++++++ 8 files changed, 386 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlxseg-vsxseg.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlxsegu-vsxseg.c diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc index a52ae921639..380c695ccb1 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc @@ -2194,7 +2194,18 @@ public: } else { - if (LST_TYPE == LST_STRIDED) + if (LST_TYPE == LST_INDEXED) + { + if (STORE_P) + return e.use_exact_insn ( + code_for_pred_th_indexed_store (UNSPEC, e.vector_mode (), + e.index_mode ())); + else + return e.use_exact_insn ( + code_for_pred_th_indexed_load (UNSPEC, e.vector_mode (), + e.index_mode ())); + } + else if (LST_TYPE == LST_STRIDED) { if (STORE_P) return e.use_exact_insn ( @@ -2798,6 +2809,15 @@ static CONSTEXPR const th_loadstore_width<false, LST_STRIDED, true, UNSPEC_TH_VL static CONSTEXPR const th_loadstore_width<true, LST_STRIDED, true, UNSPEC_TH_VLSSEGB> vsssegb_obj; static CONSTEXPR const th_loadstore_width<true, LST_STRIDED, true, UNSPEC_TH_VLSSEGH> vsssegh_obj; static CONSTEXPR const th_loadstore_width<true, LST_STRIDED, true, UNSPEC_TH_VLSSEGW> vsssegw_obj; +static CONSTEXPR const th_loadstore_width<false, LST_INDEXED, true, UNSPEC_TH_VLXSEGB> vlxsegb_obj; +static CONSTEXPR const th_loadstore_width<false, LST_INDEXED, true, UNSPEC_TH_VLXSEGH> vlxsegh_obj; +static CONSTEXPR const th_loadstore_width<false, LST_INDEXED, true, UNSPEC_TH_VLXSEGW> vlxsegw_obj; +static CONSTEXPR const th_loadstore_width<false, LST_INDEXED, true, UNSPEC_TH_VLXSEGBU> vlxsegbu_obj; +static CONSTEXPR const th_loadstore_width<false, LST_INDEXED, true, UNSPEC_TH_VLXSEGHU> vlxseghu_obj; +static CONSTEXPR const th_loadstore_width<false, LST_INDEXED, true, UNSPEC_TH_VLXSEGWU> vlxsegwu_obj; +static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, true, UNSPEC_TH_VLXSEGB> vsxsegb_obj; +static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, true, UNSPEC_TH_VLXSEGH> vsxsegh_obj; +static CONSTEXPR const th_loadstore_width<true, LST_INDEXED, true, UNSPEC_TH_VLXSEGW> vsxsegw_obj; /* Crypto Vector */ static CONSTEXPR const vandn vandn_obj; @@ -3146,6 +3166,15 @@ BASE (vlssegwu) BASE (vsssegb) BASE (vsssegh) BASE (vsssegw) +BASE (vlxsegb) +BASE (vlxsegh) +BASE (vlxsegw) +BASE (vlxsegbu) +BASE (vlxseghu) +BASE (vlxsegwu) +BASE (vsxsegb) +BASE (vsxsegh) +BASE (vsxsegw) /* Crypto vector */ BASE (vandn) BASE (vbrev) diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h b/gcc/config/riscv/riscv-vector-builtins-bases.h index 5406b0271a9..9a8d378019e 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.h +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h @@ -335,6 +335,15 @@ extern const function_base *const vlssegwu; extern const function_base *const vsssegb; extern const function_base *const vsssegh; extern const function_base *const vsssegw; +extern const function_base *const vlxsegb; +extern const function_base *const vlxsegh; +extern const function_base *const vlxsegw; +extern const function_base *const vlxsegbu; +extern const function_base *const vlxseghu; +extern const function_base *const vlxsegwu; +extern const function_base *const vsxsegb; +extern const function_base *const vsxsegh; +extern const function_base *const vsxsegw; /* Below function_base are Vectro Crypto*/ extern const function_base *const vandn; extern const function_base *const vbrev; diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc b/gcc/config/riscv/riscv-vector-builtins-shapes.cc index 1483a13653c..10c096b2e44 100644 --- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc +++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc @@ -1420,10 +1420,14 @@ void build (function_builder &b, b.append_name ("__riscv_th_vlseg"); else if (strstr (instance.base_name, "vlsseg")) b.append_name ("__riscv_th_vlsseg"); + else if (strstr (instance.base_name, "vlxseg")) + b.append_name ("__riscv_th_vlxseg"); else if (strstr (instance.base_name, "vsseg")) b.append_name ("__riscv_th_vsseg"); else if (strstr (instance.base_name, "vssseg")) b.append_name ("__riscv_th_vssseg"); + else if (strstr (instance.base_name, "vsxseg")) + b.append_name ("__riscv_th_vsxseg"); else gcc_unreachable (); diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index 673c2cb8e89..ed0482970ff 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -3411,6 +3411,30 @@ static CONSTEXPR const rvv_op_info th_tuple_v_int_scalar_ptr_ptrdiff_ops rvv_arg_type_info (RVV_BASE_void), /* Return type */ scalar_ptr_ptrdiff_args /* Args */}; +/* A static operand information for vector_type func (const scalar_type *, + * index_type) function registration. */ +static CONSTEXPR const rvv_op_info th_tuple_v_sint_scalar_const_ptr_index_ops + = {th_tuple_sint_ops, /* Types */ + OP_TYPE_v, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + scalar_const_ptr_index_args /* Args */}; + +/* A static operand information for vector_type func (const scalar_type *, + * index_type) function registration. */ +static CONSTEXPR const rvv_op_info th_tuple_v_uint_scalar_const_ptr_index_ops + = {th_tuple_uint_ops, /* Types */ + OP_TYPE_v, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + scalar_const_ptr_index_args /* Args */}; + +/* A static operand information for void func (scalar_type *, index_type, + * vector_type) function registration. */ +static CONSTEXPR const rvv_op_info th_tuple_v_int_scalar_ptr_index_ops + = {th_tuple_int_ops, /* Types */ + OP_TYPE_v, /* Suffix */ + rvv_arg_type_info (RVV_BASE_void), /* Return type */ + scalar_ptr_index_args /* Args */}; + /* A list of all RVV base function types. */ static CONSTEXPR const function_type_info function_types[] = { #define DEF_RVV_TYPE_INDEX( \ diff --git a/gcc/config/riscv/thead-vector-builtins-functions.def b/gcc/config/riscv/thead-vector-builtins-functions.def index 7d6ec519616..2d28b4eb33e 100644 --- a/gcc/config/riscv/thead-vector-builtins-functions.def +++ b/gcc/config/riscv/thead-vector-builtins-functions.def @@ -52,6 +52,15 @@ DEF_RVV_FUNCTION (vlssegwu, th_seg_loadstore, full_preds, th_tuple_v_uint_scalar DEF_RVV_FUNCTION (vsssegb, th_seg_loadstore, none_m_preds, th_tuple_v_int_scalar_ptr_ptrdiff_ops) DEF_RVV_FUNCTION (vsssegh, th_seg_loadstore, none_m_preds, th_tuple_v_int_scalar_ptr_ptrdiff_ops) DEF_RVV_FUNCTION (vsssegw, th_seg_loadstore, none_m_preds, th_tuple_v_int_scalar_ptr_ptrdiff_ops) +DEF_RVV_FUNCTION (vlxsegb, th_seg_loadstore, full_preds, th_tuple_v_sint_scalar_const_ptr_index_ops) +DEF_RVV_FUNCTION (vlxsegh, th_seg_loadstore, full_preds, th_tuple_v_sint_scalar_const_ptr_index_ops) +DEF_RVV_FUNCTION (vlxsegw, th_seg_loadstore, full_preds, th_tuple_v_sint_scalar_const_ptr_index_ops) +DEF_RVV_FUNCTION (vlxsegbu, th_seg_loadstore, full_preds, th_tuple_v_uint_scalar_const_ptr_index_ops) +DEF_RVV_FUNCTION (vlxseghu, th_seg_loadstore, full_preds, th_tuple_v_uint_scalar_const_ptr_index_ops) +DEF_RVV_FUNCTION (vlxsegwu, th_seg_loadstore, full_preds, th_tuple_v_uint_scalar_const_ptr_index_ops) +DEF_RVV_FUNCTION (vsxsegb, th_seg_loadstore, none_m_preds, th_tuple_v_int_scalar_ptr_index_ops) +DEF_RVV_FUNCTION (vsxsegh, th_seg_loadstore, none_m_preds, th_tuple_v_int_scalar_ptr_index_ops) +DEF_RVV_FUNCTION (vsxsegw, th_seg_loadstore, none_m_preds, th_tuple_v_int_scalar_ptr_index_ops) #undef REQUIRED_EXTENSIONS #undef DEF_RVV_FUNCTION diff --git a/gcc/config/riscv/thead-vector.md b/gcc/config/riscv/thead-vector.md index 7ba06c6ab5b..c9cedcfc08d 100644 --- a/gcc/config/riscv/thead-vector.md +++ b/gcc/config/riscv/thead-vector.md @@ -39,6 +39,13 @@ (define_c_enum "unspec" [ UNSPEC_TH_VLSSEGHU UNSPEC_TH_VLSSEGW UNSPEC_TH_VLSSEGWU + + UNSPEC_TH_VLXSEGB + UNSPEC_TH_VLXSEGBU + UNSPEC_TH_VLXSEGH + UNSPEC_TH_VLXSEGHU + UNSPEC_TH_VLXSEGW + UNSPEC_TH_VLXSEGWU ]) (define_int_iterator UNSPEC_TH_VLMEM_OP [ @@ -71,6 +78,12 @@ (define_int_iterator UNSPEC_TH_VLSSEGMEM_OP[ UNSPEC_TH_VLSSEGW UNSPEC_TH_VLSSEGWU ]) +(define_int_iterator UNSPEC_TH_VLXSEGMEM_OP[ + UNSPEC_TH_VLXSEGB UNSPEC_TH_VLXSEGBU + UNSPEC_TH_VLXSEGH UNSPEC_TH_VLXSEGHU + UNSPEC_TH_VLXSEGW UNSPEC_TH_VLXSEGWU +]) + (define_int_attr vlmem_op_attr [ (UNSPEC_TH_VLB "b") (UNSPEC_TH_VLBU "bu") (UNSPEC_TH_VLH "h") (UNSPEC_TH_VLHU "hu") @@ -90,6 +103,9 @@ (define_int_attr vlmem_op_attr [ (UNSPEC_TH_VLSSEGB "b") (UNSPEC_TH_VLSSEGBU "bu") (UNSPEC_TH_VLSSEGH "h") (UNSPEC_TH_VLSSEGHU "hu") (UNSPEC_TH_VLSSEGW "w") (UNSPEC_TH_VLSSEGWU "wu") + (UNSPEC_TH_VLXSEGB "b") (UNSPEC_TH_VLXSEGBU "bu") + (UNSPEC_TH_VLXSEGH "h") (UNSPEC_TH_VLXSEGHU "hu") + (UNSPEC_TH_VLXSEGW "w") (UNSPEC_TH_VLXSEGWU "wu") ]) (define_int_attr vlmem_order_attr [ @@ -134,6 +150,12 @@ (define_int_iterator UNSPEC_TH_VSSSEGMEM_OP[ UNSPEC_TH_VLSSEGW ]) +(define_int_iterator UNSPEC_TH_VSXSEGMEM_OP[ + UNSPEC_TH_VLXSEGB + UNSPEC_TH_VLXSEGH + UNSPEC_TH_VLXSEGW +]) + (define_mode_iterator V_VLS_VT [V VLS VT]) (define_mode_iterator V_VB_VLS_VT [V VB VLS VT]) @@ -544,3 +566,41 @@ (define_insn "@pred_th_strided_store<vlmem_op_attr><mode>" "vssseg<nf><vlmem_op_attr>.v\t%3,(%z1),%z2%p0" [(set_attr "type" "vssegts") (set_attr "mode" "<MODE>")]) + +(define_insn "@pred_th_indexed_load<vlmem_op_attr><VT:mode><VI:mode>" + [(set (match_operand:VT 0 "register_operand" "=&vr, &vr") + (if_then_else:VT + (unspec:<VT:VM> + [(match_operand:<VT:VM> 1 "vector_mask_operand" "vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK") + (match_operand 6 "const_int_operand" " i, i") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_TH_VLXSEGMEM_OP) + (unspec:VT + [(match_operand 3 "pmode_reg_or_0_operand" " rJ, rJ") + (mem:BLK (scratch)) + (match_operand:VI 4 "register_operand" " vr, vr")] UNSPEC_TH_VLXSEGMEM_OP) + (match_operand:VT 2 "vector_merge_operand" " vu, 0")))] + "TARGET_XTHEADVECTOR" + "vlxseg<nf><vlmem_op_attr>.v\t%0,(%z3),%4%p1" + [(set_attr "type" "vlsegdux") + (set_attr "mode" "<VT:MODE>")]) + +(define_insn "@pred_th_indexed_store<vlmem_op_attr><VT:mode><VI:mode>" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(unspec:<VT:VM> + [(match_operand:<VT:VM> 0 "vector_mask_operand" "vmWc1") + (match_operand 4 "vector_length_operand" " rK") + (match_operand 5 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_TH_VSXSEGMEM_OP) + (match_operand 1 "pmode_reg_or_0_operand" " rJ") + (match_operand:VI 2 "register_operand" " vr") + (match_operand:VT 3 "register_operand" " vr")] UNSPEC_TH_VSXSEGMEM_OP))] + "TARGET_XTHEADVECTOR" + "vsxseg<nf><vlmem_op_attr>.v\t%3,(%z1),%2%p0" + [(set_attr "type" "vssegtux") + (set_attr "mode" "<VT:MODE>")]) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlxseg-vsxseg.c b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlxseg-vsxseg.c new file mode 100644 index 00000000000..ab05b0ff699 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlxseg-vsxseg.c @@ -0,0 +1,125 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcxtheadvector -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ +#include "riscv_th_vector.h" + +/* +** f1: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlxseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),v[0-9]+ +** th\.vlxseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsxseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),v[0-9]+ +** ret +*/ +void f1 (void * in, void *out, vuint16m1_t index) +{ + vint16m1x2_t v = __riscv_th_vlxseg2h_v_i16m1x2 (in, index, 4); + vint16m1x2_t v2 = __riscv_th_vlxseg2h_v_i16m1x2_tu (v, in, index, 4); + vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0); + vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1); + vint16m1_t v3_0 = __riscv_vadd_vv_i16m1 (v2_0, v2_0, 4); + vint16m1_t v3_1 = __riscv_vadd_vv_i16m1 (v2_1, v2_1, 4); + vint16m1_t v4_0 = __riscv_vadd_vv_i16m1 (v3_0, v2_0, 4); + vint16m1_t v4_1 = __riscv_vadd_vv_i16m1 (v3_1, v2_1, 4); + vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vsxseg2h_v_i16m1x2 (out, index, v4, 4); +} + +/* +** f2: +** th\.vsetvli\s+zero,zero,e8,m1 +** th\.vle\.v\s+v[0-9]+,0\([a-x0-9]+\) +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlxseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),v[0-9]+,v[0-9]+\.t +** th\.vlxseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v[0-9]+\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v[0-9]+\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v[0-9]+\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v[0-9]+\.t +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsxseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),v[0-9]+ +** ret +*/ +void f2 (void * in, void *out, vuint16m1_t index) +{ + vbool16_t mask = *(vbool16_t*)in; + asm volatile ("":::"memory"); + vint16m1x2_t v = __riscv_th_vlxseg2h_v_i16m1x2 (in, index, 4); + vint16m1x2_t v2 = __riscv_th_vlxseg2h_v_i16m1x2_m (mask, in, index, 4); + vint16m1_t v_0 = __riscv_vget_i16m1 (v, 0); + vint16m1_t v_1 = __riscv_vget_i16m1 (v, 1); + vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0); + vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1); + vint16m1_t v3_0 = __riscv_vadd_vv_i16m1_m (mask, v2_0, v_0, 4); + vint16m1_t v3_1 = __riscv_vadd_vv_i16m1_m (mask, v2_1, v_1, 4); + vint16m1_t v4_0 = __riscv_vadd_vv_i16m1_m (mask, v3_0, v2_0, 4); + vint16m1_t v4_1 = __riscv_vadd_vv_i16m1_m (mask, v3_1, v2_1, 4); + vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vsxseg2h_v_i16m1x2 (out, index, v4, 4); +} + +/* +** f3: +** th\.vsetvli\s+zero,zero,e8,m1 +** th\.vle\.v\s+v[0-9]+,0\([a-x0-9]+\) +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlxseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),v[0-9]+ +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlxseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),v[0-9]+,v[0-9]+\.t +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v[0-9]+\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v[0-9]+\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v[0-9]+\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v[0-9]+\.t +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsxseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),v[0-9]+ +** ret +*/ +void f3 (void * in, void *out, vuint16m1_t index) +{ + vbool16_t mask = *(vbool16_t*)in; + asm volatile ("":::"memory"); + vint16m1x2_t v = __riscv_th_vlxseg2h_v_i16m1x2 (in, index, 4); + vint16m1x2_t v2 = __riscv_th_vlxseg2h_v_i16m1x2_tumu (mask, v, in, index, 4); + vint16m1_t v2_0 = __riscv_vget_i16m1 (v2, 0); + vint16m1_t v2_1 = __riscv_vget_i16m1 (v2, 1); + vint16m1_t v3_0 = __riscv_vadd_vv_i16m1_tumu (mask, v3_0, v2_0, v2_0, 4); + vint16m1_t v3_1 = __riscv_vadd_vv_i16m1_tumu (mask, v3_1, v2_1, v2_1, 4); + vint16m1_t v4_0 = __riscv_vadd_vv_i16m1_tumu (mask, v4_0, v3_0, v2_0, 4); + vint16m1_t v4_1 = __riscv_vadd_vv_i16m1_tumu (mask, v4_1, v3_1, v2_1, 4); + vint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vsxseg2h_v_i16m1x2 (out, index, v4, 4); +} \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlxsegu-vsxseg.c b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlxsegu-vsxseg.c new file mode 100644 index 00000000000..b085de74061 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/vlxsegu-vsxseg.c @@ -0,0 +1,125 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcxtheadvector -mabi=ilp32d -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ +#include "riscv_th_vector.h" + +/* +** f1: +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlxseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),v[0-9]+ +** th\.vlxseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsxseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),v[0-9]+ +** ret +*/ +void f1 (void * in, void *out, vuint16m1_t index) +{ + vuint16m1x2_t v = __riscv_th_vlxseg2hu_v_u16m1x2 (in, index, 4); + vuint16m1x2_t v2 = __riscv_th_vlxseg2hu_v_u16m1x2_tu (v, in, index, 4); + vuint16m1_t v2_0 = __riscv_vget_u16m1 (v2, 0); + vuint16m1_t v2_1 = __riscv_vget_u16m1 (v2, 1); + vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1 (v2_0, v2_0, 4); + vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1 (v2_1, v2_1, 4); + vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1 (v3_0, v2_0, 4); + vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1 (v3_1, v2_1, 4); + vuint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vsxseg2h_v_u16m1x2 (out, index, v4, 4); +} + +/* +** f2: +** th\.vsetvli\s+zero,zero,e8,m1 +** th\.vle\.v\s+v[0-9]+,0\([a-x0-9]+\) +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlxseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),v[0-9]+,v[0-9]+\.t +** th\.vlxseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),v[0-9]+ +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v[0-9]+\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v[0-9]+\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v[0-9]+\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v[0-9]+\.t +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsxseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),v[0-9]+ +** ret +*/ +void f2 (void * in, void *out, vuint16m1_t index) +{ + vbool16_t mask = *(vbool16_t*)in; + asm volatile ("":::"memory"); + vuint16m1x2_t v = __riscv_th_vlxseg2hu_v_u16m1x2 (in, index, 4); + vuint16m1x2_t v2 = __riscv_th_vlxseg2hu_v_u16m1x2_m (mask, in, index, 4); + vuint16m1_t v_0 = __riscv_vget_u16m1 (v, 0); + vuint16m1_t v_1 = __riscv_vget_u16m1 (v, 1); + vuint16m1_t v2_0 = __riscv_vget_u16m1 (v2, 0); + vuint16m1_t v2_1 = __riscv_vget_u16m1 (v2, 1); + vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1_m (mask, v2_0, v_0, 4); + vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1_m (mask, v2_1, v_1, 4); + vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1_m (mask, v3_0, v2_0, 4); + vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1_m (mask, v3_1, v2_1, 4); + vuint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vsxseg2h_v_u16m1x2 (out, index, v4, 4); +} + +/* +** f3: +** th\.vsetvli\s+zero,zero,e8,m1 +** th\.vle\.v\s+v[0-9]+,0\([a-x0-9]+\) +** li\s+[a-x0-9]+,4 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlxseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),v[0-9]+ +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vlxseg2hu\.v\s+v[0-9]+,\([a-x0-9]+\),v[0-9]+,v[0-9]+\.t +** th\.vsetvli\s+[a-x0-9]+,zero,e16,m1 +** th\.vmv\.v\.i\s+v[0-9]+,0 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v[0-9]+\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v[0-9]+\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v[0-9]+\.t +** th\.vadd\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+,v[0-9]+\.t +** th\.vsetvli\s+zero,zero,e16,m1 +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vmv\.v\.v\s+v[0-9]+,v[0-9]+ +** th\.vsetvli\s+zero,[a-x0-9]+,e16,m1 +** th\.vsxseg2h\.v\s+v[0-9]+,\([a-x0-9]+\),v[0-9]+ +** ret +*/ +void f3 (void * in, void *out, vuint16m1_t index) +{ + vbool16_t mask = *(vbool16_t*)in; + asm volatile ("":::"memory"); + vuint16m1x2_t v = __riscv_th_vlxseg2hu_v_u16m1x2 (in, index, 4); + vuint16m1x2_t v2 = __riscv_th_vlxseg2hu_v_u16m1x2_tumu (mask, v, in, index, 4); + vuint16m1_t v2_0 = __riscv_vget_u16m1 (v2, 0); + vuint16m1_t v2_1 = __riscv_vget_u16m1 (v2, 1); + vuint16m1_t v3_0 = __riscv_vadd_vv_u16m1_tumu (mask, v3_0, v2_0, v2_0, 4); + vuint16m1_t v3_1 = __riscv_vadd_vv_u16m1_tumu (mask, v3_1, v2_1, v2_1, 4); + vuint16m1_t v4_0 = __riscv_vadd_vv_u16m1_tumu (mask, v4_0, v3_0, v2_0, 4); + vuint16m1_t v4_1 = __riscv_vadd_vv_u16m1_tumu (mask, v4_1, v3_1, v2_1, 4); + vuint16m1x2_t v4 = __riscv_vset (v4, 0, v4_0); + v4 = __riscv_vset (v4, 1, v4_1); + __riscv_th_vsxseg2h_v_u16m1x2 (out, index, v4, 4); +} \ No newline at end of file -- 2.47.1