Hi Richard(s), I'm just looking to see if I'm going about this the right way, based on the discussion we had on IRC. I've managed to hack something together, I've attached a (very) WIP patch which gives the correct codegen for the testcase in question (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98772). It would obviously need to support other widening patterns and differentiate between big/little endian among other things.
I added a backend pattern because I wasn't quite clear which changes to make in order to allow the existing backend patterns to be used with a V8QI, or how to represent V16QI where we don't care about the top/bottom 8. I made some attempt in optabs.c, which is in the patch commented out, but I'm not sure if I'm going about this the right way. Joel
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index be2a5a865172bdd7848be4082abb0fdfb0b35937..c66b8a367623c8daf4423677d292e292feee3606 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3498,6 +3498,14 @@ DONE; }) +(define_insn "vec_widen_usubl_half_v8qi" + [(match_operand:V8HI 0 "register_operand") + (match_operand:V8QI 1 "register_operand") + (match_operand:V8QI 2 "register_operand")] + "TARGET_SIMD" + "usubl\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" +) + (define_expand "vec_widen_<su>subl_hi_<mode>" [(match_operand:<VWIDE> 0 "register_operand") (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand")) diff --git a/gcc/expr.c b/gcc/expr.c index 04ef5ad114d0662948c896cdbf58e67737b39c7e..0939a156deef63f1cf2fa7e29c2c94925820f2ba 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -9785,6 +9785,7 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, case VEC_WIDEN_PLUS_HI_EXPR: case VEC_WIDEN_PLUS_LO_EXPR: + case VEC_WIDEN_MINUS_HALF_EXPR: case VEC_WIDEN_MINUS_HI_EXPR: case VEC_WIDEN_MINUS_LO_EXPR: case VEC_WIDEN_MULT_HI_EXPR: diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h index 876a3a6f348de122e5a52e6dd70d7946bc810162..10aa21d07595325fd8ef3057444853fc946385de 100644 --- a/gcc/optabs-query.h +++ b/gcc/optabs-query.h @@ -186,6 +186,9 @@ bool can_vec_perm_const_p (machine_mode, const vec_perm_indices &, enum insn_code find_widening_optab_handler_and_mode (optab, machine_mode, machine_mode, machine_mode *); +enum insn_code find_half_mode_optab_and_mode (optab, machine_mode, + machine_mode, + machine_mode *); int can_mult_highpart_p (machine_mode, bool); bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool); opt_machine_mode get_len_load_store_mode (machine_mode, bool); diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c index 3248ce2c06e65c9c0366757907ab057407f7c594..7abfc04aa18b7ee5b734a1b1f4378b4615ee31fd 100644 --- a/gcc/optabs-query.c +++ b/gcc/optabs-query.c @@ -462,6 +462,17 @@ can_vec_perm_const_p (machine_mode mode, const vec_perm_indices &sel, return false; } +enum insn_code +find_half_mode_optab_and_mode (optab op, machine_mode to_mode, + machine_mode from_mode, + machine_mode *found_mode) +{ + insn_code icode = CODE_FOR_nothing; + if (GET_MODE_2XWIDER_MODE(from_mode).exists(found_mode)) + icode = optab_handler (op, *found_mode); + return icode; +} + /* Find a widening optab even if it doesn't widen as much as we want. E.g. if from_mode is HImode, and to_mode is DImode, and there is no direct HI->SI insn, then return SI->DI, if that exists. */ diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c index c94073e3ed98f8c4cab65891f65dedebdb1ec274..eb52dc15f8094594c4aa22d5fc1c442886e4ebf6 100644 --- a/gcc/optabs-tree.c +++ b/gcc/optabs-tree.c @@ -185,6 +185,9 @@ optab_for_tree_code (enum tree_code code, const_tree type, case VEC_WIDEN_MINUS_HI_EXPR: return (TYPE_UNSIGNED (type) ? vec_widen_usubl_hi_optab : vec_widen_ssubl_hi_optab); + + case VEC_WIDEN_MINUS_HALF_EXPR: + return vec_widen_usubl_half_optab; case VEC_UNPACK_HI_EXPR: return (TYPE_UNSIGNED (type) @@ -308,6 +311,16 @@ supportable_convert_operation (enum tree_code code, if (!VECTOR_MODE_P (m1) || !VECTOR_MODE_P (m2)) return false; + /* The case where vectype_in is half the vector width, as opposed to the + normal case for widening patterns of vector width input, with output in + multiple registers. */ + if (code == WIDEN_MINUS_EXPR && + known_eq(TYPE_VECTOR_SUBPARTS(vectype_in),TYPE_VECTOR_SUBPARTS(vectype_out)) ) + { + *code1 = VEC_WIDEN_MINUS_HALF_EXPR; + return true; + } + /* First check if we can done conversion directly. */ if ((code == FIX_TRUNC_EXPR && can_fix_p (m1,m2,TYPE_UNSIGNED (vectype_out), &truncp) diff --git a/gcc/optabs.c b/gcc/optabs.c index f4614a394587787293dc8b680a38901f7906f61c..1252097be9893d7d65ea844fc0eda9bad70b9256 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -293,6 +293,13 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op, icode = find_widening_optab_handler (widen_pattern_optab, TYPE_MODE (TREE_TYPE (ops->op2)), tmode0); + // Perhaps something like this can eliminate the need for an additional backend pattern? + //else if (ops->code == VEC_WIDEN_MINUS_HI_EXPR) + //{ + // icode = find_half_mode_optab_and_mode (widen_pattern_optab, tmode0, + // tmode0, + // &tmode1); + //} else icode = optab_handler (widen_pattern_optab, tmode0); gcc_assert (icode != CODE_FOR_nothing); diff --git a/gcc/optabs.def b/gcc/optabs.def index b192a9d070b8aa72e5676b2eaa020b5bdd7ffcc8..43fccfa29127d99ce0131a21c2dc58fcb247bd25 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -403,6 +403,7 @@ OPTAB_D (vec_widen_umult_lo_optab, "vec_widen_umult_lo_$a") OPTAB_D (vec_widen_umult_odd_optab, "vec_widen_umult_odd_$a") OPTAB_D (vec_widen_ushiftl_hi_optab, "vec_widen_ushiftl_hi_$a") OPTAB_D (vec_widen_ushiftl_lo_optab, "vec_widen_ushiftl_lo_$a") +OPTAB_D (vec_widen_usubl_half_optab, "vec_widen_usubl_half_$a") OPTAB_D (vec_widen_usubl_hi_optab, "vec_widen_usubl_hi_$a") OPTAB_D (vec_widen_usubl_lo_optab, "vec_widen_usubl_lo_$a") OPTAB_D (vec_widen_uaddl_hi_optab, "vec_widen_uaddl_hi_$a") diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index 75d814bd121f40c6a430f33f4c7d6395642f6c33..0e2313009d39c17d998c2285b9a9938e616dc35c 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -4007,6 +4007,7 @@ verify_gimple_assign_binary (gassign *stmt) return false; } + case VEC_WIDEN_MINUS_HALF_EXPR: case VEC_WIDEN_MINUS_HI_EXPR: case VEC_WIDEN_MINUS_LO_EXPR: case VEC_WIDEN_PLUS_HI_EXPR: diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index a710fa590279234e5e8062a87bac68eb324df3cb..2c415abaf6091693c31d636644e54e18a90650b1 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -4242,6 +4242,7 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights, case VEC_WIDEN_PLUS_HI_EXPR: case VEC_WIDEN_PLUS_LO_EXPR: + case VEC_WIDEN_MINUS_HALF_EXPR: case VEC_WIDEN_MINUS_HI_EXPR: case VEC_WIDEN_MINUS_LO_EXPR: case VEC_WIDEN_MULT_HI_EXPR: diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c index c8d8493e6eaefca589ff73bcae4dc014140a1c5c..1911d2b0d637e058affadb21dac93e6880376eae 100644 --- a/gcc/tree-vect-generic.c +++ b/gcc/tree-vect-generic.c @@ -2121,6 +2121,7 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi, || code == VEC_WIDEN_PLUS_HI_EXPR || code == VEC_WIDEN_PLUS_LO_EXPR || code == VEC_WIDEN_MINUS_HI_EXPR + || code == VEC_WIDEN_MINUS_HALF_EXPR || code == VEC_WIDEN_MINUS_LO_EXPR || code == VEC_WIDEN_MULT_HI_EXPR || code == VEC_WIDEN_MULT_LO_EXPR diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index f180ced312443ba1e698932d5e8362208690b3fc..0a31c7b004eaa6fba7bbbaca0ef39a265774093f 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -4545,6 +4545,51 @@ vect_create_vectorized_promotion_stmts (vec_info *vinfo, *vec_oprnds0 = vec_tmp; } +/* Create vectorized promotion stmts for widening stmts using only half the + potential vector size for input */ +static void +vect_create_vectorized_promotion_stmts (vec_info *vinfo, + vec<tree> *vec_oprnds0, + vec<tree> *vec_oprnds1, + stmt_vec_info stmt_info, tree vec_dest, + gimple_stmt_iterator *gsi, + enum tree_code code1, + int op_type) +{ + int i; + tree vop0, vop1, new_tmp; + gimple *new_stmt; + vec<tree> vec_tmp = vNULL; + + vec_tmp.create (vec_oprnds0->length () * 2); + FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0) + { + if (op_type == binary_op) + vop1 = (*vec_oprnds1)[i]; + else + vop1 = NULL_TREE; + + /* Generate the two halves of promotion operation. */ + new_stmt = vect_gen_widened_results_half (vinfo, code1, vop0, vop1, + op_type, vec_dest, gsi, + stmt_info); + if (is_gimple_call (new_stmt)) + { + new_tmp = gimple_call_lhs (new_stmt); + } + else + { + new_tmp = gimple_assign_lhs (new_stmt); + } + + /* Store the results for the next step. */ + vec_tmp.quick_push (new_tmp); + } + + vec_oprnds0->release (); + *vec_oprnds0 = vec_tmp; +} + /* Check if STMT_INFO performs a conversion operation that can be vectorized. If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized @@ -4731,7 +4776,8 @@ vectorizable_conversion (vec_info *vinfo, case NONE: if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR - && !CONVERT_EXPR_CODE_P (code)) + && !CONVERT_EXPR_CODE_P (code) + && code != WIDEN_MINUS_EXPR) return false; if (supportable_convert_operation (code, vectype_out, vectype_in, &code1)) break; @@ -4937,22 +4983,55 @@ vectorizable_conversion (vec_info *vinfo, switch (modifier) { case NONE: - vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, - op0, &vec_oprnds0); - FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) - { - /* Arguments are ready, create the new vector stmt. */ - gcc_assert (TREE_CODE_LENGTH (code1) == unary_op); - gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0); - new_temp = make_ssa_name (vec_dest, new_stmt); - gimple_assign_set_lhs (new_stmt, new_temp); - vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); + if (code == WIDEN_MINUS_EXPR) + { + vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs, + op0, &vec_oprnds0, + op1, + &vec_oprnds1); + vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0, + &vec_oprnds1, stmt_info, + vec_dsts[0], gsi, + code1, op_type); - if (slp_node) - SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); - else - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); - } + FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) + { + gimple *new_stmt; + if (cvt_type) + { + gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); + new_temp = make_ssa_name (vec_dest); + new_stmt = gimple_build_assign (new_temp, codecvt1, vop0); + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); + } + else + new_stmt = SSA_NAME_DEF_STMT (vop0); + + if (slp_node) + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); + else + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); + } + } + else + { + vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, + op0, &vec_oprnds0); + FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) + { + /* Arguments are ready, create the new vector stmt. */ + gcc_assert (TREE_CODE_LENGTH (code1) == unary_op); + gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0); + new_temp = make_ssa_name (vec_dest, new_stmt); + gimple_assign_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); + + if (slp_node) + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); + else + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); + } + } break; case WIDEN: diff --git a/gcc/tree.def b/gcc/tree.def index eda050bdc55c68fa11ac5526e3a3f618aad0df4b..5b2c4e74a85be18738eb6fc36bbaedd036acf89a 100644 --- a/gcc/tree.def +++ b/gcc/tree.def @@ -1433,6 +1433,7 @@ DEFTREECODE (VEC_WIDEN_LSHIFT_HI_EXPR, "widen_lshift_hi_expr", tcc_binary, 2) DEFTREECODE (VEC_WIDEN_LSHIFT_LO_EXPR, "widen_lshift_lo_expr", tcc_binary, 2) DEFTREECODE (VEC_WIDEN_PLUS_HI_EXPR, "widen_plus_hi_expr", tcc_binary, 2) DEFTREECODE (VEC_WIDEN_PLUS_LO_EXPR, "widen_plus_lo_expr", tcc_binary, 2) +DEFTREECODE (VEC_WIDEN_MINUS_HALF_EXPR, "widen_minus_half_expr", tcc_binary, 2) DEFTREECODE (VEC_WIDEN_MINUS_HI_EXPR, "widen_minus_hi_expr", tcc_binary, 2) DEFTREECODE (VEC_WIDEN_MINUS_LO_EXPR, "widen_minus_lo_expr", tcc_binary, 2)