Thanks Kewen. But I saw there is 2 more files include:
+#include "memmodel.h" +#include "optabs.h" Not sure whether Richard and Richi ok with that change ? Thanks. juzhe.zh...@rivai.ai From: Kewen.Lin Date: 2023-08-14 20:45 To: juzhe.zh...@rivai.ai CC: Robin Dapp; richard.sandiford; rguenther; GCC Patches Subject: Re: [PATCH V4] VECT: Support loop len control on EXTRACT_LAST vectorization Hi Juzhe, on 2023/8/14 20:08, juzhe.zh...@rivai.ai wrote: > Hi, Kewin. > > Could you test 'can_vec_extract_var_idx_p' and send V5 patch when you pass > the testing? The below diff was bootstrapped and regress-tested on Power10 LE. Comparing to the previous v4, the only changes should be the proposed can_vec_extract_var_idx_p and its required new includes as below: +#include "memmodel.h" +#include "optabs.h" Could you have a double check? Since I just tested it on Power10, you have the full ownership on the patch, I'd leave the v5 posting to you. Thanks! BR, Kewen ----- diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index bc3063c3615..5ae9f69c7eb 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -32,6 +32,8 @@ along with GCC; see the file COPYING3. If not see #include "tree-pass.h" #include "ssa.h" #include "optabs-tree.h" +#include "memmodel.h" +#include "optabs.h" #include "diagnostic-core.h" #include "fold-const.h" #include "stor-layout.h" @@ -10300,17 +10302,7 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, /* No transformation required. */ if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) { - if (!direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype, - OPTIMIZE_FOR_SPEED)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't operate on partial vectors " - "because the target doesn't support extract " - "last reduction.\n"); - LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; - } - else if (slp_node) + if (slp_node) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -10330,9 +10322,26 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, else { gcc_assert (ncopies == 1 && !slp_node); - vect_record_loop_mask (loop_vinfo, - &LOOP_VINFO_MASKS (loop_vinfo), - 1, vectype, NULL); + if (direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype, + OPTIMIZE_FOR_SPEED)) + vect_record_loop_mask (loop_vinfo, + &LOOP_VINFO_MASKS (loop_vinfo), + 1, vectype, NULL); + else if (can_vec_extract_var_idx_p ( + TYPE_MODE (vectype), TYPE_MODE (TREE_TYPE (vectype)))) + vect_record_loop_len (loop_vinfo, + &LOOP_VINFO_LENS (loop_vinfo), + 1, vectype, 1); + else + { + if (dump_enabled_p ()) + dump_printf_loc ( + MSG_MISSED_OPTIMIZATION, vect_location, + "can't operate on partial vectors " + "because the target doesn't support extract " + "last reduction.\n"); + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; + } } } /* ??? Enable for loop costing as well. */ @@ -10358,7 +10367,9 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, gimple *vec_stmt; if (slp_node) { - gcc_assert (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)); + gcc_assert (!loop_vinfo + || (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) + && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))); /* Get the correct slp vectorized stmt. */ vec_lhs = SLP_TREE_VEC_DEFS (slp_node)[vec_entry]; @@ -10402,7 +10413,42 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, gimple_seq stmts = NULL; tree new_tree; - if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) + if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)) + { + /* Emit: + + SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN + BIAS - 1> + + where VEC_LHS is the vectorized live-out result and MASK is + the loop mask for the final iteration. */ + gcc_assert (ncopies == 1 && !slp_node); + gimple_seq tem = NULL; + gimple_stmt_iterator gsi = gsi_last (tem); + tree len + = vect_get_loop_len (loop_vinfo, &gsi, + &LOOP_VINFO_LENS (loop_vinfo), + 1, vectype, 0, 0); + + /* BIAS - 1. */ + signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); + tree bias_minus_one + = int_const_binop (MINUS_EXPR, + build_int_cst (TREE_TYPE (len), biasval), + build_one_cst (TREE_TYPE (len))); + + /* LAST_INDEX = LEN + (BIAS - 1). */ + tree last_index = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (len), + len, bias_minus_one); + + /* SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN + BIAS - 1>. */ + tree scalar_res + = gimple_build (&stmts, CFN_VEC_EXTRACT, TREE_TYPE (vectype), + vec_lhs_phi, last_index); + + /* Convert the extracted vector element to the scalar type. */ + new_tree = gimple_convert (&stmts, lhs_type, scalar_res); + } + else if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) { /* Emit: