Hi. This patch is apply LEN_FOLD_EXTRACT_LAST into loop vectorizer.
Consider this following case: #include <stdint.h> #define N 32 /* Simple condition reduction. */ int __attribute__ ((noinline, noclone)) condition_reduction (int *a, int min_v) { int last = 66; /* High start value. */ for (int i = 0; i < N; i++) if (a[i] < min_v) last = i; return last; } With this patch, we can generate this following IR: _44 = .SELECT_VL (ivtmp_42, POLY_INT_CST [4, 4]); _34 = vect_vec_iv_.5_33 + { POLY_INT_CST [4, 4], ... }; ivtmp_36 = _44 * 4; vect__4.8_39 = .MASK_LEN_LOAD (vectp_a.6_37, 32B, { -1, ... }, _44, 0); mask__11.9_41 = vect__4.8_39 < vect_cst__40; last_5 = .LEN_FOLD_EXTRACT_LAST (last_14, mask__11.9_41, vect_vec_iv_.5_33, _44, 0); ... gcc/ChangeLog: * tree-vect-loop.cc (vectorizable_reduction): Apply LEN_FOLD_EXTRACT_LAST. * tree-vect-stmts.cc (vectorizable_condition): Ditto. --- gcc/tree-vect-loop.cc | 7 ++++-- gcc/tree-vect-stmts.cc | 52 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 1cd6c291377..ebee8037e02 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -7494,8 +7494,11 @@ vectorizable_reduction (loop_vec_info loop_vinfo, } if (reduc_chain_length == 1 - && direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST, - vectype_in, OPTIMIZE_FOR_SPEED)) + && (direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST, vectype_in, + OPTIMIZE_FOR_SPEED) + || direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST, + vectype_in, + OPTIMIZE_FOR_SPEED))) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 413a88750d6..be9f3a280bd 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -11740,8 +11740,17 @@ vectorizable_condition (vec_info *vinfo, && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) { if (reduction_type == EXTRACT_LAST_REDUCTION) - vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo), - ncopies * vec_num, vectype, NULL); + { + if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST, + vectype, OPTIMIZE_FOR_SPEED)) + vect_record_loop_len (loop_vinfo, + &LOOP_VINFO_LENS (loop_vinfo), + ncopies * vec_num, vectype, 1); + else + vect_record_loop_mask (loop_vinfo, + &LOOP_VINFO_MASKS (loop_vinfo), + ncopies * vec_num, vectype, NULL); + } /* Extra inactive lanes should be safe for vect_nested_cycle. */ else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle) { @@ -11772,7 +11781,13 @@ vectorizable_condition (vec_info *vinfo, mask to the condition, or to its inverse. */ vec_loop_masks *masks = NULL; - if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) + vec_loop_lens *lens = NULL; + if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)) + { + if (reduction_type == EXTRACT_LAST_REDUCTION) + lens = &LOOP_VINFO_LENS (loop_vinfo); + } + else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) { if (reduction_type == EXTRACT_LAST_REDUCTION) masks = &LOOP_VINFO_MASKS (loop_vinfo); @@ -11910,7 +11925,8 @@ vectorizable_condition (vec_info *vinfo, /* Force vec_compare to be an SSA_NAME rather than a comparison, in cases where that's necessary. */ - if (masks || reduction_type == EXTRACT_LAST_REDUCTION) + tree len = NULL_TREE, bias = NULL_TREE; + if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION) { if (!is_gimple_val (vec_compare)) { @@ -11931,6 +11947,23 @@ vectorizable_condition (vec_info *vinfo, vec_compare = vec_compare_name; } + if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST, + vectype, OPTIMIZE_FOR_SPEED)) + { + if (lens) + { + len = vect_get_loop_len (loop_vinfo, gsi, lens, + vec_num * ncopies, vectype, i, 1); + signed char biasval + = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); + bias = build_int_cst (intQI_type_node, biasval); + } + else + { + len = size_int (TYPE_VECTOR_SUBPARTS (vectype)); + bias = build_int_cst (intQI_type_node, 0); + } + } if (masks) { tree loop_mask @@ -11950,9 +11983,14 @@ vectorizable_condition (vec_info *vinfo, { gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt; tree lhs = gimple_get_lhs (old_stmt); - new_stmt = gimple_build_call_internal - (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare, - vec_then_clause); + if (len) + new_stmt = gimple_build_call_internal + (IFN_LEN_FOLD_EXTRACT_LAST, 5, else_clause, vec_compare, + vec_then_clause, len, bias); + else + new_stmt = gimple_build_call_internal + (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare, + vec_then_clause); gimple_call_set_lhs (new_stmt, lhs); SSA_NAME_DEF_STMT (lhs) = new_stmt; if (old_stmt == gsi_stmt (*gsi)) -- 2.36.3