Hi Richard, For Aarch64 target, I was trying to vectorize the expression "arr[i]=arr[i]*4;" via vector shifts instructions since they don't have vector mults.
unsigned long int __attribute__ ((aligned (64)))arr[100]; int i; #if 1 void test_vector_shifts() { for(i=0; i<=99;i++) arr[i]=arr[i]<<2; } #endif void test_vectorshift_via_mul() { for(i=0; i<=99;i++) arr[i]=arr[i]*4; } I found a similar PR and your comments https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65952#c6. Based on that and IRC discussion I had with you, I added vector recog pattern that transforms mults to shifts. The vectorizer is now able to generate vector shifts for the above test case. PR case also gets vectorized https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65952#c10. This is just an initial patch and tries to optimize integer type power 2 constants. I wanted to get feedback on this . I bootstrapped and reg tested on aarch64-none-linux-gnu . Regards, Venkat.
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index f034635..948203d 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -76,6 +76,10 @@ static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *, tree *, tree *); static gimple vect_recog_divmod_pattern (vec<gimple> *, tree *, tree *); + +static gimple vect_recog_multconst_pattern (vec<gimple> *, + tree *, tree *); + static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *, tree *, tree *); static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *); @@ -90,6 +94,7 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { vect_recog_rotate_pattern, vect_recog_vector_vector_shift_pattern, vect_recog_divmod_pattern, + vect_recog_multconst_pattern, vect_recog_mixed_size_cond_pattern, vect_recog_bool_pattern}; @@ -2147,6 +2152,87 @@ vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts, return pattern_stmt; } +static gimple +vect_recog_multconst_pattern (vec<gimple> *stmts, + tree *type_in, tree *type_out) +{ + gimple last_stmt = stmts->pop (); + tree oprnd0, oprnd1, vectype, itype; + gimple pattern_stmt; + enum tree_code rhs_code; + optab optab; + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); + + if (!is_gimple_assign (last_stmt)) + return NULL; + + rhs_code = gimple_assign_rhs_code (last_stmt); + switch (rhs_code) + { + case MULT_EXPR: + break; + default: + return NULL; + } + + if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) + return NULL; + + oprnd0 = gimple_assign_rhs1 (last_stmt); + oprnd1 = gimple_assign_rhs2 (last_stmt); + itype = TREE_TYPE (oprnd0); + + if (TREE_CODE (oprnd0) != SSA_NAME + || TREE_CODE (oprnd1) != INTEGER_CST + || TREE_CODE (itype) != INTEGER_TYPE + || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))) + return NULL; + + vectype = get_vectype_for_scalar_type (itype); + if (vectype == NULL_TREE) + return NULL; + + /* If the target can handle vectorized multiplication natively, + don't attempt to optimize this. */ + optab = optab_for_tree_code (rhs_code, vectype, optab_default); + if (optab != unknown_optab) + { + machine_mode vec_mode = TYPE_MODE (vectype); + int icode = (int) optab_handler (optab, vec_mode); + if (icode != CODE_FOR_nothing) + return NULL; + } + + /* If target cannot handle vector left shift then we cannot + optimize and bail out. */ + optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector); + if (!optab + || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing) + return NULL; + + if (integer_pow2p (oprnd1)) + { + /* Pattern detected. */ + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "vect_recog_multconst_pattern: detected:\n"); + + tree shift; + shift = build_int_cst (itype, tree_log2 (oprnd1)); + pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), + LSHIFT_EXPR, oprnd0, shift); + if (dump_enabled_p ()) + dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, + 0); + stmts->safe_push (last_stmt); + *type_in = vectype; + *type_out = vectype; + return pattern_stmt; + } + + return NULL; +} + /* Detect a signed division by a constant that wouldn't be otherwise vectorized: diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 48c1f8d..833fe4b 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1131,7 +1131,7 @@ extern void vect_slp_transform_bb (basic_block); Additional pattern recognition functions can (and will) be added in the future. */ typedef gimple (* vect_recog_func_ptr) (vec<gimple> *, tree *, tree *); -#define NUM_PATTERNS 12 +#define NUM_PATTERNS 13 void vect_pattern_recog (loop_vec_info, bb_vec_info); /* In tree-vectorizer.c. */