On Wed, May 15, 2013 at 03:24:37PM +0200, Richard Biener wrote: > Ok with ...
> > + /* Pattern detected. */ > > + if (dump_enabled_p ()) > > + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, > > + "vect_recog_rotate_pattern: detected: "); > > Please use MSG_NOTE here. Ok, here is what I've committed (the above change plus added 6 i386.exp testcases). 2013-05-16 Jakub Jelinek <ja...@redhat.com> * tree-vectorizer.h (NUM_PATTERNS): Increment. * tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add vect_recog_rotate_pattern. (vect_recog_rotate_pattern): New function. * gcc.target/i386/rotate-3.c: New test. * gcc.target/i386/rotate-3a.c: New test. * gcc.target/i386/rotate-4.c: New test. * gcc.target/i386/rotate-4a.c: New test. * gcc.target/i386/rotate-5.c: New test. * gcc.target/i386/rotate-5a.c: New test. --- gcc/tree-vectorizer.h.jj 2013-05-13 09:44:45.822529016 +0200 +++ gcc/tree-vectorizer.h 2013-05-16 13:56:08.093576302 +0200 @@ -1005,7 +1005,7 @@ extern void vect_slp_transform_bb (basic Additional pattern recognition functions can (and will) be added in the future. */ typedef gimple (* vect_recog_func_ptr) (vec<gimple> *, tree *, tree *); -#define NUM_PATTERNS 10 +#define NUM_PATTERNS 11 void vect_pattern_recog (loop_vec_info, bb_vec_info); /* In tree-vectorizer.c. */ --- gcc/tree-vect-patterns.c.jj 2013-05-16 12:36:29.496419141 +0200 +++ gcc/tree-vect-patterns.c 2013-05-16 13:56:08.095576326 +0200 @@ -50,6 +50,7 @@ static gimple vect_recog_over_widening_p tree *); static gimple vect_recog_widen_shift_pattern (vec<gimple> *, tree *, tree *); +static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *); static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *, tree *, tree *); static gimple vect_recog_divmod_pattern (vec<gimple> *, @@ -64,6 +65,7 @@ static vect_recog_func_ptr vect_vect_rec vect_recog_pow_pattern, vect_recog_widen_shift_pattern, vect_recog_over_widening_pattern, + vect_recog_rotate_pattern, vect_recog_vector_vector_shift_pattern, vect_recog_divmod_pattern, vect_recog_mixed_size_cond_pattern, @@ -1446,6 +1448,218 @@ vect_recog_widen_shift_pattern (vec<gimp if (dump_enabled_p ()) dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0); + + stmts->safe_push (last_stmt); + return pattern_stmt; +} + +/* Detect a rotate pattern wouldn't be otherwise vectorized: + + type a_t, b_t, c_t; + + S0 a_t = b_t r<< c_t; + + Input/Output: + + * STMTS: Contains a stmt from which the pattern search begins, + i.e. the shift/rotate stmt. The original stmt (S0) is replaced + with a sequence: + + S1 d_t = -c_t; + S2 e_t = d_t & (B - 1); + S3 f_t = b_t << c_t; + S4 g_t = b_t >> e_t; + S0 a_t = f_t | g_t; + + where B is element bitsize of type. + + Output: + + * TYPE_IN: The type of the input arguments to the pattern. + + * TYPE_OUT: The type of the output of this pattern. + + * Return value: A new stmt that will be used to replace the rotate + S0 stmt. */ + +static gimple +vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out) +{ + gimple last_stmt = stmts->pop (); + tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2; + gimple pattern_stmt, def_stmt; + enum tree_code rhs_code; + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); + enum vect_def_type dt; + optab optab1, optab2; + + if (!is_gimple_assign (last_stmt)) + return NULL; + + rhs_code = gimple_assign_rhs_code (last_stmt); + switch (rhs_code) + { + case LROTATE_EXPR: + case RROTATE_EXPR: + break; + default: + return NULL; + } + + if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) + return NULL; + + lhs = gimple_assign_lhs (last_stmt); + oprnd0 = gimple_assign_rhs1 (last_stmt); + type = TREE_TYPE (oprnd0); + oprnd1 = gimple_assign_rhs2 (last_stmt); + if (TREE_CODE (oprnd0) != SSA_NAME + || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type) + || !INTEGRAL_TYPE_P (type) + || !TYPE_UNSIGNED (type)) + return NULL; + + if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt, + &def, &dt)) + return NULL; + + if (dt != vect_internal_def + && dt != vect_constant_def + && dt != vect_external_def) + return NULL; + + vectype = get_vectype_for_scalar_type (type); + if (vectype == NULL_TREE) + return NULL; + + /* If vector/vector or vector/scalar rotate is supported by the target, + don't do anything here. */ + optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector); + if (optab1 + && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing) + return NULL; + + if (bb_vinfo != NULL || dt != vect_internal_def) + { + optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar); + if (optab2 + && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing) + return NULL; + } + + /* If vector/vector or vector/scalar shifts aren't supported by the target, + don't do anything here either. */ + optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector); + optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector); + if (!optab1 + || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing + || !optab2 + || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing) + { + if (bb_vinfo == NULL && dt == vect_internal_def) + return NULL; + optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar); + optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar); + if (!optab1 + || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing + || !optab2 + || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing) + return NULL; + } + + *type_in = vectype; + *type_out = vectype; + if (*type_in == NULL_TREE) + return NULL; + + def = NULL_TREE; + if (TREE_CODE (oprnd1) == INTEGER_CST + || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type)) + def = oprnd1; + else if (def_stmt && gimple_assign_cast_p (def_stmt)) + { + tree rhs1 = gimple_assign_rhs1 (def_stmt); + if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type) + && TYPE_PRECISION (TREE_TYPE (rhs1)) + == TYPE_PRECISION (type)) + def = rhs1; + } + + STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; + if (def == NULL_TREE) + { + def = vect_recog_temp_ssa_var (type, NULL); + def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1, + NULL_TREE); + append_pattern_def_seq (stmt_vinfo, def_stmt); + } + stype = TREE_TYPE (def); + + if (TREE_CODE (def) == INTEGER_CST) + { + if (!host_integerp (def, 1) + || (unsigned HOST_WIDE_INT) tree_low_cst (def, 1) + >= GET_MODE_PRECISION (TYPE_MODE (type)) + || integer_zerop (def)) + return NULL; + def2 = build_int_cst (stype, + GET_MODE_PRECISION (TYPE_MODE (type)) + - tree_low_cst (def, 1)); + } + else + { + tree vecstype = get_vectype_for_scalar_type (stype); + stmt_vec_info def_stmt_vinfo; + + if (vecstype == NULL_TREE) + return NULL; + def2 = vect_recog_temp_ssa_var (stype, NULL); + def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def, + NULL_TREE); + def_stmt_vinfo + = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); + set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); + STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype; + append_pattern_def_seq (stmt_vinfo, def_stmt); + + def2 = vect_recog_temp_ssa_var (stype, NULL); + tree mask + = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1); + def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2, + gimple_assign_lhs (def_stmt), + mask); + def_stmt_vinfo + = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); + set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); + STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype; + append_pattern_def_seq (stmt_vinfo, def_stmt); + } + + var1 = vect_recog_temp_ssa_var (type, NULL); + def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR + ? LSHIFT_EXPR : RSHIFT_EXPR, + var1, oprnd0, def); + append_pattern_def_seq (stmt_vinfo, def_stmt); + + var2 = vect_recog_temp_ssa_var (type, NULL); + def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR + ? RSHIFT_EXPR : LSHIFT_EXPR, + var2, oprnd0, def2); + append_pattern_def_seq (stmt_vinfo, def_stmt); + + /* Pattern detected. */ + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "vect_recog_rotate_pattern: detected: "); + + /* Pattern supported. Create a stmt to be used to replace the pattern. */ + var = vect_recog_temp_ssa_var (type, NULL); + pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2); + + if (dump_enabled_p ()) + dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0); stmts->safe_push (last_stmt); return pattern_stmt; --- gcc/testsuite/gcc.target/i386/rotate-3.c.jj 2013-05-16 13:08:00.117087861 +0200 +++ gcc/testsuite/gcc.target/i386/rotate-3.c 2013-05-16 13:59:41.332373698 +0200 @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target avx2 } */ +/* { dg-options "-O3 -mavx2 -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + +unsigned int a[1024] __attribute__((aligned (32))); + +__attribute__((noinline, noclone)) void +foo (void) +{ + int i; + for (i = 0; i < 1024; i++) + { + int j = i & 31; + a[i] = (a[i] << j) | (a[i] >> ((-j) & 31)); + } +} --- gcc/testsuite/gcc.target/i386/rotate-3a.c.jj 2013-05-16 13:51:04.584307601 +0200 +++ gcc/testsuite/gcc.target/i386/rotate-3a.c 2013-05-16 14:00:19.158164463 +0200 @@ -0,0 +1,24 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx2 } */ +/* { dg-options "-O3 -mavx2" } */ + +#include "avx2-check.h" + +#include "rotate-3.c" + +static void +__attribute__((noinline)) +avx2_test (void) +{ + int i; + for (i = 0; i < 1024; i++) + a[i] = i * 1073741789U; + foo (); + for (i = 0; i < 1024; i++) + { + int j = i & 31; + unsigned int x = i * 1073741789U; + if (a[i] != ((x << j) | (x >> ((-j) & 31)))) + abort (); + } +} --- gcc/testsuite/gcc.target/i386/rotate-4.c.jj 2013-05-16 13:08:11.044032865 +0200 +++ gcc/testsuite/gcc.target/i386/rotate-4.c 2013-05-16 13:50:14.386590821 +0200 @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target avx2 } */ +/* { dg-options "-O3 -mavx2 -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + +unsigned int a[1024] __attribute__((aligned (32))); + +__attribute__((noinline, noclone)) void +foo (int j) +{ + int i; + for (i = 0; i < 1024; i++) + a[i] = (a[i] << j) | (a[i] >> ((-j) & 31)); +} --- gcc/testsuite/gcc.target/i386/rotate-4a.c.jj 2013-05-16 13:52:11.437923226 +0200 +++ gcc/testsuite/gcc.target/i386/rotate-4a.c 2013-05-16 14:00:33.907079446 +0200 @@ -0,0 +1,34 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx2 } */ +/* { dg-options "-O3 -mavx2" } */ + +#include "avx2-check.h" + +#include "rotate-4.c" + +static void +__attribute__((noinline)) +avx2_test (void) +{ + int i; + for (i = 0; i < 1024; i++) + a[i] = i * 1073741789U; + foo (3); + for (i = 0; i < 1024; i++) + { + unsigned int x = i * 1073741789U; + if (a[i] != ((x << 3) | (x >> ((-3) & 31)))) + abort (); + } + foo (0); + for (i = 0; i < 1024; i++) + { + unsigned int x = i * 1073741789U; + if (a[i] != ((x << 3) | (x >> ((-3) & 31)))) + abort (); + } + foo (29); + for (i = 0; i < 1024; i++) + if (a[i] != i * 1073741789U) + abort (); +} --- gcc/testsuite/gcc.target/i386/rotate-5.c.jj 2013-05-16 13:08:13.849019300 +0200 +++ gcc/testsuite/gcc.target/i386/rotate-5.c 2013-05-16 13:50:22.331547612 +0200 @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O3 -mavx -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + +unsigned int a[1024] __attribute__((aligned (32))); + +__attribute__((noinline, noclone)) void +foo (void) +{ + int i, j = 3; + for (i = 0; i < 1024; i++) + a[i] = (a[i] << j) | (a[i] >> ((-j) & 31)); +} --- gcc/testsuite/gcc.target/i386/rotate-5a.c.jj 2013-05-16 13:51:14.104252744 +0200 +++ gcc/testsuite/gcc.target/i386/rotate-5a.c 2013-05-16 13:58:15.402858405 +0200 @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O3 -mavx" } */ + +#include "avx-check.h" + +#include "rotate-5.c" + +static void +__attribute__((noinline)) +avx_test (void) +{ + int i; + for (i = 0; i < 1024; i++) + a[i] = i * 1073741789U; + foo (); + for (i = 0; i < 1024; i++) + { + unsigned int x = i * 1073741789U; + if (a[i] != ((x << 3) | (x >> ((-3) & 31)))) + abort (); + } +} Jakub