Hi all, In match.pd and expmed.c, we have some codes to transform lrotate to rrotate if rotation count is const. But they don't consider the target whether supports the rrotate. It leads to some suboptimal generated code since some optimization can't get expected result by querying target optab. One typical case is that we miss some rotation vectorization opportunity on Power.
This patch is to teach them to check target optab availability first. Regression testing just launched, is it OK for trunk if it passed and bootstrapped? Thanks, Kewen ----------------------------------------------- gcc/ChangeLog 2019-07-15 Kewen Lin <li...@gcc.gnu.org> * expmed.c (expand_shift_1): Only transform to opposite rotate when it's supported on the target. * match.pd (lrot N -> rrot N'): Check target support or not. gcc/testsuite/ChangeLog 2019-07-15 Kewen Lin <li...@gcc.gnu.org> * gcc.target/powerpc/vec_rotate.c: New test. diff --git a/gcc/expmed.c b/gcc/expmed.c index d7f8e9a5d76..6cd3341a0e4 100644 --- a/gcc/expmed.c +++ b/gcc/expmed.c @@ -2491,7 +2491,9 @@ expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted, if (rotate && CONST_INT_P (op1) && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left, - GET_MODE_BITSIZE (scalar_mode) - 1)) + GET_MODE_BITSIZE (scalar_mode) - 1) + && ((left && optab_handler (rrotate_optab, mode) != CODE_FOR_nothing) || + (!left && optab_handler (lrotate_optab, mode) != CODE_FOR_nothing))) { op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1))); diff --git a/gcc/match.pd b/gcc/match.pd index 88dae4231d8..a63cd15e129 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -2418,11 +2418,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* Rewrite an LROTATE_EXPR by a constant into an RROTATE_EXPR by a new constant. */ + (simplify (lrotate @0 INTEGER_CST@1) + (if ((VECTOR_TYPE_P (type) && target_supports_op_p (type, RROTATE_EXPR, optab_vector)) + || (!VECTOR_TYPE_P (type) && target_supports_op_p (type, RROTATE_EXPR, optab_scalar))) (rrotate @0 { const_binop (MINUS_EXPR, TREE_TYPE (@1), build_int_cst (TREE_TYPE (@1), - element_precision (type)), @1); })) + element_precision (type)), @1); }))) /* Turn (a OP c1) OP c2 into a OP (c1+c2). */ (for op (lrotate rrotate rshift lshift) diff --git a/gcc/testsuite/gcc.target/powerpc/vec_rotate.c b/gcc/testsuite/gcc.target/powerpc/vec_rotate.c new file mode 100644 index 00000000000..16d1f297d2d --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec_rotate.c @@ -0,0 +1,47 @@ +/* { dg-options "-O3" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ + +/* Check LROTATE to RROTATE transformation on const rotation count is disabled + on Power by checking optab, it helps vectorizer to exploit vector rotation + instructions. */ + +#define N 256 +unsigned long long sud[N], rud[N]; +unsigned int suw[N], ruw[N]; +unsigned short suh[N], ruh[N]; +unsigned char sub[N], rub[N]; + +void +testULL () +{ + for (int i = 0; i < 256; ++i) + rud[i] = (sud[i] >> 8) | (sud[i] << (sizeof (sud[0]) * 8 - 8)); +} + +void +testUW () +{ + for (int i = 0; i < 256; ++i) + ruw[i] = (suw[i] >> 8) | (suw[i] << (sizeof (suw[0]) * 8 - 8)); +} + +void +testUH () +{ + for (int i = 0; i < 256; ++i) + ruh[i] = (unsigned short) (suh[i] >> 9) + | (unsigned short) (suh[i] << (sizeof (suh[0]) * 8 - 9)); +} + +void +testUB () +{ + for (int i = 0; i < 256; ++i) + rub[i] = (unsigned char) (sub[i] >> 5) + | (unsigned char) (sub[i] << (sizeof (sub[0]) * 8 - 5)); +} + +/* { dg-final { scan-assembler {\mvrld\M} } } */ +/* { dg-final { scan-assembler {\mvrlw\M} } } */ +/* { dg-final { scan-assembler {\mvrlh\M} } } */ +/* { dg-final { scan-assembler {\mvrlb\M} } } */