On Thu, Oct 17, 2024 at 11:06 AM Kyrylo Tkachov <ktkac...@nvidia.com> wrote: > > Hi Eikansh > > > On 16 Oct 2024, at 18:23, Eikansh Gupta <quic_eikag...@quicinc.com> wrote: > > > > The pattern `a rrotate (32-b)` should be optimized to `a lrotate b`. > > The same is also true for `a lrotate (32-b)`. It can be optimized to > > `a rrotate b`. > > > > This patch adds following patterns: > > a rrotate (32-b) -> a lrotate b > > a lrotate (32-b) -> a rrotate b > > > > Bootstrapped and tested on x86_64-linux-gnu with no regressions. > > > > PR tree-optimization/109906 > > > > gcc/ChangeLog: > > > > * match.pd (a rrotate (32-b) -> a lrotate b): New pattern > > (a lrotate (32-b) -> a rrotate b): New pattern > > > > gcc/testsuite/ChangeLog: > > > > * gcc.dg/tree-ssa/pr109906.c: New test. > > > > Signed-off-by: Eikansh Gupta <quic_eikag...@quicinc.com> > > --- > > gcc/match.pd | 9 ++++++ > > gcc/testsuite/gcc.dg/tree-ssa/pr109906.c | 41 ++++++++++++++++++++++++ > > 2 files changed, 50 insertions(+) > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr109906.c > > > > diff --git a/gcc/match.pd b/gcc/match.pd > > index 5ec31ef6269..078ef050351 100644 > > --- a/gcc/match.pd > > +++ b/gcc/match.pd > > @@ -4861,6 +4861,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > > build_int_cst (TREE_TYPE (@1), > > element_precision (type)), @1); })) > > > > +/* a rrotate (32-b) -> a lrotate b */ > > +/* a lrotate (32-b) -> a rrotate b */ > > +(for rotate (lrotate rrotate) > > + orotate (rrotate lrotate) > > + (simplify > > + (rotate @0 (minus INTEGER_CST@1 @2)) > > + (if (TYPE_PRECISION (TREE_TYPE (@0)) == wi::to_wide (@1))
Use element_precision (@0) - rotates can be used on vector types as well and using TYPE_PRECISION would ICE on them. > > + (orotate @0 @2)))) > > There is already a transformation for lrotate (x, C) into rotate (x, SIZE - > C) around line 4937 in match.pd > Isn’t there a risk that we enter an infinite recursion situation with this? I don't think so. The patch is OK with the adjustment. Thanks, Richard. > Thanks, > Kyrill > > > > + > > /* Turn (a OP c1) OP c2 into a OP (c1+c2). */ > > (for op (lrotate rrotate rshift lshift) > > (simplify > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr109906.c > > b/gcc/testsuite/gcc.dg/tree-ssa/pr109906.c > > new file mode 100644 > > index 00000000000..9aa015d8c65 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr109906.c > > @@ -0,0 +1,41 @@ > > +/* PR tree-optimization/109906 */ > > +/* { dg-do compile } */ > > +/* { dg-options "-O1 -fdump-tree-optimized-raw" } */ > > +/* { dg-require-effective-target int32 } */ > > + > > +/* Implementation of rotate right operation */ > > +static inline > > +unsigned rrotate(unsigned x, int t) > > +{ > > + if (t >= 32) __builtin_unreachable(); > > + unsigned tl = x >> (t); > > + unsigned th = x << (32 - t); > > + return tl | th; > > +} > > + > > +/* Here rotate left is achieved by doing rotate right by (32 - x) */ > > +unsigned rotateleft(unsigned t, int x) > > +{ > > + return rrotate (t, 32 - x); > > +} > > + > > +/* Implementation of rotate left operation */ > > +static inline > > +unsigned lrotate(unsigned x, int t) > > +{ > > + if (t >= 32) __builtin_unreachable(); > > + unsigned tl = x << (t); > > + unsigned th = x >> (32 - t); > > + return tl | th; > > +} > > + > > +/* Here rotate right is achieved by doing rotate left by (32 - x) */ > > +unsigned rotateright(unsigned t, int x) > > +{ > > + return lrotate (t, 32 - x); > > +} > > + > > +/* Shouldn't have instruction for (32 - x). */ > > +/* { dg-final { scan-tree-dump-not "minus_expr" "optimized" } } */ > > +/* { dg-final { scan-tree-dump "rrotate_expr" "optimized" } } */ > > +/* { dg-final { scan-tree-dump "lrotate_expr" "optimized" } } */ > > -- > > 2.17.1 > > >