On Fri, 24 May 2024, Manolis Tsamis wrote:

> On Fri, May 24, 2024 at 9:31 AM Richard Biener <rguent...@suse.de> wrote:
> >
> > On Wed, 22 May 2024, Manolis Tsamis wrote:
> >
> > > The match.pd patterns to merge two vector permutes into one fail when a
> > > potentially no-op view convert expressions is between the two permutes.
> > > This change lifts this restriction.
> > >
> > > gcc/ChangeLog:
> > >
> > >       * match.pd: Allow no-op view_convert between permutes.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >       * gcc.dg/fold-perm-2.c: New test.
> > >
> > > Signed-off-by: Manolis Tsamis <manolis.tsa...@vrull.eu>
> > > ---
> > >
> > >  gcc/match.pd                       | 14 ++++++++------
> > >  gcc/testsuite/gcc.dg/fold-perm-2.c | 16 ++++++++++++++++
> > >  2 files changed, 24 insertions(+), 6 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.dg/fold-perm-2.c
> > >
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index 07e743ae464..cbb3c5d86e0 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -10039,19 +10039,21 @@ and,
> > >       d = VEC_PERM_EXPR <a, b, NEW_VCST>;  */
> > >
> > >  (simplify
> > > - (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4)
> > > + (vec_perm (view_convert?@0 (vec_perm@1 @2 @3 VECTOR_CST@4)) @0 
> > > VECTOR_CST@5)
> > >   (if (TYPE_VECTOR_SUBPARTS (type).is_constant ())
> > >    (with
> > >     {
> > >       machine_mode result_mode = TYPE_MODE (type);
> > > -     machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1));
> > > +     machine_mode op_mode = TYPE_MODE (TREE_TYPE (@2));
> > >       int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
> > >       vec_perm_builder builder0;
> > >       vec_perm_builder builder1;
> > >       vec_perm_builder builder2 (nelts, nelts, 1);
> > >     }
> > > -   (if (tree_to_vec_perm_builder (&builder0, @3)
> > > -     && tree_to_vec_perm_builder (&builder1, @4))
> > > +   (if (tree_to_vec_perm_builder (&builder0, @4)
> > > +     && tree_to_vec_perm_builder (&builder1, @5)
> > > +     && element_precision (TREE_TYPE (@0))
> > > +        == element_precision (TREE_TYPE (@1)))
> >
> > I think you want to check TYPE_SIZE (TREE_TYPE (@0/@1)) for equality
> > instead.
> >
> 
> I think TYPE_SIZE is not enough as we need the vector elements to have
> the same size, not just the vector as a whole.

Err, yes - you want to check the element sizes of course.

> For example, when using the TYPE_SIZE check instead the following
> testcase miscompiles
> 
> typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
> typedef double vecd __attribute__ ((vector_size (2 * sizeof (double))));
> 
> void fun (veci *a, veci *b, veci *c)
> {
>   char data[16];
>   veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7);
>   vecd r2;
>   __builtin_memcpy(data, &r1, sizeof(veci));
>   __builtin_memcpy(&r2, data, sizeof(vecd));
>   vecd r3 = __builtin_shufflevector (r2, r2, 1, 0);
>   __builtin_memcpy(data, &r3, sizeof(vecd));
>   __builtin_memcpy(c, data, sizeof(veci));
> }
> 
> To:
> 
> ldr     q31, [x0]
> rev64   v31.4s, v31.4s
> str     q31, [x2]
> ret
> 
> > Otherwise OK.
> >
> > Thanks,
> > Richard.
> >
> > >      (with
> > >       {
> > >         vec_perm_indices sel0 (builder0, 2, nelts);
> > > @@ -10073,10 +10075,10 @@ and,
> > >              ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false)
> > >                 || !can_vec_perm_const_p (result_mode, op_mode, sel1, 
> > > false))
> > >              : !can_vec_perm_const_p (result_mode, op_mode, sel1, false)))
> > > -      op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2);
> > > +      op0 = vec_perm_indices_to_tree (TREE_TYPE (@5), sel2);
> > >       }
> > >       (if (op0)
> > > -      (vec_perm @1 @2 { op0; })))))))
> > > +      (view_convert (vec_perm @2 @3 { op0; }))))))))
> > >
> > >  /* Merge
> > >       c = VEC_PERM_EXPR <a, b, VCST0>;
> > > diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c 
> > > b/gcc/testsuite/gcc.dg/fold-perm-2.c
> > > new file mode 100644
> > > index 00000000000..1a4ab4065de
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/fold-perm-2.c
> > > @@ -0,0 +1,16 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O -fdump-tree-fre1" } */
> > > +
> > > +typedef int veci __attribute__ ((vector_size (4 * sizeof (int))));
> > > +typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof 
> > > (unsigned int))));
> > > +
> > > +void fun (veci *a, veci *b, veci *c)
> > > +{
> > > +  veci r1 = __builtin_shufflevector (*a, *b, 0, 5, 2, 7);
> > > +  vecu r2 = __builtin_convertvector (r1, vecu);
> > > +  vecu r3 = __builtin_shufflevector (r2, r2, 2, 3, 1, 0);
> > > +  *c = __builtin_convertvector (r3, veci);
> > > +}
> > > +
> > > +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 5, 0 }" "fre1" } 
> > > } */
> > > +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "fre1" } } */
> > >
> >
> > --
> > Richard Biener <rguent...@suse.de>
> > SUSE Software Solutions Germany GmbH,
> > Frankenstrasse 146, 90461 Nuernberg, Germany;
> > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to