On Tue, 31 Oct 2023, Thomas Schwinge wrote:

> Hi!
> 
> On 2023-10-19T11:47:14+0000, Richard Biener <rguent...@suse.de> wrote:
> > The following implements SLP vectorization support for gathers
> > without relying on IFNs being pattern detected (and supported by
> > the target).  That includes support for emulated gathers but also
> > the legacy x86 builtin path.
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, will push.
> 
> For GCN (tested '-march=gfx90a'), I see:
> 
>      PASS: gcc.dg/vect/vect-gather-2.c (test for excess errors)
>     +FAIL: gcc.dg/vect/vect-gather-2.c scan-tree-dump vect "different gather 
> base"
>     +FAIL: gcc.dg/vect/vect-gather-2.c scan-tree-dump vect "different gather 
> scale"
>     +PASS: gcc.dg/vect/vect-gather-2.c scan-tree-dump-not vect "Loop contains 
> only SLP stmts"

Ah, for gather IFNs pattern matched it will instead have

Build SLP failed: different calls in patt_55 = .GATHER_LOAD ((sizetype) 
x2_29(D), _15, 4, 0);

but then I have put in

/* { dg-final { scan-tree-dump "different gather base" vect { target { ! 
vect_gather_load_ifn } } } } */
/* { dg-final { scan-tree-dump "different gather scale" vect { target { ! 
vect_gather_load_ifn } } } } */

and expected gcn to have vect_gather_load_ifn ... but that is

proc check_effective_target_vect_gather_load_ifn { } {
    return [expr { [check_effective_target_aarch64_sve]
                   || [check_effective_target_riscv_v] }]
} 

probably add

                   || [istarget amdgcn*-*-*]

there?  Can you do that (after checking it doesn't break other
tests)?

Richard.

> 
> Gr??e
>  Thomas
> 
> 
> >       PR tree-optimization/111131
> >       * tree-vect-loop.cc (update_epilogue_loop_vinfo): Make
> >       sure to update all gather/scatter stmt DRs, not only those
> >       that eventually got VMAT_GATHER_SCATTER set.
> >       * tree-vect-slp.cc (_slp_oprnd_info::first_gs_info): Add.
> >       (vect_get_and_check_slp_defs): Handle gathers/scatters,
> >       adding the offset as SLP operand and comparing base and scale.
> >       (vect_build_slp_tree_1): Handle gathers.
> >       (vect_build_slp_tree_2): Likewise.
> >
> >       * gcc.dg/vect/vect-gather-1.c: Now expected to vectorize
> >       everywhere.
> >       * gcc.dg/vect/vect-gather-2.c: Expected to not SLP anywhere.
> >       Massage the scale case to more reliably produce a different
> >       one.  Scan for the specific messages.
> >       * gcc.dg/vect/vect-gather-3.c: Masked gather is also supported
> >       for AVX2, but not emulated.
> >       * gcc.dg/vect/vect-gather-4.c: Expected to not SLP anywhere.
> >       Massage to more properly ensure this.
> >       * gcc.dg/vect/tsvc/vect-tsvc-s353.c: Expect to vectorize
> >       everywhere.
> > ---
> >  .../gcc.dg/vect/tsvc/vect-tsvc-s353.c         |  2 +-
> >  gcc/testsuite/gcc.dg/vect/vect-gather-1.c     |  2 +-
> >  gcc/testsuite/gcc.dg/vect/vect-gather-2.c     | 13 ++++--
> >  gcc/testsuite/gcc.dg/vect/vect-gather-3.c     |  2 +-
> >  gcc/testsuite/gcc.dg/vect/vect-gather-4.c     |  6 +--
> >  gcc/tree-vect-loop.cc                         |  6 ++-
> >  gcc/tree-vect-slp.cc                          | 45 +++++++++++++++++--
> >  7 files changed, 61 insertions(+), 15 deletions(-)
> >
> > diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s353.c 
> > b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s353.c
> > index 98ba7522471..2c4fa3f5991 100644
> > --- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s353.c
> > +++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s353.c
> > @@ -44,4 +44,4 @@ int main (int argc, char **argv)
> >    return 0;
> >  }
> >
> > -/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail { ! 
> > riscv_v } } } } */
> > +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-1.c 
> > b/gcc/testsuite/gcc.dg/vect/vect-gather-1.c
> > index e3bbf5c0bf8..5f6640d9ab6 100644
> > --- a/gcc/testsuite/gcc.dg/vect/vect-gather-1.c
> > +++ b/gcc/testsuite/gcc.dg/vect/vect-gather-1.c
> > @@ -58,4 +58,4 @@ main (void)
> >    return 0;
> >  }
> >
> > -/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { 
> > target vect_gather_load_ifn } } } */
> > +/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect } } */
> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-2.c 
> > b/gcc/testsuite/gcc.dg/vect/vect-gather-2.c
> > index a1f6ba458a9..4c23b808333 100644
> > --- a/gcc/testsuite/gcc.dg/vect/vect-gather-2.c
> > +++ b/gcc/testsuite/gcc.dg/vect/vect-gather-2.c
> > @@ -8,6 +8,7 @@ f1 (int *restrict y, int *restrict x1, int *restrict x2,
> >  {
> >    for (int i = 0; i < N; ++i)
> >      {
> > +      /* Different base.  */
> >        y[i * 2] = x1[indices[i * 2]] + 1;
> >        y[i * 2 + 1] = x2[indices[i * 2 + 1]] + 2;
> >      }
> > @@ -18,8 +19,9 @@ f2 (int *restrict y, int *restrict x, int *restrict 
> > indices)
> >  {
> >    for (int i = 0; i < N; ++i)
> >      {
> > -      y[i * 2] = x[indices[i * 2]] + 1;
> > -      y[i * 2 + 1] = x[indices[i * 2 + 1] * 2] + 2;
> > +      /* Different scale.  */
> > +      y[i * 2] = *(int *)((char *)x + (__UINTPTR_TYPE__)indices[i * 2] * 
> > 4) + 1;
> > +      y[i * 2 + 1] = *(int *)((char *)x + (__UINTPTR_TYPE__)indices[i * 2 
> > + 1] * 2) + 2;
> >      }
> >  }
> >
> > @@ -28,9 +30,12 @@ f3 (int *restrict y, int *restrict x, int *restrict 
> > indices)
> >  {
> >    for (int i = 0; i < N; ++i)
> >      {
> > +      /* Different type.  */
> >        y[i * 2] = x[indices[i * 2]] + 1;
> > -      y[i * 2 + 1] = x[(unsigned int) indices[i * 2 + 1]] + 2;
> > +      y[i * 2 + 1] = x[((unsigned int *) indices)[i * 2 + 1]] + 2;
> >      }
> >  }
> >
> > -/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect { 
> > target vect_gather_load_ifn } } } */
> > +/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect } } 
> > */
> > +/* { dg-final { scan-tree-dump "different gather base" vect { target { ! 
> > vect_gather_load_ifn } } } } */
> > +/* { dg-final { scan-tree-dump "different gather scale" vect { target { ! 
> > vect_gather_load_ifn } } } } */
> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-3.c 
> > b/gcc/testsuite/gcc.dg/vect/vect-gather-3.c
> > index adfef3bf407..30ba6789e03 100644
> > --- a/gcc/testsuite/gcc.dg/vect/vect-gather-3.c
> > +++ b/gcc/testsuite/gcc.dg/vect/vect-gather-3.c
> > @@ -62,4 +62,4 @@ main (void)
> >    return 0;
> >  }
> >
> > -/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { 
> > target { vect_gather_load_ifn && vect_masked_load } } } } */
> > +/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { 
> > target { { vect_gather_load_ifn || avx2 } && vect_masked_load } } } } */
> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-4.c 
> > b/gcc/testsuite/gcc.dg/vect/vect-gather-4.c
> > index ee2e4e4999a..1ce63e69199 100644
> > --- a/gcc/testsuite/gcc.dg/vect/vect-gather-4.c
> > +++ b/gcc/testsuite/gcc.dg/vect/vect-gather-4.c
> > @@ -39,10 +39,10 @@ f3 (int *restrict y, int *restrict x, int *restrict 
> > indices)
> >        y[i * 2] = (indices[i * 2] < N * 2
> >                 ? x[indices[i * 2]] + 1
> >                 : 1);
> > -      y[i * 2 + 1] = (indices[i * 2 + 1] < N * 2
> > -                   ? x[(unsigned int) indices[i * 2 + 1]] + 2
> > +      y[i * 2 + 1] = (((unsigned int *)indices)[i * 2 + 1] < N * 2
> > +                   ? x[((unsigned int *) indices)[i * 2 + 1]] + 2
> >                     : 2);
> >      }
> >  }
> >
> > -/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect { 
> > target vect_gather_load_ifn } } } */
> > +/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect } } 
> > */
> > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > index ebab1953b9c..8877ebde246 100644
> > --- a/gcc/tree-vect-loop.cc
> > +++ b/gcc/tree-vect-loop.cc
> > @@ -11362,8 +11362,7 @@ update_epilogue_loop_vinfo (class loop *epilogue, 
> > tree advance)
> >        updated offset we set using ADVANCE.  Instead we have to make sure 
> > the
> >        reference in the data references point to the corresponding copy of
> >        the original in the epilogue.  */
> > -      if (STMT_VINFO_MEMORY_ACCESS_TYPE (vect_stmt_to_vectorize 
> > (stmt_vinfo))
> > -       == VMAT_GATHER_SCATTER)
> > +      if (STMT_VINFO_GATHER_SCATTER_P (vect_stmt_to_vectorize 
> > (stmt_vinfo)))
> >       {
> >         DR_REF (dr)
> >           = simplify_replace_tree (DR_REF (dr), NULL_TREE, NULL_TREE,
> > @@ -11372,6 +11371,9 @@ update_epilogue_loop_vinfo (class loop *epilogue, 
> > tree advance)
> >           = simplify_replace_tree (DR_BASE_ADDRESS (dr), NULL_TREE, 
> > NULL_TREE,
> >                                    &find_in_mapping, &mapping);
> >       }
> > +      else
> > +     gcc_assert (STMT_VINFO_MEMORY_ACCESS_TYPE (vect_stmt_to_vectorize 
> > (stmt_vinfo))
> > +                 != VMAT_GATHER_SCATTER);
> >        DR_STMT (dr) = STMT_VINFO_STMT (stmt_vinfo);
> >        stmt_vinfo->dr_aux.stmt = stmt_vinfo;
> >        /* The vector size of the epilogue is smaller than that of the main 
> > loop
> > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> > index d081999a763..8efff2e912d 100644
> > --- a/gcc/tree-vect-slp.cc
> > +++ b/gcc/tree-vect-slp.cc
> > @@ -283,10 +283,11 @@ typedef struct _slp_oprnd_info
> >    vec<tree> ops;
> >    /* Information about the first statement, its vector def-type, type, the
> >       operand itself in case it's constant, and an indication if it's a 
> > pattern
> > -     stmt.  */
> > +     stmt and gather/scatter info.  */
> >    tree first_op_type;
> >    enum vect_def_type first_dt;
> >    bool any_pattern;
> > +  gather_scatter_info first_gs_info;
> >  } *slp_oprnd_info;
> >
> >
> > @@ -609,6 +610,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned 
> > char swap,
> >    unsigned int i, number_of_oprnds;
> >    enum vect_def_type dt = vect_uninitialized_def;
> >    slp_oprnd_info oprnd_info;
> > +  gather_scatter_info gs_info;
> >    unsigned int commutative_op = -1U;
> >    bool first = stmt_num == 0;
> >
> > @@ -660,6 +662,19 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned 
> > char swap,
> >
> >        oprnd_info = (*oprnds_info)[i];
> >
> > +      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> > +     {
> > +       gcc_assert (number_of_oprnds == 1);
> > +       if (!is_a <loop_vec_info> (vinfo)
> > +           || !vect_check_gather_scatter (stmt_info,
> > +                                          as_a <loop_vec_info> (vinfo),
> > +                                          first ? 
> > &oprnd_info->first_gs_info
> > +                                          : &gs_info))
> > +         return -1;
> > +
> > +       oprnd = first ? oprnd_info->first_gs_info.offset : gs_info.offset;
> > +     }
> > +
> >        stmt_vec_info def_stmt_info;
> >        if (!vect_is_simple_use (oprnd, vinfo, &dts[i], &def_stmt_info))
> >       {
> > @@ -792,6 +807,25 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned 
> > char swap,
> >         return 1;
> >       }
> >
> > +      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> > +     {
> > +       if (!operand_equal_p (oprnd_info->first_gs_info.base,
> > +                             gs_info.base))
> > +         {
> > +           if (dump_enabled_p ())
> > +             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > +                              "Build SLP failed: different gather base\n");
> > +           return 1;
> > +         }
> > +       if (oprnd_info->first_gs_info.scale != gs_info.scale)
> > +         {
> > +           if (dump_enabled_p ())
> > +             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > +                              "Build SLP failed: different gather 
> > scale\n");
> > +           return 1;
> > +         }
> > +     }
> > +
> >        /* Not first stmt of the group, check that the def-stmt/s match
> >        the def-stmt/s of the first stmt.  Allow different definition
> >        types for reduction chains: the first stmt must be a
> > @@ -1235,6 +1269,9 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
> > *swap,
> >                       || rhs_code == INDIRECT_REF
> >                       || rhs_code == COMPONENT_REF
> >                       || rhs_code == MEM_REF)))
> > +           || (ldst_p
> > +               && (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
> > +                   != STMT_VINFO_GATHER_SCATTER_P (first_stmt_info)))
> >             || first_stmt_ldst_p != ldst_p
> >             || first_stmt_phi_p != phi_p)
> >           {
> > @@ -1357,12 +1394,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned 
> > char *swap,
> >         if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))
> >             && rhs_code != CFN_GATHER_LOAD
> >             && rhs_code != CFN_MASK_GATHER_LOAD
> > +           && !STMT_VINFO_GATHER_SCATTER_P (stmt_info)
> >             /* Not grouped loads are handled as externals for BB
> >                vectorization.  For loop vectorization we can handle
> >                splats the same we handle single element interleaving.  */
> >             && (is_a <bb_vec_info> (vinfo)
> > -               || stmt_info != first_stmt_info
> > -               || STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
> > +               || stmt_info != first_stmt_info))
> >           {
> >             /* Not grouped load.  */
> >             if (dump_enabled_p ())
> > @@ -1858,6 +1895,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
> >       gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
> >                   || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
> >                   || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD));
> > +      else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> > +     gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
> >        else
> >       {
> >         *max_nunits = this_max_nunits;
> > --
> > 2.35.3
> -----------------
> Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstra?e 201, 80634 
> M?nchen; Gesellschaft mit beschr?nkter Haftung; Gesch?ftsf?hrer: Thomas 
> Heurung, Frank Th?rauf; Sitz der Gesellschaft: M?nchen; Registergericht 
> M?nchen, HRB 106955
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to