RE: [PATCH]middle-end: delay checking for alignment to load [PR118464]

Richard Biener Wed, 05 Feb 2025 05:14:58 -0800

On Wed, 5 Feb 2025, Tamar Christina wrote:

[...]


> > 6eda40267bd1382938a77826d11f20dcc959a166..d0148d4938cafc3c59edfa6a
> > > > 60002933f384f65b 100644
> > > > > --- a/gcc/tree-vect-data-refs.cc
> > > > > +++ b/gcc/tree-vect-data-refs.cc
> > > > > @@ -731,7 +731,8 @@ vect_analyze_early_break_dependences
> > (loop_vec_info
> > > > loop_vinfo)
> > > > >         if (is_gimple_debug (stmt))
> > > > >           continue;
> > > > >
> > > > > -       stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (stmt);
> > > > > +       stmt_vec_info stmt_vinfo
> > > > > +         = vect_stmt_to_vectorize (loop_vinfo->lookup_stmt (stmt));
> > > > >         auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
> > > > >         if (!dr_ref)
> > > > >           continue;
> > > > > @@ -748,26 +749,15 @@ vect_analyze_early_break_dependences
> > > > (loop_vec_info loop_vinfo)
> > > > >            bounded by VF so accesses are within range.  We only need 
> > > > > to check
> > > > >            the reads since writes are moved to a safe place where if 
> > > > > we get
> > > > >            there we know they are safe to perform.  */
> > > > > -       if (DR_IS_READ (dr_ref)
> > > > > -           && !ref_within_array_bound (stmt, DR_REF (dr_ref)))
> > > > > +       if (DR_IS_READ (dr_ref))
> > > > >           {
> > > > > -           if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo)
> > > > > -               || STMT_VINFO_STRIDED_P (stmt_vinfo))
> > > > > -             {
> > > > > -               const char *msg
> > > > > -                 = "early break not supported: cannot peel "
> > > > > -                   "for alignment, vectorization would read out of "
> > > > > -                   "bounds at %G";
> > > > > -               return opt_result::failure_at (stmt, msg, stmt);
> > > > > -             }
> > > > > -
> > > > >             dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_vinfo);
> > > > >             dr_info->need_peeling_for_alignment = true;
> > > >
> > > > You're setting the flag on any DR of a DR group here ...
> > > >
> > > > >             if (dump_enabled_p ())
> > > > >               dump_printf_loc (MSG_NOTE, vect_location,
> > > > > -                              "marking DR (read) as needing peeling 
> > > > > for "
> > > > > -                              "alignment at %G", stmt);
> > > > > +                              "marking DR (read) as possibly needing 
> > > > > peeling "
> > > > > +                              "for alignment at %G", stmt);
> > > > >           }
> > > > >
> > > > >         if (DR_IS_READ (dr_ref))
> > > > > @@ -1326,9 +1316,6 @@ vect_record_base_alignments (vec_info *vinfo)
> > > > >     Compute the misalignment of the data reference DR_INFO when 
> > > > > vectorizing
> > > > >     with VECTYPE.
> > > > >
> > > > > -   RESULT is non-NULL iff VINFO is a loop_vec_info.  In that case, 
> > > > > *RESULT will
> > > > > -   be set appropriately on failure (but is otherwise left unchanged).
> > > > > -
> > > > >     Output:
> > > > >     1. initialized misalignment info for DR_INFO
> > > > >
> > > > > @@ -1337,7 +1324,7 @@ vect_record_base_alignments (vec_info *vinfo)
> > > > >
> > > > >  static void
> > > > >  vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info 
> > > > > *dr_info,
> > > > > -                              tree vectype, opt_result *result = 
> > > > > nullptr)
> > > > > +                              tree vectype)
> > > > >  {
> > > > >    stmt_vec_info stmt_info = dr_info->stmt;
> > > > >    vec_base_alignments *base_alignments = &vinfo->base_alignments;
> > > > > @@ -1365,66 +1352,6 @@ vect_compute_data_ref_alignment (vec_info
> > *vinfo,
> > > > dr_vec_info *dr_info,
> > > > >      = exact_div (targetm.vectorize.preferred_vector_alignment 
> > > > > (vectype),
> > > > >                BITS_PER_UNIT);
> > > > >
> > > > > -  /* If this DR needs peeling for alignment for correctness, we must
> > > > > -     ensure the target alignment is a constant power-of-two multiple 
> > > > > of the
> > > > > -     amount read per vector iteration (overriding the above hook 
> > > > > where
> > > > > -     necessary).  */
> > > > > -  if (dr_info->need_peeling_for_alignment)
> > > > > -    {
> > > > > -      /* Vector size in bytes.  */
> > > > > -      poly_uint64 safe_align = tree_to_poly_uint64 (TYPE_SIZE_UNIT
> > (vectype));
> > > > > -
> > > > > -      /* We can only peel for loops, of course.  */
> > > > > -      gcc_checking_assert (loop_vinfo);
> > > > > -
> > > > > -      /* Calculate the number of vectors read per vector iteration.  
> > > > > If
> > > > > -      it is a power of two, multiply through to get the required
> > > > > -      alignment in bytes.  Otherwise, fail analysis since alignment
> > > > > -      peeling wouldn't work in such a case.  */
> > > > > -      poly_uint64 num_scalars = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
> > > > > -      if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
> > > > > -     num_scalars *= DR_GROUP_SIZE (stmt_info);
> > > > > -
> > > > > -      auto num_vectors = vect_get_num_vectors (num_scalars, vectype);
> > > > > -      if (!pow2p_hwi (num_vectors))
> > > > > -     {
> > > > > -       *result = opt_result::failure_at (vect_location,
> > > > > -                                         "non-power-of-two num 
> > > > > vectors %u "
> > > > > -                                         "for DR needing peeling for 
> > > > > "
> > > > > -                                         "alignment at %G",
> > > > > -                                         num_vectors, 
> > > > > stmt_info->stmt);
> > > > > -       return;
> > > > > -     }
> > > > > -
> > > > > -      safe_align *= num_vectors;
> > > > > -      if (maybe_gt (safe_align, 4096U))
> > > > > -     {
> > > > > -       pretty_printer pp;
> > > > > -       pp_wide_integer (&pp, safe_align);
> > > > > -       *result = opt_result::failure_at (vect_location,
> > > > > -                                         "alignment required for 
> > > > > correctness"
> > > > > -                                         " (%s) may exceed page 
> > > > > size",
> > > > > -                                         pp_formatted_text (&pp));
> > > > > -       return;
> > > > > -     }
> > > > > -
> > > > > -      unsigned HOST_WIDE_INT multiple;
> > > > > -      if (!constant_multiple_p (vector_alignment, safe_align, 
> > > > > &multiple)
> > > > > -       || !pow2p_hwi (multiple))
> > > > > -     {
> > > > > -       if (dump_enabled_p ())
> > > > > -         {
> > > > > -           dump_printf_loc (MSG_NOTE, vect_location,
> > > > > -                            "forcing alignment for DR from preferred 
> > > > > (");
> > > > > -           dump_dec (MSG_NOTE, vector_alignment);
> > > > > -           dump_printf (MSG_NOTE, ") to safe align (");
> > > > > -           dump_dec (MSG_NOTE, safe_align);
> > > > > -           dump_printf (MSG_NOTE, ") for stmt: %G", stmt_info->stmt);
> > > > > -         }
> > > > > -       vector_alignment = safe_align;
> > > > > -     }
> > > > > -    }
> > > > > -
> > > > >    SET_DR_TARGET_ALIGNMENT (dr_info, vector_alignment);
> > > > >
> > > > >    /* If the main loop has peeled for alignment we have no way of 
> > > > > knowing
> > > > > @@ -2479,7 +2406,7 @@ vect_enhance_data_refs_alignment
> > (loop_vec_info
> > > > loop_vinfo)
> > > > >        || !slpeel_can_duplicate_loop_p (loop, LOOP_VINFO_IV_EXIT
> > (loop_vinfo),
> > > > >                                      loop_preheader_edge (loop))
> > > > >        || loop->inner
> > > > > -      || LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
> > > > > +      || LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)) // <<-- ??
> > > >
> > > > Spurious change(?)
> > >
> > > I was actually wondering why this is here. I'm curious why we're saying 
> > > we can't
> > > peel for alignment on an inverted loop.
> > 
> > No idea either.
> > 
> > > >
> > > > >      do_peeling = false;
> > > > >
> > > > >    struct _vect_peel_extended_info peel_for_known_alignment;
> > > > > @@ -2942,12 +2869,9 @@ vect_analyze_data_refs_alignment
> > (loop_vec_info
> > > > loop_vinfo)
> > > > >         if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt)
> > > > >             && DR_GROUP_FIRST_ELEMENT (dr_info->stmt) != 
> > > > > dr_info->stmt)
> > > > >           continue;
> > > > > -       opt_result res = opt_result::success ();
> > > > > +
> > > > >         vect_compute_data_ref_alignment (loop_vinfo, dr_info,
> > > > > -                                        STMT_VINFO_VECTYPE 
> > > > > (dr_info->stmt),
> > > > > -                                        &res);
> > > > > -       if (!res)
> > > > > -         return res;
> > > > > +                                        STMT_VINFO_VECTYPE 
> > > > > (dr_info->stmt));
> > > > >       }
> > > > >      }
> > > > >
> > > > > @@ -7211,7 +7135,7 @@ vect_supportable_dr_alignment (vec_info *vinfo,
> > > > dr_vec_info *dr_info,
> > > > >
> > > > >    if (misalignment == 0)
> > > > >      return dr_aligned;
> > > > > -  else if (dr_info->need_peeling_for_alignment)
> > > > > +  else if (dr_peeling_alignment (stmt_info))
> > > > >      return dr_unaligned_unsupported;
> > > > >
> > > > >    /* For now assume all conditional loads/stores support unaligned
> > > > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> > > > > index
> > > >
> > 1b639ae3b174779bb838d0f7ce4886d84ecafcfe..c213ec46c4bdb456f99a43a3a
> > > > ff7c1af80e8769a 100644
> > > > > --- a/gcc/tree-vect-stmts.cc
> > > > > +++ b/gcc/tree-vect-stmts.cc
> > > > > @@ -2597,6 +2597,69 @@ get_load_store_type (vec_info  *vinfo,
> > > > stmt_vec_info stmt_info,
> > > > >        return false;
> > > > >      }
> > > > >
> > > > > +  auto unit_size = GET_MODE_UNIT_SIZE (TYPE_MODE (vectype));
> > > > > +  /* Check if a misalignment with an unsupported peeling for early 
> > > > > break is
> > > > > +     still OK.  First we need to distinguish between when we've 
> > > > > reached here
> > do
> > > >
> > > > due to
> > > >
> > > > > +     to dependency analysis or when the user has requested 
> > > > > -mstrict-align or
> > > > > +     similar.  In those cases we must not override it.  */
> > > > > +  if (dr_peeling_alignment (stmt_info)
> > > > > +      && *alignment_support_scheme == dr_unaligned_unsupported
> > > > > +      /* We can only attempt to override if the misalignment is a 
> > > > > multiple of
> > > > > +      the element being loaded, otherwise peeling or versioning 
> > > > > would have
> > > > > +      really been required.  */
> > > > > +      && multiple_p (*misalignment, unit_size))
> > > >
> > > > Hmm, but wouldn't that mean dr_info->target_alignment is bigger
> > > > than the vector size?  Does that ever happen?  I'll note that
> > > > *alignment_support_scheme == dr_aligned means alignment according
> > > > to dr_info->target_alignment which might be actually less than
> > > > the vector size (we've noticed this recently in other PRs), so
> > > > we might want to make sure that dr_info->target_alignment is
> > > > at least vector size when ->need_peeling_for_alignment I think.
> > > >
> > >
> > > One reason I block LOAD_LANES from the non-inbound case is that in
> > > those cases dr_info->target_alignment would need to be GROUP_SIZE * vector
> > size
> > > to ensure that the entire access doesn't cross a page.  Because this puts 
> > > an
> > > excessive alignment request in place I currently just reject the loop.
> > 
> > But that's true for all grouped accesses and one reason we wanted to move
> > this code here - we know group_size and the vectorization factor.
> > 
> 
> Yes, I mentioned LOAD_LANES since that's what AArch64 makes group accesses,
> But yes you're right, this is about group accessed, but maybe my understanding
> is wrong here, I thought the only VMAT that can result in a group access is a 
> LOAD_LANES?

No, even VMAT_CONTIGUOUS can be a group access.

> But yes I agree I should drop the *memory_access_type == 
> VMAT_LOAD_STORE_LANES bit
> and only look at the group access flag.
> 
> > > But In principal it can happen, however the above checks for element 
> > > size, not
> > > vector size.  This fails when the user has intentionally misaligned the 
> > > array and we
> > > don't support peeling for the access type to correct it.
> > >
> > > So something like vect-early-break_133_pfa3.c with a grouped access for
> > instance.
> > >
> > > > So - which of the testcases gets you here?  I think we
> > > > set *misalignment to be modulo target_alignment, never larger than that.
> > > >
> > >
> > > The condition passes for most cases yes, unless we can't peel in which 
> > > case we
> > > vect-early-break_22.c and vect-early-break_121-pr114081.c two that get
> > rejected
> > > inside the block.
> > >
> > > > > +    {
> > > > > +      bool inbounds
> > > > > +     = ref_within_array_bound (STMT_VINFO_STMT (stmt_info),
> > > > > +                               DR_REF (STMT_VINFO_DATA_REF 
> > > > > (stmt_info)));
> > > > > +      /* If we have a known misalignment, and are doing a group load 
> > > > > for a DR
> > > > > +      that requires aligned access, check if the misalignment is a 
> > > > > multiple
> > > > > +      of the unit size.  In which case the group load will be issued 
> > > > > aligned
> > > > > +      as long as the first load in the group is aligned.
> > > > > +
> > > > > +      For the non-inbound case we'd need goup_size * vectype 
> > > > > alignment.  But
> > > > > +      this is quite huge and unlikely to ever happen so if we can't 
> > > > > peel for
> > > > > +      it, just reject it.  */
> > 
> > I don't think the in-bound case is any different from the non-in-bound
> > case unless the size of the object is a multiple of the whole vector
> > access size as well.
> > 
> 
> The idea was that we know the accesses are all within the boundary of the 
> object,
> What we want to know is whether the accesses done are aligned.  Since we don't
> support group loads with gaps here we know the elements must be sequential 
> and so
> the relaxation was that if we know all this that then all we really need to 
> know is that
> it is safe to read a multiple of GROUP_SIZE * element sizes, since we would 
> still be in
> range of the buffer.  Because these are the number of scalar loads that would 
> have
> been done per iteration and so we can relax the alignment requirement based on
> the information known in the inbounds case.

Huh, I guess I fail to parse this.  ref_within_array_bound computes
whether we know all _scalar_ accesses are within bounds.

At this point we know *alignment_support_scheme == 
dr_unaligned_unsupported which means the access is not aligned
but the misalignment is a multiple of the vector type unit size.

How can the access not be aligned then?  Only when target_alignment
> type unit size?  But I think this implies a grouped access, thus
ncopies > 1 which is why I think you need to consider the VF?

> > > > > +      if (*memory_access_type == VMAT_LOAD_STORE_LANES
> > > > > +       && (STMT_VINFO_GROUPED_ACCESS (stmt_info) || slp_node))
> > > > > +     {
> > > > > +       /* ?? This needs updating whenever we support slp group > 1.  
> > > > > */
> > 
> > ?
> > 
> > > > > +       auto group_size = DR_GROUP_SIZE (stmt_info);
> > > > > +       /* For the inbound case it's enough to check for an alignment 
> > > > > of
> > > > > +          GROUP_SIZE * element size.  */
> > > > > +       if (inbounds
> > > > > +           && (*misalignment % (group_size * unit_size)) == 0
> > > > > +           && group_size % 2 == 0)
> > 
> > It looks fishy that you do not need to consider the VF here.
> 
> This and the ? above from you are related.  I don't think we have to consider 
> VF here
> since early break vectorization does not support slp group size > 1.  So the 
> VF can at
> this time never be larger than a single vector.  The note is there to update 
> this when we do.

You don't need a SLP group-size > 1 to have more than one vector read.
Consider

  if (long_long[i])
    early break;
  char[i] = char2[i];

where you with V2DI and V16QI end up with a VF of 16 and 4 V2DI loads
to compute the early break condition?  That said, implying, without
double-checking, some constraints on early break vectorization here
looks a bit fragile - if there are unwritten constraints we'd better
check them here.  That also makes it easier to understand.

> > 
> > > > > +         {
> > > > > +           if (dump_enabled_p ())
> > > > > +             dump_printf_loc (MSG_NOTE, vect_location,
> > > > > +                      "Assuming grouped access is aligned due to 
> > > > > load "
> > > > > +                      "lanes, overriding alignment scheme\n");
> > > > > +
> > > > > +           *alignment_support_scheme = dr_unaligned_supported;
> > > > > +         }
> > > > > +     }
> > > > > +      /* If we have a linear access and know the misalignment and 
> > > > > know we
> > won't
> > > > > +      read out of bounds then it's also ok if the misalignment is a 
> > > > > multiple
> > > > > +      of the element size.  We get this when the loop has known 
> > > > > misalignments
> > > > > +      but the misalignments of the DRs can't be peeled to reach 
> > > > > mutual
> > > > > +      alignment.  Because the misalignments are known however we 
> > > > > also know
> > > > > +      that versioning won't work.  If the target does support 
> > > > > unaligned
> > > > > +      accesses and we know we are free to read the entire buffer 
> > > > > then we
> > > > > +      can allow the unaligned access if it's on elements for an 
> > > > > early break
> > > > > +      condition.  */
> > 
> > See above - one of the PRs was exactly that we ovverread a decl even
> > if the original scalar accesses are all in-bounds.  So we can't allow
> > this.
> > 
> 
> But that PR was only because it was misaligned to an unnatural alignment of 
> the type
> which resulted one element possibly being split across a page boundary.  That 
> is still
> rejected here.

By the multiple_p (*misalignment, unit_size) condition?  Btw, you should
check known_alignment_for_access_p before using *misalignment or
check *misalignment != DR_MISALIGNMENT_UNKNOWN.

> This check is saying that if you have mutual misalignment, but each DR is in 
> itself still
> aligned to the type's natural alignment and we know that all access are in 
> bounds that
> we should be safe to vectorize as we won't accidentally access an invalid 
> part of memory.

See above - when we do two vector loads we only know the first one had
a corresponding scalar access, the second vector load can still be
out of bounds.  But when we can align to N * unit_size then it's fine
the vectorize.  And only then (target_alignment > unit_size) we can have
*misalignment > unit_size?

Richard.

> > > > > +      else if (*memory_access_type != VMAT_GATHER_SCATTER
> > > > > +            && inbounds)
> > > > > +     {
> > > > > +       if (dump_enabled_p ())
> > > > > +         dump_printf_loc (MSG_NOTE, vect_location,
> > > > > +                      "Access will not read beyond buffer to due 
> > > > > known size "
> > > > > +                      "buffer, overriding alignment scheme\n");
> > > > > +
> > > > > +       *alignment_support_scheme = dr_unaligned_supported;
> > > > > +     }
> > > > > +    }
> > > > > +
> > > > >    if (*alignment_support_scheme == dr_unaligned_unsupported)
> > > > >      {
> > > > >        if (dump_enabled_p ())
> > > > > @@ -10520,6 +10583,68 @@ vectorizable_load (vec_info *vinfo,
> > > > >    /* Transform.  */
> > > > >
> > > > >    dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), 
> > > > > *first_dr_info =
> > > > NULL;
> > > > > +
> > > > > +  /* Check if we support the operation if early breaks are needed.  
> > > > > */
> > > > > +  if (loop_vinfo
> > > > > +      && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
> > > > > +      && (memory_access_type == VMAT_GATHER_SCATTER
> > > > > +       || memory_access_type == VMAT_STRIDED_SLP))
> > > > > +    {
> > > > > +      if (dump_enabled_p ())
> > > > > +       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > > > > +                        "early break not supported: cannot peel for "
> > > > > +                        "alignment. With non-contiguous memory 
> > > > > vectorization"
> > > > > +                        " could read out of bounds at %G ",
> > > > > +                        STMT_VINFO_STMT (stmt_info));
> > > >
> > > > Hmm, this is now more restrictive than the original check in
> > > > vect_analyze_early_break_dependences because it covers all accesses.
> > > > The simplest fix would be to leave it there.
> > > >
> > >
> > > It covers all loads, which you're right is more restrictive, I think it 
> > > just needs to be
> > > moved inside   if (costing_p && dr_info->need_peeling_for_alignment) block
> > > below it though.
> > >
> > > Delaying this to here instead of earlier has allowed us to vectorize
> > gcc.dg/vect/bb-slp-pr65935.c
> > > Which now vectorizes after the inner loops are unrolled.
> > >
> > > Are you happy with just moving it down?
> > 
> > OK.
> > 
> > > > > +      return false;
> > > > > +    }
> > > > > +
> > > > > +  /* If this DR needs peeling for alignment for correctness, we must
> > > > > +     ensure the target alignment is a constant power-of-two multiple 
> > > > > of the
> > > > > +     amount read per vector iteration (overriding the above hook 
> > > > > where
> > > > > +     necessary).  We don't support group loads, which would have 
> > > > > been filterd
> > > > > +     out in the check above.  For now it means we don't have to look 
> > > > > at the
> > > > > +     group info and just check that the load is continguous and can 
> > > > > just use
> > > > > +     dr_info.  For known size buffers we still need to check if the 
> > > > > vector
> > > > > +     is misaligned and if so we need to peel.  */
> > > > > +  if (costing_p && dr_info->need_peeling_for_alignment)
> > > >
> > > > dr_peeling_alignment ()
> > > >
> > > > I think this belongs in get_load_store_type, specifically I think
> > > > we want to only allow VMAT_CONTIGUOUS for need_peeling_for_alignment
> > refs
> > > > and by construction dr_aligned should ensure type size alignment
> > > > (by altering target_alignment for the respective refs).  Given that
> > > > both VF and vector type are fixed at vect_analyze_data_refs_alignment
> > > > time we should be able to compute the appropriate target alignment
> > > > there (I'm not sure we support peeling of more than VF-1 iterations
> > > > though).
> > > >
> > > > > +    {
> > > > > +      /* Vector size in bytes.  */
> > > > > +      poly_uint64 safe_align = tree_to_poly_uint64 (TYPE_SIZE_UNIT
> > (vectype));
> > > > > +
> > > > > +      /* We can only peel for loops, of course.  */
> > > > > +      gcc_checking_assert (loop_vinfo);
> > > > > +
> > > > > +      auto num_vectors = ncopies;
> > > > > +      if (!pow2p_hwi (num_vectors))
> > > > > +     {
> > > > > +       if (dump_enabled_p ())
> > > > > +         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > > > > +                          "non-power-of-two num vectors %u "
> > > > > +                          "for DR needing peeling for "
> > > > > +                          "alignment at %G",
> > > > > +                          num_vectors, STMT_VINFO_STMT (stmt_info));
> > > > > +       return false;
> > > > > +     }
> > > > > +
> > > > > +      safe_align *= num_vectors;
> > > > > +      if (known_gt (safe_align, (unsigned)param_min_pagesize)
> > > > > +       /* For VLA we don't support PFA when any unrolling needs to 
> > > > > be done.
> > > > > +          We could though but too much work for GCC 15.  For now we 
> > > > > assume a
> > > > > +          vector is not larger than a page size so allow single 
> > > > > loads.  */
> > > > > +       && (num_vectors > 1 && !vf.is_constant ()))
> > > > > +     {
> > > > > +       if (dump_enabled_p ())
> > > > > +         {
> > > > > +           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > > > > +                            "alignment required for correctness (");
> > > > > +           dump_dec (MSG_MISSED_OPTIMIZATION, safe_align);
> > > > > +           dump_printf (MSG_NOTE, ") may exceed page size\n");
> > > > > +         }
> > > > > +       return false;
> > > > > +     }
> > > > > +    }
> > > > > +
> > > > >    ensure_base_align (dr_info);
> > > > >
> > > > >    if (memory_access_type == VMAT_INVARIANT)
> > > > > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> > > > > index
> > > >
> > 44d3a1d46c409597f1e67a275211a1da414fc7c7..6ef97ee84336ac9a0e192214
> > > > 5f3d418436d709f4 100644
> > > > > --- a/gcc/tree-vectorizer.h
> > > > > +++ b/gcc/tree-vectorizer.h
> > > > > @@ -1998,6 +1998,19 @@ dr_target_alignment (dr_vec_info *dr_info)
> > > > >  }
> > > > >  #define DR_TARGET_ALIGNMENT(DR) dr_target_alignment (DR)
> > > > >
> > > > > +/* Return if the stmt_vec_info requires peeling for alignment.  */
> > > > > +inline bool
> > > > > +dr_peeling_alignment (stmt_vec_info stmt_info)
> > > > > +{
> > > > > +  dr_vec_info *dr_info;
> > > > > +  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
> > > > > +    dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT
> > (stmt_info));
> > > >
> > > > ... but checking it on the first only.
> > >
> > > I did it that way because I was under the assumption that group loads 
> > > could be
> > relaxed
> > > to e.g. element wise or some other form.  If it's the case that the group 
> > > cannot
> > grow or
> > > be changed I could instead set it only on the first access and then not 
> > > need to
> > check it
> > > elsewhere if you prefer.
> > 
> > I've merely noted the discrepancy - consider
> > 
> >   if (a[2*i+1])
> >     early break;
> >   ... = a[2*i];
> > 
> > then you'd set ->needs_peeling on the 2nd group member but
> > dr_peeling_alignment would always check the first.  So yes, I think
> > we always want to set the flag on the first element of a grouped
> > access.  We're no longer splitting groups when loop vectorizing.
> > 
> 
> Ack,
> 
> Will update.
> 
> Thanks,
> Tamar
> 
> > Richard.
> > 
> > >
> > > Thanks,
> > > Tamar
> > > >
> > > > > +  else
> > > > > +    dr_info = STMT_VINFO_DR_INFO (stmt_info);
> > > > > +
> > > > > +  return dr_info->need_peeling_for_alignment;
> > > > > +}
> > > > > +
> > > > >  inline void
> > > > >  set_dr_target_alignment (dr_vec_info *dr_info, poly_uint64 val)
> > > > >  {
> > > > >
> > > >
> > > > --
> > > > Richard Biener <rguent...@suse.de>
> > > > SUSE Software Solutions Germany GmbH,
> > > > Frankenstrasse 146, 90461 Nuernberg, Germany;
> > > > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG
> > Nuernberg)
> > >
> > 
> > --
> > Richard Biener <rguent...@suse.de>
> > SUSE Software Solutions Germany GmbH,
> > Frankenstrasse 146, 90461 Nuernberg, Germany;
> > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

RE: [PATCH]middle-end: delay checking for alignment to load [PR118464]

Reply via email to