> > > >
> > > > -  /* Loops vectorized with a variable factor won't benefit from
> > > > +  /* Loops vectorized would have already taken into account unrolling
> specified
> > > > +     by the user as the suggested unroll factor, as such we need to 
> > > > prevent the
> > > > +     RTL unroller from unrolling twice.  The only exception is static 
> > > > known
> > > > +     iterations where we would have expected the loop to be fully 
> > > > unrolled.
> > > > +     Loops vectorized with a variable factor won't benefit from
> > > >       unrolling/peeling.  */
> > > > -  if (!vf.is_constant ())
> > > > +  if (LOOP_VINFO_USER_UNROLL (loop_vinfo)
> > >
> > > ... this is the transform phase - is LOOP_VINFO_USER_UNROLL copied
> > > from the earlier attempt?
> >
> > Ah, I see I forgot to copy it when the loop_vinfo is copied..  Will fix.
> >

I've been looking more into the behavior and I think it's correct not to copy 
it from an earlier attempt.
The flag would be re-set every time during vect_estimate_min_profitable_iters 
as we have to recalculate
the unroll based on the assumed_vf.

When vect_analyze_loop_2 initializes the costing structure, we just set it 
again during vect_analyze_loop_costing
as loop->unroll is not cleared until vectorization succeeds.

For the epilogue it would be false, which I think makes sense as the epilogues 
should determine their VF solely
based on that of the previous attempt? Because I think it makes sense that the 
epilogues should be able to tell
the vectorizer that it wants to re-use the same mode for the next attempt, just 
without the unrolling.

> In the end whatever we do it's going to be a matter of documenting
> the interaction between vectorization and #pragma GCC unroll.
> 

Docs added

> The way you handle it is reasonable, the question is whether to
> set loop->unroll to 1 in the end (disable any further unrolling)
> or to 0 (only auto-unroll based on heuristics).  I'd argue 0
> makes more sense - iff we chose to apply the extra unrolling
> during vectorization.

0 does make more sense to me as well.  I think where we got crossed earlier was 
that I was mentioning that
Having unroll > 1 after this wasn't a good idea, so was a miscommunication. But 
0 makes much sense.

Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * tree-vectorizer.h (vector_costs::set_suggested_unroll_factor,
        LOOP_VINFO_USER_UNROLL): New.
        (class _loop_vec_info): Add user_unroll.
        * tree-vect-loop.cc (vect_estimate_min_profitable_iters): Set
        suggested_unroll_factor before calling backend costing.
        (_loop_vec_info::_loop_vec_info): Initialize user_unroll.
        (vect_transform_loop): Clear the loop->unroll value if the pragma was
        used.
        doc/extend.texi (pragma unroll): Document vectorizer interaction.

-- inline copy of patch --

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 
e87a3c271f8420d8fd175823b5bb655f76c89afe..f8261d13903afc90d3341c09ab3fdbd0ab96ea49
 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -10398,6 +10398,11 @@ unrolled @var{n} times regardless of any commandline 
arguments.
 When the option is @var{preferred} then the user is allowed to override the
 unroll amount through commandline options.
 
+If the loop was vectorized the unroll factor specified will be used to seed the
+vectorizer unroll factor.  Whether the loop is unrolled or not will be
+determined by target costing.  The resulting vectorized loop may still be
+unrolled more in later passes depending on the target costing.
+
 @end table
 
 @node Thread-Local
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 
fe6f3cf188e40396b299ff9e814cc402bc2d4e2d..1fbf92b5f4b176ada7379930b73ab503fb423e99
 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -1073,6 +1073,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, 
vec_info_shared *shared)
     peeling_for_gaps (false),
     peeling_for_niter (false),
     early_breaks (false),
+    user_unroll (false),
     no_data_dependencies (false),
     has_mask_store (false),
     scalar_loop_scaling (profile_probability::uninitialized ()),
@@ -4983,6 +4984,26 @@ vect_estimate_min_profitable_iters (loop_vec_info 
loop_vinfo,
        }
     }
 
+  /* Seed the target cost model with what the user requested if the unroll
+     factor is larger than 1 vector VF.  */
+  auto user_unroll = LOOP_VINFO_LOOP (loop_vinfo)->unroll;
+  if (user_unroll > 1)
+    {
+      LOOP_VINFO_USER_UNROLL (loop_vinfo) = true;
+      int unroll_fact = user_unroll / assumed_vf;
+      unroll_fact = 1 << ceil_log2 (unroll_fact);
+      if (unroll_fact > 1)
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_NOTE, vect_location,
+                        "setting unroll factor to %d based on user requested "
+                        "unroll factor %d and suggested vectorization "
+                        "factor: %d\n",
+                        unroll_fact, user_unroll, assumed_vf);
+         loop_vinfo->vector_costs->set_suggested_unroll_factor (unroll_fact);
+       }
+    }
+
   /* Complete the target-specific cost calculations.  */
   loop_vinfo->vector_costs->finish_cost (loop_vinfo->scalar_costs);
   vec_prologue_cost = loop_vinfo->vector_costs->prologue_cost ();
@@ -12373,6 +12394,13 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple 
*loop_vectorized_call)
        dump_printf_loc (MSG_NOTE, vect_location, "Disabling unrolling due to"
                         " variable-length vectorization factor\n");
     }
+
+  /* When we have unrolled the loop due to a user requested value we should
+     leave it up to the RTL unroll heuristics to determine if it's still worth
+     while to unroll more.  */
+  if (LOOP_VINFO_USER_UNROLL (loop_vinfo))
+    loop->unroll = 0;
+
   /* Free SLP instances here because otherwise stmt reference counting
      won't work.  */
   slp_instance instance;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 
a2f33a5ecd60288fe7f28ee639ff8b6a77667796..5675309ab16dc7f7f0b98e229e4798075e6cdd7e
 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -970,6 +970,10 @@ public:
   /* Main loop IV cond.  */
   gcond* loop_iv_cond;
 
+  /* True if we have an unroll factor requested by the user through pragma GCC
+     unroll.  */
+  bool user_unroll;
+
   /* True if there are no loop carried data dependencies in the loop.
      If loop->safelen <= 1, then this is always true, either the loop
      didn't have any loop carried data dependencies, or the loop is being
@@ -1094,6 +1098,7 @@ public:
 #define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L)  (L)->check_unequal_addrs
 #define LOOP_VINFO_CHECK_NONZERO(L)        (L)->check_nonzero
 #define LOOP_VINFO_LOWER_BOUNDS(L)         (L)->lower_bounds
+#define LOOP_VINFO_USER_UNROLL(L)          (L)->user_unroll
 #define LOOP_VINFO_GROUPED_STORES(L)       (L)->grouped_stores
 #define LOOP_VINFO_SLP_INSTANCES(L)        (L)->slp_instances
 #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
@@ -1693,6 +1698,7 @@ public:
   unsigned int outside_cost () const;
   unsigned int total_cost () const;
   unsigned int suggested_unroll_factor () const;
+  void set_suggested_unroll_factor (unsigned int);
   machine_mode suggested_epilogue_mode () const;
 
 protected:
@@ -1791,6 +1797,15 @@ vector_costs::suggested_unroll_factor () const
   return m_suggested_unroll_factor;
 }
 
+/* Set the suggested unroll factor.  */
+
+inline void
+vector_costs::set_suggested_unroll_factor (unsigned unroll_fact)
+{
+  gcc_checking_assert (!m_finished);
+  m_suggested_unroll_factor = unroll_fact;
+}
+
 /* Return the suggested epilogue mode.  */
 
 inline machine_mode

Attachment: rb19435.patch
Description: rb19435.patch

Reply via email to