On Wed, Nov 6, 2024 at 7:28 AM Andrew Stubbs <a...@baylibre.com> wrote:
>
> If requested, return the vectorization factor appropriate for the offload
> device, if any.
>
> This change gives a significant speedup in the BabelStream "dot" benchmark on
> amdgcn.
>
> The omp_adjust_chunk_size usecase is set "false", for now, but I intend to
> change that in a follow-up patch.
>
> Note that NVPTX SIMT offload does not use this code-path.
>
> gcc/ChangeLog:
>
>         * gimple-loop-versioning.cc (loop_versioning::loop_versioning): Set
>         omp_max_vf to offload == false.
>         * omp-expand.cc (omp_adjust_chunk_size): Likewise.
>         * omp-general.cc (omp_max_vf): Add "offload" parameter, and detect
>         amdgcn offload devices.
>         * omp-general.h (omp_max_vf): Likewise.
>         * omp-low.cc (lower_rec_simd_input_clauses): Pass offload state to
>         omp_max_vf.
> ---
>  gcc/gimple-loop-versioning.cc |  2 +-
>  gcc/omp-expand.cc             |  2 +-
>  gcc/omp-general.cc            | 17 +++++++++++++++--
>  gcc/omp-general.h             |  2 +-
>  gcc/omp-low.cc                |  3 ++-
>  5 files changed, 20 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc
> index 107b0020024..2968c929d04 100644
> --- a/gcc/gimple-loop-versioning.cc
> +++ b/gcc/gimple-loop-versioning.cc
> @@ -554,7 +554,7 @@ loop_versioning::loop_versioning (function *fn)
>       handled efficiently by scalar code.  omp_max_vf calculates the
>       maximum number of bytes in a vector, when such a value is relevant
>       to loop optimization.  */
> -  m_maximum_scale = estimated_poly_value (omp_max_vf ());
> +  m_maximum_scale = estimated_poly_value (omp_max_vf (false));
>    m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE);
>  }
>
> diff --git a/gcc/omp-expand.cc b/gcc/omp-expand.cc
> index b0b4ddf5dbc..907fd46a5b2 100644
> --- a/gcc/omp-expand.cc
> +++ b/gcc/omp-expand.cc
> @@ -212,7 +212,7 @@ omp_adjust_chunk_size (tree chunk_size, bool 
> simd_schedule)
>    if (!simd_schedule || integer_zerop (chunk_size))
>      return chunk_size;
>
> -  poly_uint64 vf = omp_max_vf ();
> +  poly_uint64 vf = omp_max_vf (false);
>    if (known_eq (vf, 1U))
>      return chunk_size;
>
> diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc
> index f74b9bf5e96..1ae575ee181 100644
> --- a/gcc/omp-general.cc
> +++ b/gcc/omp-general.cc
> @@ -987,10 +987,11 @@ find_combined_omp_for (tree *tp, int *walk_subtrees, 
> void *data)
>    return NULL_TREE;
>  }
>
> -/* Return maximum possible vectorization factor for the target.  */
> +/* Return maximum possible vectorization factor for the target, or for
> +   the OpenMP offload target if one exists.  */
>
>  poly_uint64
> -omp_max_vf (void)
> +omp_max_vf (bool offload)
>  {
>    if (!optimize
>        || optimize_debug
> @@ -999,6 +1000,18 @@ omp_max_vf (void)
>           && OPTION_SET_P (flag_tree_loop_vectorize)))
>      return 1;
>
> +  if (ENABLE_OFFLOADING && offload)
> +    {
> +      for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;)
> +       {
> +         if (startswith (c, "amdgcn"))
> +           return ordered_max (64, omp_max_vf (false));

This causes a bootstrap failure for me (and others) on x86_64-linux-gnu:

In file included from ../../src-master/gcc/coretypes.h:497,
                 from ../../src-master/gcc/omp-general.cc:25:
../../src-master/gcc/poly-int.h: In instantiation of ‘typename
if_nonpoly<Cb, bool>::type maybe_lt(const poly_int<N, C>&, const Cb&)
[with unsigned int N = 1; Ca = long unsigned int; Cb = int; typename
if_nonpoly<Cb, bool>::type = bool]’:
../../src-master/gcc/poly-int.h:1440:7:   required from ‘poly_int<N,
typename poly_result<typename if_nonpoly<Ca>::type, Cb>::type>
ordered_max(const Ca&, const poly_int<N, Cb>&) [with unsigned int N =
1; Ca = int; Cb = long unsigned int; typename poly_result<typename
if_nonpoly<Ca>::type, Cb>::type = long unsigned int; typename
if_nonpoly<Ca>::type = int]’
 1342 | #define maybe_gt(A, B) maybe_lt (B, A)
      |                        ~~~~~~~~~^~~~~~
../../src-master/gcc/omp-general.cc:1008:25:   required from here
 1008 |             return ordered_max (64, omp_max_vf (false));
      |                    ~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~
../../src-master/gcc/poly-int.h:1317:22: error: comparison of integer
expressions of different signedness: ‘const long unsigned int’ and
‘const int’ [-Werror=sign-compare]
 1317 |   return a.coeffs[0] < b;
      |          ~~~~~~~~~~~~^~~
../../src-master/gcc/poly-int.h: In instantiation of ‘typename
if_nonpoly<Ca, bool>::type maybe_lt(const Ca&, const poly_int<N, Cb>&)
[with unsigned int N = 1; Ca = int; Cb = long unsigned int; typename
if_nonpoly<Ca, bool>::type = bool]’:
../../src-master/gcc/poly-int.h:1445:2:   required from ‘poly_int<N,
typename poly_result<typename if_nonpoly<Ca>::type, Cb>::type>
ordered_max(const Ca&, const poly_int<N, Cb>&) [with unsigned int N =
1; Ca = int; Cb = long unsigned int; typename poly_result<typename
if_nonpoly<Ca>::type, Cb>::type = long unsigned int; typename
if_nonpoly<Ca>::type = int]’
 1342 | #define maybe_gt(A, B) maybe_lt (B, A)
      |                        ~~~~~~~~~^~~~~~
../../src-master/gcc/omp-general.cc:1008:25:   required from here
 1008 |             return ordered_max (64, omp_max_vf (false));
      |                    ~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~
../../src-master/gcc/poly-int.h:1328:12: error: comparison of integer
expressions of different signedness: ‘const int’ and ‘const long
unsigned int’ [-Werror=sign-compare]
 1328 |   return a < b.coeffs[0];
      |          ~~^~~~~~~~~~~

Thanks,
Andrew Pinski


> +         else if ((c = strchr (c, ':')))
> +           c++;
> +       }
> +      /* Otherwise, fall through to host VF.  */
> +    }
> +
>    auto_vector_modes modes;
>    targetm.vectorize.autovectorize_vector_modes (&modes, true);
>    if (!modes.is_empty ())
> diff --git a/gcc/omp-general.h b/gcc/omp-general.h
> index f3778131626..70f78d2055b 100644
> --- a/gcc/omp-general.h
> +++ b/gcc/omp-general.h
> @@ -162,7 +162,7 @@ extern void omp_extract_for_data (gomp_for *for_stmt, 
> struct omp_for_data *fd,
>                                   struct omp_for_data_loop *loops);
>  extern gimple *omp_build_barrier (tree lhs);
>  extern tree find_combined_omp_for (tree *, int *, void *);
> -extern poly_uint64 omp_max_vf (void);
> +extern poly_uint64 omp_max_vf (bool);
>  extern int omp_max_simt_vf (void);
>  extern const char *omp_context_name_list_prop (tree);
>  extern void omp_construct_traits_to_codes (tree, int, enum tree_code *);
> diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
> index 44c4310075b..70a2c108fbc 100644
> --- a/gcc/omp-low.cc
> +++ b/gcc/omp-low.cc
> @@ -4589,7 +4589,8 @@ lower_rec_simd_input_clauses (tree new_var, omp_context 
> *ctx,
>  {
>    if (known_eq (sctx->max_vf, 0U))
>      {
> -      sctx->max_vf = sctx->is_simt ? omp_max_simt_vf () : omp_max_vf ();
> +      sctx->max_vf = (sctx->is_simt ? omp_max_simt_vf ()
> +                     : omp_max_vf (omp_maybe_offloaded_ctx (ctx)));
>        if (maybe_gt (sctx->max_vf, 1U))
>         {
>           tree c = omp_find_clause (gimple_omp_for_clauses (ctx->stmt),
> --
> 2.46.0
>

Reply via email to