On Fri, Jul 25, 2025 at 4:45 PM Andrew Stubbs <a...@baylibre.com> wrote:
>
> Hi all,
>
> The optimization options are deliberately passed through to the LTO compiler,
> but when the same mechanism is reused for offloading it ends up forcing the
> host compiler settings onto the device compiler.  Maybe this should be removed
> completely, but this patch just fixes a few of them.  In particular,
> param_vect_partial_vector_usage is disabled by x86 and this really hurts 
> amdgcn.
>
> I also fixed an ambiguous else warning in the generated file by adding braces.
>
> OK for mainline?

OK.

Thanks,
Richard.

> Andrew
>
> gcc/ChangeLog:
>
>         * config/gcn/gcn.cc (gcn_option_override): Set default for
>         param_vect_partial_vector_usage to "1".
>         * optc-save-gen.awk: Don't pass through options marked "NoOffload".
>         * params.opt (-param=vect-epilogues-nomask): Add NoOffload.
>         (-param=vect-partial-vector-usage): Likewise.
>         (-param=vect-inner-loop-cost-factor): Likewise.
> ---
>  gcc/config/gcn/gcn.cc |  4 ++++
>  gcc/optc-save-gen.awk | 19 +++++++++++++++++--
>  gcc/params.opt        |  6 +++---
>  3 files changed, 24 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
> index d451bf43355..f228c022157 100644
> --- a/gcc/config/gcn/gcn.cc
> +++ b/gcc/config/gcn/gcn.cc
> @@ -54,6 +54,7 @@
>  #include "gimple.h"
>  #include "cgraph.h"
>  #include "case-cfn-macros.h"
> +#include "opts.h"
>
>  /* This file should be included last.  */
>  #include "target-def.h"
> @@ -183,6 +184,9 @@ gcn_option_override (void)
>
>    if (flag_sram_ecc == HSACO_ATTR_DEFAULT)
>      flag_sram_ecc = gcn_devices[gcn_arch].sramecc_default;
> +
> +  SET_OPTION_IF_UNSET (&global_options, &global_options_set,
> +                      param_vect_partial_vector_usage, 1);
>  }
>
>  /* }}}  */
> diff --git a/gcc/optc-save-gen.awk b/gcc/optc-save-gen.awk
> index a3d7e5a478e..31756ec380d 100644
> --- a/gcc/optc-save-gen.awk
> +++ b/gcc/optc-save-gen.awk
> @@ -1313,6 +1313,12 @@ for (i = 0; i < n_opts; i++) {
>                 # offloading is enabled.
>                 if (flag_set_p("Target", flags[i]))
>                         var_target_opt[n_opt_val] = 1;
> +
> +               # These options should not be passed from host to target, but
> +               # are not actually target specific.
> +               if (flag_set_p("NoOffload", flags[i]))
> +                       var_target_opt[n_opt_val] = 2;
> +
>                 n_opt_val++;
>         }
>  }
> @@ -1393,7 +1399,7 @@ for (i = 0; i < n_opt_val; i++) {
>                 # Do not stream out target-specific opts if offloading is
>                 # enabled.
>                 if (var_target_opt[i])
> -                       print "  if (!lto_stream_offload_p)"
> +                       print "  if (!lto_stream_offload_p) {"
>                 # If applicable, encode the streamed value.
>                 if (var_opt_optimize_init[i]) {
>                         print "  if (" var_opt_optimize_init[i] " > (" 
> var_opt_val_type[i] ") 10)";
> @@ -1403,6 +1409,8 @@ for (i = 0; i < n_opt_val; i++) {
>                 } else {
>                         print "  bp_pack_var_len_" sgn " (bp, ptr->" name");";
>                 }
> +               if (var_target_opt[i])
> +                       print "}"
>         }
>  }
>  print "  for (size_t i = 0; i < ARRAY_SIZE (ptr->explicit_mask); i++)";
> @@ -1418,10 +1426,14 @@ print "                           struct 
> cl_optimization *ptr ATTRIBUTE_UNUSED)"
>  print "{";
>  for (i = 0; i < n_opt_val; i++) {
>         name = var_opt_val[i]
> -        if (var_target_opt[i]) {
> +        if (var_target_opt[i] == 1) {
>                 print "#ifdef ACCEL_COMPILER"
>                 print "#error accel compiler cannot define Optimization 
> attribute for target-specific option " name;
>                 print "#else"
> +       } else if (var_target_opt[i] == 2) {
> +               print "#ifdef ACCEL_COMPILER"
> +               print "  ptr->" name " = global_options." name ";"
> +               print "#else"
>         }
>         otype = var_opt_val_type[i];
>         if (otype ~ "^const char \\**$") {
> @@ -1489,6 +1501,9 @@ for (i = 0; i < n_opts; i++) {
>         if (flag_set_p("Warning", flags[i]))
>                 continue;
>
> +       if (flag_set_p("NoOffload", flags[i]))
> +               continue;
> +
>         if (name in checked_options)
>                 continue;
>         checked_options[name]++
> diff --git a/gcc/params.opt b/gcc/params.opt
> index c7d5fd4d13b..ac1b2c7eb26 100644
> --- a/gcc/params.opt
> +++ b/gcc/params.opt
> @@ -1226,7 +1226,7 @@ Common Joined UInteger Var(param_use_canonical_types) 
> Init(1) IntegerRange(0, 1)
>  Whether to use canonical types.
>
>  -param=vect-epilogues-nomask=
> -Common Joined UInteger Var(param_vect_epilogues_nomask) Init(1) 
> IntegerRange(0, 1) Param Optimization
> +Common Joined UInteger Var(param_vect_epilogues_nomask) Init(1) 
> IntegerRange(0, 1) Param Optimization NoOffload
>  Enable loop epilogue vectorization using smaller vector size.
>
>  -param=vect-max-layout-candidates=
> @@ -1246,11 +1246,11 @@ Common Joined UInteger 
> Var(param_vect_max_version_for_alignment_checks) Init(6)
>  Bound on number of runtime checks inserted by the vectorizer's loop 
> versioning for alignment check.
>
>  -param=vect-partial-vector-usage=
> -Common Joined UInteger Var(param_vect_partial_vector_usage) Init(2) 
> IntegerRange(0, 2) Param Optimization
> +Common Joined UInteger Var(param_vect_partial_vector_usage) Init(2) 
> IntegerRange(0, 2) Param Optimization NoOffload
>  Controls how loop vectorizer uses partial vectors.  0 means never, 1 means 
> only for loops whose need to iterate can be removed, 2 means for all loops.  
> The default value is 2.
>
>  -param=vect-inner-loop-cost-factor=
> -Common Joined UInteger Var(param_vect_inner_loop_cost_factor) Init(50) 
> IntegerRange(1, 10000) Param Optimization
> +Common Joined UInteger Var(param_vect_inner_loop_cost_factor) Init(50) 
> IntegerRange(1, 10000) Param Optimization NoOffload
>  The maximum factor which the loop vectorizer applies to the cost of 
> statements in an inner loop relative to the loop being vectorized.
>
>  -param=vect-induction-float=
> --
> 2.50.0
>

Reply via email to