Alfie Richards <alfie.richa...@arm.com> writes:
> Adds an optimisation in FMV to redirect to a specific target if possible.
>
> A call is redirected to a specific target if both:
> - the caller can always call the callee version
> - and, it is possible to rule out all higher priority versions of the callee
>   fmv set. That is estabilished either by the callee being the highest 
> priority
>   version, or each higher priority version of the callee implying that, were 
> it
>   resolved, a higher priority version of the caller would have been selected.
>
> For this logic, introduces the new TARGET_OPTION_VERSION_A_IMPLIES_VERSION_B
> hook. Adds a full implementation for Aarch64, and a weaker default version
> for other targets.
>
> This allows the target to replace the previous optimisation as the new one is
> able to cover the same case where two function sets implement the same 
> versions.
>
> gcc/ChangeLog:
>
>       * config/aarch64/aarch64.cc (aarch64_version_a_implies_version_b): New
>       function.
>       (TARGET_OPTION_VERSION_A_IMPLIES_VERSION_B): New define.
>       * doc/tm.texi: Regenerate.
>       * doc/tm.texi.in: Add documentation for version_a_implies_version_b.
>       * multiple_target.cc (redirect_to_specific_clone): Add new optimisation
>       logic.
>       (ipa_target_clone): Add
>       * target.def: Remove TARGET_HAS_FMV_TARGET_ATTRIBUTE check.
>       * attribs.cc: (version_a_implies_version_b) New function.
>       * attribs.h: (version_a_implies_version_b) New function.
>
> gcc/testsuite/ChangeLog:
>
>       * g++.target/aarch64/fmv-selection1.C: New test.
>       * g++.target/aarch64/fmv-selection2.C: New test.
>       * g++.target/aarch64/fmv-selection3.C: New test.
>       * g++.target/aarch64/fmv-selection4.C: New test.
>       * g++.target/aarch64/fmv-selection5.C: New test.
>       * g++.target/aarch64/fmv-selection6.C: New test.
> ---
>  gcc/attribs.cc                                | 16 ++++
>  gcc/attribs.h                                 |  1 +
>  gcc/config/aarch64/aarch64.cc                 | 26 +++++
>  gcc/doc/tm.texi                               |  4 +
>  gcc/doc/tm.texi.in                            |  2 +
>  gcc/multiple_target.cc                        | 96 ++++++++++++-------
>  gcc/target.def                                |  9 ++
>  .../g++.target/aarch64/fmv-selection1.C       | 40 ++++++++
>  .../g++.target/aarch64/fmv-selection2.C       | 40 ++++++++
>  .../g++.target/aarch64/fmv-selection3.C       | 25 +++++
>  .../g++.target/aarch64/fmv-selection4.C       | 30 ++++++
>  .../g++.target/aarch64/fmv-selection5.C       | 28 ++++++
>  .../g++.target/aarch64/fmv-selection6.C       | 27 ++++++
>  13 files changed, 311 insertions(+), 33 deletions(-)
>  create mode 100644 gcc/testsuite/g++.target/aarch64/fmv-selection1.C
>  create mode 100644 gcc/testsuite/g++.target/aarch64/fmv-selection2.C
>  create mode 100644 gcc/testsuite/g++.target/aarch64/fmv-selection3.C
>  create mode 100644 gcc/testsuite/g++.target/aarch64/fmv-selection4.C
>  create mode 100644 gcc/testsuite/g++.target/aarch64/fmv-selection5.C
>  create mode 100644 gcc/testsuite/g++.target/aarch64/fmv-selection6.C
>
> diff --git a/gcc/attribs.cc b/gcc/attribs.cc
> index 2ca82674f7c..66c77904404 100644
> --- a/gcc/attribs.cc
> +++ b/gcc/attribs.cc
> @@ -1095,6 +1095,22 @@ common_function_versions (string_slice fn1 
> ATTRIBUTE_UNUSED,
>    gcc_unreachable ();
>  }
>  
> +bool
> +version_a_implies_version_b (tree fn1, tree fn2)
> +{
> +  const char *attr_name = TARGET_HAS_FMV_TARGET_ATTRIBUTE
> +                       ? "target"
> +                       : "target_version";
> +
> +  tree attr1 = lookup_attribute (attr_name, DECL_ATTRIBUTES (fn1));
> +  tree attr2 = lookup_attribute (attr_name, DECL_ATTRIBUTES (fn2));
> +
> +  if (!attr1 || !attr2)
> +    return false;
> +
> +  return attribute_value_equal (attr1, attr2);
> +}
> +
>  /* Comparator function to be used in qsort routine to sort attribute
>     specification strings to "target".  */
>  
> diff --git a/gcc/attribs.h b/gcc/attribs.h
> index fc343c0eab5..b846ce0d3a2 100644
> --- a/gcc/attribs.h
> +++ b/gcc/attribs.h
> @@ -58,6 +58,7 @@ extern bool common_function_versions (string_slice, 
> string_slice);
>  extern bool reject_target_clone_version (string_slice, location_t);
>  extern tree make_dispatcher_decl (const tree);
>  extern bool is_function_default_version (const tree);
> +extern bool version_a_implies_version_b (tree, tree);
>  extern void handle_ignored_attributes_option (vec<char *> *);
>  
>  /* Return a type like TTYPE except that its TYPE_ATTRIBUTES
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 4369a0c82e6..bfae22bede2 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -20449,6 +20449,29 @@ aarch64_compare_version_priority (tree decl1, tree 
> decl2)
>    return compare_feature_masks (mask1, mask2);
>  }
>  
> +/* Check if version a implies version b.  */

It'd be good to reference the target hook macro that this implements.

/* Implement TARGET_OPTION_VERSION_A_IMPLIES_VERSION_B.  */

> +bool
> +aarch64_version_a_implies_version_b (tree decl_a, tree decl_b)
> +{
> +  auto a_isa = aarch64_get_isa_flags
> +              (TREE_TARGET_OPTION (aarch64_fndecl_options (decl_a)));
> +  auto b_isa = aarch64_get_isa_flags
> +              (TREE_TARGET_OPTION (aarch64_fndecl_options (decl_b)));
> +
> +  auto a_version = get_target_version (decl_a);
> +  auto b_version = get_target_version (decl_b);
> +  if (a_version.is_valid ())
> +    aarch64_parse_fmv_features (a_version, &a_isa, NULL, NULL);
> +  if (b_version.is_valid ())
> +    aarch64_parse_fmv_features (b_version, &b_isa, NULL, NULL);

The name made me think that we should be able to assert that is_valid,
but I suppose the usage in the patch means that this isn't true for "a"
at least.

But I think the hook documentation needs to be more precise about what
this means in terms of the baseline ISA and the target(_version) ISA.
It looks on the face of it that, if we had:

a: baseline armv8-a, target_version("default")
b: target("sve"), target_version("default")

then the default implementation would return true, on the basis that
the target_versions are the same, and that the aarch64 hook would
return false.  That makes it seem like the default implementation isn't
conservatively correct (or, alternatively, that the aarch64 code is
pessimising valid cases that the default implementation wouldn't).

It would be good to test some target+target_version cases.  It looks
like the tests in the patch are for pure target_version only.

> +
> +  // Are there any bits of b that arent in a

Nit: should use /* ... */ comments in files that already do.

> +  if (b_isa & (~a_isa))
> +    return false;
> +
> +  return true;
> +}
> +
>  /* Build the struct __ifunc_arg_t type:
>  
>     struct __ifunc_arg_t
> @@ -32561,6 +32584,9 @@ aarch64_libgcc_floating_mode_supported_p
>  #undef TARGET_COMPARE_VERSION_PRIORITY
>  #define TARGET_COMPARE_VERSION_PRIORITY aarch64_compare_version_priority
>  
> +#undef TARGET_OPTION_VERSION_A_IMPLIES_VERSION_B
> +#define TARGET_OPTION_VERSION_A_IMPLIES_VERSION_B 
> aarch64_version_a_implies_version_b
> +
>  #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
>  #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
>    aarch64_generate_version_dispatcher_body
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index ee5bd2871fc..b7e19aeeb65 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -10963,6 +10963,10 @@ versions if and only if they imply different target 
> specific attributes,
>  that is, they are compiled for different target machines.
>  @end deftypefn
>  
> +@deftypefn {Target Hook} bool TARGET_OPTION_VERSION_A_IMPLIES_VERSION_B 
> (tree @var{v_a}, tree @var{v_b})
> +This target hook returns @code{true} if the target implied by @var{v_a} 
> (with the globally enabled extensions) is a super-set of the features 
> required for @var{v_b}.
> +@end deftypefn
> +
>  @deftypefn {Target Hook} bool TARGET_CAN_INLINE_P (tree @var{caller}, tree 
> @var{callee})
>  This target hook returns @code{false} if the @var{caller} function
>  cannot inline @var{callee}, based on target specific information.  By
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index 5db7917e214..4d0024d142b 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -7130,6 +7130,8 @@ with the target specific attributes.  The default value 
> is @code{','}.
>  
>  @hook TARGET_OPTION_FUNCTION_VERSIONS
>  
> +@hook TARGET_OPTION_VERSION_A_IMPLIES_VERSION_B
> +
>  @hook TARGET_CAN_INLINE_P
>  
>  @hook TARGET_UPDATE_IPA_FN_TARGET_INFO
> diff --git a/gcc/multiple_target.cc b/gcc/multiple_target.cc
> index 7784478d8e2..093fbb7e004 100644
> --- a/gcc/multiple_target.cc
> +++ b/gcc/multiple_target.cc
> @@ -437,47 +437,78 @@ expand_target_clones (struct cgraph_node *node, bool 
> definition)
>  static void
>  redirect_to_specific_clone (cgraph_node *node)
>  {
> -  cgraph_function_version_info *fv = node->function_version ();
> -  if (fv == NULL)
> -    return;
> -
> -  gcc_assert (TARGET_HAS_FMV_TARGET_ATTRIBUTE);
> -  tree attr_target = lookup_attribute ("target", DECL_ATTRIBUTES 
> (node->decl));
> -  if (attr_target == NULL_TREE)
> +  if (!targetm.compare_version_priority
> +      || !targetm.target_option.version_a_implies_version_b
> +      || !optimize)
>      return;
>  
> -  /* We need to remember NEXT_CALLER as it could be modified in the loop.  */
> +  /* We need to remember NEXT_CALLER as it could be modified in the
> +     loop.  */
>    for (cgraph_edge *e = node->callees; e ; e = e->next_callee)
>      {
> -      cgraph_function_version_info *fv2 = e->callee->function_version ();
> -      if (!fv2)
> +      /* Only if this is a call to a dispatched symbol.  */
> +      if (!e->callee->dispatcher_function)
>       continue;
>  
> -      tree attr_target2 = lookup_attribute ("target",
> -                                         DECL_ATTRIBUTES (e->callee->decl));
> +      cgraph_function_version_info *callee_v
> +     = e->callee->function_version ();
> +      cgraph_function_version_info *caller_v
> +     = e->caller->function_version ();
>  
> -      /* Function is not calling proper target clone.  */
> -      if (attr_target2 == NULL_TREE
> -       || !attribute_value_equal (attr_target, attr_target2))
> -     {
> -       while (fv2->prev != NULL)
> -         fv2 = fv2->prev;
> +      /* If this is not the TU that contains the definition of the default
> +      version we are not guaranteed to have visibility of all versions
> +      so cannot reason about them.  */
> +      if (!TREE_STATIC (callee_v->next->this_node->decl))
> +     continue;

I think the important test here is instead e->binds_to_current_def_p ().
Preemption and weak symbols mean that a locally-defined function might
not be the one chosen at link or load time.

> -       /* Try to find a clone with equal target attribute.  */
> -       for (; fv2 != NULL; fv2 = fv2->next)
> -         {
> -           cgraph_node *callee = fv2->this_node;
> -           attr_target2 = lookup_attribute ("target",
> -                                            DECL_ATTRIBUTES (callee->decl));
> -           if (attr_target2 != NULL_TREE
> -               && attribute_value_equal (attr_target, attr_target2))
> +      cgraph_function_version_info *highest_callable_fn = NULL;
> +      for (cgraph_function_version_info *ver = callee_v->next;
> +        ver;
> +        ver = ver->next)
> +     if (targetm.target_option.version_a_implies_version_b
> +           (node->decl, ver->this_node->decl))
> +       highest_callable_fn = ver;
> +
> +      if (!highest_callable_fn)
> +     continue;
> +
> +      /* If there are higher priority versions of callee and caller has no
> +      more version information, then not callable.  */
> +      if (!caller_v && highest_callable_fn->next)
> +     continue;
> +
> +      bool inlinable = true;

Maybe "can_redirect" rather than "inlinable".

> +      /* If every higher priority version would imply a higher priority
> +      version of caller would have been selected, then this is
> +      callable.  */
> +      if (caller_v)
> +     for (cgraph_function_version_info *callee_ver
> +            = highest_callable_fn->next;
> +          callee_ver;
> +          callee_ver = callee_ver->next)
> +       {
> +         bool not_possible = false;
> +         for (cgraph_function_version_info *caller_ver = caller_v->next;
> +              caller_ver;
> +              caller_ver = caller_ver->next)
> +           if (targetm.target_option.version_a_implies_version_b
> +                 (callee_ver->this_node->decl,
> +                  caller_ver->this_node->decl))
>               {
> -               e->redirect_callee (callee);
> -               cgraph_edge::redirect_call_stmt_to_callee (e);
> +               not_possible = true;
>                 break;
>               }
> -         }
> -     }
> +         if (!not_possible)
> +           {
> +             inlinable = false;
> +             break;
> +           }
> +       }
> +      if (!inlinable)
> +     continue;

It would be good to avoid the double negative in !not_possible.

Thanks,
Richard

Reply via email to