Andrew Carlotti <andrew.carlo...@arm.com> writes:
> Compared to v2, this splits out the alignment switching into a new class and
> merges the rest of the switching functionality into aarch64_target_switcher,
> as agreed with Richard in the previous review discussion.
>
> Bootstrapped and regression tested on aarch64. Is this ok for master?
>
> ---
>
> Refactor the switcher classes into two separate classes:
>
> - sve_alignment_switcher takes the alignment switching functionality,
>   and is used only for ABI correctness when defining sve structure
>   types.
> - aarch64_target_switcher takes the rest of the functionality of
>   aarch64_simd_switcher and sve_switcher, and gates simd/sve specific
>   parts upon the specified feature flags.
>
> Additionally, aarch64_target_switcher now adds dependencies of the
> specified flags (which adds +fcma and +bf16 to some intrinsic
> declarations), and unsets current_target_pragma.
>
> This last change fixes an internal bug where we would sometimes add a
> user specified target pragma (stored in current_target_pragma) on top of
> an internally specified target architecture while initialising
> intrinsics with `#pragma GCC aarch64 "arm_*.h"`.  As far as I can tell, this
> has no visible impact at the moment.  However, the unintended target
> feature combinations lead to unwanted behaviour in an under-development
> patch.
>
> gcc/ChangeLog:
>
>       * common/config/aarch64/aarch64-common.cc
>       (struct aarch64_extension_info): Add field.
>       (aarch64_get_required_features): New.
>       * config/aarch64/aarch64-builtins.cc
>       (aarch64_simd_switcher::aarch64_simd_switcher): Rename to...
>       (aarch64_target_switcher::aarch64_target_switcher): ...this,
>       and extend to handle sve, nosimd and target pragmas.
>       (aarch64_simd_switcher::~aarch64_simd_switcher): Rename to...
>       (aarch64_target_switcher::~aarch64_target_switcher): ...this,
>       and extend to handle sve, nosimd and target pragmas.
>       (handle_arm_acle_h): Use aarch64_target_switcher.
>       (handle_arm_neon_h): Rename switcher and pass explicit flags.
>       (aarch64_general_init_builtins): Ditto.
>       * config/aarch64/aarch64-protos.h
>       (class aarch64_simd_switcher): Rename to...
>       (class aarch64_target_switcher): ...this, and add new members.
>       (aarch64_get_required_features): New prototype.
>       * config/aarch64/aarch64-sve-builtins.cc
>       (sve_switcher::sve_switcher): Delete
>       (sve_switcher::~sve_switcher): Delete
>       (sve_alignment_switcher::sve_alignment_switcher): New
>       (sve_alignment_switcher::~sve_alignment_switcher): New
>       (register_builtin_types): Use alignment switcher
>       (init_builtins): Rename switcher.
>       (handle_arm_sve_h): Ditto.
>       (handle_arm_neon_sve_bridge_h): Ditto.
>       (handle_arm_sme_h): Ditto.
>       * config/aarch64/aarch64-sve-builtins.h
>       (class sve_switcher): Delete.
>       (class sme_switcher): Delete.
>       (class sve_alignment_switcher): New.

OK, thanks.  Personally I think we should keep the sve_alignment_switcher
at function scope (in handle_arm_sve_h), for two reasons:

(a) Nothing in arm_sve.h should be affected by -fpack-struct, so it seems
    safer/more future-proof to apply it to the whole header.

(b) Even the reduced scope isn't precise, since it includes vectors as
    well as structures.

So I'd slightly prefer the patch in that form (pre-approved).  The patch
is still OK as posted though.

Richard

> diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
> b/gcc/common/config/aarch64/aarch64-common.cc
> index 
> ef4458fb69308d2bb6785e97be5be85226cf0ebb..500bf784983d851c54ea4ec59cf3cad29e5e309e
>  100644
> --- a/gcc/common/config/aarch64/aarch64-common.cc
> +++ b/gcc/common/config/aarch64/aarch64-common.cc
> @@ -157,6 +157,8 @@ struct aarch64_extension_info
>    aarch64_feature_flags flags_on;
>    /* If this feature is turned off, these bits also need to be turned off.  
> */
>    aarch64_feature_flags flags_off;
> +  /* If this feature remains enabled, these bits must also remain enabled.  
> */
> +  aarch64_feature_flags flags_required;
>  };
>  
>  /* ISA extensions in AArch64.  */
> @@ -164,9 +166,10 @@ static constexpr aarch64_extension_info all_extensions[] 
> =
>  {
>  #define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, FEATURE_STRING) \
>    {NAME, AARCH64_FL_##IDENT, feature_deps::IDENT ().explicit_on, \
> -   feature_deps::get_flags_off (feature_deps::root_off_##IDENT)},
> +   feature_deps::get_flags_off (feature_deps::root_off_##IDENT), \
> +   feature_deps::IDENT ().enable},
>  #include "config/aarch64/aarch64-option-extensions.def"
> -  {NULL, 0, 0, 0}
> +  {NULL, 0, 0, 0, 0}
>  };
>  
>  struct aarch64_arch_info
> @@ -204,6 +207,18 @@ static constexpr aarch64_processor_info all_cores[] =
>    {NULL, aarch64_no_cpu, aarch64_no_arch, 0}
>  };
>  
> +/* Return the set of feature flags that are required to be enabled when the
> +   features in FLAGS are enabled.  */
> +
> +aarch64_feature_flags
> +aarch64_get_required_features (aarch64_feature_flags flags)
> +{
> +  const struct aarch64_extension_info *opt;
> +  for (opt = all_extensions; opt->name != NULL; opt++)
> +    if (flags & opt->flag_canonical)
> +      flags |= opt->flags_required;
> +  return flags;
> +}
>  
>  /* Print a list of CANDIDATES for an argument, and try to suggest a specific
>     close match.  */
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
> b/gcc/config/aarch64/aarch64-builtins.cc
> index 
> 128cc365d3d585e01cb69668f285318ee56a36fc..93f939a9c834c664fa8f081e6a484779071503eb
>  100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -43,6 +43,7 @@
>  #include "langhooks.h"
>  #include "gimple-iterator.h"
>  #include "case-cfn-macros.h"
> +#include "regs.h"
>  #include "emit-rtl.h"
>  #include "stringpool.h"
>  #include "attribs.h"
> @@ -1877,23 +1878,42 @@ aarch64_scalar_builtin_type_p (aarch64_simd_type t)
>    return (t == Poly8_t || t == Poly16_t || t == Poly64_t || t == Poly128_t);
>  }
>  
> -/* Enable AARCH64_FL_* flags EXTRA_FLAGS on top of the base Advanced SIMD
> -   set.  */
> -aarch64_simd_switcher::aarch64_simd_switcher (aarch64_feature_flags 
> extra_flags)
> +/* Temporarily set FLAGS as the enabled target features.  */
> +aarch64_target_switcher::aarch64_target_switcher (aarch64_feature_flags 
> flags)
>    : m_old_asm_isa_flags (aarch64_asm_isa_flags),
> -    m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY)
> +    m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY),
> +    m_old_target_pragma (current_target_pragma)
>  {
> -  /* Changing the ISA flags should be enough here.  We shouldn't need to
> -     pay the compile-time cost of a full target switch.  */
> -  global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
> -  aarch64_set_asm_isa_flags (AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags);
> +  /* Include all dependencies.  */
> +  flags = aarch64_get_required_features (flags);
> +
> +  /* Changing the ISA flags and have_regs_of_mode should be enough here.  We
> +     shouldn't need to pay the compile-time cost of a full target switch.  */
> +  if (flags & AARCH64_FL_FP)
> +    global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
> +  aarch64_set_asm_isa_flags (flags);
> +
> +  /* Target pragmas are irrelevant when defining intrinsics artificially.  */
> +  current_target_pragma = NULL_TREE;
> +
> +  /* Ensure SVE regs are available if SVE or SME is enabled.  */
> +  memcpy (m_old_have_regs_of_mode, have_regs_of_mode, sizeof
> +       (have_regs_of_mode));
> +  if (flags & (AARCH64_FL_SVE | AARCH64_FL_SME))
> +    for (int i = 0; i < NUM_MACHINE_MODES; ++i)
> +      if (aarch64_sve_mode_p ((machine_mode) i))
> +     have_regs_of_mode[i] = true;
>  }
>  
> -aarch64_simd_switcher::~aarch64_simd_switcher ()
> +aarch64_target_switcher::~aarch64_target_switcher ()
>  {
>    if (m_old_general_regs_only)
>      global_options.x_target_flags |= MASK_GENERAL_REGS_ONLY;
>    aarch64_set_asm_isa_flags (m_old_asm_isa_flags);
> +  current_target_pragma = m_old_target_pragma;
> +
> +  memcpy (have_regs_of_mode, m_old_have_regs_of_mode,
> +       sizeof (have_regs_of_mode));
>  }
>  
>  /* Implement #pragma GCC aarch64 "arm_neon.h".
> @@ -1903,7 +1923,7 @@ aarch64_simd_switcher::~aarch64_simd_switcher ()
>  void
>  handle_arm_neon_h (void)
>  {
> -  aarch64_simd_switcher simd;
> +  aarch64_target_switcher switcher (AARCH64_FL_SIMD);
>  
>    /* Register the AdvSIMD vector tuple types.  */
>    for (unsigned int i = 0; i < ARM_NEON_H_TYPES_LAST; i++)
> @@ -2353,6 +2373,8 @@ aarch64_init_data_intrinsics (void)
>  void
>  handle_arm_acle_h (void)
>  {
> +  aarch64_target_switcher switcher;
> +
>    aarch64_init_ls64_builtins ();
>    aarch64_init_tme_builtins ();
>    aarch64_init_memtag_builtins ();
> @@ -2446,7 +2468,7 @@ aarch64_general_init_builtins (void)
>    aarch64_init_bf16_types ();
>  
>    {
> -    aarch64_simd_switcher simd;
> +    aarch64_target_switcher switcher (AARCH64_FL_SIMD);
>      aarch64_init_simd_builtins ();
>    }
>  
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 
> 4235f4a0ca51af49c2852a420f1056727b24f345..ecacbd307987c54575d546d6dd34c33dbf7e1c9b
>  100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -733,15 +733,17 @@ const unsigned int AARCH64_BUILTIN_CLASS = (1 << 
> AARCH64_BUILTIN_SHIFT) - 1;
>  
>  /* RAII class for enabling enough features to define built-in types
>     and implement the arm_neon.h pragma.  */
> -class aarch64_simd_switcher
> +class aarch64_target_switcher
>  {
>  public:
> -  aarch64_simd_switcher (aarch64_feature_flags extra_flags = 0);
> -  ~aarch64_simd_switcher ();
> +  aarch64_target_switcher (aarch64_feature_flags flags = 0);
> +  ~aarch64_target_switcher ();
>  
>  private:
>    aarch64_feature_flags m_old_asm_isa_flags;
>    bool m_old_general_regs_only;
> +  tree m_old_target_pragma;
> +  bool m_old_have_regs_of_mode[MAX_MACHINE_MODE];
>  };
>  
>  /* Represents the ISA requirements of an intrinsic function, or of some
> @@ -1190,6 +1192,7 @@ void aarch64_set_asm_isa_flags (aarch64_feature_flags);
>  void aarch64_set_asm_isa_flags (gcc_options *, aarch64_feature_flags);
>  bool aarch64_handle_option (struct gcc_options *, struct gcc_options *,
>                            const struct cl_decoded_option *, location_t);
> +aarch64_feature_flags aarch64_get_required_features (aarch64_feature_flags);
>  void aarch64_print_hint_for_extensions (const char *);
>  void aarch64_print_hint_for_arch (const char *);
>  void aarch64_print_hint_for_core (const char *);
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
> b/gcc/config/aarch64/aarch64-sve-builtins.cc
> index 
> 5d2062726d6bab31652bc9fa4bbd597704ef46e5..c62d0c2ea499f4c2eee3e70b41b6281ebd326a6d
>  100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
> @@ -42,7 +42,6 @@
>  #include "emit-rtl.h"
>  #include "tree-vector-builder.h"
>  #include "stor-layout.h"
> -#include "regs.h"
>  #include "alias.h"
>  #include "gimple-fold.h"
>  #include "langhooks.h"
> @@ -1296,26 +1295,14 @@ registered_function_hasher::equal (value_type value, 
> const compare_type &key)
>    return value->instance == key;
>  }
>  
> -sve_switcher::sve_switcher (aarch64_feature_flags flags)
> -  : aarch64_simd_switcher (AARCH64_FL_F16 | AARCH64_FL_SVE | flags)
> +sve_alignment_switcher::sve_alignment_switcher ()
>  {
> -  /* Changing the ISA flags and have_regs_of_mode should be enough here.
> -     We shouldn't need to pay the compile-time cost of a full target
> -     switch.  */
>    m_old_maximum_field_alignment = maximum_field_alignment;
>    maximum_field_alignment = 0;
> -
> -  memcpy (m_old_have_regs_of_mode, have_regs_of_mode,
> -       sizeof (have_regs_of_mode));
> -  for (int i = 0; i < NUM_MACHINE_MODES; ++i)
> -    if (aarch64_sve_mode_p ((machine_mode) i))
> -      have_regs_of_mode[i] = true;
>  }
>  
> -sve_switcher::~sve_switcher ()
> +sve_alignment_switcher::~sve_alignment_switcher ()
>  {
> -  memcpy (have_regs_of_mode, m_old_have_regs_of_mode,
> -       sizeof (have_regs_of_mode));
>    maximum_field_alignment = m_old_maximum_field_alignment;
>  }
>  
> @@ -4652,6 +4639,8 @@ register_type_decl (tree type, const char *name)
>  static void
>  register_builtin_types ()
>  {
> +  sve_alignment_switcher switcher;
> +
>  #define DEF_SVE_TYPE(ACLE_NAME, NCHARS, ABI_NAME, SCALAR_TYPE) \
>    scalar_types[VECTOR_TYPE_ ## ACLE_NAME] = SCALAR_TYPE;
>  #include "aarch64-sve-builtins.def"
> @@ -4726,7 +4715,7 @@ register_builtin_types ()
>  void
>  init_builtins ()
>  {
> -  sve_switcher sve;
> +  aarch64_target_switcher switcher (AARCH64_FL_SVE);
>    register_builtin_types ();
>    if (in_lto_p)
>      {
> @@ -4842,18 +4831,21 @@ handle_arm_sve_h (bool function_nulls_p)
>        return;
>      }
>  
> -  sve_switcher sve;
> +  aarch64_target_switcher switcher (AARCH64_FL_SVE);
>  
> -  /* Define the vector and tuple types.  */
> -  for (unsigned int type_i = 0; type_i < NUM_VECTOR_TYPES; ++type_i)
> -    {
> -      vector_type_index type = vector_type_index (type_i);
> -      register_vector_type (type);
> -      if (type != VECTOR_TYPE_svcount_t)
> -     for (unsigned int count = 2; count <= MAX_TUPLE_SIZE; ++count)
> -       if (type != VECTOR_TYPE_svbool_t || count == 2 || count == 4)
> -         register_tuple_type (count, type);
> -    }
> +  {
> +    /* Define the vector and tuple types.  */
> +    sve_alignment_switcher alignment_switcher;
> +    for (unsigned int type_i = 0; type_i < NUM_VECTOR_TYPES; ++type_i)
> +      {
> +     vector_type_index type = vector_type_index (type_i);
> +     register_vector_type (type);
> +     if (type != VECTOR_TYPE_svcount_t)
> +       for (unsigned int count = 2; count <= MAX_TUPLE_SIZE; ++count)
> +         if (type != VECTOR_TYPE_svbool_t || count == 2 || count == 4)
> +           register_tuple_type (count, type);
> +      }
> +  }
>  
>    /* Define the enums.  */
>    register_svpattern ();
> @@ -4873,6 +4865,8 @@ handle_arm_neon_sve_bridge_h (bool function_nulls_p)
>    if (initial_indexes[arm_sme_handle] == 0)
>      handle_arm_sme_h (true);
>  
> +  aarch64_target_switcher switcher;
> +
>    /* Define the functions.  */
>    function_builder builder (arm_neon_sve_handle, function_nulls_p);
>    for (unsigned int i = 0; i < ARRAY_SIZE (neon_sve_function_groups); ++i)
> @@ -4900,7 +4894,7 @@ handle_arm_sme_h (bool function_nulls_p)
>        return;
>      }
>  
> -  sme_switcher sme;
> +  aarch64_target_switcher switcher (AARCH64_FL_SME);
>  
>    function_builder builder (arm_sme_handle, function_nulls_p);
>    for (unsigned int i = 0; i < ARRAY_SIZE (sme_function_groups); ++i)
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h 
> b/gcc/config/aarch64/aarch64-sve-builtins.h
> index 
> 54d213dfe6e0e1cd95e932fc4a04e9cd360f15f5..c145b8065ae3c32bc860b3d8def0380743537aa6
>  100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins.h
> +++ b/gcc/config/aarch64/aarch64-sve-builtins.h
> @@ -822,24 +822,17 @@ public:
>    virtual bool check (function_checker &) const { return true; }
>  };
>  
> -/* RAII class for enabling enough SVE features to define the built-in
> -   types and implement the arm_sve.h pragma.  */
> -class sve_switcher : public aarch64_simd_switcher
> +/* RAII class for temporarily disabling the effect of any -fpack-struct 
> option.
> +   This is used to ensure that sve vector tuple types are defined with the
> +   correct alignment.  */
> +class sve_alignment_switcher
>  {
>  public:
> -  sve_switcher (aarch64_feature_flags = 0);
> -  ~sve_switcher ();
> +  sve_alignment_switcher ();
> +  ~sve_alignment_switcher ();
>  
>  private:
>    unsigned int m_old_maximum_field_alignment;
> -  bool m_old_have_regs_of_mode[MAX_MACHINE_MODE];
> -};
> -
> -/* Extends sve_switch enough for defining arm_sme.h.  */
> -class sme_switcher : public sve_switcher
> -{
> -public:
> -  sme_switcher () : sve_switcher (AARCH64_FL_SME) {}
>  };
>  
>  extern const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1];

Reply via email to