Wilco Dijkstra <wilco.dijks...@arm.com> writes:
> Feedback from the kernel team suggests that it's best to only use HWCAPs
> rather than also use low-level checks as done by has_lse128() and has_rcpc3().
> So change these to just use HWCAPs which simplifies the code and speeds up
> ifunc selection by avoiding expensive system register accesses.

Could you give details?  I thought it was always known that trapped
system register accesses were slow.  In the previous versions, the
checks seemed to be presented as an up-front price worth paying for
faster atomic operations, on the systems that would use those paths.
Now the checks are being presented as something that are good to remove
to make the code simpler and faster.

There have been a few changes to this code in the current release cycle,
and each time it seems like the new version is being presented as better
than the previous one with single-sentence justifications.

Could we instead have a comment in the code discussing the various
approaches that we could take, including the ones that previous versions
took, describes the trade-offs, and explains why we've chosen to do what
we've chosen to do?

Thanks,
Richard

>
> Passes regress, OK for commit?
>
> libatomic:
>         * config/linux/aarch64/host-config.h (has_lse2): Remove unused arg.
>         (has_lse128): Change to just use HWCAPs.
>         (has_rcpc3): Likewise.
>
> ---
>
> diff --git a/libatomic/config/linux/aarch64/host-config.h 
> b/libatomic/config/linux/aarch64/host-config.h
> index 
> d0d44bf18eaa64437f52c2894da6ece9e02618df..6a4f7014323a2ed196cabe408aaa6df0d2521518
>  100644
> --- a/libatomic/config/linux/aarch64/host-config.h
> +++ b/libatomic/config/linux/aarch64/host-config.h
> @@ -69,7 +69,7 @@ typedef struct __ifunc_arg_t {
>  #  elif defined (LSE2_LRCPC3_ATOP)
>  #   define IFUNC_NCOND(N)      2
>  #   define IFUNC_COND_1        (has_rcpc3 (hwcap, features))
> -#   define IFUNC_COND_2        (has_lse2 (hwcap, features))
> +#   define IFUNC_COND_2        (has_lse2 (hwcap))
>  #  elif defined (LSE128_ATOP)
>  #   define IFUNC_NCOND(N)      1
>  #   define IFUNC_COND_1        (has_lse128 (hwcap, features))
> @@ -86,7 +86,7 @@ typedef struct __ifunc_arg_t {
>  #define MIDR_PARTNUM(midr)     (((midr) >> 4) & 0xfff)
>
>  static inline bool
> -has_lse2 (unsigned long hwcap, const __ifunc_arg_t *features)
> +has_lse2 (unsigned long hwcap)
>  {
>    /* Check for LSE2.  */
>    if (hwcap & HWCAP_USCAT)
> @@ -105,50 +105,20 @@ has_lse2 (unsigned long hwcap, const __ifunc_arg_t 
> *features)
>    return false;
>  }
>
> -/* LSE128 atomic support encoded in ID_AA64ISAR0_EL1.Atomic, bits[23:20].
> -   The minimum value for LSE128 is 0b0011.  */
> -
> -#define AT_FEAT_FIELD(isar0)   (((isar0) >> 20) & 15)
> -
>  static inline bool
>  has_lse128 (unsigned long hwcap, const __ifunc_arg_t *features)
>  {
> -  if (hwcap & _IFUNC_ARG_HWCAP && features->_hwcap2 & HWCAP2_LSE128)
> -    return true;
> -
> -  /* If LSE2 and CPUID are supported, check for LSE128.  */
> -  if (hwcap & HWCAP_CPUID && hwcap & HWCAP_USCAT)
> -    {
> -      unsigned long isar0;
> -      asm volatile ("mrs %0, ID_AA64ISAR0_EL1" : "=r" (isar0));
> -      return AT_FEAT_FIELD (isar0) >= 3;
> -    }
> -
> -  return false;
> +  return hwcap & _IFUNC_ARG_HWCAP && features->_hwcap2 & HWCAP2_LSE128;
>  }
>
> -/* LRCPC atomic support encoded in ID_AA64ISAR1_EL1.Atomic, bits[23:20].
> -   The minimum value for LRCPC3 is 0b0011.  */
> -
>  static inline bool
>  has_rcpc3 (unsigned long hwcap, const __ifunc_arg_t *features)
>  {
>    /* LSE2 is a prerequisite for atomic LDIAPP/STILP - check HWCAP_USCAT since
>       has_lse2 is more expensive and Neoverse N1 does not have LRCPC3. */
> -  if (!(hwcap & HWCAP_USCAT))
> -    return false;
> -
> -  if (hwcap & _IFUNC_ARG_HWCAP && features->_hwcap2 & HWCAP2_LRCPC3)
> -    return true;
> -
> -  if (hwcap & HWCAP_CPUID)
> -    {
> -      unsigned long isar1;
> -      asm volatile ("mrs %0, ID_AA64ISAR1_EL1" : "=r" (isar1));
> -      return AT_FEAT_FIELD (isar1) >= 3;
> -    }
> -
> -  return false;
> +  return (hwcap & HWCAP_USCAT
> +         && hwcap & _IFUNC_ARG_HWCAP
> +         && features->_hwcap2 & HWCAP2_LRCPC3);
>  }
>
>  #endif /* HAVE_IFUNC */

Reply via email to