Hi Richard,

I've reworded the commit message a bit:

The CPU features initialization code uses CPUID registers (rather than
HWCAP).  The equality comparisons it uses are incorrect: for example FEAT_SVE
is not set if SVE2 is available.  Using HWCAPs for these is both simpler and
correct.  The initialization must also be done atomically to avoid multiple
threads causing corruption due to non-atomic RMW accesses to the global.

> What criteria did you use for choosing whether to keep or remove
> the system register checks?

Essentially anything covered by HWCAP doesn't need an explicit check. So I kept
the LS64 and PREDRES checks since they don't have a HWCAP allocated (I'm not
entirely convinced we need these, let alone having 3 individual bits for LS64, 
but
that's something for the ACLE spec to sort out). The goal here is to fix all 
obvious
bugs so one can use FMV as intended.

> Passes regress, OK for commit and backport?
>
> libgcc:
>         PR target/115342
>         * config/aarch64/cpuinfo.c (__init_cpu_features_constructor):
>         Use HWCAP where possible.  Use atomic write for initialization.

> It'd be good to mention the fix for the FEAT_PREDRES system register check
> as well.

Done, see below.

Cheers,
Wilco


v2: Update commit message and mention PREDRES.

The CPU features initialization code uses CPUID registers (rather than
HWCAP).  The equality comparisons it uses are incorrect: for example FEAT_SVE
is not set if SVE2 is available.  Using HWCAPs for these is both simpler and
correct.  The initialization must also be done atomically to avoid multiple
threads causing corruption due to non-atomic RMW accesses to the global.

Passes regress, OK for commit and backport?

libgcc:
        PR target/115342
        * config/aarch64/cpuinfo.c (__init_cpu_features_constructor):
        Use HWCAP where possible.  Use atomic write for initialization.
        Fix FEAT_PREDRES comparison.
        (__init_cpu_features_resolver): Use atomic load for correct
        initialization.
        (__init_cpu_features): Likewise.

---

diff --git a/libgcc/config/aarch64/cpuinfo.c b/libgcc/config/aarch64/cpuinfo.c
index 
4b94fca869507145ec690c825f637abbc82a3493..544c5516133ec3a554d1222de2ea9d5e6d4c27a9
 100644
--- a/libgcc/config/aarch64/cpuinfo.c
+++ b/libgcc/config/aarch64/cpuinfo.c
@@ -227,14 +227,22 @@ struct {
 #ifndef HWCAP2_SVE_EBF16
 #define HWCAP2_SVE_EBF16 (1UL << 33)
 #endif
+#ifndef HWCAP2_SME2
+#define HWCAP2_SME2 (1UL << 37)
+#endif
+#ifndef HWCAP2_LRCPC3
+#define HWCAP2_LRCPC3  (1UL << 46)
+#endif
 
 static void
-__init_cpu_features_constructor(unsigned long hwcap,
-                               const __ifunc_arg_t *arg) {
-#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
+__init_cpu_features_constructor (unsigned long hwcap,
+                                const __ifunc_arg_t *arg)
+{
+  unsigned long feat = 0;
+#define setCPUFeature(F) feat |= 1UL << F
 #define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
 #define extractBits(val, start, number) \
-  (val & ((1ULL << number) - 1ULL) << start) >> start
+  (val & ((1UL << number) - 1UL) << start) >> start
   unsigned long hwcap2 = 0;
   if (hwcap & _IFUNC_ARG_HWCAP)
     hwcap2 = arg->_hwcap2;
@@ -244,26 +252,20 @@ __init_cpu_features_constructor(unsigned long hwcap,
     setCPUFeature(FEAT_PMULL);
   if (hwcap & HWCAP_FLAGM)
     setCPUFeature(FEAT_FLAGM);
-  if (hwcap2 & HWCAP2_FLAGM2) {
-    setCPUFeature(FEAT_FLAGM);
+  if (hwcap2 & HWCAP2_FLAGM2)
     setCPUFeature(FEAT_FLAGM2);
-  }
-  if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4)
+  if (hwcap & HWCAP_SM4)
     setCPUFeature(FEAT_SM4);
   if (hwcap & HWCAP_ASIMDDP)
     setCPUFeature(FEAT_DOTPROD);
   if (hwcap & HWCAP_ASIMDFHM)
     setCPUFeature(FEAT_FP16FML);
-  if (hwcap & HWCAP_FPHP) {
+  if (hwcap & HWCAP_FPHP)
     setCPUFeature(FEAT_FP16);
-    setCPUFeature(FEAT_FP);
-  }
   if (hwcap & HWCAP_DIT)
     setCPUFeature(FEAT_DIT);
   if (hwcap & HWCAP_ASIMDRDM)
     setCPUFeature(FEAT_RDM);
-  if (hwcap & HWCAP_ILRCPC)
-    setCPUFeature(FEAT_RCPC2);
   if (hwcap & HWCAP_AES)
     setCPUFeature(FEAT_AES);
   if (hwcap & HWCAP_SHA1)
@@ -277,22 +279,21 @@ __init_cpu_features_constructor(unsigned long hwcap,
   if (hwcap & HWCAP_SB)
     setCPUFeature(FEAT_SB);
   if (hwcap & HWCAP_SSBS)
-    setCPUFeature(FEAT_SSBS2);
-  if (hwcap2 & HWCAP2_MTE) {
-    setCPUFeature(FEAT_MEMTAG);
-    setCPUFeature(FEAT_MEMTAG2);
-  }
-  if (hwcap2 & HWCAP2_MTE3) {
-    setCPUFeature(FEAT_MEMTAG);
-    setCPUFeature(FEAT_MEMTAG2);
+    {
+      setCPUFeature(FEAT_SSBS);
+      setCPUFeature(FEAT_SSBS2);
+    }
+  if (hwcap2 & HWCAP2_MTE)
+    {
+      setCPUFeature(FEAT_MEMTAG);
+      setCPUFeature(FEAT_MEMTAG2);
+    }
+  if (hwcap2 & HWCAP2_MTE3)
     setCPUFeature(FEAT_MEMTAG3);
-  }
   if (hwcap2 & HWCAP2_SVEAES)
     setCPUFeature(FEAT_SVE_AES);
-  if (hwcap2 & HWCAP2_SVEPMULL) {
-    setCPUFeature(FEAT_SVE_AES);
+  if (hwcap2 & HWCAP2_SVEPMULL)
     setCPUFeature(FEAT_SVE_PMULL128);
-  }
   if (hwcap2 & HWCAP2_SVEBITPERM)
     setCPUFeature(FEAT_SVE_BITPERM);
   if (hwcap2 & HWCAP2_SVESHA3)
@@ -329,108 +330,76 @@ __init_cpu_features_constructor(unsigned long hwcap,
     setCPUFeature(FEAT_WFXT);
   if (hwcap2 & HWCAP2_SME)
     setCPUFeature(FEAT_SME);
+  if (hwcap2 & HWCAP2_SME2)
+    setCPUFeature(FEAT_SME2);
   if (hwcap2 & HWCAP2_SME_I16I64)
     setCPUFeature(FEAT_SME_I64);
   if (hwcap2 & HWCAP2_SME_F64F64)
     setCPUFeature(FEAT_SME_F64);
-  if (hwcap & HWCAP_CPUID) {
-    unsigned long ftr;
-    getCPUFeature(ID_AA64PFR1_EL1, ftr);
-    /* ID_AA64PFR1_EL1.MTE >= 0b0001  */
-    if (extractBits(ftr, 8, 4) >= 0x1)
-      setCPUFeature(FEAT_MEMTAG);
-    /* ID_AA64PFR1_EL1.SSBS == 0b0001  */
-    if (extractBits(ftr, 4, 4) == 0x1)
-      setCPUFeature(FEAT_SSBS);
-    /* ID_AA64PFR1_EL1.SME == 0b0010  */
-    if (extractBits(ftr, 24, 4) == 0x2)
-      setCPUFeature(FEAT_SME2);
-    getCPUFeature(ID_AA64PFR0_EL1, ftr);
-    /* ID_AA64PFR0_EL1.FP != 0b1111  */
-    if (extractBits(ftr, 16, 4) != 0xF) {
-      setCPUFeature(FEAT_FP);
-      /* ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP  */
-      setCPUFeature(FEAT_SIMD);
-    }
-    /* ID_AA64PFR0_EL1.SVE != 0b0000  */
-    if (extractBits(ftr, 32, 4) != 0x0) {
-      /* get ID_AA64ZFR0_EL1, that name supported if sve enabled only  */
-      getCPUFeature(S3_0_C0_C4_4, ftr);
-      /* ID_AA64ZFR0_EL1.SVEver == 0b0000  */
-      if (extractBits(ftr, 0, 4) == 0x0)
-       setCPUFeature(FEAT_SVE);
-      /* ID_AA64ZFR0_EL1.SVEver == 0b0001  */
-      if (extractBits(ftr, 0, 4) == 0x1)
-       setCPUFeature(FEAT_SVE2);
-      /* ID_AA64ZFR0_EL1.BF16 != 0b0000  */
-      if (extractBits(ftr, 20, 4) != 0x0)
-       setCPUFeature(FEAT_SVE_BF16);
+  if (hwcap & HWCAP_CPUID)
+    {
+      unsigned long ftr;
+
+      getCPUFeature(ID_AA64ISAR1_EL1, ftr);
+      /* ID_AA64ISAR1_EL1.SPECRES >= 0b0001  */
+      if (extractBits(ftr, 40, 4) >= 0x1)
+       setCPUFeature(FEAT_PREDRES);
+      /* ID_AA64ISAR1_EL1.LS64 >= 0b0001  */
+      if (extractBits(ftr, 60, 4) >= 0x1)
+       setCPUFeature(FEAT_LS64);
+      /* ID_AA64ISAR1_EL1.LS64 >= 0b0010  */
+      if (extractBits(ftr, 60, 4) >= 0x2)
+       setCPUFeature(FEAT_LS64_V);
+      /* ID_AA64ISAR1_EL1.LS64 >= 0b0011  */
+      if (extractBits(ftr, 60, 4) >= 0x3)
+       setCPUFeature(FEAT_LS64_ACCDATA);
     }
-    getCPUFeature(ID_AA64ISAR0_EL1, ftr);
-    /* ID_AA64ISAR0_EL1.SHA3 != 0b0000  */
-    if (extractBits(ftr, 32, 4) != 0x0)
-      setCPUFeature(FEAT_SHA3);
-    getCPUFeature(ID_AA64ISAR1_EL1, ftr);
-    /* ID_AA64ISAR1_EL1.DPB >= 0b0001  */
-    if (extractBits(ftr, 0, 4) >= 0x1)
-      setCPUFeature(FEAT_DPB);
-    /* ID_AA64ISAR1_EL1.LRCPC != 0b0000  */
-    if (extractBits(ftr, 20, 4) != 0x0)
-      setCPUFeature(FEAT_RCPC);
-    /* ID_AA64ISAR1_EL1.LRCPC == 0b0011  */
-    if (extractBits(ftr, 20, 4) == 0x3)
-      setCPUFeature(FEAT_RCPC3);
-    /* ID_AA64ISAR1_EL1.SPECRES == 0b0001  */
-    if (extractBits(ftr, 40, 4) == 0x2)
-      setCPUFeature(FEAT_PREDRES);
-    /* ID_AA64ISAR1_EL1.BF16 != 0b0000  */
-    if (extractBits(ftr, 44, 4) != 0x0)
-      setCPUFeature(FEAT_BF16);
-    /* ID_AA64ISAR1_EL1.LS64 >= 0b0001  */
-    if (extractBits(ftr, 60, 4) >= 0x1)
-      setCPUFeature(FEAT_LS64);
-    /* ID_AA64ISAR1_EL1.LS64 >= 0b0010  */
-    if (extractBits(ftr, 60, 4) >= 0x2)
-      setCPUFeature(FEAT_LS64_V);
-    /* ID_AA64ISAR1_EL1.LS64 >= 0b0011  */
-    if (extractBits(ftr, 60, 4) >= 0x3)
-      setCPUFeature(FEAT_LS64_ACCDATA);
-  } else {
-    /* Set some features in case of no CPUID support.  */
-    if (hwcap & (HWCAP_FP | HWCAP_FPHP)) {
+
+  if (hwcap & HWCAP_FP)
+    {
       setCPUFeature(FEAT_FP);
       /* FP and AdvSIMD fields have the same value.  */
       setCPUFeature(FEAT_SIMD);
     }
-    if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP)
-      setCPUFeature(FEAT_DPB);
-    if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC)
-      setCPUFeature(FEAT_RCPC);
-    if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16)
-      setCPUFeature(FEAT_BF16);
-    if (hwcap2 & HWCAP2_SVEBF16)
-      setCPUFeature(FEAT_SVE_BF16);
-    if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE)
-      setCPUFeature(FEAT_SVE2);
-    if (hwcap & HWCAP_SHA3)
-      setCPUFeature(FEAT_SHA3);
-  }
+  if (hwcap & HWCAP_DCPOP)
+    setCPUFeature(FEAT_DPB);
+  if (hwcap & HWCAP_LRCPC)
+    setCPUFeature(FEAT_RCPC);
+  if (hwcap & HWCAP_ILRCPC)
+    setCPUFeature(FEAT_RCPC2);
+  if (hwcap2 & HWCAP2_LRCPC3)
+    setCPUFeature(FEAT_RCPC3);
+  if (hwcap2 & HWCAP2_BF16)
+    setCPUFeature(FEAT_BF16);
+  if (hwcap2 & HWCAP2_SVEBF16)
+    setCPUFeature(FEAT_SVE_BF16);
+  if (hwcap & HWCAP_SVE)
+    setCPUFeature(FEAT_SVE);
+  if (hwcap2 & HWCAP2_SVE2)
+    setCPUFeature(FEAT_SVE2);
+  if (hwcap & HWCAP_SHA3)
+    setCPUFeature(FEAT_SHA3);
   setCPUFeature(FEAT_INIT);
+
+  __atomic_store_n (&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
 }
 
 void
-__init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) {
-  if (__aarch64_cpu_features.features)
+__init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg)
+{
+  if (__atomic_load_n (&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
     return;
   __init_cpu_features_constructor(hwcap, arg);
 }
 
 void __attribute__ ((constructor))
-__init_cpu_features(void) {
+__init_cpu_features(void)
+{
   unsigned long hwcap;
   unsigned long hwcap2;
+
   /* CPU features already initialized.  */
-  if (__aarch64_cpu_features.features)
+  if (__atomic_load_n (&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
     return;
   hwcap = getauxval(AT_HWCAP);
   hwcap2 = getauxval(AT_HWCAP2);


Reply via email to