Wilco Dijkstra <wilco.dijks...@arm.com> writes: > Fix CPU features initialization. Use HWCAP rather than explicit accesses > to CPUID registers. Perform the initialization atomically to avoid multi- > threading issues.
Please describe the problem that the patch is fixing. I think the PR description would make a better commit message: ----------------------------------------------------------------------- The CPU features initialization code uses CPUID registers. It uses incorrect comparisons so that for example SVE is not set if SVE2 is available. Using HWCAPs for these is both simpler and works correctly. The initialization must also be done atomically so to avoid multiple threads causing corruption due to non-atomic RMW of the global. ----------------------------------------------------------------------- What criteria did you use for choosing whether to keep or remove the system register checks? > Passes regress, OK for commit and backport? > > libgcc: > PR target/115342 > * config/aarch64/cpuinfo.c (__init_cpu_features_constructor): > Use HWCAP where possible. Use atomic write for initialization. It'd be good to mention the fix for the FEAT_PREDRES system register check as well. > (__init_cpu_features_resolver): Use atomic load for correct > initialization. > (__init_cpu_features): Likewise. Thanks, Richard > > --- > > diff --git a/libgcc/config/aarch64/cpuinfo.c b/libgcc/config/aarch64/cpuinfo.c > index > 4b94fca869507145ec690c825f637abbc82a3493..544c5516133ec3a554d1222de2ea9d5e6d4c27a9 > 100644 > --- a/libgcc/config/aarch64/cpuinfo.c > +++ b/libgcc/config/aarch64/cpuinfo.c > @@ -227,14 +227,22 @@ struct { > #ifndef HWCAP2_SVE_EBF16 > #define HWCAP2_SVE_EBF16 (1UL << 33) > #endif > +#ifndef HWCAP2_SME2 > +#define HWCAP2_SME2 (1UL << 37) > +#endif > +#ifndef HWCAP2_LRCPC3 > +#define HWCAP2_LRCPC3 (1UL << 46) > +#endif > > static void > -__init_cpu_features_constructor(unsigned long hwcap, > - const __ifunc_arg_t *arg) { > -#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F > +__init_cpu_features_constructor (unsigned long hwcap, > + const __ifunc_arg_t *arg) > +{ > + unsigned long feat = 0; > +#define setCPUFeature(F) feat |= 1UL << F > #define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr)) > #define extractBits(val, start, number) \ > - (val & ((1ULL << number) - 1ULL) << start) >> start > + (val & ((1UL << number) - 1UL) << start) >> start > unsigned long hwcap2 = 0; > if (hwcap & _IFUNC_ARG_HWCAP) > hwcap2 = arg->_hwcap2; > @@ -244,26 +252,20 @@ __init_cpu_features_constructor(unsigned long hwcap, > setCPUFeature(FEAT_PMULL); > if (hwcap & HWCAP_FLAGM) > setCPUFeature(FEAT_FLAGM); > - if (hwcap2 & HWCAP2_FLAGM2) { > - setCPUFeature(FEAT_FLAGM); > + if (hwcap2 & HWCAP2_FLAGM2) > setCPUFeature(FEAT_FLAGM2); > - } > - if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4) > + if (hwcap & HWCAP_SM4) > setCPUFeature(FEAT_SM4); > if (hwcap & HWCAP_ASIMDDP) > setCPUFeature(FEAT_DOTPROD); > if (hwcap & HWCAP_ASIMDFHM) > setCPUFeature(FEAT_FP16FML); > - if (hwcap & HWCAP_FPHP) { > + if (hwcap & HWCAP_FPHP) > setCPUFeature(FEAT_FP16); > - setCPUFeature(FEAT_FP); > - } > if (hwcap & HWCAP_DIT) > setCPUFeature(FEAT_DIT); > if (hwcap & HWCAP_ASIMDRDM) > setCPUFeature(FEAT_RDM); > - if (hwcap & HWCAP_ILRCPC) > - setCPUFeature(FEAT_RCPC2); > if (hwcap & HWCAP_AES) > setCPUFeature(FEAT_AES); > if (hwcap & HWCAP_SHA1) > @@ -277,22 +279,21 @@ __init_cpu_features_constructor(unsigned long hwcap, > if (hwcap & HWCAP_SB) > setCPUFeature(FEAT_SB); > if (hwcap & HWCAP_SSBS) > - setCPUFeature(FEAT_SSBS2); > - if (hwcap2 & HWCAP2_MTE) { > - setCPUFeature(FEAT_MEMTAG); > - setCPUFeature(FEAT_MEMTAG2); > - } > - if (hwcap2 & HWCAP2_MTE3) { > - setCPUFeature(FEAT_MEMTAG); > - setCPUFeature(FEAT_MEMTAG2); > + { > + setCPUFeature(FEAT_SSBS); > + setCPUFeature(FEAT_SSBS2); > + } > + if (hwcap2 & HWCAP2_MTE) > + { > + setCPUFeature(FEAT_MEMTAG); > + setCPUFeature(FEAT_MEMTAG2); > + } > + if (hwcap2 & HWCAP2_MTE3) > setCPUFeature(FEAT_MEMTAG3); > - } > if (hwcap2 & HWCAP2_SVEAES) > setCPUFeature(FEAT_SVE_AES); > - if (hwcap2 & HWCAP2_SVEPMULL) { > - setCPUFeature(FEAT_SVE_AES); > + if (hwcap2 & HWCAP2_SVEPMULL) > setCPUFeature(FEAT_SVE_PMULL128); > - } > if (hwcap2 & HWCAP2_SVEBITPERM) > setCPUFeature(FEAT_SVE_BITPERM); > if (hwcap2 & HWCAP2_SVESHA3) > @@ -329,108 +330,76 @@ __init_cpu_features_constructor(unsigned long hwcap, > setCPUFeature(FEAT_WFXT); > if (hwcap2 & HWCAP2_SME) > setCPUFeature(FEAT_SME); > + if (hwcap2 & HWCAP2_SME2) > + setCPUFeature(FEAT_SME2); > if (hwcap2 & HWCAP2_SME_I16I64) > setCPUFeature(FEAT_SME_I64); > if (hwcap2 & HWCAP2_SME_F64F64) > setCPUFeature(FEAT_SME_F64); > - if (hwcap & HWCAP_CPUID) { > - unsigned long ftr; > - getCPUFeature(ID_AA64PFR1_EL1, ftr); > - /* ID_AA64PFR1_EL1.MTE >= 0b0001 */ > - if (extractBits(ftr, 8, 4) >= 0x1) > - setCPUFeature(FEAT_MEMTAG); > - /* ID_AA64PFR1_EL1.SSBS == 0b0001 */ > - if (extractBits(ftr, 4, 4) == 0x1) > - setCPUFeature(FEAT_SSBS); > - /* ID_AA64PFR1_EL1.SME == 0b0010 */ > - if (extractBits(ftr, 24, 4) == 0x2) > - setCPUFeature(FEAT_SME2); > - getCPUFeature(ID_AA64PFR0_EL1, ftr); > - /* ID_AA64PFR0_EL1.FP != 0b1111 */ > - if (extractBits(ftr, 16, 4) != 0xF) { > - setCPUFeature(FEAT_FP); > - /* ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP */ > - setCPUFeature(FEAT_SIMD); > - } > - /* ID_AA64PFR0_EL1.SVE != 0b0000 */ > - if (extractBits(ftr, 32, 4) != 0x0) { > - /* get ID_AA64ZFR0_EL1, that name supported if sve enabled only */ > - getCPUFeature(S3_0_C0_C4_4, ftr); > - /* ID_AA64ZFR0_EL1.SVEver == 0b0000 */ > - if (extractBits(ftr, 0, 4) == 0x0) > - setCPUFeature(FEAT_SVE); > - /* ID_AA64ZFR0_EL1.SVEver == 0b0001 */ > - if (extractBits(ftr, 0, 4) == 0x1) > - setCPUFeature(FEAT_SVE2); > - /* ID_AA64ZFR0_EL1.BF16 != 0b0000 */ > - if (extractBits(ftr, 20, 4) != 0x0) > - setCPUFeature(FEAT_SVE_BF16); > + if (hwcap & HWCAP_CPUID) > + { > + unsigned long ftr; > + > + getCPUFeature(ID_AA64ISAR1_EL1, ftr); > + /* ID_AA64ISAR1_EL1.SPECRES >= 0b0001 */ > + if (extractBits(ftr, 40, 4) >= 0x1) > + setCPUFeature(FEAT_PREDRES); > + /* ID_AA64ISAR1_EL1.LS64 >= 0b0001 */ > + if (extractBits(ftr, 60, 4) >= 0x1) > + setCPUFeature(FEAT_LS64); > + /* ID_AA64ISAR1_EL1.LS64 >= 0b0010 */ > + if (extractBits(ftr, 60, 4) >= 0x2) > + setCPUFeature(FEAT_LS64_V); > + /* ID_AA64ISAR1_EL1.LS64 >= 0b0011 */ > + if (extractBits(ftr, 60, 4) >= 0x3) > + setCPUFeature(FEAT_LS64_ACCDATA); > } > - getCPUFeature(ID_AA64ISAR0_EL1, ftr); > - /* ID_AA64ISAR0_EL1.SHA3 != 0b0000 */ > - if (extractBits(ftr, 32, 4) != 0x0) > - setCPUFeature(FEAT_SHA3); > - getCPUFeature(ID_AA64ISAR1_EL1, ftr); > - /* ID_AA64ISAR1_EL1.DPB >= 0b0001 */ > - if (extractBits(ftr, 0, 4) >= 0x1) > - setCPUFeature(FEAT_DPB); > - /* ID_AA64ISAR1_EL1.LRCPC != 0b0000 */ > - if (extractBits(ftr, 20, 4) != 0x0) > - setCPUFeature(FEAT_RCPC); > - /* ID_AA64ISAR1_EL1.LRCPC == 0b0011 */ > - if (extractBits(ftr, 20, 4) == 0x3) > - setCPUFeature(FEAT_RCPC3); > - /* ID_AA64ISAR1_EL1.SPECRES == 0b0001 */ > - if (extractBits(ftr, 40, 4) == 0x2) > - setCPUFeature(FEAT_PREDRES); > - /* ID_AA64ISAR1_EL1.BF16 != 0b0000 */ > - if (extractBits(ftr, 44, 4) != 0x0) > - setCPUFeature(FEAT_BF16); > - /* ID_AA64ISAR1_EL1.LS64 >= 0b0001 */ > - if (extractBits(ftr, 60, 4) >= 0x1) > - setCPUFeature(FEAT_LS64); > - /* ID_AA64ISAR1_EL1.LS64 >= 0b0010 */ > - if (extractBits(ftr, 60, 4) >= 0x2) > - setCPUFeature(FEAT_LS64_V); > - /* ID_AA64ISAR1_EL1.LS64 >= 0b0011 */ > - if (extractBits(ftr, 60, 4) >= 0x3) > - setCPUFeature(FEAT_LS64_ACCDATA); > - } else { > - /* Set some features in case of no CPUID support. */ > - if (hwcap & (HWCAP_FP | HWCAP_FPHP)) { > + > + if (hwcap & HWCAP_FP) > + { > setCPUFeature(FEAT_FP); > /* FP and AdvSIMD fields have the same value. */ > setCPUFeature(FEAT_SIMD); > } > - if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP) > - setCPUFeature(FEAT_DPB); > - if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC) > - setCPUFeature(FEAT_RCPC); > - if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16) > - setCPUFeature(FEAT_BF16); > - if (hwcap2 & HWCAP2_SVEBF16) > - setCPUFeature(FEAT_SVE_BF16); > - if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE) > - setCPUFeature(FEAT_SVE2); > - if (hwcap & HWCAP_SHA3) > - setCPUFeature(FEAT_SHA3); > - } > + if (hwcap & HWCAP_DCPOP) > + setCPUFeature(FEAT_DPB); > + if (hwcap & HWCAP_LRCPC) > + setCPUFeature(FEAT_RCPC); > + if (hwcap & HWCAP_ILRCPC) > + setCPUFeature(FEAT_RCPC2); > + if (hwcap2 & HWCAP2_LRCPC3) > + setCPUFeature(FEAT_RCPC3); > + if (hwcap2 & HWCAP2_BF16) > + setCPUFeature(FEAT_BF16); > + if (hwcap2 & HWCAP2_SVEBF16) > + setCPUFeature(FEAT_SVE_BF16); > + if (hwcap & HWCAP_SVE) > + setCPUFeature(FEAT_SVE); > + if (hwcap2 & HWCAP2_SVE2) > + setCPUFeature(FEAT_SVE2); > + if (hwcap & HWCAP_SHA3) > + setCPUFeature(FEAT_SHA3); > setCPUFeature(FEAT_INIT); > + > + __atomic_store_n (&__aarch64_cpu_features.features, feat, > __ATOMIC_RELAXED); > } > > void > -__init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) { > - if (__aarch64_cpu_features.features) > +__init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) > +{ > + if (__atomic_load_n (&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) > return; > __init_cpu_features_constructor(hwcap, arg); > } > > void __attribute__ ((constructor)) > -__init_cpu_features(void) { > +__init_cpu_features(void) > +{ > unsigned long hwcap; > unsigned long hwcap2; > + > /* CPU features already initialized. */ > - if (__aarch64_cpu_features.features) > + if (__atomic_load_n (&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) > return; > hwcap = getauxval(AT_HWCAP); > hwcap2 = getauxval(AT_HWCAP2);