On Thu, Jul 18, 2024 at 01:57:11PM +0200, Uros Bizjak wrote:
> Attached patch illustrates the proposed improvement with nested cpuid
> calls. Bootstrapped and teased with libatomic testsuite.
> 
> Jakub, WDYT?

I'd probably keep the FEAT1_REGISTER = 0; before the if (__get_cpuid (1, ...)
to avoid the else, I think that could result in smaller code, but otherwise
LGTM, especially the use of just __cpuid there.  And note your patch doesn't
incorporate the Zhaoxin changes.

> diff --git a/libatomic/config/x86/init.c b/libatomic/config/x86/init.c
> index a75be3f175c..94d45683567 100644
> --- a/libatomic/config/x86/init.c
> +++ b/libatomic/config/x86/init.c
> @@ -32,22 +32,25 @@ unsigned int
>  __libat_feat1_init (void)
>  {
>    unsigned int eax, ebx, ecx, edx;
> -  FEAT1_REGISTER = 0;
> -  __get_cpuid (1, &eax, &ebx, &ecx, &edx);
> -#ifdef __x86_64__
> -  if ((FEAT1_REGISTER & (bit_AVX | bit_CMPXCHG16B))
> -      == (bit_AVX | bit_CMPXCHG16B))
> +  if (__get_cpuid (1, &eax, &ebx, &ecx, &edx))
>      {
> -      /* Intel SDM guarantees that 16-byte VMOVDQA on 16-byte aligned address
> -      is atomic, and AMD is going to do something similar soon.
> -      We don't have a guarantee from vendors of other CPUs with AVX,
> -      like Zhaoxin and VIA.  */
> -      unsigned int ecx2 = 0;
> -      __get_cpuid (0, &eax, &ebx, &ecx2, &edx);
> -      if (ecx2 != signature_INTEL_ecx && ecx2 != signature_AMD_ecx)
> -     FEAT1_REGISTER &= ~bit_AVX;
> -    }
> +#ifdef __x86_64__
> +      if ((FEAT1_REGISTER & (bit_AVX | bit_CMPXCHG16B))
> +       == (bit_AVX | bit_CMPXCHG16B))
> +     {
> +       /* Intel SDM guarantees that 16-byte VMOVDQA on 16-byte aligned
> +          address is atomic, and AMD is going to do something similar soon.
> +          We don't have a guarantee from vendors of other CPUs with AVX,
> +          like Zhaoxin and VIA.  */
> +       unsigned int ecx2;
> +       __cpuid (0, eax, ebx, ecx2, edx);
> +       if (ecx2 != signature_INTEL_ecx && ecx2 != signature_AMD_ecx)
> +         FEAT1_REGISTER &= ~bit_AVX;
> +     }
>  #endif
> +    }
> +  else
> +    FEAT1_REGISTER = 0;
>    /* See the load in load_feat1.  */
>    __atomic_store_n (&__libat_feat1, FEAT1_REGISTER, __ATOMIC_RELAXED);
>    return FEAT1_REGISTER;


        Jakub

Reply via email to