Ping - any feedback?
Thanks!

On Wed, Mar 10, 2021 at 11:02:33AM -0800, Alison Schofield wrote:
> Commit 1340ccfa9a9a ("x86,sched: Allow topologies where NUMA nodes
> share an LLC") added a vendor and model specific check to never
> call topology_sane() for Intel Skylake Server systems where NUMA
> nodes share an LLC.
> 
> Intel Ice Lake and Sapphire Rapids CPUs also enumerate an LLC that is
> shared by multiple NUMA nodes. The LLC on these CPUs is shared for
> off-package data access but private to the NUMA node for on-package
> access. Rather than managing a list of allowable SNC topologies, make
> this SNC topology the default, and treat Intel's Cluster-On-Die (COD)
> topology as the exception.
> 
> In SNC mode, Sky Lake, Ice Lake, and Sapphire Rapids servers do not
> emit this warning:
> 
> sched: CPU #3's llc-sibling CPU #0 is not on the same node! [node: 1 != 0]. 
> Ignoring dependency.
> 
> Acked-by: Dave Hansen <dave.han...@linux.intel.com>
> Suggested-by: Peter Zijlstra (Intel) <pet...@infradead.org>
> Signed-off-by: Alison Schofield <alison.schofi...@intel.com>
> Cc: sta...@vger.kernel.org
> Cc: Dave Hansen <dave.han...@linux.intel.com>
> Cc: Tony Luck <tony.l...@intel.com>
> Cc: Tim Chen <tim.c.c...@linux.intel.com>
> Cc: "H. Peter Anvin" <h...@linux.intel.com>
> Cc: Borislav Petkov <b...@alien8.de>
> Cc: Peter Zijlstra (Intel) <pet...@infradead.org>
> Cc: David Rientjes <rient...@google.com>
> Cc: Igor Mammedov <imamm...@redhat.com>
> Cc: Prarit Bhargava <pra...@redhat.com>
> Cc: brice.gog...@gmail.com
> ---
> 
> Changes v2->v3:
> - This is a v3 of this patch: 
> https://lore.kernel.org/lkml/20210216195804.24204-1-alison.schofi...@intel.com/
> - Implemented PeterZ suggestion, and his code, to check for COD instead
>   of SNC.
> - Updated commit message and log.
> - Added 'Cc stable.
> 
> Changes v1->v2:
> - Implemented the minimal required change of adding the new models to
>   the existing vendor and model specific check.
> 
> - Side effect of going minimalist: no longer labelled an X86_BUG (TonyL)
> 
> - Considered PeterZ suggestion of checking for COD CPUs, rather than
>   SNC CPUs. That meant this snc_cpu list would go away, and so it never
>   needs updating. That ups the stakes for this patch wrt LOC changed
>   and testing needed. It actually drove me back to this simplest soln.
> 
> - Considered DaveH suggestion to remove the check altogether and recognize
>   these topologies as sane. Not running with that further here. This patch
>   is what is needed now. The broader discussion of sane topologies can
>   carry on independent of this update.
> 
> - Updated commit message and log.
> 
> 
>  arch/x86/kernel/smpboot.c | 90 ++++++++++++++++++++-------------------
>  1 file changed, 46 insertions(+), 44 deletions(-)
> 
> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
> index 02813a7f3a7c..147b2f3a2a09 100644
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -458,29 +458,52 @@ static bool match_smt(struct cpuinfo_x86 *c, struct 
> cpuinfo_x86 *o)
>       return false;
>  }
>  
> +static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
> +{
> +     if (c->phys_proc_id == o->phys_proc_id &&
> +         c->cpu_die_id == o->cpu_die_id)
> +             return true;
> +     return false;
> +}
> +
> +/*
> + * Unlike the other levels, we do not enforce keeping a
> + * multicore group inside a NUMA node.  If this happens, we will
> + * discard the MC level of the topology later.
> + */
> +static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
> +{
> +     if (c->phys_proc_id == o->phys_proc_id)
> +             return true;
> +     return false;
> +}
> +
>  /*
> - * Define snc_cpu[] for SNC (Sub-NUMA Cluster) CPUs.
> + * Define intel_cod_cpu[] for Intel COD (Cluster-on-Die) CPUs.
>   *
> - * These are Intel CPUs that enumerate an LLC that is shared by
> - * multiple NUMA nodes. The LLC on these systems is shared for
> - * off-package data access but private to the NUMA node (half
> - * of the package) for on-package access.
> + * Any Intel CPU that has multiple nodes per package and does not
> + * match intel_cod_cpu[] has the SNC (Sub-NUMA Cluster) topology.
>   *
> - * CPUID (the source of the information about the LLC) can only
> - * enumerate the cache as being shared *or* unshared, but not
> - * this particular configuration. The CPU in this case enumerates
> - * the cache to be shared across the entire package (spanning both
> - * NUMA nodes).
> + * When in SNC mode, these CPUs enumerate an LLC that is shared
> + * by multiple NUMA nodes. The LLC is shared for off-package data
> + * access but private to the NUMA node (half of the package) for
> + * on-package access. CPUID (the source of the information about
> + * the LLC) can only enumerate the cache as shared or unshared,
> + * but not this particular configuration.
>   */
>  
> -static const struct x86_cpu_id snc_cpu[] = {
> -     X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, NULL),
> +static const struct x86_cpu_id intel_cod_cpu[] = {
> +     X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, 0),       /* COD */
> +     X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, 0),     /* COD */
> +     X86_MATCH_INTEL_FAM6_MODEL(ANY, 1),             /* SNC */
>       {}
>  };
>  
>  static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
>  {
> +     const struct x86_cpu_id *id = x86_match_cpu(intel_cod_cpu);
>       int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
> +     bool intel_snc = id && id->driver_data;
>  
>       /* Do not match if we do not have a valid APICID for cpu: */
>       if (per_cpu(cpu_llc_id, cpu1) == BAD_APICID)
> @@ -495,32 +518,12 @@ static bool match_llc(struct cpuinfo_x86 *c, struct 
> cpuinfo_x86 *o)
>        * means 'c' does not share the LLC of 'o'. This will be
>        * reflected to userspace.
>        */
> -     if (!topology_same_node(c, o) && x86_match_cpu(snc_cpu))
> +     if (match_pkg(c, o) && !topology_same_node(c, o) && intel_snc)
>               return false;
>  
>       return topology_sane(c, o, "llc");
>  }
>  
> -/*
> - * Unlike the other levels, we do not enforce keeping a
> - * multicore group inside a NUMA node.  If this happens, we will
> - * discard the MC level of the topology later.
> - */
> -static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
> -{
> -     if (c->phys_proc_id == o->phys_proc_id)
> -             return true;
> -     return false;
> -}
> -
> -static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
> -{
> -     if ((c->phys_proc_id == o->phys_proc_id) &&
> -             (c->cpu_die_id == o->cpu_die_id))
> -             return true;
> -     return false;
> -}
> -
>  
>  #if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
>  static inline int x86_sched_itmt_flags(void)
> @@ -592,14 +595,23 @@ void set_cpu_sibling_map(int cpu)
>       for_each_cpu(i, cpu_sibling_setup_mask) {
>               o = &cpu_data(i);
>  
> +             if (match_pkg(c, o) && !topology_same_node(c, o))
> +                     x86_has_numa_in_package = true;
> +
>               if ((i == cpu) || (has_smt && match_smt(c, o)))
>                       link_mask(topology_sibling_cpumask, cpu, i);
>  
>               if ((i == cpu) || (has_mp && match_llc(c, o)))
>                       link_mask(cpu_llc_shared_mask, cpu, i);
>  
> +             if ((i == cpu) || (has_mp && match_die(c, o)))
> +                     link_mask(topology_die_cpumask, cpu, i);
>       }
>  
> +     threads = cpumask_weight(topology_sibling_cpumask(cpu));
> +     if (threads > __max_smt_threads)
> +             __max_smt_threads = threads;
> +
>       /*
>        * This needs a separate iteration over the cpus because we rely on all
>        * topology_sibling_cpumask links to be set-up.
> @@ -613,8 +625,7 @@ void set_cpu_sibling_map(int cpu)
>                       /*
>                        *  Does this new cpu bringup a new core?
>                        */
> -                     if (cpumask_weight(
> -                         topology_sibling_cpumask(cpu)) == 1) {
> +                     if (threads == 1) {
>                               /*
>                                * for each core in package, increment
>                                * the booted_cores for this new cpu
> @@ -631,16 +642,7 @@ void set_cpu_sibling_map(int cpu)
>                       } else if (i != cpu && !c->booted_cores)
>                               c->booted_cores = cpu_data(i).booted_cores;
>               }
> -             if (match_pkg(c, o) && !topology_same_node(c, o))
> -                     x86_has_numa_in_package = true;
> -
> -             if ((i == cpu) || (has_mp && match_die(c, o)))
> -                     link_mask(topology_die_cpumask, cpu, i);
>       }
> -
> -     threads = cpumask_weight(topology_sibling_cpumask(cpu));
> -     if (threads > __max_smt_threads)
> -             __max_smt_threads = threads;
>  }
>  
>  /* maps the cpu to the sched domain representing multi-core */
> -- 
> 2.20.1
> 

Reply via email to