On 09/06/2017 07:17 PM, Andi Kleen wrote: > From: Andi Kleen <a...@linux.intel.com> > > I was looking at large early boot allocations and noticed that > since (1f12e32f x86/topology: Create logical package id) > every 64bit system allocates a 128k array to convert logical > package ids. > > This happens because the array is sized for > MAX_LOCAL_APIC and that is always 32k on 64bit systems, > and it needs 4 bytes for each entry. > > This is fairly wasteful, especially for the common case > of having only one socket, where we need 128K just to store > a single 4 byte value. > > The max logical APIC value is not known at this point, > so it's hard to size it correctly. > > The previous patch converted the only performance critical > user to cache the value, and all others are fairly > slow path, so we can just convert the O(1) array > lookup to a linear search in cpu_data() > > This can also avoid the need for an extra bitmap structure > to know if the logical package ID is already allocated. > We can also save this information in cpu_data and look it > up during the search. > > This patch removes the explicit arrays and replaces > the lookups with explicit searches. > > Overall the new code is somewhat simpler, and needs a lot > less run time memory. > > arch/x86/include/asm/processor.h | 6 +++++- > arch/x86/kernel/smpboot.c | 47 > ++++++++++++++++------------------------------- > 2 files changed, 21 insertions(+), 32 deletions(-) > > The naming of the variables in cpu_data is still not > great (_proc sometimes means package and sometimes means > logical processor), but I followed the existing (messy) > conventions when possible. At some point would be probably good > to clean this up too. > > Tested on a 2S system, but it would be good > to test on more obscure systems which may have problems > with package IDs. I'm copying Prarit who had problematic systems > before. > > Signed-off-by: Andi Kleen <a...@linux.intel.com> > --- > arch/x86/include/asm/processor.h | 6 ++++- > arch/x86/kernel/smpboot.c | 47 > ++++++++++++++-------------------------- > 2 files changed, 21 insertions(+), 32 deletions(-) > > diff --git a/arch/x86/include/asm/processor.h > b/arch/x86/include/asm/processor.h > index 3fa26a61eabc..d369d2a82d8f 100644 > --- a/arch/x86/include/asm/processor.h > +++ b/arch/x86/include/asm/processor.h > @@ -124,13 +124,17 @@ struct cpuinfo_x86 { > u16 booted_cores; > /* Physical processor id: */ > u16 phys_proc_id; > - /* Logical processor id: */ > + /* Logical processor (package) id: */ > u16 logical_proc_id; > + /* Physical package ID */ > + u16 phys_pkg_id; > /* Core id: */ > u16 cpu_core_id; > /* Index into per_cpu list: */ > u16 cpu_index; > u32 microcode; > + /* Flags */ > + unsigned logical_proc_set : 1; > } __randomize_layout; > > struct cpuid_regs { > diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c > index 54b9e89d4d6b..e78460aeca0a 100644 > --- a/arch/x86/kernel/smpboot.c > +++ b/arch/x86/kernel/smpboot.c > @@ -100,9 +100,6 @@ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); > EXPORT_PER_CPU_SYMBOL(cpu_info); > > /* Logical package management. We might want to allocate that dynamically */ > -static int *physical_to_logical_pkg __read_mostly; > -static unsigned long *physical_package_map __read_mostly;; > -static unsigned int max_physical_pkg_id __read_mostly; > unsigned int __max_logical_packages __read_mostly; > EXPORT_SYMBOL(__max_logical_packages); > static unsigned int logical_packages __read_mostly; > @@ -282,17 +279,12 @@ static void notrace start_secondary(void *unused) > */ > int topology_update_package_map(unsigned int pkg, unsigned int cpu) > { > - unsigned int new; > + int new; > > - /* Called from early boot ? */ > - if (!physical_package_map) > - return 0; > > - if (pkg >= max_physical_pkg_id) > - return -EINVAL; > - > - /* Set the logical package id */ > - if (test_and_set_bit(pkg, physical_package_map)) > + /* Already available somewhere? */ > + new = topology_phys_to_logical_pkg(pkg); > + if (new >= 0) > goto found; > > if (logical_packages >= __max_logical_packages) { > @@ -306,10 +298,11 @@ int topology_update_package_map(unsigned int pkg, > unsigned int cpu) > pr_info("CPU %u Converting physical %u to logical package %u\n", > cpu, pkg, new); > } > - physical_to_logical_pkg[pkg] = new; > > found: > - cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg]; > + cpu_data(cpu).phys_pkg_id = pkg; > + cpu_data(cpu).logical_proc_id = new; > + cpu_data(cpu).logical_proc_set = 1; > return 0; > } > > @@ -320,16 +313,21 @@ int topology_update_package_map(unsigned int pkg, > unsigned int cpu) > */ > int topology_phys_to_logical_pkg(unsigned int phys_pkg) > { > - if (phys_pkg >= max_physical_pkg_id) > - return -1; > - return physical_to_logical_pkg[phys_pkg]; > + int cpu; > + > + for_each_possible_cpu (cpu) { > + if (cpu_data(cpu).phys_pkg_id == phys_pkg && > + cpu_data(cpu).logical_proc_set) { > + return cpu_data(cpu).logical_proc_id; > + } > + } > + return -1; > } > EXPORT_SYMBOL(topology_phys_to_logical_pkg); > > static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int > cpu) > { > unsigned int ncpus; > - size_t size; > > /* > * Today neither Intel nor AMD support heterogenous systems. That > @@ -362,19 +360,6 @@ static void __init smp_init_package_map(struct > cpuinfo_x86 *c, unsigned int cpu) > __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); > logical_packages = 0; > > - /* > - * Possibly larger than what we need as the number of apic ids per > - * package can be smaller than the actual used apic ids. > - */ > - max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus); > - size = max_physical_pkg_id * sizeof(unsigned int); > - physical_to_logical_pkg = kmalloc(size, GFP_KERNEL); > - memset(physical_to_logical_pkg, 0xff, size); > - size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long); > - physical_package_map = kzalloc(size, GFP_KERNEL); > - > - pr_info("Max logical packages: %u\n", __max_logical_packages);
^^ This pr_info needs to stay IMO. ... testing now. P. > - > topology_update_package_map(c->phys_proc_id, cpu); > } > >