Module Name: src Committed By: martin Date: Wed Oct 23 19:14:19 UTC 2019
Modified Files: src/sys/arch/aarch64/aarch64 [netbsd-9]: cpu.c locore.S src/sys/arch/aarch64/include [netbsd-9]: cpu.h param.h src/sys/arch/arm/acpi [netbsd-9]: cpu_acpi.c src/sys/arch/arm/arm32 [netbsd-9]: cpu.c src/sys/arch/arm/fdt [netbsd-9]: cpu_fdt.c src/sys/arch/arm/include [netbsd-9]: cpu.h Log Message: Pull up following revision(s) (requested by jmcneill in ticket #359): sys/arch/aarch64/aarch64/locore.S: revision 1.42 sys/arch/aarch64/aarch64/locore.S: revision 1.43 sys/arch/aarch64/aarch64/locore.S: revision 1.44 sys/arch/arm/fdt/cpu_fdt.c: revision 1.28 sys/arch/aarch64/include/cpu.h: revision 1.14 sys/arch/aarch64/include/param.h: revision 1.12 sys/arch/arm/arm32/cpu.c: revision 1.133 sys/arch/arm/arm32/cpu.c: revision 1.134 sys/arch/arm/include/cpu.h: revision 1.101 sys/arch/arm/acpi/cpu_acpi.c: revision 1.7 sys/arch/aarch64/aarch64/cpu.c: revision 1.23 sys/arch/aarch64/aarch64/cpu.c: revision 1.24 sys/arch/aarch64/aarch64/cpu.c: revision 1.25 Increase aarch64 MAXCPUS to 256. - Invalidate dcache before polling AP hatched status - Avoid overlap between BP and last AP stack. AP stacks are now in order of increasing address order. Spotted by and idea from mlelstv. - Use separate cacheline aligned arrays for mbox and hatched as before. - cpu_hatched_p only for MULTIPROCESSOR To generate a diff of this commit: cvs rdiff -u -r1.20 -r1.20.2.1 src/sys/arch/aarch64/aarch64/cpu.c cvs rdiff -u -r1.39.2.1 -r1.39.2.2 src/sys/arch/aarch64/aarch64/locore.S cvs rdiff -u -r1.13 -r1.13.4.1 src/sys/arch/aarch64/include/cpu.h cvs rdiff -u -r1.11 -r1.11.4.1 src/sys/arch/aarch64/include/param.h cvs rdiff -u -r1.6 -r1.6.4.1 src/sys/arch/arm/acpi/cpu_acpi.c cvs rdiff -u -r1.129 -r1.129.4.1 src/sys/arch/arm/arm32/cpu.c cvs rdiff -u -r1.25 -r1.25.4.1 src/sys/arch/arm/fdt/cpu_fdt.c cvs rdiff -u -r1.100 -r1.100.4.1 src/sys/arch/arm/include/cpu.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/aarch64/aarch64/cpu.c diff -u src/sys/arch/aarch64/aarch64/cpu.c:1.20 src/sys/arch/aarch64/aarch64/cpu.c:1.20.2.1 --- src/sys/arch/aarch64/aarch64/cpu.c:1.20 Tue Jul 16 20:29:53 2019 +++ src/sys/arch/aarch64/aarch64/cpu.c Wed Oct 23 19:14:19 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.c,v 1.20 2019/07/16 20:29:53 jmcneill Exp $ */ +/* $NetBSD: cpu.c,v 1.20.2.1 2019/10/23 19:14:19 martin Exp $ */ /* * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org> @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(1, "$NetBSD: cpu.c,v 1.20 2019/07/16 20:29:53 jmcneill Exp $"); +__KERNEL_RCSID(1, "$NetBSD: cpu.c,v 1.20.2.1 2019/10/23 19:14:19 martin Exp $"); #include "locators.h" #include "opt_arm_debug.h" @@ -69,8 +69,8 @@ static void cpu_setup_sysctl(device_t, s #ifdef MULTIPROCESSOR uint64_t cpu_mpidr[MAXCPUS]; -volatile u_int arm_cpu_hatched __cacheline_aligned = 0; -volatile uint32_t arm_cpu_mbox __cacheline_aligned = 0; +volatile u_int aarch64_cpu_mbox[howmany(MAXCPUS, sizeof(u_int))] __cacheline_aligned = { 0 }; +volatile u_int aarch64_cpu_hatched[howmany(MAXCPUS, sizeof(u_int))] __cacheline_aligned = { 0 }; u_int arm_cpu_max = 1; static kmutex_t cpu_hatch_lock; @@ -126,7 +126,7 @@ cpu_attach(device_t dv, cpuid_t id) /* ci_id is stored by own cpus when hatching */ cpu_info[ncpu] = ci; - if ((arm_cpu_hatched & __BIT(unit)) == 0) { + if (cpu_hatched_p(unit) == 0) { ci->ci_dev = dv; dv->dv_private = ci; ci->ci_index = -1; @@ -499,27 +499,33 @@ cpu_setup_sysctl(device_t dv, struct cpu void cpu_boot_secondary_processors(void) { + u_int n, bit; + if ((boothowto & RB_MD1) != 0) return; mutex_init(&cpu_hatch_lock, MUTEX_DEFAULT, IPL_NONE); - VPRINTF("%s: writing mbox with %#x\n", __func__, arm_cpu_hatched); + VPRINTF("%s: starting secondary processors\n", __func__); /* send mbox to have secondary processors do cpu_hatch() */ - atomic_or_32(&arm_cpu_mbox, arm_cpu_hatched); + for (n = 0; n < __arraycount(aarch64_cpu_mbox); n++) + atomic_or_uint(&aarch64_cpu_mbox[n], aarch64_cpu_hatched[n]); __asm __volatile ("sev; sev; sev"); /* wait all cpus have done cpu_hatch() */ - while (membar_consumer(), arm_cpu_mbox & arm_cpu_hatched) { - __asm __volatile ("wfe"); + for (n = 0; n < __arraycount(aarch64_cpu_mbox); n++) { + while (membar_consumer(), aarch64_cpu_mbox[n] & aarch64_cpu_hatched[n]) { + __asm __volatile ("wfe"); + } + /* Add processors to kcpuset */ + for (bit = 0; bit < 32; bit++) { + if (aarch64_cpu_hatched[n] & __BIT(bit)) + kcpuset_set(kcpuset_attached, n * 32 + bit); + } } VPRINTF("%s: secondary processors hatched\n", __func__); - - /* add available processors to kcpuset */ - uint32_t mbox = arm_cpu_hatched; - kcpuset_export_u32(kcpuset_attached, &mbox, sizeof(mbox)); } void @@ -549,12 +555,23 @@ cpu_hatch(struct cpu_info *ci) #endif /* - * clear my bit of arm_cpu_mbox to tell cpu_boot_secondary_processors(). + * clear my bit of aarch64_cpu_mbox to tell cpu_boot_secondary_processors(). * there are cpu0,1,2,3, and if cpu2 is unresponsive, * ci_index are each cpu0=0, cpu1=1, cpu2=undef, cpu3=2. * therefore we have to use device_unit instead of ci_index for mbox. */ - atomic_and_32(&arm_cpu_mbox, ~__BIT(device_unit(ci->ci_dev))); + const u_int off = device_unit(ci->ci_dev) / 32; + const u_int bit = device_unit(ci->ci_dev) % 32; + atomic_and_uint(&aarch64_cpu_mbox[off], ~__BIT(bit)); __asm __volatile ("sev; sev; sev"); } + +bool +cpu_hatched_p(u_int cpuindex) +{ + const u_int off = cpuindex / 32; + const u_int bit = cpuindex % 32; + membar_consumer(); + return (aarch64_cpu_hatched[off] & __BIT(bit)) != 0; +} #endif /* MULTIPROCESSOR */ Index: src/sys/arch/aarch64/aarch64/locore.S diff -u src/sys/arch/aarch64/aarch64/locore.S:1.39.2.1 src/sys/arch/aarch64/aarch64/locore.S:1.39.2.2 --- src/sys/arch/aarch64/aarch64/locore.S:1.39.2.1 Sun Sep 22 12:39:36 2019 +++ src/sys/arch/aarch64/aarch64/locore.S Wed Oct 23 19:14:19 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: locore.S,v 1.39.2.1 2019/09/22 12:39:36 martin Exp $ */ +/* $NetBSD: locore.S,v 1.39.2.2 2019/10/23 19:14:19 martin Exp $ */ /* * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org> @@ -38,7 +38,7 @@ #include <aarch64/hypervisor.h> #include "assym.h" -RCSID("$NetBSD: locore.S,v 1.39.2.1 2019/09/22 12:39:36 martin Exp $") +RCSID("$NetBSD: locore.S,v 1.39.2.2 2019/10/23 19:14:19 martin Exp $") /*#define DEBUG_LOCORE /* debug print */ @@ -361,28 +361,23 @@ ENTRY_NP(cpu_mpstart) mov x1, xzr 1: add x1, x1, #1 - cmp x1, MAXCPUS /* cpuindex >= MAXCPUS ? */ + cmp x1, #MAXCPUS /* cpuindex >= MAXCPUS ? */ bge toomanycpus ldr x2, [x0, x1, lsl #3] /* cpu_mpidr[cpuindex] */ cmp x2, x3 /* == mpidr_el1 & MPIDR_AFF ? */ bne 1b mov x27, x1 /* x27 = cpuindex */ - mov x0, #1 - lsl x28, x0, x27 /* x28 = 1 << cpuindex */ - /* * x27 = cpuindex - * x28 = (1 << cpuindex) */ /* set stack pointer for boot */ mov x1, #BOOT_AP_STACKSIZE mul x1, x1, x27 - ADDR x0, bootstk_cpus - sub sp, x0, x1 /* sp = bootstk_cpus-(BOOT_AP_STACKSIZE*cpuindex) */ - + ADDR x0, bootstk + add sp, x0, x1 /* sp = bootstk + (BOOT_AP_STACKSIZE * cpuindex) */ bl 1f 1: CPU_DPRINTREG("PC = ", lr) @@ -447,28 +442,39 @@ mp_vstart: mrs x1, mpidr_el1 str x1, [x0, #CI_MPIDR] /* curcpu()->ci_mpidr = mpidr_el1 */ - CPU_DPRINTREG("arm_cpu_hatched |= ", x28) + mov x0, #32 + udiv x1, x27, x0 + ADDR x0, _C_LABEL(aarch64_cpu_hatched) + add x28, x0, x1, lsl #2 /* x28 = &aarch64_cpu_hatched[cpuindex/32] */ + mov x0, #1 + mov x2, #32 + msub x1, x1, x2, x27 + lsl x29, x0, x1 /* x29 = 1 << (cpuindex % 32) */ /* - * atomic_or_32(&arm_cpu_hatched, (1 << cpuindex)) + * atomic_or_uint(&aarch64_cpu_hatched[cpuindex/32], 1<<cpuindex%32) * to tell my activity to primary processor. */ - ADDR x0, _C_LABEL(arm_cpu_hatched) - mov x1, x28 - bl _C_LABEL(atomic_or_32) /* hatched! */ + mov x0, x28 + mov x1, x29 + bl _C_LABEL(atomic_or_uint) /* hatched! */ + dsb sy sev - /* wait for my bit of arm_cpu_mbox become true */ - ADDR x0, _C_LABEL(arm_cpu_mbox) + mov x0, #32 + udiv x1, x27, x0 + ADDR x0, _C_LABEL(aarch64_cpu_mbox) + add x28, x0, x1, lsl #2 /* x28 = &aarch64_cpu_mbox[cpuindex/32] */ + + /* wait for the mailbox start bit to become true */ 1: dmb sy - ldr x20, [x0] - tst x20, x28 + ldr w20, [x28] + tst w20, w29 bne 9f wfe b 1b 9: -// CPU_DPRINTREG("got arm_cpu_mbox = ", x20) /* fill my cpu_info */ mrs x0, tpidr_el1 /* curcpu() */ @@ -1042,7 +1048,6 @@ bootstk: #ifdef MULTIPROCESSOR .space BOOT_AP_STACKSIZE * (MAXCPUS - 1) -bootstk_cpus: #endif .section ".init_pagetable", "aw", %nobits Index: src/sys/arch/aarch64/include/cpu.h diff -u src/sys/arch/aarch64/include/cpu.h:1.13 src/sys/arch/aarch64/include/cpu.h:1.13.4.1 --- src/sys/arch/aarch64/include/cpu.h:1.13 Fri Dec 21 08:01:01 2018 +++ src/sys/arch/aarch64/include/cpu.h Wed Oct 23 19:14:19 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.h,v 1.13 2018/12/21 08:01:01 ryo Exp $ */ +/* $NetBSD: cpu.h,v 1.13.4.1 2019/10/23 19:14:19 martin Exp $ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. @@ -126,8 +126,8 @@ void cpu_mpstart(void); void cpu_hatch(struct cpu_info *); extern struct cpu_info *cpu_info[]; -extern volatile u_int arm_cpu_hatched; /* MULTIPROCESSOR */ extern uint64_t cpu_mpidr[]; /* MULTIPROCESSOR */ +bool cpu_hatched_p(u_int); /* MULTIPROCESSOR */ #define CPU_INFO_ITERATOR cpuid_t #ifdef MULTIPROCESSOR Index: src/sys/arch/aarch64/include/param.h diff -u src/sys/arch/aarch64/include/param.h:1.11 src/sys/arch/aarch64/include/param.h:1.11.4.1 --- src/sys/arch/aarch64/include/param.h:1.11 Sat Jan 19 09:11:55 2019 +++ src/sys/arch/aarch64/include/param.h Wed Oct 23 19:14:19 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: param.h,v 1.11 2019/01/19 09:11:55 skrll Exp $ */ +/* $NetBSD: param.h,v 1.11.4.1 2019/10/23 19:14:19 martin Exp $ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. @@ -129,6 +129,8 @@ #define COHERENCY_UNIT 128 #define CACHE_LINE_SIZE 128 +#define MAXCPUS 256 + #ifdef _KERNEL #ifndef __HIDE_DELAY Index: src/sys/arch/arm/acpi/cpu_acpi.c diff -u src/sys/arch/arm/acpi/cpu_acpi.c:1.6 src/sys/arch/arm/acpi/cpu_acpi.c:1.6.4.1 --- src/sys/arch/arm/acpi/cpu_acpi.c:1.6 Thu May 23 15:54:28 2019 +++ src/sys/arch/arm/acpi/cpu_acpi.c Wed Oct 23 19:14:19 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu_acpi.c,v 1.6 2019/05/23 15:54:28 ryo Exp $ */ +/* $NetBSD: cpu_acpi.c,v 1.6.4.1 2019/10/23 19:14:19 martin Exp $ */ /*- * Copyright (c) 2018 The NetBSD Foundation, Inc. @@ -33,7 +33,7 @@ #include "opt_multiprocessor.h" #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: cpu_acpi.c,v 1.6 2019/05/23 15:54:28 ryo Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpu_acpi.c,v 1.6.4.1 2019/10/23 19:14:19 martin Exp $"); #include <sys/param.h> #include <sys/bus.h> @@ -41,6 +41,7 @@ __KERNEL_RCSID(0, "$NetBSD: cpu_acpi.c,v #include <sys/device.h> #include <sys/interrupt.h> #include <sys/kcpuset.h> +#include <sys/reboot.h> #include <dev/acpi/acpireg.h> #include <dev/acpi/acpivar.h> @@ -99,7 +100,7 @@ cpu_acpi_attach(device_t parent, device_ struct cpu_info *ci = &cpu_info_store[unit]; #ifdef MULTIPROCESSOR - if (cpu_mpidr_aff_read() != mpidr) { + if (cpu_mpidr_aff_read() != mpidr && (boothowto & RB_MD1) == 0) { const u_int cpuindex = device_unit(self); int error; @@ -116,9 +117,8 @@ cpu_acpi_attach(device_t parent, device_ __asm __volatile("sev" ::: "memory"); for (u_int i = 0x10000000; i > 0; i--) { - membar_consumer(); - if (arm_cpu_hatched & __BIT(cpuindex)) - break; + if (cpu_hatched_p(cpuindex)) + break; } } #endif /* MULTIPROCESSOR */ Index: src/sys/arch/arm/arm32/cpu.c diff -u src/sys/arch/arm/arm32/cpu.c:1.129 src/sys/arch/arm/arm32/cpu.c:1.129.4.1 --- src/sys/arch/arm/arm32/cpu.c:1.129 Sun Mar 17 08:37:55 2019 +++ src/sys/arch/arm/arm32/cpu.c Wed Oct 23 19:14:19 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.c,v 1.129 2019/03/17 08:37:55 skrll Exp $ */ +/* $NetBSD: cpu.c,v 1.129.4.1 2019/10/23 19:14:19 martin Exp $ */ /* * Copyright (c) 1995 Mark Brinicombe. @@ -46,7 +46,7 @@ #include "opt_multiprocessor.h" #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.129 2019/03/17 08:37:55 skrll Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.129.4.1 2019/10/23 19:14:19 martin Exp $"); #include <sys/param.h> #include <sys/conf.h> @@ -130,7 +130,7 @@ cpu_attach(device_t dv, cpuid_t id) ci->ci_undefsave[2] = cpu_info_store.ci_undefsave[2]; cpu_info[unit] = ci; - if ((arm_cpu_hatched & __BIT(unit)) == 0) { + if (cpu_hatched_p(unit) == false) { ci->ci_dev = dv; dv->dv_private = ci; aprint_naive(": disabled\n"); @@ -239,6 +239,15 @@ cpu_attach(device_t dv, cpuid_t id) vfp_attach(ci); /* XXX SMP */ } +#ifdef MULTIPROCESSOR +bool +cpu_hatched_p(u_int cpuindex) +{ + membar_consumer(); + return (arm_cpu_hatched & __BIT(cpuindex)) != 0; +} +#endif + enum cpu_class { CPU_CLASS_NONE, CPU_CLASS_ARM2, Index: src/sys/arch/arm/fdt/cpu_fdt.c diff -u src/sys/arch/arm/fdt/cpu_fdt.c:1.25 src/sys/arch/arm/fdt/cpu_fdt.c:1.25.4.1 --- src/sys/arch/arm/fdt/cpu_fdt.c:1.25 Sat Apr 13 19:15:25 2019 +++ src/sys/arch/arm/fdt/cpu_fdt.c Wed Oct 23 19:14:19 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu_fdt.c,v 1.25 2019/04/13 19:15:25 jmcneill Exp $ */ +/* $NetBSD: cpu_fdt.c,v 1.25.4.1 2019/10/23 19:14:19 martin Exp $ */ /*- * Copyright (c) 2017 Jared McNeill <jmcne...@invisible.ca> @@ -30,7 +30,7 @@ #include "psci_fdt.h" #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: cpu_fdt.c,v 1.25 2019/04/13 19:15:25 jmcneill Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpu_fdt.c,v 1.25.4.1 2019/10/23 19:14:19 martin Exp $"); #include <sys/param.h> #include <sys/atomic.h> @@ -308,8 +308,7 @@ arm_fdt_cpu_mpstart(void) /* Wait for AP to start */ for (i = 0x10000000; i > 0; i--) { - membar_consumer(); - if (arm_cpu_hatched & __BIT(cpuindex)) + if (cpu_hatched_p(cpuindex)) break; } Index: src/sys/arch/arm/include/cpu.h diff -u src/sys/arch/arm/include/cpu.h:1.100 src/sys/arch/arm/include/cpu.h:1.100.4.1 --- src/sys/arch/arm/include/cpu.h:1.100 Thu Jan 3 10:26:41 2019 +++ src/sys/arch/arm/include/cpu.h Wed Oct 23 19:14:19 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.h,v 1.100 2019/01/03 10:26:41 skrll Exp $ */ +/* $NetBSD: cpu.h,v 1.100.4.1 2019/10/23 19:14:19 martin Exp $ */ /* * Copyright (c) 1994-1996 Mark Brinicombe. @@ -253,8 +253,8 @@ extern struct cpu_info *cpu_info[]; #if defined(MULTIPROCESSOR) -extern volatile u_int arm_cpu_hatched; extern uint32_t cpu_mpidr[]; +bool cpu_hatched_p(u_int); void cpu_mpstart(void); void cpu_init_secondary_processor(int);