Author: markj Date: Tue Mar 24 18:43:23 2020 New Revision: 359280 URL: https://svnweb.freebsd.org/changeset/base/359280
Log: Remove the secondary_stacks array in arm64 and riscv kernels. Instead, dynamically allocate a page for the boot stack of each AP when starting them up, like we do on x86. This shrinks the bss by MAXCPU*KSTACK_PAGES pages, which corresponds to 4MB on arm64 and 256KB on riscv. Duplicate the logic used on x86 to free the bootstacks, by using a sysinit to wait for each AP to switch to a thread before freeing its stack. While here, mark some static MD variables as such. Reviewed by: kib MFC after: 1 month Sponsored by: Juniper Networks, Klara Inc. Differential Revision: https://reviews.freebsd.org/D24158 Modified: head/sys/arm64/arm64/locore.S head/sys/arm64/arm64/mp_machdep.c head/sys/riscv/riscv/locore.S head/sys/riscv/riscv/mp_machdep.c Modified: head/sys/arm64/arm64/locore.S ============================================================================== --- head/sys/arm64/arm64/locore.S Tue Mar 24 18:35:33 2020 (r359279) +++ head/sys/arm64/arm64/locore.S Tue Mar 24 18:43:23 2020 (r359280) @@ -214,11 +214,10 @@ ENTRY(mpentry) br x15 mp_virtdone: - ldr x4, =secondary_stacks - mov x5, #(PAGE_SIZE * KSTACK_PAGES) - mul x5, x0, x5 - add sp, x4, x5 - + /* Start using the AP boot stack */ + ldr x4, =bootstack + ldr x4, [x4] + mov sp, x4 b init_secondary END(mpentry) #endif Modified: head/sys/arm64/arm64/mp_machdep.c ============================================================================== --- head/sys/arm64/arm64/mp_machdep.c Tue Mar 24 18:35:33 2020 (r359279) +++ head/sys/arm64/arm64/mp_machdep.c Tue Mar 24 18:43:23 2020 (r359280) @@ -123,7 +123,6 @@ static void ipi_preempt(void *); static void ipi_rendezvous(void *); static void ipi_stop(void *); -struct mtx ap_boot_mtx; struct pcb stoppcbs[MAXCPU]; /* @@ -136,10 +135,18 @@ static int cpu0 = -1; void mpentry(unsigned long cpuid); void init_secondary(uint64_t); -uint8_t secondary_stacks[MAXCPU - 1][PAGE_SIZE * KSTACK_PAGES] __aligned(16); +/* Synchronize AP startup. */ +static struct mtx ap_boot_mtx; +/* Stacks for AP initialization, discarded once idle threads are started. */ +void *bootstack; +static void *bootstacks[MAXCPU]; + +/* Count of started APs, used to synchronize access to bootstack. */ +static volatile int aps_started; + /* Set to 1 once we're ready to let the APs out of the pen. */ -volatile int aps_ready = 0; +static volatile int aps_ready; /* Temporary variables for init_secondary() */ void *dpcpu[MAXCPU - 1]; @@ -205,14 +212,14 @@ init_secondary(uint64_t cpu) "mov x18, %0 \n" "msr tpidr_el1, %0" :: "r"(pcpup)); - /* Spin until the BSP releases the APs */ - while (!aps_ready) + /* Signal the BSP and spin until it has released all APs. */ + atomic_add_int(&aps_started, 1); + while (!atomic_load_int(&aps_ready)) __asm __volatile("wfe"); /* Initialize curthread */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); pcpup->pc_curthread = pcpup->pc_idlethread; - pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb; /* Initialize curpmap to match TTBR0's current setting. */ pmap0 = vmspace_pmap(&vmspace0); @@ -250,6 +257,11 @@ init_secondary(uint64_t cpu) kcsan_cpu_init(cpu); + /* + * Assert that smp_after_idle_runnable condition is reasonable. + */ + MPASS(PCPU_GET(curpcb) == NULL); + /* Enter the scheduler */ sched_throw(NULL); @@ -257,6 +269,24 @@ init_secondary(uint64_t cpu) /* NOTREACHED */ } +static void +smp_after_idle_runnable(void *arg __unused) +{ + struct pcpu *pc; + int cpu; + + for (cpu = 1; cpu < mp_ncpus; cpu++) { + if (bootstacks[cpu] != NULL) { + pc = pcpu_find(cpu); + while (atomic_load_ptr(&pc->pc_curpcb) == NULL) + cpu_spinwait(); + kmem_free((vm_offset_t)bootstacks[cpu], PAGE_SIZE); + } + } +} +SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY, + smp_after_idle_runnable, NULL); + /* * Send IPI thru interrupt controller. */ @@ -391,7 +421,7 @@ start_cpu(u_int id, uint64_t target_cpu) struct pcpu *pcpup; vm_paddr_t pa; u_int cpuid; - int err; + int err, naps; /* Check we are able to start this cpu */ if (id > mp_maxid) @@ -405,7 +435,7 @@ start_cpu(u_int id, uint64_t target_cpu) /* * Rotate the CPU IDs to put the boot CPU as CPU 0. We keep the other - * CPUs ordered as the are likely grouped into clusters so it can be + * CPUs ordered as they are likely grouped into clusters so it can be * useful to keep that property, e.g. for the GICv3 driver to send * an IPI to all CPUs in the cluster. */ @@ -420,29 +450,41 @@ start_cpu(u_int id, uint64_t target_cpu) dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO); dpcpu_init(dpcpu[cpuid - 1], cpuid); + bootstacks[cpuid] = (void *)kmem_malloc(PAGE_SIZE, M_WAITOK | M_ZERO); + + naps = atomic_load_int(&aps_started); + bootstack = (char *)bootstacks[cpuid] + PAGE_SIZE; + printf("Starting CPU %u (%lx)\n", cpuid, target_cpu); pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry); - err = psci_cpu_on(target_cpu, pa, cpuid); if (err != PSCI_RETVAL_SUCCESS) { /* * Panic here if INVARIANTS are enabled and PSCI failed to - * start the requested CPU. If psci_cpu_on returns PSCI_MISSING + * start the requested CPU. psci_cpu_on() returns PSCI_MISSING * to indicate we are unable to use it to start the given CPU. */ KASSERT(err == PSCI_MISSING || (mp_quirks & MP_QUIRK_CPULIST) == MP_QUIRK_CPULIST, - ("Failed to start CPU %u (%lx)\n", id, target_cpu)); + ("Failed to start CPU %u (%lx), error %d\n", + id, target_cpu, err)); pcpu_destroy(pcpup); kmem_free((vm_offset_t)dpcpu[cpuid - 1], DPCPU_SIZE); dpcpu[cpuid - 1] = NULL; + kmem_free((vm_offset_t)bootstacks[cpuid], PAGE_SIZE); + bootstacks[cpuid] = NULL; mp_ncpus--; /* Notify the user that the CPU failed to start */ - printf("Failed to start CPU %u (%lx)\n", id, target_cpu); - } else + printf("Failed to start CPU %u (%lx), error %d\n", + id, target_cpu, err); + } else { + /* Wait for the AP to switch to its boot stack. */ + while (atomic_load_int(&aps_started) < naps + 1) + cpu_spinwait(); CPU_SET(cpuid, &all_cpus); + } return (true); } Modified: head/sys/riscv/riscv/locore.S ============================================================================== --- head/sys/riscv/riscv/locore.S Tue Mar 24 18:35:33 2020 (r359279) +++ head/sys/riscv/riscv/locore.S Tue Mar 24 18:43:23 2020 (r359280) @@ -301,14 +301,8 @@ ENTRY(mpentry) beqz t1, 1b /* Setup stack pointer */ - lla t0, secondary_stacks - li t1, (PAGE_SIZE * KSTACK_PAGES) - mulw t2, t1, a0 - add t0, t0, t2 - add t0, t0, t1 - sub t0, t0, s9 - li t1, KERNBASE - add sp, t0, t1 + lla t0, bootstack + ld sp, 0(t0) /* Setup supervisor trap vector */ lla t0, mpva Modified: head/sys/riscv/riscv/mp_machdep.c ============================================================================== --- head/sys/riscv/riscv/mp_machdep.c Tue Mar 24 18:35:33 2020 (r359279) +++ head/sys/riscv/riscv/mp_machdep.c Tue Mar 24 18:43:23 2020 (r359280) @@ -87,7 +87,6 @@ static device_attach_t riscv64_cpu_attach; static int ipi_handler(void *); -struct mtx ap_boot_mtx; struct pcb stoppcbs[MAXCPU]; extern uint32_t boot_hart; @@ -98,13 +97,19 @@ static uint32_t cpu_reg[MAXCPU][2]; #endif static device_t cpu_list[MAXCPU]; -void mpentry(unsigned long cpuid); void init_secondary(uint64_t); -uint8_t secondary_stacks[MAXCPU][PAGE_SIZE * KSTACK_PAGES] __aligned(16); +static struct mtx ap_boot_mtx; +/* Stacks for AP initialization, discarded once idle threads are started. */ +void *bootstack; +static void *bootstacks[MAXCPU]; + +/* Count of started APs, used to synchronize access to bootstack. */ +static volatile int aps_started; + /* Set to 1 once we're ready to let the APs out of the pen. */ -volatile int aps_ready = 0; +static volatile int aps_ready; /* Temporary variables for init_secondary() */ void *dpcpu[MAXCPU - 1]; @@ -233,14 +238,14 @@ init_secondary(uint64_t hart) csr_set(sie, SIE_SSIE); csr_set(sip, SIE_SSIE); - /* Spin until the BSP releases the APs */ - while (!aps_ready) + /* Signal the BSP and spin until it has released all APs. */ + atomic_add_int(&aps_started, 1); + while (!atomic_load_int(&aps_ready)) __asm __volatile("wfi"); /* Initialize curthread */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); pcpup->pc_curthread = pcpup->pc_idlethread; - pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb; /* * Identify current CPU. This is necessary to setup @@ -274,6 +279,11 @@ init_secondary(uint64_t hart) mtx_unlock_spin(&ap_boot_mtx); + /* + * Assert that smp_after_idle_runnable condition is reasonable. + */ + MPASS(PCPU_GET(curpcb) == NULL); + /* Enter the scheduler */ sched_throw(NULL); @@ -281,6 +291,24 @@ init_secondary(uint64_t hart) /* NOTREACHED */ } +static void +smp_after_idle_runnable(void *arg __unused) +{ + struct pcpu *pc; + int cpu; + + for (cpu = 1; cpu < mp_ncpus; cpu++) { + if (bootstacks[cpu] != NULL) { + pc = pcpu_find(cpu); + while (atomic_load_ptr(&pc->pc_curpcb) == NULL) + cpu_spinwait(); + kmem_free((vm_offset_t)bootstacks[cpu], PAGE_SIZE); + } + } +} +SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY, + smp_after_idle_runnable, NULL); + static int ipi_handler(void *arg) { @@ -373,6 +401,7 @@ cpu_init_fdt(u_int id, phandle_t node, u_int addr_size struct pcpu *pcpup; uint64_t hart; u_int cpuid; + int naps; /* Check if this hart supports MMU. */ if (OF_getproplen(node, "mmu-type") < 0) @@ -419,8 +448,17 @@ cpu_init_fdt(u_int id, phandle_t node, u_int addr_size dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO); dpcpu_init(dpcpu[cpuid - 1], cpuid); + bootstacks[cpuid] = (void *)kmem_malloc(PAGE_SIZE, M_WAITOK | M_ZERO); + + naps = atomic_load_int(&aps_started); + bootstack = (char *)bootstacks[cpuid] + PAGE_SIZE; + printf("Starting CPU %u (hart %lx)\n", cpuid, hart); - __riscv_boot_ap[hart] = 1; + atomic_store_32(&__riscv_boot_ap[hart], 1); + + /* Wait for the AP to switch to its boot stack. */ + while (atomic_load_int(&aps_started) < naps + 1) + cpu_spinwait(); CPU_SET(cpuid, &all_cpus); CPU_SET(hart, &all_harts); _______________________________________________ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"