Module Name:    src
Committed By:   martin
Date:           Wed Oct 23 19:14:19 UTC 2019

Modified Files:
        src/sys/arch/aarch64/aarch64 [netbsd-9]: cpu.c locore.S
        src/sys/arch/aarch64/include [netbsd-9]: cpu.h param.h
        src/sys/arch/arm/acpi [netbsd-9]: cpu_acpi.c
        src/sys/arch/arm/arm32 [netbsd-9]: cpu.c
        src/sys/arch/arm/fdt [netbsd-9]: cpu_fdt.c
        src/sys/arch/arm/include [netbsd-9]: cpu.h

Log Message:
Pull up following revision(s) (requested by jmcneill in ticket #359):

        sys/arch/aarch64/aarch64/locore.S: revision 1.42
        sys/arch/aarch64/aarch64/locore.S: revision 1.43
        sys/arch/aarch64/aarch64/locore.S: revision 1.44
        sys/arch/arm/fdt/cpu_fdt.c: revision 1.28
        sys/arch/aarch64/include/cpu.h: revision 1.14
        sys/arch/aarch64/include/param.h: revision 1.12
        sys/arch/arm/arm32/cpu.c: revision 1.133
        sys/arch/arm/arm32/cpu.c: revision 1.134
        sys/arch/arm/include/cpu.h: revision 1.101
        sys/arch/arm/acpi/cpu_acpi.c: revision 1.7
        sys/arch/aarch64/aarch64/cpu.c: revision 1.23
        sys/arch/aarch64/aarch64/cpu.c: revision 1.24
        sys/arch/aarch64/aarch64/cpu.c: revision 1.25

Increase aarch64 MAXCPUS to 256.

 -

Invalidate dcache before polling AP hatched status

 -

Avoid overlap between BP and last AP stack. AP stacks are now in order of
increasing address order.

Spotted by and idea from mlelstv.

 -

Use separate cacheline aligned arrays for mbox and hatched as before.

 -

cpu_hatched_p only for MULTIPROCESSOR


To generate a diff of this commit:
cvs rdiff -u -r1.20 -r1.20.2.1 src/sys/arch/aarch64/aarch64/cpu.c
cvs rdiff -u -r1.39.2.1 -r1.39.2.2 src/sys/arch/aarch64/aarch64/locore.S
cvs rdiff -u -r1.13 -r1.13.4.1 src/sys/arch/aarch64/include/cpu.h
cvs rdiff -u -r1.11 -r1.11.4.1 src/sys/arch/aarch64/include/param.h
cvs rdiff -u -r1.6 -r1.6.4.1 src/sys/arch/arm/acpi/cpu_acpi.c
cvs rdiff -u -r1.129 -r1.129.4.1 src/sys/arch/arm/arm32/cpu.c
cvs rdiff -u -r1.25 -r1.25.4.1 src/sys/arch/arm/fdt/cpu_fdt.c
cvs rdiff -u -r1.100 -r1.100.4.1 src/sys/arch/arm/include/cpu.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/aarch64/aarch64/cpu.c
diff -u src/sys/arch/aarch64/aarch64/cpu.c:1.20 src/sys/arch/aarch64/aarch64/cpu.c:1.20.2.1
--- src/sys/arch/aarch64/aarch64/cpu.c:1.20	Tue Jul 16 20:29:53 2019
+++ src/sys/arch/aarch64/aarch64/cpu.c	Wed Oct 23 19:14:19 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.c,v 1.20 2019/07/16 20:29:53 jmcneill Exp $ */
+/* $NetBSD: cpu.c,v 1.20.2.1 2019/10/23 19:14:19 martin Exp $ */
 
 /*
  * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org>
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(1, "$NetBSD: cpu.c,v 1.20 2019/07/16 20:29:53 jmcneill Exp $");
+__KERNEL_RCSID(1, "$NetBSD: cpu.c,v 1.20.2.1 2019/10/23 19:14:19 martin Exp $");
 
 #include "locators.h"
 #include "opt_arm_debug.h"
@@ -69,8 +69,8 @@ static void cpu_setup_sysctl(device_t, s
 #ifdef MULTIPROCESSOR
 uint64_t cpu_mpidr[MAXCPUS];
 
-volatile u_int arm_cpu_hatched __cacheline_aligned = 0;
-volatile uint32_t arm_cpu_mbox __cacheline_aligned = 0;
+volatile u_int aarch64_cpu_mbox[howmany(MAXCPUS, sizeof(u_int))] __cacheline_aligned = { 0 };
+volatile u_int aarch64_cpu_hatched[howmany(MAXCPUS, sizeof(u_int))] __cacheline_aligned = { 0 };
 u_int arm_cpu_max = 1;
 
 static kmutex_t cpu_hatch_lock;
@@ -126,7 +126,7 @@ cpu_attach(device_t dv, cpuid_t id)
 		/* ci_id is stored by own cpus when hatching */
 
 		cpu_info[ncpu] = ci;
-		if ((arm_cpu_hatched & __BIT(unit)) == 0) {
+		if (cpu_hatched_p(unit) == 0) {
 			ci->ci_dev = dv;
 			dv->dv_private = ci;
 			ci->ci_index = -1;
@@ -499,27 +499,33 @@ cpu_setup_sysctl(device_t dv, struct cpu
 void
 cpu_boot_secondary_processors(void)
 {
+	u_int n, bit;
+
 	if ((boothowto & RB_MD1) != 0)
 		return;
 
 	mutex_init(&cpu_hatch_lock, MUTEX_DEFAULT, IPL_NONE);
 
-	VPRINTF("%s: writing mbox with %#x\n", __func__, arm_cpu_hatched);
+	VPRINTF("%s: starting secondary processors\n", __func__);
 
 	/* send mbox to have secondary processors do cpu_hatch() */
-	atomic_or_32(&arm_cpu_mbox, arm_cpu_hatched);
+	for (n = 0; n < __arraycount(aarch64_cpu_mbox); n++)
+		atomic_or_uint(&aarch64_cpu_mbox[n], aarch64_cpu_hatched[n]);
 	__asm __volatile ("sev; sev; sev");
 
 	/* wait all cpus have done cpu_hatch() */
-	while (membar_consumer(), arm_cpu_mbox & arm_cpu_hatched) {
-		__asm __volatile ("wfe");
+	for (n = 0; n < __arraycount(aarch64_cpu_mbox); n++) {
+		while (membar_consumer(), aarch64_cpu_mbox[n] & aarch64_cpu_hatched[n]) {
+			__asm __volatile ("wfe");
+		}
+		/* Add processors to kcpuset */
+		for (bit = 0; bit < 32; bit++) {
+			if (aarch64_cpu_hatched[n] & __BIT(bit))
+				kcpuset_set(kcpuset_attached, n * 32 + bit);
+		}
 	}
 
 	VPRINTF("%s: secondary processors hatched\n", __func__);
-
-	/* add available processors to kcpuset */
-	uint32_t mbox = arm_cpu_hatched;
-	kcpuset_export_u32(kcpuset_attached, &mbox, sizeof(mbox));
 }
 
 void
@@ -549,12 +555,23 @@ cpu_hatch(struct cpu_info *ci)
 #endif
 
 	/*
-	 * clear my bit of arm_cpu_mbox to tell cpu_boot_secondary_processors().
+	 * clear my bit of aarch64_cpu_mbox to tell cpu_boot_secondary_processors().
 	 * there are cpu0,1,2,3, and if cpu2 is unresponsive,
 	 * ci_index are each cpu0=0, cpu1=1, cpu2=undef, cpu3=2.
 	 * therefore we have to use device_unit instead of ci_index for mbox.
 	 */
-	atomic_and_32(&arm_cpu_mbox, ~__BIT(device_unit(ci->ci_dev)));
+	const u_int off = device_unit(ci->ci_dev) / 32;
+	const u_int bit = device_unit(ci->ci_dev) % 32;
+	atomic_and_uint(&aarch64_cpu_mbox[off], ~__BIT(bit));
 	__asm __volatile ("sev; sev; sev");
 }
+
+bool
+cpu_hatched_p(u_int cpuindex)
+{
+	const u_int off = cpuindex / 32;
+	const u_int bit = cpuindex % 32;
+	membar_consumer();
+	return (aarch64_cpu_hatched[off] & __BIT(bit)) != 0;
+}
 #endif /* MULTIPROCESSOR */

Index: src/sys/arch/aarch64/aarch64/locore.S
diff -u src/sys/arch/aarch64/aarch64/locore.S:1.39.2.1 src/sys/arch/aarch64/aarch64/locore.S:1.39.2.2
--- src/sys/arch/aarch64/aarch64/locore.S:1.39.2.1	Sun Sep 22 12:39:36 2019
+++ src/sys/arch/aarch64/aarch64/locore.S	Wed Oct 23 19:14:19 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.39.2.1 2019/09/22 12:39:36 martin Exp $	*/
+/*	$NetBSD: locore.S,v 1.39.2.2 2019/10/23 19:14:19 martin Exp $	*/
 
 /*
  * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org>
@@ -38,7 +38,7 @@
 #include <aarch64/hypervisor.h>
 #include "assym.h"
 
-RCSID("$NetBSD: locore.S,v 1.39.2.1 2019/09/22 12:39:36 martin Exp $")
+RCSID("$NetBSD: locore.S,v 1.39.2.2 2019/10/23 19:14:19 martin Exp $")
 
 
 /*#define DEBUG_LOCORE			/* debug print */
@@ -361,28 +361,23 @@ ENTRY_NP(cpu_mpstart)
 	mov	x1, xzr
 1:
 	add	x1, x1, #1
-	cmp	x1, MAXCPUS		/* cpuindex >= MAXCPUS ? */
+	cmp	x1, #MAXCPUS		/* cpuindex >= MAXCPUS ? */
 	bge	toomanycpus
 	ldr	x2, [x0, x1, lsl #3]	/* cpu_mpidr[cpuindex] */
 	cmp	x2, x3			/* == mpidr_el1 & MPIDR_AFF ? */
 	bne	1b
 
 	mov	x27, x1			/* x27 = cpuindex */
-	mov	x0, #1
-	lsl	x28, x0, x27		/* x28 = 1 << cpuindex */
-
 
 	/*
 	 * x27 = cpuindex
-	 * x28 = (1 << cpuindex)
 	 */
 
 	/* set stack pointer for boot */
 	mov	x1, #BOOT_AP_STACKSIZE
 	mul	x1, x1, x27
-	ADDR	x0, bootstk_cpus
-	sub	sp, x0, x1  /* sp = bootstk_cpus-(BOOT_AP_STACKSIZE*cpuindex) */
-
+	ADDR	x0, bootstk
+	add	sp, x0, x1  /* sp = bootstk + (BOOT_AP_STACKSIZE * cpuindex) */
 
 	bl	1f
 1:	CPU_DPRINTREG("PC               = ", lr)
@@ -447,28 +442,39 @@ mp_vstart:
 	mrs	x1, mpidr_el1
 	str	x1, [x0, #CI_MPIDR]	/* curcpu()->ci_mpidr = mpidr_el1 */
 
-	CPU_DPRINTREG("arm_cpu_hatched |= ", x28)
+	mov	x0, #32
+	udiv	x1, x27, x0
+	ADDR	x0, _C_LABEL(aarch64_cpu_hatched)
+	add	x28, x0, x1, lsl #2	/* x28 = &aarch64_cpu_hatched[cpuindex/32] */
+	mov	x0, #1
+	mov	x2, #32
+	msub	x1, x1, x2, x27
+	lsl	x29, x0, x1		/* x29 = 1 << (cpuindex % 32) */
 
 	/*
-	 * atomic_or_32(&arm_cpu_hatched, (1 << cpuindex))
+	 * atomic_or_uint(&aarch64_cpu_hatched[cpuindex/32], 1<<cpuindex%32)
 	 * to tell my activity to primary processor.
 	 */
-	ADDR	x0, _C_LABEL(arm_cpu_hatched)
-	mov	x1, x28
-	bl	_C_LABEL(atomic_or_32)	/* hatched! */
+	mov	x0, x28
+	mov	x1, x29
+	bl	_C_LABEL(atomic_or_uint)	/* hatched! */
+	dsb	sy
 	sev
 
-	/* wait for my bit of arm_cpu_mbox become true */
-	ADDR	x0, _C_LABEL(arm_cpu_mbox)
+	mov	x0, #32
+	udiv	x1, x27, x0
+	ADDR	x0, _C_LABEL(aarch64_cpu_mbox)
+	add	x28, x0, x1, lsl #2	/* x28 = &aarch64_cpu_mbox[cpuindex/32] */
+
+	/* wait for the mailbox start bit to become true */
 1:
 	dmb	sy
-	ldr	x20, [x0]
-	tst	x20, x28
+	ldr	w20, [x28]
+	tst	w20, w29
 	bne	9f
 	wfe
 	b	1b
 9:
-//	CPU_DPRINTREG("got arm_cpu_mbox = ", x20)
 
 	/* fill my cpu_info */
 	mrs	x0, tpidr_el1		/* curcpu() */
@@ -1042,7 +1048,6 @@ bootstk:
 
 #ifdef MULTIPROCESSOR
 	.space	BOOT_AP_STACKSIZE * (MAXCPUS - 1)
-bootstk_cpus:
 #endif
 
 	.section ".init_pagetable", "aw", %nobits

Index: src/sys/arch/aarch64/include/cpu.h
diff -u src/sys/arch/aarch64/include/cpu.h:1.13 src/sys/arch/aarch64/include/cpu.h:1.13.4.1
--- src/sys/arch/aarch64/include/cpu.h:1.13	Fri Dec 21 08:01:01 2018
+++ src/sys/arch/aarch64/include/cpu.h	Wed Oct 23 19:14:19 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.h,v 1.13 2018/12/21 08:01:01 ryo Exp $ */
+/* $NetBSD: cpu.h,v 1.13.4.1 2019/10/23 19:14:19 martin Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -126,8 +126,8 @@ void cpu_mpstart(void);
 void cpu_hatch(struct cpu_info *);
 
 extern struct cpu_info *cpu_info[];
-extern volatile u_int arm_cpu_hatched;	/* MULTIPROCESSOR */
 extern uint64_t cpu_mpidr[];		/* MULTIPROCESSOR */
+bool cpu_hatched_p(u_int);		/* MULTIPROCESSOR */
 
 #define CPU_INFO_ITERATOR	cpuid_t
 #ifdef MULTIPROCESSOR

Index: src/sys/arch/aarch64/include/param.h
diff -u src/sys/arch/aarch64/include/param.h:1.11 src/sys/arch/aarch64/include/param.h:1.11.4.1
--- src/sys/arch/aarch64/include/param.h:1.11	Sat Jan 19 09:11:55 2019
+++ src/sys/arch/aarch64/include/param.h	Wed Oct 23 19:14:19 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: param.h,v 1.11 2019/01/19 09:11:55 skrll Exp $ */
+/* $NetBSD: param.h,v 1.11.4.1 2019/10/23 19:14:19 martin Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -129,6 +129,8 @@
 #define COHERENCY_UNIT		128
 #define CACHE_LINE_SIZE		128
 
+#define MAXCPUS			256
+
 #ifdef _KERNEL
 
 #ifndef __HIDE_DELAY

Index: src/sys/arch/arm/acpi/cpu_acpi.c
diff -u src/sys/arch/arm/acpi/cpu_acpi.c:1.6 src/sys/arch/arm/acpi/cpu_acpi.c:1.6.4.1
--- src/sys/arch/arm/acpi/cpu_acpi.c:1.6	Thu May 23 15:54:28 2019
+++ src/sys/arch/arm/acpi/cpu_acpi.c	Wed Oct 23 19:14:19 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu_acpi.c,v 1.6 2019/05/23 15:54:28 ryo Exp $ */
+/* $NetBSD: cpu_acpi.c,v 1.6.4.1 2019/10/23 19:14:19 martin Exp $ */
 
 /*-
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -33,7 +33,7 @@
 #include "opt_multiprocessor.h"
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu_acpi.c,v 1.6 2019/05/23 15:54:28 ryo Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu_acpi.c,v 1.6.4.1 2019/10/23 19:14:19 martin Exp $");
 
 #include <sys/param.h>
 #include <sys/bus.h>
@@ -41,6 +41,7 @@ __KERNEL_RCSID(0, "$NetBSD: cpu_acpi.c,v
 #include <sys/device.h>
 #include <sys/interrupt.h>
 #include <sys/kcpuset.h>
+#include <sys/reboot.h>
 
 #include <dev/acpi/acpireg.h>
 #include <dev/acpi/acpivar.h>
@@ -99,7 +100,7 @@ cpu_acpi_attach(device_t parent, device_
 	struct cpu_info *ci = &cpu_info_store[unit];
 
 #ifdef MULTIPROCESSOR
-	if (cpu_mpidr_aff_read() != mpidr) {
+	if (cpu_mpidr_aff_read() != mpidr && (boothowto & RB_MD1) == 0) {
 		const u_int cpuindex = device_unit(self);
 		int error;
 
@@ -116,9 +117,8 @@ cpu_acpi_attach(device_t parent, device_
 		__asm __volatile("sev" ::: "memory");
 
 		for (u_int i = 0x10000000; i > 0; i--) {
-			membar_consumer();
-			if (arm_cpu_hatched & __BIT(cpuindex))
-				break;
+			if (cpu_hatched_p(cpuindex))
+				 break;
 		}
 	}
 #endif /* MULTIPROCESSOR */

Index: src/sys/arch/arm/arm32/cpu.c
diff -u src/sys/arch/arm/arm32/cpu.c:1.129 src/sys/arch/arm/arm32/cpu.c:1.129.4.1
--- src/sys/arch/arm/arm32/cpu.c:1.129	Sun Mar 17 08:37:55 2019
+++ src/sys/arch/arm/arm32/cpu.c	Wed Oct 23 19:14:19 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.129 2019/03/17 08:37:55 skrll Exp $	*/
+/*	$NetBSD: cpu.c,v 1.129.4.1 2019/10/23 19:14:19 martin Exp $	*/
 
 /*
  * Copyright (c) 1995 Mark Brinicombe.
@@ -46,7 +46,7 @@
 #include "opt_multiprocessor.h"
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.129 2019/03/17 08:37:55 skrll Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.129.4.1 2019/10/23 19:14:19 martin Exp $");
 
 #include <sys/param.h>
 #include <sys/conf.h>
@@ -130,7 +130,7 @@ cpu_attach(device_t dv, cpuid_t id)
 		ci->ci_undefsave[2] = cpu_info_store.ci_undefsave[2];
 
 		cpu_info[unit] = ci;
-		if ((arm_cpu_hatched & __BIT(unit)) == 0) {
+		if (cpu_hatched_p(unit) == false) {
 			ci->ci_dev = dv;
 			dv->dv_private = ci;
 			aprint_naive(": disabled\n");
@@ -239,6 +239,15 @@ cpu_attach(device_t dv, cpuid_t id)
 	vfp_attach(ci);		/* XXX SMP */
 }
 
+#ifdef MULTIPROCESSOR
+bool
+cpu_hatched_p(u_int cpuindex)
+{
+	membar_consumer();
+	return (arm_cpu_hatched & __BIT(cpuindex)) != 0;
+}
+#endif
+
 enum cpu_class {
 	CPU_CLASS_NONE,
 	CPU_CLASS_ARM2,

Index: src/sys/arch/arm/fdt/cpu_fdt.c
diff -u src/sys/arch/arm/fdt/cpu_fdt.c:1.25 src/sys/arch/arm/fdt/cpu_fdt.c:1.25.4.1
--- src/sys/arch/arm/fdt/cpu_fdt.c:1.25	Sat Apr 13 19:15:25 2019
+++ src/sys/arch/arm/fdt/cpu_fdt.c	Wed Oct 23 19:14:19 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu_fdt.c,v 1.25 2019/04/13 19:15:25 jmcneill Exp $ */
+/* $NetBSD: cpu_fdt.c,v 1.25.4.1 2019/10/23 19:14:19 martin Exp $ */
 
 /*-
  * Copyright (c) 2017 Jared McNeill <jmcne...@invisible.ca>
@@ -30,7 +30,7 @@
 #include "psci_fdt.h"
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu_fdt.c,v 1.25 2019/04/13 19:15:25 jmcneill Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu_fdt.c,v 1.25.4.1 2019/10/23 19:14:19 martin Exp $");
 
 #include <sys/param.h>
 #include <sys/atomic.h>
@@ -308,8 +308,7 @@ arm_fdt_cpu_mpstart(void)
 
 		/* Wait for AP to start */
 		for (i = 0x10000000; i > 0; i--) {
-			membar_consumer();
-			if (arm_cpu_hatched & __BIT(cpuindex))
+			if (cpu_hatched_p(cpuindex))
 				break;
 		}
 

Index: src/sys/arch/arm/include/cpu.h
diff -u src/sys/arch/arm/include/cpu.h:1.100 src/sys/arch/arm/include/cpu.h:1.100.4.1
--- src/sys/arch/arm/include/cpu.h:1.100	Thu Jan  3 10:26:41 2019
+++ src/sys/arch/arm/include/cpu.h	Wed Oct 23 19:14:19 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.h,v 1.100 2019/01/03 10:26:41 skrll Exp $	*/
+/*	$NetBSD: cpu.h,v 1.100.4.1 2019/10/23 19:14:19 martin Exp $	*/
 
 /*
  * Copyright (c) 1994-1996 Mark Brinicombe.
@@ -253,8 +253,8 @@ extern struct cpu_info *cpu_info[];
 
 #if defined(MULTIPROCESSOR)
 
-extern volatile u_int arm_cpu_hatched;
 extern uint32_t cpu_mpidr[];
+bool cpu_hatched_p(u_int);
 
 void cpu_mpstart(void);
 void cpu_init_secondary_processor(int);

Reply via email to