Module Name:    src
Committed By:   skrll
Date:           Sun Oct 31 16:23:48 UTC 2021

Modified Files:
        src/sys/arch/aarch64/aarch64: aarch64_machdep.c cpu.c cpufunc.c
            db_machdep.c locore.S
        src/sys/arch/aarch64/include: cpu.h cpufunc.h db_machdep.h
        src/sys/arch/arm/apple: apple_intc.c
        src/sys/arch/arm/arm: cpu_subr.c undefined.c
        src/sys/arch/arm/arm32: arm32_boot.c arm32_machdep.c cpu.c
        src/sys/arch/arm/broadcom: bcm2835_intr.c
        src/sys/arch/arm/cortex: gicv3_its.c gicv3_its.h gtmr.c
        src/sys/arch/arm/include: cpu.h locore.h undefined.h
        src/sys/arch/arm/pic: pic.c
        src/sys/arch/arm/vfp: vfp_init.c

Log Message:
Rework Arm (32bit and 64bit) AP startup so that cpu_hatch doesn't sleep.

The AP initialisation code in cpu_init_secondary_processor will read and
initialise the required system registers and state for the BP to attach
and report.

Rework the interrupt handler code for this new sequence. Thankfully,
this removes a bunch of code for bcm2836mp.

The VFP detection handler on <= armv7 relies on the global undefined
handler being in place until the BP attaches vfp. That is, after the
APs have been spun up.

gicv3_its.c has a serialisation issue which is protected against in
the gicv3_its_cpu_init, which is called from cpu_hatch, with a spin
lock. The serialisation issue needs addressing more completely.

Tested on RPI3, Apple M1, QEMU, and lx2k

Fixes PR port-arm/56264:
   diagnostic assertion "l->l_stat == LSONPROC" failed on RPI3


To generate a diff of this commit:
cvs rdiff -u -r1.62 -r1.63 src/sys/arch/aarch64/aarch64/aarch64_machdep.c
cvs rdiff -u -r1.66 -r1.67 src/sys/arch/aarch64/aarch64/cpu.c
cvs rdiff -u -r1.31 -r1.32 src/sys/arch/aarch64/aarch64/cpufunc.c
cvs rdiff -u -r1.41 -r1.42 src/sys/arch/aarch64/aarch64/db_machdep.c
cvs rdiff -u -r1.81 -r1.82 src/sys/arch/aarch64/aarch64/locore.S
cvs rdiff -u -r1.42 -r1.43 src/sys/arch/aarch64/include/cpu.h
cvs rdiff -u -r1.21 -r1.22 src/sys/arch/aarch64/include/cpufunc.h
cvs rdiff -u -r1.14 -r1.15 src/sys/arch/aarch64/include/db_machdep.h
cvs rdiff -u -r1.3 -r1.4 src/sys/arch/arm/apple/apple_intc.c
cvs rdiff -u -r1.3 -r1.4 src/sys/arch/arm/arm/cpu_subr.c
cvs rdiff -u -r1.71 -r1.72 src/sys/arch/arm/arm/undefined.c
cvs rdiff -u -r1.43 -r1.44 src/sys/arch/arm/arm32/arm32_boot.c
cvs rdiff -u -r1.140 -r1.141 src/sys/arch/arm/arm32/arm32_machdep.c
cvs rdiff -u -r1.151 -r1.152 src/sys/arch/arm/arm32/cpu.c
cvs rdiff -u -r1.41 -r1.42 src/sys/arch/arm/broadcom/bcm2835_intr.c
cvs rdiff -u -r1.32 -r1.33 src/sys/arch/arm/cortex/gicv3_its.c
cvs rdiff -u -r1.7 -r1.8 src/sys/arch/arm/cortex/gicv3_its.h
cvs rdiff -u -r1.45 -r1.46 src/sys/arch/arm/cortex/gtmr.c
cvs rdiff -u -r1.119 -r1.120 src/sys/arch/arm/include/cpu.h
cvs rdiff -u -r1.36 -r1.37 src/sys/arch/arm/include/locore.h
cvs rdiff -u -r1.14 -r1.15 src/sys/arch/arm/include/undefined.h
cvs rdiff -u -r1.72 -r1.73 src/sys/arch/arm/pic/pic.c
cvs rdiff -u -r1.75 -r1.76 src/sys/arch/arm/vfp/vfp_init.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/aarch64/aarch64/aarch64_machdep.c
diff -u src/sys/arch/aarch64/aarch64/aarch64_machdep.c:1.62 src/sys/arch/aarch64/aarch64/aarch64_machdep.c:1.63
--- src/sys/arch/aarch64/aarch64/aarch64_machdep.c:1.62	Fri Oct  8 21:41:29 2021
+++ src/sys/arch/aarch64/aarch64/aarch64_machdep.c	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: aarch64_machdep.c,v 1.62 2021/10/08 21:41:29 ryo Exp $ */
+/* $NetBSD: aarch64_machdep.c,v 1.63 2021/10/31 16:23:47 skrll Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(1, "$NetBSD: aarch64_machdep.c,v 1.62 2021/10/08 21:41:29 ryo Exp $");
+__KERNEL_RCSID(1, "$NetBSD: aarch64_machdep.c,v 1.63 2021/10/31 16:23:47 skrll Exp $");
 
 #include "opt_arm_debug.h"
 #include "opt_cpuoptions.h"
@@ -359,7 +359,7 @@ initarm_common(vaddr_t kvm_base, vsize_t
 	    VM_MAX_KERNEL_ADDRESS);
 
 #ifdef DDB
-	db_machdep_init();
+	db_machdep_cpu_init();
 #endif
 
 	uvm_md_init();

Index: src/sys/arch/aarch64/aarch64/cpu.c
diff -u src/sys/arch/aarch64/aarch64/cpu.c:1.66 src/sys/arch/aarch64/aarch64/cpu.c:1.67
--- src/sys/arch/aarch64/aarch64/cpu.c:1.66	Sat Oct 30 10:47:03 2021
+++ src/sys/arch/aarch64/aarch64/cpu.c	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.c,v 1.66 2021/10/30 10:47:03 skrll Exp $ */
+/* $NetBSD: cpu.c,v 1.67 2021/10/31 16:23:47 skrll Exp $ */
 
 /*
  * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org>
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(1, "$NetBSD: cpu.c,v 1.66 2021/10/30 10:47:03 skrll Exp $");
+__KERNEL_RCSID(1, "$NetBSD: cpu.c,v 1.67 2021/10/31 16:23:47 skrll Exp $");
 
 #include "locators.h"
 #include "opt_arm_debug.h"
@@ -72,12 +72,13 @@ __KERNEL_RCSID(1, "$NetBSD: cpu.c,v 1.66
 #endif
 
 void cpu_attach(device_t, cpuid_t);
+void cpu_setup_id(struct cpu_info *);
+
 static void identify_aarch64_model(uint32_t, char *, size_t);
 static void cpu_identify(device_t self, struct cpu_info *);
 static void cpu_identify1(device_t self, struct cpu_info *);
 static void cpu_identify2(device_t self, struct cpu_info *);
 static void cpu_init_counter(struct cpu_info *);
-static void cpu_setup_id(struct cpu_info *);
 static void cpu_setup_sysctl(device_t, struct cpu_info *);
 static void cpu_setup_rng(device_t, struct cpu_info *);
 static void cpu_setup_aes(device_t, struct cpu_info *);
@@ -109,7 +110,6 @@ cpu_attach(device_t dv, cpuid_t id)
 	if (unit == 0) {
 		ci = curcpu();
 		ci->ci_cpuid = id;
-		cpu_setup_id(ci);
 	} else {
 #ifdef MULTIPROCESSOR
 		if ((boothowto & RB_MD1) != 0) {
@@ -150,26 +150,31 @@ cpu_attach(device_t dv, cpuid_t id)
 	arm_cpu_do_topology(ci);
 	cpu_identify(dv, ci);
 
+	cpu_setup_sysctl(dv, ci);
+
 #ifdef MULTIPROCESSOR
 	if (unit != 0) {
 		mi_cpu_attach(ci);
 		pmap_tlb_info_attach(&pmap_tlb0_info, ci);
-		return;
+		aarch64_parsecacheinfo(ci);
 	}
 #endif /* MULTIPROCESSOR */
 
-	set_cpufuncs();
 	fpu_attach(ci);
 
 	cpu_identify1(dv, ci);
-
-	/* aarch64_getcacheinfo(0) was called by locore.S */
-	aarch64_printcacheinfo(dv);
+	aarch64_printcacheinfo(dv, ci);
 	cpu_identify2(dv, ci);
 
+	if (unit != 0) {
+	    return;
+	}
+
+	db_machdep_init(ci);
+
 	cpu_init_counter(ci);
 
-	cpu_setup_sysctl(dv, ci);
+	/* These currently only check the BP. */
 	cpu_setup_rng(dv, ci);
 	cpu_setup_aes(dv, ci);
 	cpu_setup_chacha(dv, ci);
@@ -251,10 +256,9 @@ cpu_identify(device_t self, struct cpu_i
 static void
 cpu_identify1(device_t self, struct cpu_info *ci)
 {
-	uint64_t ctr, clidr, sctlr;	/* for cache */
+	struct aarch64_sysctl_cpu_id *id = &ci->ci_id;
+	uint64_t sctlr = ci->ci_sctlr_el1;
 
-	/* SCTLR - System Control Register */
-	sctlr = reg_sctlr_el1_read();
 	if (sctlr & SCTLR_I)
 		aprint_verbose_dev(self, "IC enabled");
 	else
@@ -288,8 +292,8 @@ cpu_identify1(device_t self, struct cpu_
 	/*
 	 * CTR - Cache Type Register
 	 */
-	ctr = reg_ctr_el0_read();
-	clidr = reg_clidr_el1_read();
+	const uint64_t ctr = id->ac_ctr;
+	const uint64_t clidr = id->ac_clidr;
 	aprint_verbose_dev(self, "Cache Writeback Granule %" PRIu64 "B,"
 	    " Exclusives Reservation Granule %" PRIu64 "B\n",
 	    __SHIFTOUT(ctr, CTR_EL0_CWG_LINE) * 4,
@@ -313,22 +317,14 @@ cpu_identify1(device_t self, struct cpu_
 static void
 cpu_identify2(device_t self, struct cpu_info *ci)
 {
-	struct aarch64_sysctl_cpu_id *id = &ci->ci_id;
-	uint64_t dfr0;
-
-	if (!CPU_IS_PRIMARY(ci)) {
-		cpu_setup_id(ci);
-		cpu_setup_sysctl(self, ci);
-	}
-
-	dfr0 = reg_id_aa64dfr0_el1_read();
+	struct aarch64_sysctl_cpu_id * const id = &ci->ci_id;
 
 	aprint_debug_dev(self, "midr=0x%" PRIx32 " mpidr=0x%" PRIx32 "\n",
-	    (uint32_t)ci->ci_id.ac_midr, (uint32_t)ci->ci_id.ac_mpidr);
+	    (uint32_t)id->ac_midr, (uint32_t)id->ac_mpidr);
 	aprint_verbose_dev(self, "revID=0x%" PRIx64, id->ac_revidr);
 
 	/* ID_AA64DFR0_EL1 */
-	switch (__SHIFTOUT(dfr0, ID_AA64DFR0_EL1_PMUVER)) {
+	switch (__SHIFTOUT(id->ac_aa64dfr0, ID_AA64DFR0_EL1_PMUVER)) {
 	case ID_AA64DFR0_EL1_PMUVER_V3:
 		aprint_verbose(", PMCv3");
 		break;
@@ -501,13 +497,16 @@ cpu_init_counter(struct cpu_info *ci)
 }
 
 /*
- * Fill in this CPUs id data.  Must be called from hatched cpus.
+ * Fill in this CPUs id data.  Must be called on all cpus.
  */
-static void
+void __noasan
 cpu_setup_id(struct cpu_info *ci)
 {
 	struct aarch64_sysctl_cpu_id *id = &ci->ci_id;
 
+	/* SCTLR - System Control Register */
+	ci->ci_sctlr_el1 = reg_sctlr_el1_read();
+
 	memset(id, 0, sizeof *id);
 
 	id->ac_midr      = reg_midr_el1_read();
@@ -611,10 +610,6 @@ cpu_setup_rng(device_t dv, struct cpu_in
 {
 	struct aarch64_sysctl_cpu_id *id = &ci->ci_id;
 
-	/* Probably shared between cores.  */
-	if (!CPU_IS_PRIMARY(ci))
-		return;
-
 	/* Verify that it is supported.  */
 	switch (__SHIFTOUT(id->ac_aa64isar0, ID_AA64ISAR0_EL1_RNDR)) {
 	case ID_AA64ISAR0_EL1_RNDR_RNDRRS:
@@ -676,25 +671,47 @@ cpu_setup_chacha(device_t dv, struct cpu
 }
 
 #ifdef MULTIPROCESSOR
+/*
+ * Initialise a secondary processor.
+ *
+ * printf isn't available as kmutex(9) relies on curcpu which isn't setup yet.
+ *
+ */
+void __noasan
+cpu_init_secondary_processor(int cpuindex)
+{
+	struct cpu_info * ci = &cpu_info_store[cpuindex];
+	struct aarch64_sysctl_cpu_id *id = &ci->ci_id;
+
+	aarch64_setcpufuncs(ci);
+
+	/* Sets ci->ci_{sctlr,midr,mpidr}, etc */
+	cpu_setup_id(ci);
+
+	arm_cpu_topology_set(ci, id->ac_mpidr);
+	aarch64_getcacheinfo(ci);
+
+	cpu_set_hatched(cpuindex);
+
+	/*
+	 * return to assembly to wait for cpu_boot_secondary_processors
+	 */
+}
+
+
+/*
+ * When we are called, the MMU and caches are on and we are running on the stack
+ * of the idlelwp for this cpu.
+ */
 void
 cpu_hatch(struct cpu_info *ci)
 {
 	KASSERT(curcpu() == ci);
 	KASSERT((reg_tcr_el1_read() & TCR_EPD0) != 0);
 
-	mutex_enter(&cpu_hatch_lock);
-
-	set_cpufuncs();
-	fpu_attach(ci);
-
-	cpu_identify1(ci->ci_dev, ci);
-	aarch64_getcacheinfo(device_unit(ci->ci_dev));
-	aarch64_printcacheinfo(ci->ci_dev);
-	cpu_identify2(ci->ci_dev, ci);
 #ifdef DDB
-	db_machdep_init();
+	db_machdep_cpu_init();
 #endif
-	mutex_exit(&cpu_hatch_lock);
 
 	cpu_init_counter(ci);
 

Index: src/sys/arch/aarch64/aarch64/cpufunc.c
diff -u src/sys/arch/aarch64/aarch64/cpufunc.c:1.31 src/sys/arch/aarch64/aarch64/cpufunc.c:1.32
--- src/sys/arch/aarch64/aarch64/cpufunc.c:1.31	Sun Oct 31 07:56:55 2021
+++ src/sys/arch/aarch64/aarch64/cpufunc.c	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpufunc.c,v 1.31 2021/10/31 07:56:55 skrll Exp $	*/
+/*	$NetBSD: cpufunc.c,v 1.32 2021/10/31 16:23:47 skrll Exp $	*/
 
 /*
  * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org>
@@ -30,7 +30,7 @@
 #include "opt_multiprocessor.h"
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpufunc.c,v 1.31 2021/10/31 07:56:55 skrll Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpufunc.c,v 1.32 2021/10/31 16:23:47 skrll Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -53,13 +53,7 @@ u_int aarch64_cache_prefer_mask;
 int aarch64_pan_enabled __read_mostly;
 int aarch64_pac_enabled __read_mostly;
 
-/* cache info per cluster. the same cluster has the same cache configuration? */
-#define MAXCPUPACKAGES	MAXCPUS		/* maximum of ci->ci_package_id */
-static struct aarch64_cache_info *aarch64_cacheinfo[MAXCPUPACKAGES];
-static struct aarch64_cache_info aarch64_cacheinfo0[MAX_CACHE_LEVEL];
-
-
-static void
+static void __noasan
 extract_cacheunit(int level, bool insn, int cachetype,
     struct aarch64_cache_info *cacheinfo)
 {
@@ -101,35 +95,14 @@ extract_cacheunit(int level, bool insn, 
 	cunit->cache_size = cunit->cache_way_size * cunit->cache_ways;
 }
 
-void
-aarch64_getcacheinfo(int unit)
+
+/* Must be called on each processor */
+void __noasan
+aarch64_getcacheinfo(struct cpu_info *ci)
 {
-	struct cpu_info * const ci = curcpu();
+	struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo;
 	uint32_t clidr, ctr;
-	u_int vindexsize;
 	int level, cachetype;
-	struct aarch64_cache_info *cinfo = NULL;
-
-	if (cputype == 0)
-		cputype = aarch64_cpuid();
-
-	/* already extract about this cluster? */
-	KASSERT(ci->ci_package_id < MAXCPUPACKAGES);
-	cinfo = aarch64_cacheinfo[ci->ci_package_id];
-	if (cinfo != NULL) {
-		ci->ci_cacheinfo = cinfo;
-		return;
-	}
-
-	/* Need static buffer for the boot CPU */
-	if (unit == 0)
-		cinfo = aarch64_cacheinfo0;
-	else
-		cinfo = kmem_zalloc(sizeof(struct aarch64_cache_info)
-		    * MAX_CACHE_LEVEL, KM_SLEEP);
-	aarch64_cacheinfo[ci->ci_package_id] = cinfo;
-	ci->ci_cacheinfo = cinfo;
-
 
 	/*
 	 * CTR - Cache Type Register
@@ -150,19 +123,6 @@ aarch64_getcacheinfo(int unit)
 		break;
 	}
 
-	/* remember maximum alignment */
-	if (arm_dcache_maxline < __SHIFTOUT(ctr, CTR_EL0_DMIN_LINE)) {
-		arm_dcache_maxline = __SHIFTOUT(ctr, CTR_EL0_DMIN_LINE);
-		arm_dcache_align = sizeof(int) << arm_dcache_maxline;
-		arm_dcache_align_mask = arm_dcache_align - 1;
-	}
-
-#ifdef MULTIPROCESSOR
-	if (coherency_unit < arm_dcache_align)
-		panic("coherency_unit %ld < %d; increase COHERENCY_UNIT",
-		    coherency_unit, arm_dcache_align);
-#endif
-
 	/*
 	 * CLIDR -  Cache Level ID Register
 	 * CSSELR - Cache Size Selection Register
@@ -213,6 +173,29 @@ aarch64_getcacheinfo(int unit)
 		 */
 		cachetype = CACHE_TYPE_PIPT;
 	}
+}
+
+
+void
+aarch64_parsecacheinfo(struct cpu_info *ci)
+{
+	struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo;
+	struct aarch64_sysctl_cpu_id *id = &ci->ci_id;
+	const uint32_t ctr = id->ac_ctr;
+	u_int vindexsize;
+
+	/* remember maximum alignment */
+	if (arm_dcache_maxline < __SHIFTOUT(ctr, CTR_EL0_DMIN_LINE)) {
+		arm_dcache_maxline = __SHIFTOUT(ctr, CTR_EL0_DMIN_LINE);
+		arm_dcache_align = sizeof(int) << arm_dcache_maxline;
+		arm_dcache_align_mask = arm_dcache_align - 1;
+	}
+
+#ifdef MULTIPROCESSOR
+	if (coherency_unit < arm_dcache_align)
+		panic("coherency_unit %ld < %d; increase COHERENCY_UNIT",
+		    coherency_unit, arm_dcache_align);
+#endif
 
 	/* calculate L1 icache virtual index size */
 	if ((cinfo[0].icache.cache_type == CACHE_TYPE_VIVT ||
@@ -232,6 +215,7 @@ aarch64_getcacheinfo(int unit)
 	if (vindexsize > aarch64_cache_vindexsize) {
 		aarch64_cache_vindexsize = vindexsize;
 		aarch64_cache_prefer_mask = vindexsize - 1;
+
 		if (uvm.page_init_done)
 			uvm_page_recolor(vindexsize / PAGE_SIZE);
 	}
@@ -311,9 +295,8 @@ prt_cache(device_t self, struct aarch64_
 }
 
 void
-aarch64_printcacheinfo(device_t dev)
+aarch64_printcacheinfo(device_t dev, struct cpu_info *ci)
 {
-	struct cpu_info * const ci = curcpu();
 	struct aarch64_cache_info * const cinfo = ci->ci_cacheinfo;
 	int level;
 
@@ -432,7 +415,15 @@ aarch64_dcache_wb_all(void)
 int
 set_cpufuncs(void)
 {
-	struct cpu_info * const ci = curcpu();
+	// This is only called from the BP
+
+	return aarch64_setcpufuncs(&cpu_info_store[0]);
+}
+
+
+int
+aarch64_setcpufuncs(struct cpu_info *ci)
+{
 	const uint64_t ctr = reg_ctr_el0_read();
 	const uint64_t clidr = reg_clidr_el1_read();
 

Index: src/sys/arch/aarch64/aarch64/db_machdep.c
diff -u src/sys/arch/aarch64/aarch64/db_machdep.c:1.41 src/sys/arch/aarch64/aarch64/db_machdep.c:1.42
--- src/sys/arch/aarch64/aarch64/db_machdep.c:1.41	Sun Oct 17 22:44:34 2021
+++ src/sys/arch/aarch64/aarch64/db_machdep.c	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: db_machdep.c,v 1.41 2021/10/17 22:44:34 ryo Exp $ */
+/* $NetBSD: db_machdep.c,v 1.42 2021/10/31 16:23:47 skrll Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: db_machdep.c,v 1.41 2021/10/17 22:44:34 ryo Exp $");
+__KERNEL_RCSID(0, "$NetBSD: db_machdep.c,v 1.42 2021/10/31 16:23:47 skrll Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_compat_netbsd32.h"
@@ -819,7 +819,7 @@ db_md_breakwatchpoints_reload(void)
 }
 
 void
-db_machdep_init(void)
+db_machdep_cpu_init(void)
 {
 	uint64_t dfr, mdscr;
 	int i, cpu_max_breakpoint, cpu_max_watchpoint;
@@ -842,16 +842,27 @@ db_machdep_init(void)
 	mdscr |= MDSCR_MDE | MDSCR_KDE;
 	reg_mdscr_el1_write(mdscr);
 	reg_oslar_el1_write(0);
+}
 
-	/* num of {watch,break}point may be different depending on the core */
-	membar_consumer();
+void
+db_machdep_init(struct cpu_info * const ci)
+{
+	struct aarch64_sysctl_cpu_id * const id = &ci->ci_id;
+	const uint64_t dfr = id->ac_aa64dfr0;
+	const u_int cpu_max_breakpoint = __SHIFTOUT(dfr, ID_AA64DFR0_EL1_BRPS);
+	const u_int cpu_max_watchpoint = __SHIFTOUT(dfr, ID_AA64DFR0_EL1_WRPS);
+
+	/*
+	 * num of {watch,break}point may be different depending on the
+	 * core.
+	 */
 	if (max_breakpoint > cpu_max_breakpoint)
 		max_breakpoint = cpu_max_breakpoint;
 	if (max_watchpoint > cpu_max_watchpoint)
 		max_watchpoint = cpu_max_watchpoint;
-	membar_producer();
 }
 
+
 static void
 show_breakpoints(void)
 {

Index: src/sys/arch/aarch64/aarch64/locore.S
diff -u src/sys/arch/aarch64/aarch64/locore.S:1.81 src/sys/arch/aarch64/aarch64/locore.S:1.82
--- src/sys/arch/aarch64/aarch64/locore.S:1.81	Thu Oct 21 06:06:16 2021
+++ src/sys/arch/aarch64/aarch64/locore.S	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.81 2021/10/21 06:06:16 skrll Exp $	*/
+/*	$NetBSD: locore.S,v 1.82 2021/10/31 16:23:47 skrll Exp $	*/
 
 /*
  * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org>
@@ -38,7 +38,7 @@
 #include <aarch64/hypervisor.h>
 #include "assym.h"
 
-RCSID("$NetBSD: locore.S,v 1.81 2021/10/21 06:06:16 skrll Exp $")
+RCSID("$NetBSD: locore.S,v 1.82 2021/10/31 16:23:47 skrll Exp $")
 
 #ifdef AARCH64_DEVICE_MEM_STRONGLY_ORDERED
 #define	MAIR_DEVICE_MEM		MAIR_DEVICE_nGnRnE
@@ -194,15 +194,22 @@ vstart:
 	msr	sctlr_el1, x0
 1:
 
+	adrl	x19, cpu_info_store	/* curcpu (&cpu_info_store[0] */
+
+	mov	x0, x19
+	bl	cpu_setup_id
+
 	/* set topology information */
-	adrl	x0, cpu_info_store	/* curcpu */
+	mov	x0, x19
 	mrs	x1, mpidr_el1
 	mov	x2, #0
 	bl	arm_cpu_topology_set
 
-	/* get cache configuration */
-	mov	x0, xzr
+	/* get and parse the cache configuration */
+	mov	x0, x19
 	bl	aarch64_getcacheinfo
+	mov	x0, x19
+	bl	aarch64_parsecacheinfo
 
 #ifdef KASAN
 	adrl	x0, lwp0uspace
@@ -453,7 +460,6 @@ ENTRY_NP(cpu_mpstart)
 	CPU_DPRINTREG("CurrentEL        = ", x20)
 #endif /* LOCORE_EL2 */
 
-
 	bl	mmu_disable
 	bl	init_sysregs
 
@@ -498,46 +504,14 @@ mp_vstart:
 	msr	tpidr_el0, xzr
 	msr	tpidrro_el0, xzr
 
-	mov	x0, #CPU_INFO_SIZE
-	mul	x0, x27, x0
-	adrl	x1, _C_LABEL(cpu_info_store)
-	add	x0, x0, x1		/* x0 = &cpu_info_store[cpuindex] */
-
-	/* temporarily set tpidr_el1 to curcpu until the idle lwp is setup */
-	msr	tpidr_el1, x0		/* tpidr_el1 = curcpu = x0 */
-
-	/* fill curcpu()->ci_{midr,mpidr} */
-	mrs	x1, midr_el1
-	str	x1, [x0, #CI_MIDR]	/* curcpu()->ci_cpuid = midr_el1 */
-	mrs	x1, mpidr_el1
-	str	x1, [x0, #CI_MPIDR]	/* curcpu()->ci_mpidr = mpidr_el1 */
-
-	/* set topology information */
-	mov	x2, #0
-	bl	arm_cpu_topology_set
-
-	/* x28 = &arm_cpu_hatched[cpuindex / (sizeof(u_long) * NBBY)] */
-	adrl	x0, _C_LABEL(arm_cpu_hatched)
-	// Appease clang - mov	x1, x27, lsr #6
-	orr	x1, xzr, x27, lsr #6
-	add	x28, x0, x1, lsl #3
+	mov	x0, x27
+	bl	cpu_init_secondary_processor
 
 	/* x29 = __BIT(cpuindex % (sizeof(u_long) * NBBY)) */
 	mov	x0, #1
 	and	x2, x27, #63
 	lsl	x29, x0, x2
 
-	/*
-	 * atomic_or_ulong(&arm_cpu_hatched[cpuindex / (sizeof(u_long)  * NBBY)],
-	 *   _BIT(cpuindex % ((sizeof(u_long) * NBBY)
-	 * to inform the boot processor.
-	 */
-	mov	x0, x28
-	mov	x1, x29
-	bl	_C_LABEL(atomic_or_ulong)	/* hatched! */
-	dsb	sy
-	sev
-
 	/* x28 = &arm_cpu_mbox[cpuindex / (sizeof(u_long) * NBBY)] */
 	adrl	x0, _C_LABEL(arm_cpu_mbox)
 	// Appease clang - mov	x1, x27, lsr #6
@@ -554,11 +528,15 @@ mp_vstart:
 	b	1b
 9:
 
+	mov	x0, #CPU_INFO_SIZE
+	mul	x0, x27, x0
+	adrl	x1, _C_LABEL(cpu_info_store)
+	add	x0, x0, x1		/* x0 = &cpu_info_store[cpuindex] */
+
 	/*
 	 * set curlwp (tpidr_el1 and curcpu()->ci_curlwp) now we know the
 	 * idle lwp from curcpu()->ci_idlelwp
 	 */
-	mrs	x0, tpidr_el1		/* curcpu (temporarily) */
 	ldr	x1, [x0, #CI_IDLELWP]	/* x0 = curcpu()->ci_idlelwp */
 	msr	tpidr_el1, x1		/* tpidr_el1 = curlwp = x1 */
 	str	x1, [x0, #CI_CURLWP]	/* curlwp is idlelwp */

Index: src/sys/arch/aarch64/include/cpu.h
diff -u src/sys/arch/aarch64/include/cpu.h:1.42 src/sys/arch/aarch64/include/cpu.h:1.43
--- src/sys/arch/aarch64/include/cpu.h:1.42	Sun Oct 31 08:21:24 2021
+++ src/sys/arch/aarch64/include/cpu.h	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: cpu.h,v 1.42 2021/10/31 08:21:24 skrll Exp $ */
+/* $NetBSD: cpu.h,v 1.43 2021/10/31 16:23:47 skrll Exp $ */
 
 /*-
  * Copyright (c) 2014, 2020 The NetBSD Foundation, Inc.
@@ -72,6 +72,32 @@ struct aarch64_cpufuncs {
 	void (*cf_icache_sync_range)(vaddr_t, vsize_t);
 };
 
+#define MAX_CACHE_LEVEL	8		/* ARMv8 has maximum 8 level cache */
+
+struct aarch64_cache_unit {
+	u_int cache_type;
+#define CACHE_TYPE_VPIPT	0	/* VMID-aware PIPT */
+#define CACHE_TYPE_VIVT		1	/* ASID-tagged VIVT */
+#define CACHE_TYPE_VIPT		2
+#define CACHE_TYPE_PIPT		3
+	u_int cache_line_size;
+	u_int cache_ways;
+	u_int cache_sets;
+	u_int cache_way_size;
+	u_int cache_size;
+};
+
+struct aarch64_cache_info {
+	u_int cacheable;
+#define CACHE_CACHEABLE_NONE	0
+#define CACHE_CACHEABLE_ICACHE	1	/* instruction cache only */
+#define CACHE_CACHEABLE_DCACHE	2	/* data cache only */
+#define CACHE_CACHEABLE_IDCACHE	3	/* instruction and data caches */
+#define CACHE_CACHEABLE_UNIFIED	4	/* unified cache */
+	struct aarch64_cache_unit icache;
+	struct aarch64_cache_unit dcache;
+};
+
 struct cpu_info {
 	struct cpu_data ci_data;
 	device_t ci_dev;
@@ -134,6 +160,10 @@ struct cpu_info {
 	/* ACPI */
 	uint32_t ci_acpiid;	/* ACPI Processor Unique ID */
 
+	/* cached system registers */
+	uint64_t ci_sctlr_el1;
+	uint64_t ci_sctlr_el2;
+
 	/* sysctl(9) exposed system registers */
 	struct aarch64_sysctl_cpu_id ci_id;
 

Index: src/sys/arch/aarch64/include/cpufunc.h
diff -u src/sys/arch/aarch64/include/cpufunc.h:1.21 src/sys/arch/aarch64/include/cpufunc.h:1.22
--- src/sys/arch/aarch64/include/cpufunc.h:1.21	Sat Oct 23 05:32:40 2021
+++ src/sys/arch/aarch64/include/cpufunc.h	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpufunc.h,v 1.21 2021/10/23 05:32:40 skrll Exp $	*/
+/*	$NetBSD: cpufunc.h,v 1.22 2021/10/31 16:23:47 skrll Exp $	*/
 
 /*
  * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org>
@@ -34,31 +34,7 @@
 #include <arm/armreg.h>
 #include <sys/device_if.h>
 
-struct aarch64_cache_unit {
-	u_int cache_type;
-#define CACHE_TYPE_VPIPT	0	/* VMID-aware PIPT */
-#define CACHE_TYPE_VIVT		1	/* ASID-tagged VIVT */
-#define CACHE_TYPE_VIPT		2
-#define CACHE_TYPE_PIPT		3
-	u_int cache_line_size;
-	u_int cache_ways;
-	u_int cache_sets;
-	u_int cache_way_size;
-	u_int cache_size;
-};
-
-struct aarch64_cache_info {
-	u_int cacheable;
-#define CACHE_CACHEABLE_NONE	0
-#define CACHE_CACHEABLE_ICACHE	1	/* instruction cache only */
-#define CACHE_CACHEABLE_DCACHE	2	/* data cache only */
-#define CACHE_CACHEABLE_IDCACHE	3	/* instruction and data caches */
-#define CACHE_CACHEABLE_UNIFIED	4	/* unified cache */
-	struct aarch64_cache_unit icache;
-	struct aarch64_cache_unit dcache;
-};
 
-#define MAX_CACHE_LEVEL	8		/* ARMv8 has maximum 8 level cache */
 extern u_int aarch64_cache_vindexsize;	/* cachesize/way (VIVT/VIPT) */
 extern u_int aarch64_cache_prefer_mask;
 extern u_int cputype;			/* compat arm */
@@ -71,8 +47,10 @@ void aarch64_pan_init(int);
 int aarch64_pac_init(int);
 
 int set_cpufuncs(void);
-void aarch64_getcacheinfo(int);
-void aarch64_printcacheinfo(device_t);
+int aarch64_setcpufuncs(struct cpu_info *);
+void aarch64_getcacheinfo(struct cpu_info *);
+void aarch64_parsecacheinfo(struct cpu_info *);
+void aarch64_printcacheinfo(device_t, struct cpu_info *);
 
 void aarch64_dcache_wbinv_all(void);
 void aarch64_dcache_inv_all(void);
@@ -103,7 +81,6 @@ void aarch64_tlbi_by_va_ll(vaddr_t);		/*
 void aarch64_tlbi_by_asid_va(int, vaddr_t);	/*  an ASID, a VA */
 void aarch64_tlbi_by_asid_va_ll(int, vaddr_t);	/*  an ASID, a VA, lastlevel */
 
-
 /* misc */
 #define cpu_idnum()			aarch64_cpuid()
 

Index: src/sys/arch/aarch64/include/db_machdep.h
diff -u src/sys/arch/aarch64/include/db_machdep.h:1.14 src/sys/arch/aarch64/include/db_machdep.h:1.15
--- src/sys/arch/aarch64/include/db_machdep.h:1.14	Fri Apr 30 20:07:23 2021
+++ src/sys/arch/aarch64/include/db_machdep.h	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: db_machdep.h,v 1.14 2021/04/30 20:07:23 skrll Exp $ */
+/* $NetBSD: db_machdep.h,v 1.15 2021/10/31 16:23:47 skrll Exp $ */
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
@@ -227,7 +227,8 @@ void dump_trapframe(struct trapframe *, 
 
 void dump_switchframe(struct trapframe *, void (*)(const char *, ...) __printflike(1, 2));
 const char *strdisasm(vaddr_t, uint64_t);
-void db_machdep_init(void);
+void db_machdep_cpu_init(void);
+void db_machdep_init(struct cpu_info * const);
 
 /* hardware breakpoint/watchpoint functions */
 void aarch64_breakpoint_set(int, vaddr_t);

Index: src/sys/arch/arm/apple/apple_intc.c
diff -u src/sys/arch/arm/apple/apple_intc.c:1.3 src/sys/arch/arm/apple/apple_intc.c:1.4
--- src/sys/arch/arm/apple/apple_intc.c:1.3	Sat Oct 16 06:37:43 2021
+++ src/sys/arch/arm/apple/apple_intc.c	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: apple_intc.c,v 1.3 2021/10/16 06:37:43 ryo Exp $ */
+/* $NetBSD: apple_intc.c,v 1.4 2021/10/31 16:23:47 skrll Exp $ */
 
 /*-
  * Copyright (c) 2021 Jared McNeill <jmcne...@invisible.ca>
@@ -32,7 +32,7 @@
 #define	_INTR_PRIVATE
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: apple_intc.c,v 1.3 2021/10/16 06:37:43 ryo Exp $");
+__KERNEL_RCSID(0, "$NetBSD: apple_intc.c,v 1.4 2021/10/31 16:23:47 skrll Exp $");
 
 #include <sys/param.h>
 #include <sys/bus.h>
@@ -229,7 +229,6 @@ apple_intc_fdt_establish(device_t dev, u
     int (*func)(void *), void *arg, const char *xname)
 {
 	struct apple_intc_softc * const sc = device_private(dev);
-	struct apple_intc_percpu * const pc = &sc->sc_pc[cpu_index(curcpu())];
 
 	/* 1st cell is the interrupt type (0=IRQ, 1=FIQ) */
 	const u_int type = be32toh(specifier[0]);
@@ -238,10 +237,27 @@ apple_intc_fdt_establish(device_t dev, u
 	/* 3rd cell is the interrupt flags */
 
 	const u_int mpsafe = (flags & FDT_INTR_MPSAFE) ? IST_MPSAFE : 0;
-	const int irq = type == 0 ?
-	    intno : pc->pc_pic.pic_irqbase + LOCALPIC_SOURCE_TIMER;
-	return intr_establish_xname(irq, ipl, IST_LEVEL | mpsafe, func, arg,
-	    xname);
+
+	if (type == 0)
+		return intr_establish_xname(intno, ipl, IST_LEVEL | mpsafe,
+		    func, arg, xname);
+
+	/* interate over CPUs for LOCALPIC_SOURCE_TIMER */
+	CPU_INFO_ITERATOR cii;
+	struct cpu_info *ci;
+	void *ih = NULL;
+	for (CPU_INFO_FOREACH(cii, ci)) {
+		const cpuid_t cpuno = cpu_index(ci);
+		struct apple_intc_percpu * const pc = &sc->sc_pc[cpuno];
+		struct pic_softc * const pic = &pc->pc_pic;
+		const int irq = pic->pic_irqbase + LOCALPIC_SOURCE_TIMER;
+
+		void *ihn = intr_establish_xname(irq, ipl, IST_LEVEL | mpsafe,
+		    func, arg, xname);
+		if (cpuno == 0)
+			ih = ihn;
+	}
+	return ih;
 }
 
 static void
@@ -421,36 +437,6 @@ apple_intc_ipi_handler(void *priv)
 }
 #endif /* MULTIPROCESSOR */
 
-static void
-apple_intc_percpu_init(void *priv, struct cpu_info *ci)
-{
-	struct apple_intc_softc * const sc = priv;
-	const u_int cpuno = cpu_index(ci);
-	struct apple_intc_percpu * const pc = &sc->sc_pc[cpuno];
-	struct pic_softc * const pic = &pc->pc_pic;
-
-#ifdef MULTIPROCESSOR
-	pic->pic_cpus = ci->ci_kcpuset;
-#endif
-
-	pic_add(pic, PIC_IRQBASE_ALLOC);
-
-#ifdef MULTIPROCESSOR
-	if (cpuno != 0) {
-		struct intrsource * const is =
-		    sc->sc_pc[0].pc_pic.pic_sources[LOCALPIC_SOURCE_TIMER];
-		KASSERT(is != NULL);
-
-		intr_establish_xname(pic->pic_irqbase + LOCALPIC_SOURCE_TIMER,
-		    is->is_ipl, is->is_type | (is->is_mpsafe ? IST_MPSAFE : 0),
-		    is->is_func, is->is_arg, is->is_xname);
-	}
-
-	intr_establish_xname(pic->pic_irqbase + LOCALPIC_SOURCE_IPI, IPL_HIGH,
-	    IST_LEVEL | IST_MPSAFE, apple_intc_ipi_handler, pc, "ipi");
-#endif
-}
-
 static int
 apple_intc_match(device_t parent, cfdata_t cf, void *aux)
 {
@@ -467,7 +453,6 @@ apple_intc_attach(device_t parent, devic
 	const int phandle = faa->faa_phandle;
 	bus_addr_t addr;
 	bus_size_t size;
-	u_int cpuno;
 	int error;
 
 	if (fdtbus_get_reg(phandle, 0, &addr, &size) != 0) {
@@ -509,18 +494,30 @@ apple_intc_attach(device_t parent, devic
 	KASSERT(ncpu != 0);
 	sc->sc_cpuid = kmem_zalloc(sizeof(*sc->sc_cpuid) * ncpu, KM_SLEEP);
 	sc->sc_pc = kmem_zalloc(sizeof(*sc->sc_pc) * ncpu, KM_SLEEP);
-	for (cpuno = 0; cpuno < ncpu; cpuno++) {
-		sc->sc_pc[cpuno].pc_sc = sc;
-		sc->sc_pc[cpuno].pc_cpuid = cpuno;
-		sc->sc_pc[cpuno].pc_pic.pic_ops = &apple_intc_localpicops;
-		sc->sc_pc[cpuno].pc_pic.pic_maxsources = 2;
-		snprintf(sc->sc_pc[cpuno].pc_pic.pic_name,
-		    sizeof(sc->sc_pc[cpuno].pc_pic.pic_name), "AIC/%u", cpuno);
+
+	CPU_INFO_ITERATOR cii;
+	struct cpu_info *ci;
+	for (CPU_INFO_FOREACH(cii, ci)) {
+		const cpuid_t cpuno = cpu_index(ci);
+		struct apple_intc_percpu * const pc = &sc->sc_pc[cpuno];
+		struct pic_softc * const pic = &pc->pc_pic;
+
+		pc->pc_sc = sc;
+		pc->pc_cpuid = cpuno;
+
+		pic->pic_cpus = ci->ci_kcpuset;
+		pic->pic_ops = &apple_intc_localpicops;
+		pic->pic_maxsources = 2;
+		snprintf(pic->pic_name, sizeof(pic->pic_name), "AIC/%lu", cpuno);
+
+		pic_add(pic, PIC_IRQBASE_ALLOC);
+
+		intr_establish_xname(pic->pic_irqbase + LOCALPIC_SOURCE_IPI,
+		    IPL_HIGH, IST_LEVEL | IST_MPSAFE, apple_intc_ipi_handler,
+		    pc, "ipi");
 	}
 
 	apple_intc_cpu_init(&sc->sc_pic, curcpu());
-	apple_intc_percpu_init(sc, curcpu());
-	arm_fdt_cpu_hatch_register(sc, apple_intc_percpu_init);
 }
 
 CFATTACH_DECL_NEW(apple_intc, sizeof(struct apple_intc_softc),

Index: src/sys/arch/arm/arm/cpu_subr.c
diff -u src/sys/arch/arm/arm/cpu_subr.c:1.3 src/sys/arch/arm/arm/cpu_subr.c:1.4
--- src/sys/arch/arm/arm/cpu_subr.c:1.3	Thu Dec  3 07:45:52 2020
+++ src/sys/arch/arm/arm/cpu_subr.c	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu_subr.c,v 1.3 2020/12/03 07:45:52 skrll Exp $	*/
+/*	$NetBSD: cpu_subr.c,v 1.4 2021/10/31 16:23:47 skrll Exp $	*/
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -33,7 +33,7 @@
 #include "opt_multiprocessor.h"
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu_subr.c,v 1.3 2020/12/03 07:45:52 skrll Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu_subr.c,v 1.4 2021/10/31 16:23:47 skrll Exp $");
 
 #include <sys/param.h>
 #include <sys/atomic.h>
@@ -70,8 +70,6 @@ volatile u_long arm_cpu_hatched[howmany(
 volatile u_long arm_cpu_mbox[howmany(MAXCPUS, CPUINDEX_DIVISOR)] __cacheline_aligned = { 0 };
 u_int arm_cpu_max = 1;
 
-kmutex_t cpu_hatch_lock;
-
 void
 cpu_boot_secondary_processors(void)
 {
@@ -80,8 +78,6 @@ cpu_boot_secondary_processors(void)
 	if ((boothowto & RB_MD1) != 0)
 		return;
 
-	mutex_init(&cpu_hatch_lock, MUTEX_DEFAULT, IPL_NONE);
-
 	VPRINTF("%s: starting secondary processors\n", __func__);
 
 	/* send mbox to have secondary processors do cpu_hatch() */
@@ -127,6 +123,8 @@ cpu_set_hatched(int cpuindex)
 	const u_long bit = __BIT(cpuindex % CPUINDEX_DIVISOR);
 
 	atomic_or_ulong(&arm_cpu_hatched[off], bit);
+	dsb(ishst);
+	sev();
 }
 
 void
@@ -138,7 +136,6 @@ cpu_clr_mbox(int cpuindex)
 
 	/* Notify cpu_boot_secondary_processors that we're done */
 	atomic_and_ulong(&arm_cpu_mbox[off], ~bit);
-	membar_producer();
 	dsb(ishst);
 	sev();
 }

Index: src/sys/arch/arm/arm/undefined.c
diff -u src/sys/arch/arm/arm/undefined.c:1.71 src/sys/arch/arm/arm/undefined.c:1.72
--- src/sys/arch/arm/arm/undefined.c:1.71	Sat Oct 30 09:23:10 2021
+++ src/sys/arch/arm/arm/undefined.c	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: undefined.c,v 1.71 2021/10/30 09:23:10 skrll Exp $	*/
+/*	$NetBSD: undefined.c,v 1.72 2021/10/31 16:23:47 skrll Exp $	*/
 
 /*
  * Copyright (c) 2001 Ben Harris.
@@ -44,12 +44,13 @@
  * Created      : 06/01/95
  */
 
+#include "opt_cputypes.h"
 #include "opt_ddb.h"
 #include "opt_dtrace.h"
 #include "opt_kgdb.h"
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: undefined.c,v 1.71 2021/10/30 09:23:10 skrll Exp $");
+__KERNEL_RCSID(0, "$NetBSD: undefined.c,v 1.72 2021/10/31 16:23:47 skrll Exp $");
 
 #include <sys/param.h>
 #include <sys/cpu.h>
@@ -97,6 +98,13 @@ install_coproc_handler(int coproc, undef
 }
 
 void
+replace_coproc_handler(int coproc, undef_handler_t handler)
+{
+	LIST_INIT(&undefined_handlers[coproc]);
+	install_coproc_handler(coproc, handler);
+}
+
+void
 install_coproc_handler_static(int coproc, struct undefined_handler *uh)
 {
 
@@ -199,6 +207,26 @@ gdb_trapper(u_int addr, u_int insn, stru
 	return 1;
 }
 
+#ifdef FPU_VFP
+/*
+ * Used to test for a VFP. The following function is installed as a coproc10
+ * handler on the undefined instruction vector and then we issue a VFP
+ * instruction. If ci_vfd_id is set to zero then the VFP did not handle
+ * the instruction so must be absent, or disabled.
+ */
+
+static int
+vfp_test(u_int address, u_int insn, trapframe_t *frame, int fault_code)
+{
+	struct cpu_info * const ci = curcpu();
+
+	frame->tf_pc += INSN_SIZE;
+	ci->ci_vfp_id = 0;
+
+	return 0;
+}
+#endif
+
 static struct undefined_handler cp15_uh = {
 	.uh_handler = cp15_trapper,
 };
@@ -210,6 +238,11 @@ static struct undefined_handler gdb_uh_t
 	.uh_handler = gdb_trapper,
 };
 #endif
+#ifdef FPU_VFP
+struct undefined_handler vfptest_uh = {
+	.uh_handler = vfp_test,
+};
+#endif
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
@@ -259,6 +292,9 @@ undefined_init(void)
 #ifdef THUMB_CODE
 	install_coproc_handler_static(THUMB_UNKNOWN_HANDLER, &gdb_uh_thumb);
 #endif
+#ifdef FPU_VFP
+	install_coproc_handler_static(VFP_COPROC, &vfptest_uh);
+#endif
 }
 
 void

Index: src/sys/arch/arm/arm32/arm32_boot.c
diff -u src/sys/arch/arm/arm32/arm32_boot.c:1.43 src/sys/arch/arm/arm32/arm32_boot.c:1.44
--- src/sys/arch/arm/arm32/arm32_boot.c:1.43	Thu Jun  3 07:06:22 2021
+++ src/sys/arch/arm/arm32/arm32_boot.c	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: arm32_boot.c,v 1.43 2021/06/03 07:06:22 skrll Exp $	*/
+/*	$NetBSD: arm32_boot.c,v 1.44 2021/10/31 16:23:47 skrll Exp $	*/
 
 /*
  * Copyright (c) 2002, 2003, 2005  Genetec Corporation.  All rights reserved.
@@ -122,7 +122,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(1, "$NetBSD: arm32_boot.c,v 1.43 2021/06/03 07:06:22 skrll Exp $");
+__KERNEL_RCSID(1, "$NetBSD: arm32_boot.c,v 1.44 2021/10/31 16:23:47 skrll Exp $");
 
 #include "opt_arm_debug.h"
 #include "opt_cputypes.h"
@@ -238,6 +238,12 @@ initarm_common(vaddr_t kvm_base, vsize_t
 	VPRINTF("undefined ");
 	undefined_init();
 
+#ifdef FPU_VFP
+	/* vfp_detect uses an undefined handler */
+	VPRINTF("vfp ");
+	vfp_detect(curcpu());
+#endif
+
 	/* Load memory into UVM. */
 	VPRINTF("page ");
 	uvm_md_init();
@@ -362,11 +368,7 @@ cpu_hatch(struct cpu_info *ci, u_int cpu
 	splhigh();
 
 	VPRINTF("%s(%s): ", __func__, cpu_name(ci));
-	/* mpidr/midr filled in by armv7_mpcontinuation */
-	ci->ci_ctrl = armreg_sctlr_read();
-	ci->ci_arm_cpuid = cpu_idnum();
-	ci->ci_arm_cputype = ci->ci_arm_cpuid & CPU_ID_CPU_MASK;
-	ci->ci_arm_cpurev = ci->ci_arm_cpuid & CPU_ID_REVISION_MASK;
+	/* mpidr/midr filled in by cpu_init_secondary_processor */
 
 	/*
 	 * Make sure we have the right vector page.
@@ -409,16 +411,6 @@ cpu_hatch(struct cpu_info *ci, u_int cpu
 	}
 #endif
 
-	mutex_enter(&cpu_hatch_lock);
-
-	aprint_naive("%s", device_xname(ci->ci_dev));
-	aprint_normal("%s", device_xname(ci->ci_dev));
-	identify_arm_cpu(ci->ci_dev, ci);
-	VPRINTF(" vfp");
-	vfp_attach(ci);
-
-	mutex_exit(&cpu_hatch_lock);
-
 	VPRINTF(" md(%p)", md_cpu_init);
 	if (md_cpu_init != NULL)
 		(*md_cpu_init)(ci);
@@ -430,7 +422,6 @@ cpu_hatch(struct cpu_info *ci, u_int cpu
 	intr_cpu_init(ci);
 
 	VPRINTF(" done!\n");
-
 	cpu_clr_mbox(cpuindex);
 }
 #endif /* MULTIPROCESSOR */

Index: src/sys/arch/arm/arm32/arm32_machdep.c
diff -u src/sys/arch/arm/arm32/arm32_machdep.c:1.140 src/sys/arch/arm/arm32/arm32_machdep.c:1.141
--- src/sys/arch/arm/arm32/arm32_machdep.c:1.140	Thu Oct 21 07:03:26 2021
+++ src/sys/arch/arm/arm32/arm32_machdep.c	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: arm32_machdep.c,v 1.140 2021/10/21 07:03:26 skrll Exp $	*/
+/*	$NetBSD: arm32_machdep.c,v 1.141 2021/10/31 16:23:47 skrll Exp $	*/
 
 /*
  * Copyright (c) 1994-1998 Mark Brinicombe.
@@ -42,7 +42,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: arm32_machdep.c,v 1.140 2021/10/21 07:03:26 skrll Exp $");
+__KERNEL_RCSID(0, "$NetBSD: arm32_machdep.c,v 1.141 2021/10/31 16:23:47 skrll Exp $");
 
 #include "opt_arm_debug.h"
 #include "opt_arm_start.h"
@@ -783,12 +783,22 @@ cpu_init_secondary_processor(int cpuinde
 	VPRINTS(" ci = ");
 	VPRINTX((int)ci);
 
+	ci->ci_ctrl = armreg_sctlr_read();
+	ci->ci_arm_cpuid = cpu_idnum();
+	ci->ci_arm_cputype = ci->ci_arm_cpuid & CPU_ID_CPU_MASK;
+	ci->ci_arm_cpurev = ci->ci_arm_cpuid & CPU_ID_REVISION_MASK;
+
 	ci->ci_midr = armreg_midr_read();
+	ci->ci_actlr = armreg_auxctl_read();
+	ci->ci_revidr = armreg_revidr_read();
 	ci->ci_mpidr = armreg_mpidr_read();
 
 	arm_cpu_topology_set(ci, ci->ci_mpidr);
 
-	VPRINTS(" hatched|=");
+	VPRINTS(" vfp");
+	vfp_detect(ci);
+
+	VPRINTS(" hatched |=");
 	VPRINTX(__BIT(cpuindex));
 	VPRINTS("\n\r");
 

Index: src/sys/arch/arm/arm32/cpu.c
diff -u src/sys/arch/arm/arm32/cpu.c:1.151 src/sys/arch/arm/arm32/cpu.c:1.152
--- src/sys/arch/arm/arm32/cpu.c:1.151	Mon Oct 11 07:32:52 2021
+++ src/sys/arch/arm/arm32/cpu.c	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.151 2021/10/11 07:32:52 rin Exp $	*/
+/*	$NetBSD: cpu.c,v 1.152 2021/10/31 16:23:47 skrll Exp $	*/
 
 /*
  * Copyright (c) 1995 Mark Brinicombe.
@@ -46,7 +46,7 @@
 #include "opt_multiprocessor.h"
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.151 2021/10/11 07:32:52 rin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.152 2021/10/31 16:23:47 skrll Exp $");
 
 #include <sys/param.h>
 
@@ -75,8 +75,8 @@ uint32_t arm_cpu_marker[2] __cacheline_a
 
 /* Prototypes */
 void identify_arm_cpu(device_t, struct cpu_info *);
+void identify_features(device_t, struct cpu_info *);
 void identify_cortex_caches(device_t);
-void identify_features(device_t);
 
 /*
  * Identify the master (boot) CPU
@@ -96,11 +96,17 @@ cpu_attach(device_t dv, cpuid_t id)
 		ci->ci_ctrl = cpu_control(0, 0);
 
 		/* Get the CPU ID from coprocessor 15 */
-
 		ci->ci_cpuid = id;
 		ci->ci_arm_cpuid = cpu_idnum();
 		ci->ci_arm_cputype = ci->ci_arm_cpuid & CPU_ID_CPU_MASK;
 		ci->ci_arm_cpurev = ci->ci_arm_cpuid & CPU_ID_REVISION_MASK;
+
+		/*
+		 * Get other sysregs for BP. APs information is grabbed in
+		 * cpu_init_secondary_processor.
+		 */
+		ci->ci_actlr = armreg_auxctl_read();
+		ci->ci_revidr = armreg_revidr_read();
 	} else {
 #ifdef MULTIPROCESSOR
 		if ((boothowto & RB_MD1) != 0) {
@@ -182,17 +188,11 @@ cpu_attach(device_t dv, cpuid_t id)
 	ci->ci_kfpu_spl = -1;
 
 #ifdef MULTIPROCESSOR
-	/*
-	 * and we are done if this is a secondary processor.
-	 */
 	if (unit != 0) {
-		aprint_naive("\n");
-		aprint_normal("\n");
 		mi_cpu_attach(ci);
 #ifdef ARM_MMU_EXTENDED
 		pmap_tlb_info_attach(&pmap_tlb0_info, ci);
 #endif
-		return;
 	}
 #endif
 
@@ -230,7 +230,7 @@ cpu_attach(device_t dv, cpuid_t id)
  	}
 #endif
 
-	vfp_attach(ci);		/* XXX SMP */
+	vfp_attach(ci);
 }
 
 enum cpu_class {
@@ -750,10 +750,12 @@ identify_arm_cpu(device_t dv, struct cpu
 
 	aprint_normal("\n");
 
-	if (CPU_ID_CORTEX_P(arm_cpuid) || CPU_ID_ARM11_P(arm_cpuid) || CPU_ID_MV88SV58XX_P(arm_cpuid)) {
+	if (CPU_ID_CORTEX_P(arm_cpuid) ||
+	    CPU_ID_ARM11_P(arm_cpuid) ||
+	    CPU_ID_MV88SV58XX_P(arm_cpuid)) {
 		if ((arm_cpuid & CPU_ID_CPU_MASK) != CPU_ID_ARM1136JS &&
 		    (arm_cpuid & CPU_ID_CPU_MASK) != CPU_ID_ARM1176JZS) {
-			identify_features(dv);
+			identify_features(dv, ci);
 		}
 	}
 
@@ -832,8 +834,20 @@ extern int cpu_simd_present;
 extern int cpu_simdex_present;
 
 void
-identify_features(device_t dv)
+identify_features(device_t dv, struct cpu_info *ci)
 {
+	const int unit = device_unit(dv);
+
+	aprint_debug_dev(dv, "sctlr:  %#x\n", ci->ci_ctrl);
+	aprint_debug_dev(dv, "actlr:  %#x\n", ci->ci_actlr);
+	aprint_debug_dev(dv, "revidr: %#x\n", ci->ci_revidr);
+#ifdef MULTIPROCESSOR
+	aprint_debug_dev(dv, "mpidr:  %#x\n", ci->ci_mpidr);
+#endif
+
+	if (unit != 0)
+		return;
+
 	cpu_instruction_set_attributes[0] = armreg_isar0_read();
 	cpu_instruction_set_attributes[1] = armreg_isar1_read();
 	cpu_instruction_set_attributes[2] = armreg_isar2_read();
@@ -859,10 +873,10 @@ identify_features(device_t dv)
 #if 0
 	if (__SHIFTOUT(cpu_memory_model_features[3], __BITS(23,20))) {
 		/*
-		 * Updates to the translation tables do not require a clean
-		 * to the point of unification to ensure visibility by
-		 * subsequent translation table walks.
-		 */
+		* Updates to the translation tables do not require a clean
+		* to the point of unification to ensure visibility by
+		* subsequent translation table walks.
+		*/
 		pmap_needs_pte_sync = 0;
 	}
 #endif
@@ -870,12 +884,6 @@ identify_features(device_t dv)
 	cpu_processor_features[0] = armreg_pfr0_read();
 	cpu_processor_features[1] = armreg_pfr1_read();
 
-	aprint_debug_dev(dv, "sctlr:  %#x\n", armreg_sctlr_read());
-	aprint_debug_dev(dv, "actlr:  %#x\n", armreg_auxctl_read());
-	aprint_debug_dev(dv, "revidr: %#x\n", armreg_revidr_read());
-#ifdef MULTIPROCESSOR
-	aprint_debug_dev(dv, "mpidr:  %#x\n", armreg_mpidr_read());
-#endif
 	aprint_debug_dev(dv,
 	    "isar: [0]=%#x [1]=%#x [2]=%#x [3]=%#x, [4]=%#x, [5]=%#x\n",
 	    cpu_instruction_set_attributes[0],

Index: src/sys/arch/arm/broadcom/bcm2835_intr.c
diff -u src/sys/arch/arm/broadcom/bcm2835_intr.c:1.41 src/sys/arch/arm/broadcom/bcm2835_intr.c:1.42
--- src/sys/arch/arm/broadcom/bcm2835_intr.c:1.41	Sun Sep 12 03:58:52 2021
+++ src/sys/arch/arm/broadcom/bcm2835_intr.c	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: bcm2835_intr.c,v 1.41 2021/09/12 03:58:52 nat Exp $	*/
+/*	$NetBSD: bcm2835_intr.c,v 1.42 2021/10/31 16:23:47 skrll Exp $	*/
 
 /*-
  * Copyright (c) 2012, 2015, 2019 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: bcm2835_intr.c,v 1.41 2021/09/12 03:58:52 nat Exp $");
+__KERNEL_RCSID(0, "$NetBSD: bcm2835_intr.c,v 1.42 2021/10/31 16:23:47 skrll Exp $");
 
 #define _INTR_PRIVATE
 
@@ -60,7 +60,6 @@ __KERNEL_RCSID(0, "$NetBSD: bcm2835_intr
 #include <arm/fdt/arm_fdtvar.h>
 
 static void bcm2835_irq_handler(void *);
-static void bcm2836mp_intr_init(void *, struct cpu_info *);
 
 static void bcm2835_pic_unblock_irqs(struct pic_softc *, size_t, uint32_t);
 static void bcm2835_pic_block_irqs(struct pic_softc *, size_t, uint32_t);
@@ -77,6 +76,7 @@ static void bcm2836mp_pic_source_name(st
     size_t);
 #ifdef MULTIPROCESSOR
 int bcm2836mp_ipi_handler(void *);
+static void bcm2836mp_intr_init(struct cpu_info *);
 static void bcm2836mp_cpu_init(struct pic_softc *, struct cpu_info *);
 static void bcm2836mp_send_ipi(struct pic_softc *, const kcpuset_t *, u_long);
 #endif
@@ -359,10 +359,10 @@ bcm2835_icu_attach(device_t parent, devi
 
 		ifuncs = &bcm2836mpicu_fdt_funcs;
 
+#if defined(MULTIPROCESSOR)
 		/*
-		 * XXX
 		 * Register all PICs here in order to avoid pic_add() from
-		 * cpu_hatch(). See port-arm/56264.
+		 * cpu_hatch().  This is the only approved method.
 		 */
 		CPU_INFO_ITERATOR cii;
 		struct cpu_info *ci;
@@ -372,7 +372,6 @@ bcm2835_icu_attach(device_t parent, devi
 
 			KASSERT(cpuid < BCM2836_NCPUS);
 
-#if defined(MULTIPROCESSOR)
 			pic->pic_cpus = ci->ci_kcpuset;
 			/*
 			 * Append "#n" to avoid duplication of .pic_name[]
@@ -381,14 +380,12 @@ bcm2835_icu_attach(device_t parent, devi
 			char suffix[sizeof("#00000")];
 			snprintf(suffix, sizeof(suffix), "#%lu", cpuid);
 			strlcat(pic->pic_name, suffix, sizeof(pic->pic_name));
-#endif
 
 			bcm2836mp_int_base[cpuid] =
 			    pic_add(pic, PIC_IRQBASE_ALLOC);
+			bcm2836mp_intr_init(ci);
 		}
-
-		bcm2836mp_intr_init(self, curcpu());
-		arm_fdt_cpu_hatch_register(self, bcm2836mp_intr_init);
+#endif
 	} else {
 		if (bcml1icu_sc == NULL)
 			arm_fdt_irq_set_handler(bcm2835_irq_handler);
@@ -889,30 +886,18 @@ bcm2836mp_ipi_handler(void *priv)
 }
 #endif
 
+#if defined(MULTIPROCESSOR)
 static void
-bcm2836mp_intr_init(void *priv, struct cpu_info *ci)
+bcm2836mp_intr_init(struct cpu_info *ci)
 {
-#if defined(MULTIPROCESSOR)
 	const cpuid_t cpuid = ci->ci_core_id;
 
 	KASSERT(cpuid < BCM2836_NCPUS);
 
 	intr_establish(BCM2836_INT_MAILBOX0_CPUN(cpuid), IPL_HIGH,
 	    IST_LEVEL | IST_MPSAFE, bcm2836mp_ipi_handler, ci);
-
-	struct bcm2836mp_interrupt *bip;
-	TAILQ_FOREACH(bip, &bcm2836mp_interrupts, bi_next) {
-		if (bip->bi_done)
-			continue;
-
-		const int irq = BCM2836_INT_BASECPUN(cpuid) + bip->bi_irq;
-		void *ih = intr_establish(irq, bip->bi_ipl,
-		    IST_LEVEL | bip->bi_flags, bip->bi_func, bip->bi_arg);
-
-		bip->bi_ihs[cpuid] = ih;
-	}
-#endif
 }
+#endif
 
 static int
 bcm2836mp_icu_fdt_decode_irq(u_int *specifier)
@@ -928,96 +913,37 @@ bcm2836mp_icu_fdt_establish(device_t dev
     int (*func)(void *), void *arg, const char *xname)
 {
 	int iflags = (flags & FDT_INTR_MPSAFE) ? IST_MPSAFE : 0;
-	struct bcm2836mp_interrupt *bip;
-	void *ih;
 
 	int irq = bcm2836mp_icu_fdt_decode_irq(specifier);
 	if (irq == -1)
 		return NULL;
 
-	TAILQ_FOREACH(bip, &bcm2836mp_interrupts, bi_next) {
-		if (irq == bip->bi_irq)
-			return NULL;
-	}
-
-	bip = kmem_alloc(sizeof(*bip), KM_SLEEP);
-	if (bip == NULL)
-		return NULL;
-
-	bip->bi_done = false;
-	bip->bi_irq = irq;
-	bip->bi_ipl = ipl;
-	bip->bi_flags = IST_LEVEL | iflags;
-	bip->bi_func = func;
-	bip->bi_arg = arg;
-
-	/*
-	 * If we're not cold and the BPs have been started then we can
-	 * register the interrupt for all CPUs now, e.g. PMU
-	 */
-	if (!cold) {
-		for (cpuid_t cpuid = 0; cpuid < BCM2836_NCPUS; cpuid++) {
-			ih = intr_establish_xname(
-			    BCM2836_INT_BASECPUN(cpuid) + irq, ipl,
-			    IST_LEVEL | iflags, func, arg, xname);
-			if (!ih) {
-				kmem_free(bip, sizeof(*bip));
-				return NULL;
+	void *ihs[BCM2836_NCPUS];
+	for (cpuid_t cpuid = 0; cpuid < BCM2836_NCPUS; cpuid++) {
+		const int cpuirq = BCM2836_INT_BASECPUN(cpuid) + irq;
+		ihs[cpuid] = intr_establish_xname(cpuirq, ipl,
+		    IST_LEVEL | iflags, func, arg, xname);
+		if (!ihs[cpuid]) {
+			for (cpuid_t undo = 0; undo < cpuid; undo++) {
+				intr_disestablish(ihs[undo]);
 			}
-			bip->bi_ihs[cpuid] = ih;
-
+			return NULL;
 		}
-		bip->bi_done = true;
-		ih = bip->bi_ihs[0];
-		goto done;
-	}
 
-	/*
-	 * Otherwise we can only establish the interrupt for the BP and
-	 * delay until bcm2836mp_intr_init is called for each AP, e.g.
-	 * gtmr
-	 */
-	ih = intr_establish_xname(BCM2836_INT_BASECPUN(0) + irq, ipl,
-	    IST_LEVEL | iflags, func, arg, xname);
-	if (!ih) {
-		kmem_free(bip, sizeof(*bip));
-		return NULL;
 	}
 
-	bip->bi_ihs[0] = ih;
-	for (cpuid_t cpuid = 1; cpuid < BCM2836_NCPUS; cpuid++)
-		bip->bi_ihs[cpuid] = NULL;
-
-done:
-	TAILQ_INSERT_TAIL(&bcm2836mp_interrupts, bip, bi_next);
-
 	/*
 	 * Return the intr_establish handle for cpu 0 for API compatibility.
 	 * Any cpu would do here as these sources don't support set_affinity
 	 * when the handle is used in interrupt_distribute(9)
 	 */
-	return ih;
+	return ihs[0];
 }
 
 static void
 bcm2836mp_icu_fdt_disestablish(device_t dev, void *ih)
 {
-	struct bcm2836mp_interrupt *bip;
-
-	TAILQ_FOREACH(bip, &bcm2836mp_interrupts, bi_next) {
-		if (bip->bi_ihs[0] == ih)
-			break;
-	}
-
-	if (bip == NULL)
-		return;
-
-	for (cpuid_t cpuid = 0; cpuid < BCM2836_NCPUS; cpuid++)
-		intr_disestablish(bip->bi_ihs[cpuid]);
-
-	TAILQ_REMOVE(&bcm2836mp_interrupts, bip, bi_next);
-
-	kmem_free(bip, sizeof(*bip));
+	intr_disestablish(ih);
 }
 
 static bool

Index: src/sys/arch/arm/cortex/gicv3_its.c
diff -u src/sys/arch/arm/cortex/gicv3_its.c:1.32 src/sys/arch/arm/cortex/gicv3_its.c:1.33
--- src/sys/arch/arm/cortex/gicv3_its.c:1.32	Sat Jan 16 21:05:15 2021
+++ src/sys/arch/arm/cortex/gicv3_its.c	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: gicv3_its.c,v 1.32 2021/01/16 21:05:15 jmcneill Exp $ */
+/* $NetBSD: gicv3_its.c,v 1.33 2021/10/31 16:23:47 skrll Exp $ */
 
 /*-
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -32,7 +32,7 @@
 #define _INTR_PRIVATE
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: gicv3_its.c,v 1.32 2021/01/16 21:05:15 jmcneill Exp $");
+__KERNEL_RCSID(0, "$NetBSD: gicv3_its.c,v 1.33 2021/10/31 16:23:47 skrll Exp $");
 
 #include <sys/param.h>
 #include <sys/kmem.h>
@@ -496,8 +496,9 @@ gicv3_its_msi_alloc(struct arm_pci_msi *
 			gicv3_its_msi_enable(its, lpi, *count);
 
 		/*
-		 * Record target PE
+		 * Record devid and target PE
 		 */
+		its->its_devid[lpi - its->its_pic->pic_irqbase] = devid;
 		its->its_targets[lpi - its->its_pic->pic_irqbase] = ci;
 
 		/*
@@ -565,8 +566,9 @@ gicv3_its_msix_alloc(struct arm_pci_msi 
 		gicv3_its_msix_enable(its, lpi, msix_vec, bst, bsh);
 
 		/*
-		 * Record target PE
+		 * Record devid and target PE
 		 */
+		its->its_devid[lpi - its->its_pic->pic_irqbase] = devid;
 		its->its_targets[lpi - its->its_pic->pic_irqbase] = ci;
 
 		/*
@@ -601,7 +603,7 @@ gicv3_its_msi_intr_establish(struct arm_
 	/* Invalidate LPI configuration tables */
 	pa = its->its_pa[lpi - its->its_pic->pic_irqbase];
 	KASSERT(pa != NULL);
-	const uint32_t devid = gicv3_its_devid(pa->pa_pc, pa->pa_tag);
+	const uint32_t devid = its->its_devid[lpi - its->its_pic->pic_irqbase];
 	gits_command_inv(its, devid, lpi - its->its_pic->pic_irqbase);
 
 	return intrh;
@@ -623,6 +625,7 @@ gicv3_its_msi_intr_release(struct arm_pc
 			gicv3_its_msi_disable(its, lpi);
 		gicv3_its_msi_free_lpi(its, lpi);
 		its->its_targets[lpi - its->its_pic->pic_irqbase] = NULL;
+		its->its_devid[lpi - its->its_pic->pic_irqbase] = 0;
 		struct intrsource * const is =
 		    its->its_pic->pic_sources[lpi - its->its_pic->pic_irqbase];
 		if (is != NULL)
@@ -784,6 +787,7 @@ gicv3_its_cpu_init(void *priv, struct cp
 	/*
 	 * Map collection ID of this CPU's index to this CPU's redistributor.
 	 */
+	mutex_enter(its->its_lock);
 	gits_command_mapc(its, cpu_index(ci), rdbase, true);
 	gits_command_invall(its, cpu_index(ci));
 	gits_wait(its);
@@ -797,10 +801,12 @@ gicv3_its_cpu_init(void *priv, struct cp
 		pa = its->its_pa[irq];
 		KASSERT(pa != NULL);
 
-		const uint32_t devid = gicv3_its_devid(pa->pa_pc, pa->pa_tag);
+		const uint32_t devid = its->its_devid[irq];
 		gits_command_movi(its, devid, irq, cpu_index(ci));
 		gits_command_sync(its, its->its_rdbase[cpu_index(ci)]);
 	}
+	gits_wait(its);
+	mutex_exit(its->its_lock);
 
 	its->its_cpuonline[cpu_index(ci)] = true;
 }
@@ -865,6 +871,7 @@ gicv3_its_init(struct gicv3_softc *sc, b
 	KASSERT(its->its_pic->pic_maxsources > 0);
 	its->its_pa = kmem_zalloc(sizeof(struct pci_attach_args *) * its->its_pic->pic_maxsources, KM_SLEEP);
 	its->its_targets = kmem_zalloc(sizeof(struct cpu_info *) * its->its_pic->pic_maxsources, KM_SLEEP);
+	its->its_devid = kmem_zalloc(sizeof(uint32_t) * its->its_pic->pic_maxsources, KM_SLEEP);
 	its->its_gic = sc;
 	its->its_rdbase = kmem_zalloc(sizeof(*its->its_rdbase) * ncpu, KM_SLEEP);
 	its->its_cpuonline = kmem_zalloc(sizeof(*its->its_cpuonline) * ncpu, KM_SLEEP);
@@ -874,6 +881,7 @@ gicv3_its_init(struct gicv3_softc *sc, b
 	its->its_cb.priv = its;
 	LIST_INIT(&its->its_devices);
 	LIST_INSERT_HEAD(&sc->sc_lpi_callbacks, &its->its_cb, list);
+	its->its_lock = mutex_obj_alloc(MUTEX_SPIN, IPL_NONE);
 
 	gicv3_its_command_init(sc, its);
 	gicv3_its_table_init(sc, its);

Index: src/sys/arch/arm/cortex/gicv3_its.h
diff -u src/sys/arch/arm/cortex/gicv3_its.h:1.7 src/sys/arch/arm/cortex/gicv3_its.h:1.8
--- src/sys/arch/arm/cortex/gicv3_its.h:1.7	Sat Jan 16 21:05:15 2021
+++ src/sys/arch/arm/cortex/gicv3_its.h	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: gicv3_its.h,v 1.7 2021/01/16 21:05:15 jmcneill Exp $ */
+/* $NetBSD: gicv3_its.h,v 1.8 2021/10/31 16:23:47 skrll Exp $ */
 
 /*-
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -61,6 +61,7 @@ struct gicv3_its {
 	struct pic_softc	*its_pic;
 	struct pci_attach_args	**its_pa;
 	struct cpu_info		**its_targets;
+	uint32_t		*its_devid;
 
 	LIST_HEAD(, gicv3_its_device) its_devices;
 
@@ -68,6 +69,8 @@ struct gicv3_its {
 	struct gicv3_dma	its_tab[8];		/* ITS tables */
 
 	struct arm_pci_msi	its_msi;
+
+	kmutex_t		*its_lock;
 };
 
 int	gicv3_its_init(struct gicv3_softc *, bus_space_handle_t, uint64_t, uint32_t);

Index: src/sys/arch/arm/cortex/gtmr.c
diff -u src/sys/arch/arm/cortex/gtmr.c:1.45 src/sys/arch/arm/cortex/gtmr.c:1.46
--- src/sys/arch/arm/cortex/gtmr.c:1.45	Thu Sep  9 21:39:02 2021
+++ src/sys/arch/arm/cortex/gtmr.c	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: gtmr.c,v 1.45 2021/09/09 21:39:02 jmcneill Exp $	*/
+/*	$NetBSD: gtmr.c,v 1.46 2021/10/31 16:23:47 skrll Exp $	*/
 
 /*-
  * Copyright (c) 2012 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: gtmr.c,v 1.45 2021/09/09 21:39:02 jmcneill Exp $");
+__KERNEL_RCSID(0, "$NetBSD: gtmr.c,v 1.46 2021/10/31 16:23:47 skrll Exp $");
 
 #include <sys/param.h>
 #include <sys/bus.h>
@@ -247,6 +247,7 @@ gtmr_init_cpu_clock(struct cpu_info *ci)
 
 	KASSERT(ci == curcpu());
 
+	/* XXX hmm... called from cpu_hatch which hasn't lowered ipl yet */
 	int s = splsched();
 
 	/*

Index: src/sys/arch/arm/include/cpu.h
diff -u src/sys/arch/arm/include/cpu.h:1.119 src/sys/arch/arm/include/cpu.h:1.120
--- src/sys/arch/arm/include/cpu.h:1.119	Sat Aug 14 17:51:18 2021
+++ src/sys/arch/arm/include/cpu.h	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.h,v 1.119 2021/08/14 17:51:18 ryo Exp $	*/
+/*	$NetBSD: cpu.h,v 1.120 2021/10/31 16:23:47 skrll Exp $	*/
 
 /*
  * Copyright (c) 1994-1996 Mark Brinicombe.
@@ -56,8 +56,8 @@ typedef unsigned long mpidr_t;
 #ifdef MULTIPROCESSOR
 extern u_int arm_cpu_max;
 extern mpidr_t cpu_mpidr[];
-extern kmutex_t cpu_hatch_lock;
 
+void cpu_init_secondary_processor(int);
 void cpu_boot_secondary_processors(void);
 void cpu_mpstart(void);
 bool cpu_hatched_p(u_int);
@@ -155,6 +155,8 @@ static inline void cpu_dosoftints(void);
 #include <sys/cpu_data.h>
 #include <sys/device_if.h>
 #include <sys/evcnt.h>
+
+#include <arm/cpufunc.h>
 #include <machine/param.h>
 
 struct cpu_info {
@@ -219,10 +221,14 @@ struct cpu_info {
 	struct evcnt	ci_vfp_evs[3];
 
 	uint32_t	ci_midr;
+	uint32_t	ci_actlr;
+	uint32_t	ci_revidr;
 	uint32_t	ci_mpidr;
+	uint32_t	ci_mvfr[2];
+
 	uint32_t	ci_capacity_dmips_mhz;
 
-	struct arm_cache_info *
+	struct arm_cache_info
 			ci_cacheinfo;
 
 #if defined(GPROF) && defined(MULTIPROCESSOR)
@@ -297,10 +303,6 @@ extern struct cpu_info *cpu_info[];
 	cii = 0, __USE(cii), ci = curcpu(); ci != NULL; ci = NULL
 #endif
 
-#if defined(MULTIPROCESSOR)
-void cpu_init_secondary_processor(int);
-#endif
-
 #define	LWP0_CPU_INFO	(&cpu_info_store[0])
 
 static inline int

Index: src/sys/arch/arm/include/locore.h
diff -u src/sys/arch/arm/include/locore.h:1.36 src/sys/arch/arm/include/locore.h:1.37
--- src/sys/arch/arm/include/locore.h:1.36	Mon Feb  1 19:31:34 2021
+++ src/sys/arch/arm/include/locore.h	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.h,v 1.36 2021/02/01 19:31:34 skrll Exp $	*/
+/*	$NetBSD: locore.h,v 1.37 2021/10/31 16:23:47 skrll Exp $	*/
 
 /*
  * Copyright (c) 1994-1996 Mark Brinicombe.
@@ -264,6 +264,7 @@ int	badaddr_read(void *, size_t, void *)
 void	swi_handler(trapframe_t *);
 
 /* vfp_init.c */
+void	vfp_detect(struct cpu_info *);
 void	vfp_attach(struct cpu_info *);
 void	vfp_discardcontext(lwp_t *, bool);
 void	vfp_savecontext(lwp_t *);

Index: src/sys/arch/arm/include/undefined.h
diff -u src/sys/arch/arm/include/undefined.h:1.14 src/sys/arch/arm/include/undefined.h:1.15
--- src/sys/arch/arm/include/undefined.h:1.14	Fri Aug 27 09:11:52 2021
+++ src/sys/arch/arm/include/undefined.h	Sun Oct 31 16:23:47 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: undefined.h,v 1.14 2021/08/27 09:11:52 skrll Exp $	*/
+/*	$NetBSD: undefined.h,v 1.15 2021/10/31 16:23:47 skrll Exp $	*/
 
 /*
  * Copyright (c) 1995-1996 Mark Brinicombe.
@@ -78,11 +78,12 @@ enum arm_coprocs {
 /* Prototypes for undefined.c */
 
 void *install_coproc_handler(int, undef_handler_t);
+void replace_coproc_handler(int, undef_handler_t);
 void remove_coproc_handler(void *);
 void undefined_init(void);
 
 /*
- * XXX Stuff below here is for use before malloc() is available.  Most code
+ * Stuff below here is for use before kmem(9) is available.  Most code
  * shouldn't use it.
  */
 
@@ -93,7 +94,7 @@ struct undefined_handler {
 
 /*
  * Handlers installed using install_coproc_handler_static shouldn't be
- * removed.
+ * removed.  We special case the 'test vfp existance' handler.
  */
 void install_coproc_handler_static(int, struct undefined_handler *);
 

Index: src/sys/arch/arm/pic/pic.c
diff -u src/sys/arch/arm/pic/pic.c:1.72 src/sys/arch/arm/pic/pic.c:1.73
--- src/sys/arch/arm/pic/pic.c:1.72	Sun Sep 26 13:38:49 2021
+++ src/sys/arch/arm/pic/pic.c	Sun Oct 31 16:23:48 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: pic.c,v 1.72 2021/09/26 13:38:49 jmcneill Exp $	*/
+/*	$NetBSD: pic.c,v 1.73 2021/10/31 16:23:48 skrll Exp $	*/
 /*-
  * Copyright (c) 2008 The NetBSD Foundation, Inc.
  * All rights reserved.
@@ -33,7 +33,7 @@
 #include "opt_multiprocessor.h"
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pic.c,v 1.72 2021/09/26 13:38:49 jmcneill Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pic.c,v 1.73 2021/10/31 16:23:48 skrll Exp $");
 
 #include <sys/param.h>
 #include <sys/atomic.h>
@@ -790,7 +790,7 @@ pic_establish_intr(struct pic_softc *pic
 	(*pic->pic_ops->pic_establish_irq)(pic, is);
 
 unblock:
-	if (cold || !is->is_mpsafe) {
+	if (!mp_online || !is->is_mpsafe) {
 		(*pic->pic_ops->pic_unblock_irqs)(pic, is->is_irq & ~0x1f,
 		    __BIT(is->is_irq & 0x1f));
 	} else {

Index: src/sys/arch/arm/vfp/vfp_init.c
diff -u src/sys/arch/arm/vfp/vfp_init.c:1.75 src/sys/arch/arm/vfp/vfp_init.c:1.76
--- src/sys/arch/arm/vfp/vfp_init.c:1.75	Sun Oct 17 08:47:21 2021
+++ src/sys/arch/arm/vfp/vfp_init.c	Sun Oct 31 16:23:48 2021
@@ -1,4 +1,4 @@
-/*      $NetBSD: vfp_init.c,v 1.75 2021/10/17 08:47:21 skrll Exp $ */
+/*      $NetBSD: vfp_init.c,v 1.76 2021/10/31 16:23:48 skrll Exp $ */
 
 /*
  * Copyright (c) 2008 ARM Ltd
@@ -32,7 +32,7 @@
 #include "opt_cputypes.h"
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfp_init.c,v 1.75 2021/10/17 08:47:21 skrll Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfp_init.c,v 1.76 2021/10/31 16:23:48 skrll Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -169,24 +169,6 @@ uint32_t vfp_fpscr_changable = VFP_FPSCR
 /* default to run fast */
 uint32_t vfp_fpscr_default = (VFP_FPSCR_DN | VFP_FPSCR_FZ | VFP_FPSCR_RN);
 
-/*
- * Used to test for a VFP. The following function is installed as a coproc10
- * handler on the undefined instruction vector and then we issue a VFP
- * instruction. If undefined_test is non zero then the VFP did not handle
- * the instruction so must be absent, or disabled.
- */
-
-static int undefined_test;
-
-static int
-vfp_test(u_int address, u_int insn, trapframe_t *frame, int fault_code)
-{
-
-	frame->tf_pc += INSN_SIZE;
-	++undefined_test;
-	return 0;
-}
-
 #else
 /* determine what bits can be changed */
 uint32_t vfp_fpscr_changable = VFP_FPSCR_CSUM|VFP_FPSCR_ESUM|VFP_FPSCR_RMODE;
@@ -238,6 +220,12 @@ vfp_fpscr_handler(u_int address, u_int i
 }
 
 #ifndef FPU_VFP
+void
+vfp_detect(struct cpu_info *ci)
+{
+	ci->ci_vfp_id = 0;
+	return;
+}
 /*
  * If we don't want VFP support, we still need to handle emulating VFP FPSCR
  * instructions.
@@ -246,7 +234,7 @@ void
 vfp_attach(struct cpu_info *ci)
 {
 	if (CPU_IS_PRIMARY(ci)) {
-		install_coproc_handler(VFP_COPROC, vfp_fpscr_handler);
+		replace_coproc_handler(VFP_COPROC, vfp_fpscr_handler);
 	}
 	evcnt_attach_dynamic(&ci->ci_vfp_evs[0], EVCNT_TYPE_TRAP, NULL,
 	    ci->ci_cpuname, "vfp fpscr traps");
@@ -254,9 +242,8 @@ vfp_attach(struct cpu_info *ci)
 
 #else
 void
-vfp_attach(struct cpu_info *ci)
+vfp_detect(struct cpu_info *ci)
 {
-	const char *model = NULL;
 
 	if (CPU_ID_ARM11_P(ci->ci_arm_cpuid)
 	    || CPU_ID_MV88SV58XX_P(ci->ci_arm_cpuid)
@@ -265,14 +252,7 @@ vfp_attach(struct cpu_info *ci)
 		const uint32_t nsacr = armreg_nsacr_read();
 		const uint32_t nsacr_vfp = __BITS(VFP_COPROC,VFP_COPROC2);
 		if ((nsacr & nsacr_vfp) != nsacr_vfp) {
-			aprint_normal_dev(ci->ci_dev,
-			    "VFP access denied (NSACR=%#x)\n", nsacr);
-			if (CPU_IS_PRIMARY(ci))
-				install_coproc_handler(VFP_COPROC, vfp_fpscr_handler);
-			ci->ci_vfp_id = 0;
-			evcnt_attach_dynamic(&ci->ci_vfp_evs[0],
-			    EVCNT_TYPE_TRAP, NULL, ci->ci_cpuname,
-			    "vfp fpscr traps");
+			ci->ci_fp_id = 0;
 			return;
 		}
 #endif
@@ -296,36 +276,32 @@ vfp_attach(struct cpu_info *ci)
 		bool vfp_p = __SHIFTOUT(cpacr, cpacr_vfp2) == CPACR_ALL
 		    && __SHIFTOUT(cpacr, cpacr_vfp) == CPACR_ALL;
 		if (!vfp_p) {
-			aprint_normal_dev(ci->ci_dev,
-			    "VFP access denied (CPACR=%#x)\n", cpacr);
-			if (CPU_IS_PRIMARY(ci))
-				install_coproc_handler(VFP_COPROC, vfp_fpscr_handler);
 			ci->ci_vfp_id = 0;
-			evcnt_attach_dynamic(&ci->ci_vfp_evs[0],
-			    EVCNT_TYPE_TRAP, NULL, ci->ci_cpuname,
-			    "vfp fpscr traps");
 			return;
 		}
 	}
 
-	void *uh = install_coproc_handler(VFP_COPROC, vfp_test);
-
-	undefined_test = 0;
+	/* borrow the ci_vfd_id field for VFP detection */
+	ci->ci_vfp_id = -1;
 
 	const uint32_t fpsid = armreg_fpsid_read();
-
-	remove_coproc_handler(uh);
-
-	if (undefined_test != 0) {
-		aprint_normal_dev(ci->ci_dev, "No VFP detected\n");
-		if (CPU_IS_PRIMARY(ci))
-			install_coproc_handler(VFP_COPROC, vfp_fpscr_handler);
-		ci->ci_vfp_id = 0;
+	if (ci->ci_vfp_id == 0) {
 		return;
 	}
 
 	ci->ci_vfp_id = fpsid;
-	switch (fpsid & ~ VFP_FPSID_REV_MSK) {
+
+	ci->ci_mvfr[0] = armreg_mvfr0_read();
+	ci->ci_mvfr[1] = armreg_mvfr1_read();
+
+}
+
+void
+vfp_attach(struct cpu_info *ci)
+{
+	const char *model = NULL;
+
+	switch (ci->ci_vfp_id & ~ VFP_FPSID_REV_MSK) {
 	case FPU_VFP10_ARM10E:
 		model = "VFP10 R1";
 		break;
@@ -354,9 +330,9 @@ vfp_attach(struct cpu_info *ci)
 		break;
 	default:
 		aprint_normal_dev(ci->ci_dev, "unrecognized VFP version %#x\n",
-		    fpsid);
+		    ci->ci_vfp_id);
 		if (CPU_IS_PRIMARY(ci))
-			install_coproc_handler(VFP_COPROC, vfp_fpscr_handler);
+			replace_coproc_handler(VFP_COPROC, vfp_fpscr_handler);
 		vfp_fpscr_changable = VFP_FPSCR_CSUM|VFP_FPSCR_ESUM
 		    |VFP_FPSCR_RMODE;
 		vfp_fpscr_default = 0;
@@ -364,48 +340,43 @@ vfp_attach(struct cpu_info *ci)
 	}
 
 	cpu_fpu_present = 1;
-	cpu_media_and_vfp_features[0] = armreg_mvfr0_read();
-	cpu_media_and_vfp_features[1] = armreg_mvfr1_read();
-	if (fpsid != 0) {
-		uint32_t f0 = armreg_mvfr0_read();
-		uint32_t f1 = armreg_mvfr1_read();
-		aprint_normal("vfp%d at %s: %s%s%s%s%s\n",
-		    device_unit(ci->ci_dev),
-		    device_xname(ci->ci_dev),
-		    model,
-		    ((f0 & ARM_MVFR0_ROUNDING_MASK) ? ", rounding" : ""),
-		    ((f0 & ARM_MVFR0_EXCEPT_MASK) ? ", exceptions" : ""),
-		    ((f1 & ARM_MVFR1_D_NAN_MASK) ? ", NaN propagation" : ""),
-		    ((f1 & ARM_MVFR1_FTZ_MASK) ? ", denormals" : ""));
-		aprint_debug("vfp%d: mvfr: [0]=%#x [1]=%#x\n",
-		    device_unit(ci->ci_dev), f0, f1);
-		if (CPU_IS_PRIMARY(ci)) {
-			if (f0 & ARM_MVFR0_ROUNDING_MASK) {
-				vfp_fpscr_changable |= VFP_FPSCR_RMODE;
-			}
-			if (f1 & ARM_MVFR0_EXCEPT_MASK) {
-				vfp_fpscr_changable |= VFP_FPSCR_ESUM;
-			}
-			// If hardware supports propagation of NaNs, select it.
-			if (f1 & ARM_MVFR1_D_NAN_MASK) {
-				vfp_fpscr_default &= ~VFP_FPSCR_DN;
-				vfp_fpscr_changable |= VFP_FPSCR_DN;
-			}
-			// If hardware supports denormalized numbers, use it.
-			if (cpu_media_and_vfp_features[1] & ARM_MVFR1_FTZ_MASK) {
-				vfp_fpscr_default &= ~VFP_FPSCR_FZ;
-				vfp_fpscr_changable |= VFP_FPSCR_FZ;
-			}
-		}
-	}
-	evcnt_attach_dynamic(&ci->ci_vfp_evs[0], EVCNT_TYPE_MISC, NULL,
-	    ci->ci_cpuname, "vfp coproc use");
-	evcnt_attach_dynamic(&ci->ci_vfp_evs[1], EVCNT_TYPE_MISC, NULL,
-	    ci->ci_cpuname, "vfp coproc re-use");
-	evcnt_attach_dynamic(&ci->ci_vfp_evs[2], EVCNT_TYPE_TRAP, NULL,
-	    ci->ci_cpuname, "vfp coproc fault");
+
+	const uint32_t f0 = ci->ci_mvfr[0];
+	const uint32_t f1 = ci->ci_mvfr[1];
+	aprint_normal("vfp%d at %s: %s%s%s%s%s\n",
+	    device_unit(ci->ci_dev),
+	    device_xname(ci->ci_dev),
+	    model,
+	    ((f0 & ARM_MVFR0_ROUNDING_MASK) ? ", rounding" : ""),
+	    ((f0 & ARM_MVFR0_EXCEPT_MASK) ? ", exceptions" : ""),
+	    ((f1 & ARM_MVFR1_D_NAN_MASK) ? ", NaN propagation" : ""),
+	    ((f1 & ARM_MVFR1_FTZ_MASK) ? ", denormals" : ""));
+
+	aprint_debug("vfp%d: mvfr: [0]=%#x [1]=%#x\n",
+	    device_unit(ci->ci_dev), f0, f1);
+
 	if (CPU_IS_PRIMARY(ci)) {
-		install_coproc_handler(VFP_COPROC, vfp_handler);
+		cpu_media_and_vfp_features[0] = f0;
+		cpu_media_and_vfp_features[1] = f1;
+
+		if (f0 & ARM_MVFR0_ROUNDING_MASK) {
+			vfp_fpscr_changable |= VFP_FPSCR_RMODE;
+		}
+		if (f1 & ARM_MVFR0_EXCEPT_MASK) {
+			vfp_fpscr_changable |= VFP_FPSCR_ESUM;
+		}
+		// If hardware supports propagation of NaNs, select it.
+		if (f1 & ARM_MVFR1_D_NAN_MASK) {
+			vfp_fpscr_default &= ~VFP_FPSCR_DN;
+			vfp_fpscr_changable |= VFP_FPSCR_DN;
+		}
+		// If hardware supports denormalized numbers, use it.
+		if (f1 & ARM_MVFR1_FTZ_MASK) {
+			vfp_fpscr_default &= ~VFP_FPSCR_FZ;
+			vfp_fpscr_changable |= VFP_FPSCR_FZ;
+		}
+
+		replace_coproc_handler(VFP_COPROC, vfp_handler);
 		install_coproc_handler(VFP_COPROC2, vfp_handler);
 #ifdef CPU_CORTEX
 		if (cpu_neon_present) {
@@ -416,6 +387,13 @@ vfp_attach(struct cpu_info *ci)
 		}
 #endif
 	}
+
+	evcnt_attach_dynamic(&ci->ci_vfp_evs[0], EVCNT_TYPE_MISC, NULL,
+	    ci->ci_cpuname, "vfp coproc use");
+	evcnt_attach_dynamic(&ci->ci_vfp_evs[1], EVCNT_TYPE_MISC, NULL,
+	    ci->ci_cpuname, "vfp coproc re-use");
+	evcnt_attach_dynamic(&ci->ci_vfp_evs[2], EVCNT_TYPE_TRAP, NULL,
+	    ci->ci_cpuname, "vfp coproc fault");
 }
 
 /* The real handler for VFP bounces.  */

Reply via email to