On Tue, Feb 20, 2018 at 08:52:20AM +0100, Mark Kettenis wrote:
> > Date: Mon, 19 Feb 2018 13:49:48 +0100 (CET)
> > From: Mark Kettenis <[email protected]>
> >
> > The diff below attempts to make the arm64 pmap "mpsafe" and enables MP
> > support. This diff survived a full build on my Firefly-RK3399 board.
> > It also seems to work on the Overdrive 1000. It might even work on
> > the Raspberry Pi 3. I'd appreciate it if people could play with this
> > on the Raspberry Pi and other arm64 hardware they have.
> >
> > I tried to follow a strategy for making the pmap "mpsafe" that more
> > closely resembles what we did for our other architectures, although I
> > looked at Dale Rahn's diff as well. It seems I fixed at least some
> > bugs in the process. But I'm sure there are bugs remaining. I may
> > even have introduced new ones. So do expect this to crash and burn,
> > or at least lock up.
> >
> > I'm also interested in people testing the normal GENERIC kernel with
> > this diff, especially building ports. There are some changes in there
> > that could affect non-MP as well.
>
> Here is a slightly better diff that removes a potential lock ordering
> problem, removes a redundant splvm and moves some code around to avoid
> a level of indentation. Gaining confidence, so I'm looking for ok's
> now.
I have been running you diff on my rpi (together with jsg firmware update).
My rpi has hung twice now while doing a make -j4 in the clang part of
the tree.
The last top screen shows it using swap (around 400M) and has three
c++ processes each with resident size around 200M.
I'll try this diff now.
-Otto
>
> Index: arch/arm64/arm64/cpu.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/arm64/arm64/cpu.c,v
> retrieving revision 1.13
> diff -u -p -r1.13 cpu.c
> --- arch/arm64/arm64/cpu.c 31 Jan 2018 10:52:12 -0000 1.13
> +++ arch/arm64/arm64/cpu.c 19 Feb 2018 12:28:05 -0000
> @@ -454,13 +454,8 @@ cpu_start_secondary(struct cpu_info *ci)
>
> spllower(IPL_NONE);
>
> -#ifdef notyet
> SCHED_LOCK(s);
> cpu_switchto(NULL, sched_chooseproc());
> -#else
> - for (;;)
> - __asm volatile("wfe");
> -#endif
> }
>
> void
> Index: arch/arm64/arm64/pmap.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/arm64/arm64/pmap.c,v
> retrieving revision 1.47
> diff -u -p -r1.47 pmap.c
> --- arch/arm64/arm64/pmap.c 31 Jan 2018 23:23:16 -0000 1.47
> +++ arch/arm64/arm64/pmap.c 19 Feb 2018 12:28:05 -0000
> @@ -165,6 +165,19 @@ struct mem_region *pmap_allocated = &pma
> int pmap_cnt_avail, pmap_cnt_allocated;
> uint64_t pmap_avail_kvo;
>
> +static inline void
> +pmap_lock(struct pmap *pmap)
> +{
> + if (pmap != pmap_kernel())
> + mtx_enter(&pmap->pm_mtx);
> +}
> +
> +static inline void
> +pmap_unlock(struct pmap *pmap)
> +{
> + if (pmap != pmap_kernel())
> + mtx_leave(&pmap->pm_mtx);
> +}
>
> /* virtual to physical helpers */
> static inline int
> @@ -362,6 +375,7 @@ pmap_vp_page_alloc(struct pool *pp, int
> void *v;
>
> kd.kd_waitok = ISSET(flags, PR_WAITOK);
> + kd.kd_trylock = ISSET(flags, PR_NOWAIT);
> kd.kd_slowdown = slowdown;
>
> KERNEL_LOCK();
> @@ -439,14 +453,20 @@ pmap_enter_pv(struct pte_desc *pted, str
> if (__predict_false(!pmap_initialized))
> return;
>
> + mtx_enter(&pg->mdpage.pv_mtx);
> LIST_INSERT_HEAD(&(pg->mdpage.pv_list), pted, pted_pv_list);
> pted->pted_va |= PTED_VA_MANAGED_M;
> + mtx_leave(&pg->mdpage.pv_mtx);
> }
>
> void
> pmap_remove_pv(struct pte_desc *pted)
> {
> + struct vm_page *pg = PHYS_TO_VM_PAGE(pted->pted_pte & PTE_RPGN);
> +
> + mtx_enter(&pg->mdpage.pv_mtx);
> LIST_REMOVE(pted, pted_pv_list);
> + mtx_leave(&pg->mdpage.pv_mtx);
> }
>
> int
> @@ -454,7 +474,7 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
> {
> struct pte_desc *pted;
> struct vm_page *pg;
> - int s, error;
> + int error;
> int cache = PMAP_CACHE_WB;
> int need_sync = 0;
>
> @@ -464,9 +484,7 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
> cache = PMAP_CACHE_DEV;
> pg = PHYS_TO_VM_PAGE(pa);
>
> - /* MP - Acquire lock for this pmap */
> -
> - s = splvm();
> + pmap_lock(pm);
> pted = pmap_vp_lookup(pm, va, NULL);
> if (pted && PTED_VALID(pted)) {
> pmap_remove_pted(pm, pted);
> @@ -535,8 +553,7 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
>
> error = 0;
> out:
> - splx(s);
> - /* MP - free pmap lock */
> + pmap_unlock(pm);
> return error;
> }
>
> @@ -550,6 +567,7 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd
> struct pte_desc *pted;
> vaddr_t va;
>
> + pmap_lock(pm);
> for (va = sva; va < eva; va += PAGE_SIZE) {
> pted = pmap_vp_lookup(pm, va, NULL);
>
> @@ -564,6 +582,7 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd
> if (PTED_VALID(pted))
> pmap_remove_pted(pm, pted);
> }
> + pmap_unlock(pm);
> }
>
> /*
> @@ -590,11 +609,12 @@ pmap_remove_pted(pmap_t pm, struct pte_d
> pted->pted_va &= ~PTED_VA_EXEC_M;
> }
>
> - pted->pted_pte = 0;
> -
> if (PTED_MANAGED(pted))
> pmap_remove_pv(pted);
>
> + pted->pted_pte = 0;
> + pted->pted_va = 0;
> +
> if (pm != pmap_kernel())
> pool_put(&pmap_pted_pool, pted);
> splx(s);
> @@ -615,10 +635,6 @@ _pmap_kenter_pa(vaddr_t va, paddr_t pa,
> {
> pmap_t pm = pmap_kernel();
> struct pte_desc *pted;
> - int s;
> -
> - /* MP - lock pmap. */
> - s = splvm();
>
> pted = pmap_vp_lookup(pm, va, NULL);
>
> @@ -645,8 +661,6 @@ _pmap_kenter_pa(vaddr_t va, paddr_t pa,
> pmap_pte_insert(pted);
>
> ttlb_flush(pm, va & ~PAGE_MASK);
> -
> - splx(s);
> }
>
> void
> @@ -839,6 +853,9 @@ pmap_create(void)
> pmap_t pmap;
>
> pmap = pool_get(&pmap_pmap_pool, PR_WAITOK | PR_ZERO);
> +
> + mtx_init(&pmap->pm_mtx, IPL_VM);
> +
> pmap_pinit(pmap);
> if (pmap_vp_poolcache == 0) {
> pool_setlowat(&pmap_vp_pool, 20);
> @@ -853,7 +870,7 @@ pmap_create(void)
> void
> pmap_reference(pmap_t pm)
> {
> - pm->pm_refs++;
> + atomic_inc_int(&pm->pm_refs);
> }
>
> /*
> @@ -865,7 +882,7 @@ pmap_destroy(pmap_t pm)
> {
> int refs;
>
> - refs = --pm->pm_refs;
> + refs = atomic_dec_int_nv(&pm->pm_refs);
> if (refs > 0)
> return;
>
> @@ -1384,41 +1401,67 @@ pmap_page_ro(pmap_t pm, vaddr_t va, vm_p
> void
> pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
> {
> - int s;
> struct pte_desc *pted;
> -
> - /* need to lock for this pv */
> - s = splvm();
> + struct pmap *pm;
>
> if (prot == PROT_NONE) {
> - while (!LIST_EMPTY(&(pg->mdpage.pv_list))) {
> + mtx_enter(&pg->mdpage.pv_mtx);
> + while ((pted = LIST_FIRST(&(pg->mdpage.pv_list))) != NULL) {
> + pmap_reference(pted->pted_pmap);
> + pm = pted->pted_pmap;
> + mtx_leave(&pg->mdpage.pv_mtx);
> +
> + pmap_lock(pm);
> +
> + /*
> + * We dropped the pvlist lock before grabbing
> + * the pmap lock to avoid lock ordering
> + * problems. This means we have to check the
> + * pvlist again since somebody else might have
> + * modified it. All we care about is that the
> + * pvlist entry matches the pmap we just
> + * locked. If it doesn't, unlock the pmap and
> + * try again.
> + */
> + mtx_enter(&pg->mdpage.pv_mtx);
> pted = LIST_FIRST(&(pg->mdpage.pv_list));
> - pmap_remove_pted(pted->pted_pmap, pted);
> + if (pted == NULL || pted->pted_pmap != pm) {
> + mtx_leave(&pg->mdpage.pv_mtx);
> + pmap_unlock(pm);
> + pmap_destroy(pm);
> + mtx_enter(&pg->mdpage.pv_mtx);
> + continue;
> + }
> + mtx_leave(&pg->mdpage.pv_mtx);
> + pmap_remove_pted(pm, pted);
> + pmap_unlock(pm);
> + pmap_destroy(pm);
> +
> + mtx_enter(&pg->mdpage.pv_mtx);
> }
> /* page is being reclaimed, sync icache next use */
> atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
> - splx(s);
> + mtx_leave(&pg->mdpage.pv_mtx);
> return;
> }
>
> + mtx_enter(&pg->mdpage.pv_mtx);
> LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) {
> pmap_page_ro(pted->pted_pmap, pted->pted_va, prot);
> }
> - splx(s);
> + mtx_leave(&pg->mdpage.pv_mtx);
> }
>
> void
> pmap_protect(pmap_t pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
> {
> - int s;
> -
> if (prot & (PROT_READ | PROT_EXEC)) {
> - s = splvm();
> + pmap_lock(pm);
> while (sva < eva) {
> pmap_page_ro(pm, sva, 0);
> sva += PAGE_SIZE;
> }
> - splx(s);
> + pmap_unlock(pm);
> return;
> }
> pmap_remove(pm, sva, eva);
> @@ -1446,8 +1489,8 @@ pmap_init(void)
> pool_init(&pmap_pted_pool, sizeof(struct pte_desc), 0, IPL_VM, 0,
> "pted", NULL);
> pool_setlowat(&pmap_pted_pool, 20);
> - pool_init(&pmap_vp_pool, sizeof(struct pmapvp0), PAGE_SIZE, IPL_VM,
> - PR_WAITOK, "vp", &pmap_vp_allocator);
> + pool_init(&pmap_vp_pool, sizeof(struct pmapvp0), PAGE_SIZE, IPL_VM, 0,
> + "vp", &pmap_vp_allocator);
> /* pool_setlowat(&pmap_vp_pool, 20); */
>
> pmap_initialized = 1;
> @@ -1692,12 +1735,10 @@ pmap_clear_modify(struct vm_page *pg)
> {
> struct pte_desc *pted;
> uint64_t *pl3 = NULL;
> - int s;
>
> - s = splvm();
> -
> - pg->pg_flags &= ~PG_PMAP_MOD;
> + atomic_clearbits_int(&pg->pg_flags, PG_PMAP_MOD);
>
> + mtx_enter(&pg->mdpage.pv_mtx);
> LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) {
> if (pmap_vp_lookup(pted->pted_pmap, pted->pted_va & ~PAGE_MASK,
> &pl3) == NULL)
> panic("failed to look up pte\n");
> @@ -1706,7 +1747,7 @@ pmap_clear_modify(struct vm_page *pg)
>
> ttlb_flush(pted->pted_pmap, pted->pted_va & ~PAGE_MASK);
> }
> - splx(s);
> + mtx_leave(&pg->mdpage.pv_mtx);
>
> return 0;
> }
> @@ -1719,18 +1760,16 @@ int
> pmap_clear_reference(struct vm_page *pg)
> {
> struct pte_desc *pted;
> - int s;
> -
> - s = splvm();
>
> - pg->pg_flags &= ~PG_PMAP_REF;
> + atomic_clearbits_int(&pg->pg_flags, PG_PMAP_REF);
>
> + mtx_enter(&pg->mdpage.pv_mtx);
> LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) {
> pted->pted_pte &= ~PROT_MASK;
> pmap_pte_insert(pted);
> ttlb_flush(pted->pted_pmap, pted->pted_va & ~PAGE_MASK);
> }
> - splx(s);
> + mtx_leave(&pg->mdpage.pv_mtx);
>
> return 0;
> }
> Index: arch/arm64/conf/GENERIC.MP
> ===================================================================
> RCS file: arch/arm64/conf/GENERIC.MP
> diff -N arch/arm64/conf/GENERIC.MP
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ arch/arm64/conf/GENERIC.MP 19 Feb 2018 12:28:05 -0000
> @@ -0,0 +1,8 @@
> +# $OpenBSD$
> +
> +include "arch/arm64/conf/GENERIC"
> +
> +option MULTIPROCESSOR
> +#option MP_LOCKDEBUG
> +
> +cpu* at mainbus?
> Index: arch/arm64/include/pmap.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/arm64/include/pmap.h,v
> retrieving revision 1.9
> diff -u -p -r1.9 pmap.h
> --- arch/arm64/include/pmap.h 12 Jan 2018 14:52:55 -0000 1.9
> +++ arch/arm64/include/pmap.h 19 Feb 2018 12:28:05 -0000
> @@ -17,7 +17,9 @@
> #ifndef _ARM64_PMAP_H_
> #define _ARM64_PMAP_H_
>
> -#include <arm64/pte.h>
> +#include <sys/mutex.h>
> +
> +#include <machine/pte.h>
>
> #define PMAP_PA_MASK ~((paddr_t)PAGE_MASK) /* to remove the flags */
> #define PMAP_NOCACHE 0x1 /* non-cacheable memory */
> @@ -66,6 +68,7 @@ void pagezero_cache(vaddr_t);
> * Pmap stuff
> */
> struct pmap {
> + struct mutex pm_mtx;
> union {
> struct pmapvp0 *l0; /* virtual to physical table 4 lvl */
> struct pmapvp1 *l1; /* virtual to physical table 3 lvl */
> @@ -104,12 +107,14 @@ void pmap_map_early(paddr_t, psize_t);
>
> #define __HAVE_VM_PAGE_MD
> struct vm_page_md {
> + struct mutex pv_mtx;
> LIST_HEAD(,pte_desc) pv_list;
> int pvh_attrs; /* page attributes */
> };
>
> #define VM_MDPAGE_INIT(pg) do { \
> - LIST_INIT(&((pg)->mdpage.pv_list)); \
> + mtx_init(&(pg)->mdpage.pv_mtx, IPL_VM); \
> + LIST_INIT(&((pg)->mdpage.pv_list)); \
> (pg)->mdpage.pvh_attrs = 0; \
> } while (0)
>