Module Name:    src
Committed By:   riastradh
Date:           Sat Sep 24 11:05:18 UTC 2022

Modified Files:
        src/sys/arch/amd64/conf: ALL GENERIC
        src/sys/arch/i386/conf: ALL GENERIC
        src/sys/arch/x86/conf: files.x86
        src/sys/arch/x86/include: pmap_private.h
        src/sys/arch/x86/x86: cpu.c efi_machdep.c pmap.c svs.c
Added Files:
        src/sys/arch/amd64/include: efi.h
        src/sys/arch/i386/include: efi.h

Log Message:
x86: Support EFI runtime services.

This creates a special pmap, efi_runtime_pmap, which avoids setting
PTE_U but allows mappings to lie in what would normally be user VM --
this way we don't fall afoul of SMAP/SMEP when executing EFI runtime
services from CPL 0.  SVS does not apply to the EFI runtime pmap.

The mechanism is intended to work with either physical addressing or
virtual addressing; currently the bootloader does physical addressing
but in principle it could be modified to do virtual addressing
instead, if it allocated virtual pages, assigned them in the memory
map, and issued RT->SetVirtualAddressMap.

Not sure pmap_activate_sync and pmap_deactivate_sync are correct,
need more review from an x86 wizard.

If this causes fallout, it can be disabled temporarily without
reverting anything by just making efi_runtime_init return immediately
without doing anything, or by removing options EFI_RUNTIME.

amd64-only for now pending type fixes and testing on i386.


To generate a diff of this commit:
cvs rdiff -u -r1.173 -r1.174 src/sys/arch/amd64/conf/ALL
cvs rdiff -u -r1.597 -r1.598 src/sys/arch/amd64/conf/GENERIC
cvs rdiff -u -r0 -r1.1 src/sys/arch/amd64/include/efi.h
cvs rdiff -u -r1.502 -r1.503 src/sys/arch/i386/conf/ALL
cvs rdiff -u -r1.1240 -r1.1241 src/sys/arch/i386/conf/GENERIC
cvs rdiff -u -r0 -r1.1 src/sys/arch/i386/include/efi.h
cvs rdiff -u -r1.123 -r1.124 src/sys/arch/x86/conf/files.x86
cvs rdiff -u -r1.3 -r1.4 src/sys/arch/x86/include/pmap_private.h
cvs rdiff -u -r1.205 -r1.206 src/sys/arch/x86/x86/cpu.c
cvs rdiff -u -r1.1 -r1.2 src/sys/arch/x86/x86/efi_machdep.c
cvs rdiff -u -r1.421 -r1.422 src/sys/arch/x86/x86/pmap.c
cvs rdiff -u -r1.41 -r1.42 src/sys/arch/x86/x86/svs.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/amd64/conf/ALL
diff -u src/sys/arch/amd64/conf/ALL:1.173 src/sys/arch/amd64/conf/ALL:1.174
--- src/sys/arch/amd64/conf/ALL:1.173	Sun Aug  7 02:52:23 2022
+++ src/sys/arch/amd64/conf/ALL	Sat Sep 24 11:05:17 2022
@@ -1,4 +1,4 @@
-# $NetBSD: ALL,v 1.173 2022/08/07 02:52:23 simonb Exp $
+# $NetBSD: ALL,v 1.174 2022/09/24 11:05:17 riastradh Exp $
 # From NetBSD: GENERIC,v 1.787 2006/10/01 18:37:54 bouyer Exp
 #
 # ALL machine description file
@@ -17,7 +17,7 @@ include 	"arch/amd64/conf/std.amd64"
 
 options 	INCLUDE_CONFIG_FILE	# embed config file in kernel binary
 
-#ident		"ALL-$Revision: 1.173 $"
+#ident		"ALL-$Revision: 1.174 $"
 
 maxusers	64		# estimated number of users
 
@@ -1708,6 +1708,10 @@ pseudo-device	pad
 # userland interface to drivers, including autoconf and properties retrieval
 pseudo-device	drvctl
 
+# EFI runtime support
+options 	EFI_RUNTIME
+pseudo-device 	efi			# /dev/efi
+
 # Pass-to-Userspace Transporter
 pseudo-device	putter
 

Index: src/sys/arch/amd64/conf/GENERIC
diff -u src/sys/arch/amd64/conf/GENERIC:1.597 src/sys/arch/amd64/conf/GENERIC:1.598
--- src/sys/arch/amd64/conf/GENERIC:1.597	Thu Sep  1 12:35:02 2022
+++ src/sys/arch/amd64/conf/GENERIC	Sat Sep 24 11:05:17 2022
@@ -1,4 +1,4 @@
-# $NetBSD: GENERIC,v 1.597 2022/09/01 12:35:02 bouyer Exp $
+# $NetBSD: GENERIC,v 1.598 2022/09/24 11:05:17 riastradh Exp $
 #
 # GENERIC machine description file
 #
@@ -22,7 +22,7 @@ include 	"arch/amd64/conf/std.amd64"
 
 options 	INCLUDE_CONFIG_FILE	# embed config file in kernel binary
 
-#ident		"GENERIC-$Revision: 1.597 $"
+#ident		"GENERIC-$Revision: 1.598 $"
 
 maxusers	64		# estimated number of users
 
@@ -1225,6 +1225,10 @@ pseudo-device	pad
 # userland interface to drivers, including autoconf and properties retrieval
 pseudo-device	drvctl
 
+# EFI runtime support
+options 	EFI_RUNTIME
+pseudo-device 	efi			# /dev/efi
+
 include "dev/veriexec.config"
 
 options 	PAX_SEGVGUARD=0		# PaX Segmentation fault guard

Index: src/sys/arch/i386/conf/ALL
diff -u src/sys/arch/i386/conf/ALL:1.502 src/sys/arch/i386/conf/ALL:1.503
--- src/sys/arch/i386/conf/ALL:1.502	Sun Aug  7 02:52:26 2022
+++ src/sys/arch/i386/conf/ALL	Sat Sep 24 11:05:17 2022
@@ -1,4 +1,4 @@
-# $NetBSD: ALL,v 1.502 2022/08/07 02:52:26 simonb Exp $
+# $NetBSD: ALL,v 1.503 2022/09/24 11:05:17 riastradh Exp $
 # From NetBSD: GENERIC,v 1.787 2006/10/01 18:37:54 bouyer Exp
 #
 # ALL machine description file
@@ -17,7 +17,7 @@ include 	"arch/i386/conf/std.i386"
 
 options 	INCLUDE_CONFIG_FILE	# embed config file in kernel binary
 
-#ident		"ALL-$Revision: 1.502 $"
+#ident		"ALL-$Revision: 1.503 $"
 
 maxusers	64		# estimated number of users
 
@@ -1805,6 +1805,10 @@ pseudo-device	pad
 # userland interface to drivers, including autoconf and properties retrieval
 pseudo-device	drvctl
 
+# EFI runtime support -- doesn't build yet
+#options 	EFI_RUNTIME
+#pseudo-device 	efi			# /dev/efi
+
 # Pass-to-Userspace Transporter
 pseudo-device	putter
 

Index: src/sys/arch/i386/conf/GENERIC
diff -u src/sys/arch/i386/conf/GENERIC:1.1240 src/sys/arch/i386/conf/GENERIC:1.1241
--- src/sys/arch/i386/conf/GENERIC:1.1240	Sun Aug  7 02:52:26 2022
+++ src/sys/arch/i386/conf/GENERIC	Sat Sep 24 11:05:17 2022
@@ -1,4 +1,4 @@
-# $NetBSD: GENERIC,v 1.1240 2022/08/07 02:52:26 simonb Exp $
+# $NetBSD: GENERIC,v 1.1241 2022/09/24 11:05:17 riastradh Exp $
 #
 # GENERIC machine description file
 #
@@ -22,7 +22,7 @@ include 	"arch/i386/conf/std.i386"
 
 options 	INCLUDE_CONFIG_FILE	# embed config file in kernel binary
 
-#ident		"GENERIC-$Revision: 1.1240 $"
+#ident		"GENERIC-$Revision: 1.1241 $"
 
 maxusers	64		# estimated number of users
 
@@ -1507,6 +1507,10 @@ pseudo-device	cmos
 # userland interface to drivers, including autoconf and properties retrieval
 pseudo-device	drvctl
 
+# EFI runtime support -- not yet tested
+#options 	EFI_RUNTIME
+#pseudo-device 	efi			# /dev/efi
+
 include "dev/veriexec.config"
 
 options 	PAX_ASLR_DEBUG=1	# PaX ASLR debug

Index: src/sys/arch/x86/conf/files.x86
diff -u src/sys/arch/x86/conf/files.x86:1.123 src/sys/arch/x86/conf/files.x86:1.124
--- src/sys/arch/x86/conf/files.x86:1.123	Tue Aug 30 11:03:36 2022
+++ src/sys/arch/x86/conf/files.x86	Sat Sep 24 11:05:18 2022
@@ -1,4 +1,4 @@
-#	$NetBSD: files.x86,v 1.123 2022/08/30 11:03:36 riastradh Exp $
+#	$NetBSD: files.x86,v 1.124 2022/09/24 11:05:18 riastradh Exp $
 
 # options for MP configuration through the MP spec
 defflag opt_mpbios.h MPBIOS MPDEBUG MPBIOS_SCANPCI
@@ -21,6 +21,8 @@ defflag	opt_xen.h		DO_NOT_DEFINE
 # Option to have a static kernel memory layout
 defflag opt_kaslr.h	NO_X86_ASLR
 
+defflag opt_efi.h	EFI_RUNTIME
+
 defflag	SVS
 
 defflag	PCPU_IDT

Index: src/sys/arch/x86/include/pmap_private.h
diff -u src/sys/arch/x86/include/pmap_private.h:1.3 src/sys/arch/x86/include/pmap_private.h:1.4
--- src/sys/arch/x86/include/pmap_private.h:1.3	Tue Sep 13 09:40:17 2022
+++ src/sys/arch/x86/include/pmap_private.h	Sat Sep 24 11:05:18 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap_private.h,v 1.3 2022/09/13 09:40:17 riastradh Exp $	*/
+/*	$NetBSD: pmap_private.h,v 1.4 2022/09/24 11:05:18 riastradh Exp $	*/
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -378,4 +378,22 @@ extern struct pcpu_area *pcpuarea;
 
 void	svs_quad_copy(void *, void *, long);
 
+#ifdef _KERNEL_OPT
+#include "opt_efi.h"
+#endif
+
+#ifdef EFI_RUNTIME
+void *		pmap_activate_sync(struct pmap *);
+void		pmap_deactivate_sync(struct pmap *, void *);
+bool		pmap_is_user(struct pmap *);
+#else
+static inline bool
+pmap_is_user(struct pmap *pmap)
+{
+
+	KASSERT(pmap != pmap_kernel());
+	return true;
+}
+#endif
+
 #endif	/* _X86_PMAP_PRIVATE_H_ */

Index: src/sys/arch/x86/x86/cpu.c
diff -u src/sys/arch/x86/x86/cpu.c:1.205 src/sys/arch/x86/x86/cpu.c:1.206
--- src/sys/arch/x86/x86/cpu.c:1.205	Sat Aug 20 23:48:51 2022
+++ src/sys/arch/x86/x86/cpu.c	Sat Sep 24 11:05:18 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.205 2022/08/20 23:48:51 riastradh Exp $	*/
+/*	$NetBSD: cpu.c,v 1.206 2022/09/24 11:05:18 riastradh Exp $	*/
 
 /*
  * Copyright (c) 2000-2020 NetBSD Foundation, Inc.
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.205 2022/08/20 23:48:51 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.206 2022/09/24 11:05:18 riastradh Exp $");
 
 #include "opt_ddb.h"
 #include "opt_mpbios.h"		/* for MPDEBUG */
@@ -1434,7 +1434,7 @@ void
 cpu_load_pmap(struct pmap *pmap, struct pmap *oldpmap)
 {
 #ifdef SVS
-	if (svs_enabled) {
+	if (svs_enabled && pmap_is_user(pmap)) {
 		svs_pdir_switch(pmap);
 	}
 #endif

Index: src/sys/arch/x86/x86/efi_machdep.c
diff -u src/sys/arch/x86/x86/efi_machdep.c:1.1 src/sys/arch/x86/x86/efi_machdep.c:1.2
--- src/sys/arch/x86/x86/efi_machdep.c:1.1	Tue Aug 30 11:03:36 2022
+++ src/sys/arch/x86/x86/efi_machdep.c	Sat Sep 24 11:05:18 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: efi_machdep.c,v 1.1 2022/08/30 11:03:36 riastradh Exp $	*/
+/*	$NetBSD: efi_machdep.c,v 1.2 2022/09/24 11:05:18 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2016 The NetBSD Foundation, Inc.
@@ -27,7 +27,10 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: efi_machdep.c,v 1.1 2022/08/30 11:03:36 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: efi_machdep.c,v 1.2 2022/09/24 11:05:18 riastradh Exp $");
+
+#include "efi.h"
+#include "opt_efi.h"
 
 #include <sys/kmem.h>
 #include <sys/param.h>
@@ -37,9 +40,12 @@ __KERNEL_RCSID(0, "$NetBSD: efi_machdep.
 #include <uvm/uvm_extern.h>
 
 #include <machine/bootinfo.h>
+#include <machine/pmap_private.h>
+
 #include <x86/bus_defs.h>
 #include <x86/bus_funcs.h>
 #include <x86/efi.h>
+#include <x86/fpu.h>
 
 #include <dev/mm.h>
 #if NPCI > 0
@@ -67,6 +73,26 @@ static struct efi_e820memmap {
 	struct bi_memmap_entry entry[VM_PHYSSEG_MAX - 1];
 } efi_e820memmap;
 
+#ifdef EFI_RUNTIME
+
+#include <dev/efivar.h>
+
+#include <uvm/uvm_extern.h>
+
+#if !(NEFI > 0)
+#error options EFI_RUNTIME makes no sense without pseudo-device efi.
+#endif
+
+struct pmap *efi_runtime_pmap __read_mostly;
+
+static kmutex_t efi_runtime_lock __cacheline_aligned;
+static struct efi_rt efi_rt __read_mostly;
+static struct efi_ops efi_runtime_ops __read_mostly;
+
+static void efi_runtime_init(void);
+
+#endif
+
 /*
  * Map a physical address (PA) to a newly allocated virtual address (VA).
  * The VA must be freed using efi_relva().
@@ -408,6 +434,10 @@ efi_init(void)
 #if NPCI > 0
 	pci_mapreg_map_enable_decode = true; /* PR port-amd64/53286 */
 #endif
+
+#ifdef EFI_RUNTIME
+	efi_runtime_init();
+#endif
 }
 
 bool
@@ -548,3 +578,419 @@ efi_get_e820memmap(void)
 	efi_e820memmap.bim.common.type = BTINFO_MEMMAP;
 	return &efi_e820memmap.bim;
 }
+
+#ifdef EFI_RUNTIME
+
+/*
+ * XXX move to sys/dev/efi/efi.h
+ */
+#ifdef _LP64
+#define	EFIERR(x)	(0x8000000000000000ul | (x))
+#else
+#define	EFIERR(x)	(0x80000000ul | (x))
+#endif
+
+#define	EFI_UNSUPPORTED		EFIERR(3)
+#define	EFI_DEVICE_ERROR	EFIERR(7)
+
+/*
+ * efi_runtime_init()
+ *
+ *	Set up kernel access to EFI runtime services:
+ *
+ *	- Create efi_runtime_pmap.
+ *	- Enter all the EFI runtime memory mappings into it.
+ *	- Make a copy of the EFI runtime services table in efi_rt.
+ *	- Initialize efi_runtime_lock to serialize calls.
+ *	- Register EFI runtime service operations for /dev/efi.
+ *
+ *	On failure, leaves efi_rt zero-initialized and everything else
+ *	uninitialized.
+ */
+static void
+efi_runtime_init(void)
+{
+	struct efi_systbl *systbl;
+	struct btinfo_efimemmap *efimm;
+	uint32_t i;
+	int error;
+
+	/*
+	 * Refuse to handle EFI runtime services with cross-word-sizes
+	 * for now.  We would need logic to handle the cross table
+	 * types, and logic to translate between the calling
+	 * conventions -- might be easy for 32-bit EFI and 64-bit OS,
+	 * but sounds painful to contemplate for 64-bit EFI and 32-bit
+	 * OS.
+	 */
+	if (efi_is32x64) {
+		aprint_debug("%s: 32x64 runtime services not supported\n",
+		    __func__);
+		return;
+	}
+
+	/*
+	 * Verify that we have an EFI system table with runtime
+	 * services and an EFI memory map.
+	 */
+	systbl = efi_getsystbl();
+	if (systbl->st_rt == NULL) {
+		aprint_debug("%s: no runtime\n", __func__);
+		return;
+	}
+	if ((efimm = lookup_bootinfo(BTINFO_EFIMEMMAP)) == NULL) {
+		aprint_debug("%s: no efi memmap\n", __func__);
+		return;
+	}
+
+	/*
+	 * Create a pmap for EFI runtime services and switch to it to
+	 * enter all of the mappings needed for EFI runtime services
+	 * according to the EFI_MEMORY_DESCRIPTOR records.
+	 */
+	efi_runtime_pmap = pmap_create();
+	void *const cookie = pmap_activate_sync(efi_runtime_pmap);
+	for (i = 0; i < efimm->num; i++) {
+		struct efi_md *md = (void *)(efimm->memmap + efimm->size * i);
+		uint64_t j;
+		vaddr_t va;
+		paddr_t pa;
+		int prot, flags;
+
+		/*
+		 * Only enter mappings tagged EFI_MEMORY_RUNTIME.
+		 * Ignore all others.
+		 */
+		if ((md->md_attr & EFI_MD_ATTR_RT) == 0)
+			continue;
+
+		/*
+		 * For debug boots, print the memory descriptor.
+		 */
+		aprint_debug("%s: map %zu pages at %#"PRIxVADDR
+		    " to %#"PRIxPADDR" type %"PRIu32" attrs 0x%08"PRIx64"\n",
+		    __func__, (size_t)md->md_pages, (vaddr_t)md->md_virt,
+		    (paddr_t)md->md_phys, md->md_type, md->md_attr);
+
+		/*
+		 * Allow read and write access in all of the mappings.
+		 * For code mappings, also allow execution by default.
+		 *
+		 * Even code mappings must be writable, apparently.
+		 * The mappings can be marked RO or XP to prevent write
+		 * or execute, but the code mappings are usually at the
+		 * level of entire PECOFF objects containing both rw-
+		 * and r-x sections.  The EFI_MEMORY_ATTRIBUTES_TABLE
+		 * provides finer-grained mapping protections, but we
+		 * don't currently use it.
+		 *
+		 * XXX Should parse EFI_MEMORY_ATTRIBUTES_TABLE and use
+		 * it to nix W or X access when possible.
+		 */
+		prot = VM_PROT_READ|VM_PROT_WRITE;
+		switch (md->md_type) {
+		case EFI_MD_TYPE_RT_CODE:
+			prot |= VM_PROT_EXECUTE;
+			break;
+		}
+
+		/*
+		 * Additionally pass on:
+		 *
+		 *	EFI_MEMORY_UC (uncacheable) -> PMAP_NOCACHE
+		 *	EFI_MEMORY_WC (write-combining) -> PMAP_WRITE_COMBINE
+		 *	EFI_MEMORY_RO (read-only) -> clear VM_PROT_WRITE
+		 *	EFI_MEMORY_XP (exec protect) -> clear VM_PROT_EXECUTE
+		 */
+		flags = 0;
+		if (md->md_attr & EFI_MD_ATTR_UC)
+			flags |= PMAP_NOCACHE;
+		else if (md->md_attr & EFI_MD_ATTR_WC)
+			flags |= PMAP_WRITE_COMBINE;
+		if (md->md_attr & EFI_MD_ATTR_RO)
+			prot &= ~VM_PROT_WRITE;
+		if (md->md_attr & EFI_MD_ATTR_XP)
+			prot &= ~VM_PROT_EXECUTE;
+
+		/*
+		 * Get the physical address, and the virtual address
+		 * that the EFI runtime services want mapped to it.
+		 *
+		 * If the requsted virtual address is zero, assume
+		 * we're using physical addressing, i.e., VA is the
+		 * same as PA.
+		 *
+		 * This logic is intended to allow the bootloader to
+		 * choose whether to use physical addressing or to use
+		 * virtual addressing with RT->SetVirtualAddressMap --
+		 * the kernel should work either way (although as of
+		 * time of writing it has only been tested with
+		 * physical addressing).
+		 */
+		pa = md->md_phys;
+		va = md->md_virt;
+		if (va == 0)
+			va = pa;
+
+		/*
+		 * Fail if EFI runtime services want any virtual pages
+		 * of the kernel map.
+		 */
+		if (VM_MIN_KERNEL_ADDRESS <= va &&
+		    va < VM_MAX_KERNEL_ADDRESS) {
+			aprint_debug("%s: efi runtime overlaps kernel map"
+			    " %"PRIxVADDR" in [%"PRIxVADDR", %"PRIxVADDR")\n",
+			    __func__,
+			    va,
+			    (vaddr_t)VM_MIN_KERNEL_ADDRESS,
+			    (vaddr_t)VM_MAX_KERNEL_ADDRESS);
+			goto fail;
+		}
+
+		/*
+		 * Fail if it would interfere with a direct map.
+		 *
+		 * (It's possible that it might happen to be identical
+		 * to the direct mapping, in which case we could skip
+		 * this entry.  Seems unlikely; let's deal with that
+		 * edge case as it comes up.)
+		 */
+#ifdef __HAVE_DIRECT_MAP
+		if (PMAP_DIRECT_BASE <= va && va < PMAP_DIRECT_END) {
+			aprint_debug("%s: efi runtime overlaps direct map"
+			    " %"PRIxVADDR" in [%"PRIxVADDR", %"PRIxVADDR")\n",
+			    __func__,
+			    va,
+			    (vaddr_t)PMAP_DIRECT_BASE,
+			    (vaddr_t)PMAP_DIRECT_END);
+			goto fail;
+		}
+#endif
+
+		/*
+		 * Enter each page in the range of this memory
+		 * descriptor into efi_runtime_pmap.
+		 */
+		for (j = 0; j < md->md_pages; j++) {
+			error = pmap_enter(efi_runtime_pmap,
+			    va + j*PAGE_SIZE, pa + j*PAGE_SIZE, prot, flags);
+			KASSERTMSG(error == 0, "error=%d", error);
+		}
+	}
+
+	/*
+	 * Commit the updates, make a copy of the EFI runtime services
+	 * for easy determination of unsupported ones without needing
+	 * the pmap, and deactivate the pmap now that we're done with
+	 * it for now.
+	 */
+	pmap_update(efi_runtime_pmap);
+	memcpy(&efi_rt, systbl->st_rt, sizeof(efi_rt));
+	pmap_deactivate_sync(efi_runtime_pmap, cookie);
+
+	/*
+	 * Initialize efi_runtime_lock for serializing access to the
+	 * EFI runtime services from any context up to interrupts at
+	 * IPL_VM.
+	 */
+	mutex_init(&efi_runtime_lock, MUTEX_DEFAULT, IPL_VM);
+
+	/*
+	 * Register the EFI runtime operations for /dev/efi.
+	 */
+	efi_register_ops(&efi_runtime_ops);
+
+	return;
+
+fail:	/*
+	 * On failure, deactivate and destroy efi_runtime_pmap -- no
+	 * runtime services.
+	 */
+	pmap_deactivate_sync(efi_runtime_pmap, cookie);
+	pmap_destroy(efi_runtime_pmap);
+	efi_runtime_pmap = NULL;
+	/*
+	 * efi_rt is all zero, so will lead to EFI_UNSUPPORTED even if
+	 * used outside efi_runtime_ops (which is now not registered)
+	 */
+}
+
+struct efi_runtime_cookie {
+	void	*erc_pmap_cookie;
+};
+
+/*
+ * efi_runtime_enter(cookie)
+ *
+ *	Prepare to call an EFI runtime service, storing state for the
+ *	context in cookie.  Caller must call efi_runtime_exit when
+ *	done.
+ */
+static void
+efi_runtime_enter(struct efi_runtime_cookie *cookie)
+{
+
+	KASSERT(efi_runtime_pmap != NULL);
+
+	/*
+	 * Serialize queries to the EFI runtime services.
+	 *
+	 * The UEFI spec allows some concurrency among them with rules
+	 * about which calls can run in parallel with which other
+	 * calls, but it is simplest if we just serialize everything --
+	 * none of this is performance-critical.
+	 */
+	mutex_enter(&efi_runtime_lock);
+
+	/*
+	 * EFI runtime services may use the FPU, so stash any user FPU
+	 * state and enable kernel use of it.  This has the side
+	 * effects of disabling preemption and of blocking interrupts
+	 * at up to and including IPL_VM.
+	 */
+	fpu_kern_enter();
+
+	/*
+	 * Activate the efi_runtime_pmap so that the EFI runtime
+	 * services have access to the memory mappings the firmware
+	 * requested, but not access to any user mappings.  They still,
+	 * however, have access to all kernel mappings, so we can pass
+	 * in pointers to buffers in KVA -- the EFI runtime services
+	 * run privileged, which they need in order to do I/O anyway.
+	 */
+	cookie->erc_pmap_cookie = pmap_activate_sync(efi_runtime_pmap);
+}
+
+/*
+ * efi_runtime_exit(cookie)
+ *
+ *	Restore state prior to efi_runtime_enter as stored in cookie
+ *	for a call to an EFI runtime service.
+ */
+static void
+efi_runtime_exit(struct efi_runtime_cookie *cookie)
+{
+
+	pmap_deactivate_sync(efi_runtime_pmap, cookie->erc_pmap_cookie);
+	fpu_kern_leave();
+	mutex_exit(&efi_runtime_lock);
+}
+
+/*
+ * efi_runtime_gettime(tm, tmcap)
+ *
+ *	Call RT->GetTime, or return EFI_UNSUPPORTED if unsupported.
+ */
+static efi_status
+efi_runtime_gettime(struct efi_tm *tm, struct efi_tmcap *tmcap)
+{
+	efi_status status;
+	struct efi_runtime_cookie cookie;
+
+	if (efi_rt.rt_gettime == NULL)
+		return EFI_UNSUPPORTED;
+
+	efi_runtime_enter(&cookie);
+	status = efi_rt.rt_gettime(tm, tmcap);
+	efi_runtime_exit(&cookie);
+
+	return status;
+}
+
+
+/*
+ * efi_runtime_settime(tm)
+ *
+ *	Call RT->SetTime, or return EFI_UNSUPPORTED if unsupported.
+ */
+static efi_status
+efi_runtime_settime(struct efi_tm *tm)
+{
+	efi_status status;
+	struct efi_runtime_cookie cookie;
+
+	if (efi_rt.rt_settime == NULL)
+		return EFI_UNSUPPORTED;
+
+	efi_runtime_enter(&cookie);
+	status = efi_rt.rt_settime(tm);
+	efi_runtime_exit(&cookie);
+
+	return status;
+}
+
+/*
+ * efi_runtime_getvar(name, vendor, attrib, datasize, data)
+ *
+ *	Call RT->GetVariable.
+ */
+static efi_status
+efi_runtime_getvar(efi_char *name, struct uuid *vendor, uint32_t *attrib,
+    unsigned long *datasize, void *data)
+{
+	efi_status status;
+	struct efi_runtime_cookie cookie;
+
+	if (efi_rt.rt_getvar == NULL)
+		return EFI_UNSUPPORTED;
+
+	efi_runtime_enter(&cookie);
+	status = efi_rt.rt_getvar(name, vendor, attrib, datasize, data);
+	efi_runtime_exit(&cookie);
+
+	return status;
+}
+
+/*
+ * efi_runtime_nextvar(namesize, name, vendor)
+ *
+ *	Call RT->GetNextVariableName.
+ */
+static efi_status
+efi_runtime_nextvar(unsigned long *namesize, efi_char *name,
+    struct uuid *vendor)
+{
+	efi_status status;
+	struct efi_runtime_cookie cookie;
+
+	if (efi_rt.rt_scanvar == NULL)
+		return EFI_UNSUPPORTED;
+
+	efi_runtime_enter(&cookie);
+	status = efi_rt.rt_scanvar(namesize, name, vendor);
+	efi_runtime_exit(&cookie);
+
+	return status;
+}
+
+/*
+ * efi_runtime_setvar(name, vendor, attrib, datasize, data)
+ *
+ *	Call RT->SetVariable.
+ */
+static efi_status
+efi_runtime_setvar(efi_char *name, struct uuid *vendor, uint32_t attrib,
+    unsigned long datasize, void *data)
+{
+	efi_status status;
+	struct efi_runtime_cookie cookie;
+
+	if (efi_rt.rt_setvar == NULL)
+		return EFI_UNSUPPORTED;
+
+	efi_runtime_enter(&cookie);
+	status = efi_rt.rt_setvar(name, vendor, attrib, datasize, data);
+	efi_runtime_exit(&cookie);
+
+	return status;
+}
+
+static struct efi_ops efi_runtime_ops = {
+	.efi_gettime = efi_runtime_gettime,
+	.efi_settime = efi_runtime_settime,
+	.efi_getvar = efi_runtime_getvar,
+	.efi_setvar = efi_runtime_setvar,
+	.efi_nextvar = efi_runtime_nextvar,
+};
+
+#endif	/* EFI_RUNTIME */

Index: src/sys/arch/x86/x86/pmap.c
diff -u src/sys/arch/x86/x86/pmap.c:1.421 src/sys/arch/x86/x86/pmap.c:1.422
--- src/sys/arch/x86/x86/pmap.c:1.421	Wed Aug 31 12:51:56 2022
+++ src/sys/arch/x86/x86/pmap.c	Sat Sep 24 11:05:18 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.421 2022/08/31 12:51:56 bouyer Exp $	*/
+/*	$NetBSD: pmap.c,v 1.422 2022/09/24 11:05:18 riastradh Exp $	*/
 
 /*
  * Copyright (c) 2008, 2010, 2016, 2017, 2019, 2020 The NetBSD Foundation, Inc.
@@ -130,7 +130,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.421 2022/08/31 12:51:56 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.422 2022/09/24 11:05:18 riastradh Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -138,6 +138,7 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.4
 #include "opt_xen.h"
 #include "opt_svs.h"
 #include "opt_kaslr.h"
+#include "opt_efi.h"
 
 #define	__MUTEX_PRIVATE	/* for assertions */
 
@@ -2497,7 +2498,8 @@ pmap_free_ptp(struct pmap *pmap, struct 
 			xen_kpm_sync(pmap, index);
 		}
 #elif defined(SVS)
-		if (svs_enabled && level == PTP_LEVELS - 1) {
+		if (svs_enabled && level == PTP_LEVELS - 1 &&
+		    pmap_is_user(pmap)) {
 			svs_pmap_sync(pmap, index);
 		}
 #endif
@@ -2633,7 +2635,8 @@ pmap_install_ptp(struct pmap *pmap, stru
 			xen_kpm_sync(pmap, index);
 		}
 #elif defined(SVS)
-		if (svs_enabled && i == PTP_LEVELS) {
+		if (svs_enabled && i == PTP_LEVELS &&
+		    pmap_is_user(pmap)) {
 			svs_pmap_sync(pmap, index);
 		}
 #endif
@@ -3741,6 +3744,111 @@ pmap_deactivate(struct lwp *l)
 	ci->ci_tlbstate = TLBSTATE_LAZY;
 }
 
+#ifdef EFI_RUNTIME
+
+extern struct pmap *efi_runtime_pmap;
+
+/*
+ * pmap_is_user: true if pmap, which must not be the kernel pmap, is
+ * for an unprivileged user process
+ */
+bool
+pmap_is_user(struct pmap *pmap)
+{
+
+	KASSERT(pmap != pmap_kernel());
+	return (pmap != efi_runtime_pmap);
+}
+
+/*
+ * pmap_activate_sync: synchronously activate specified pmap.
+ *
+ * => Must be called with kernel preemption disabled (high IPL is enough).
+ * => Must not sleep before pmap_deactivate_sync.
+ */
+void *
+pmap_activate_sync(struct pmap *pmap)
+{
+	struct cpu_info *ci = curcpu();
+	struct pmap *oldpmap = ci->ci_pmap;
+	unsigned cid = cpu_index(ci);
+
+	KASSERT(kpreempt_disabled());
+	KASSERT(pmap != pmap_kernel());
+
+	KASSERT(!kcpuset_isset(pmap->pm_cpus, cid));
+	KASSERT(!kcpuset_isset(pmap->pm_kernel_cpus, cid));
+
+	if (oldpmap) {
+		KASSERT_PDIRPA(oldpmap);
+		kcpuset_atomic_clear(oldpmap->pm_cpus, cid);
+		kcpuset_atomic_clear(oldpmap->pm_kernel_cpus, cid);
+	}
+
+	ci->ci_tlbstate = TLBSTATE_VALID;
+	kcpuset_atomic_set(pmap->pm_cpus, cid);
+	kcpuset_atomic_set(pmap->pm_kernel_cpus, cid);
+	ci->ci_pmap = pmap;
+
+#if defined(SVS) && defined(USER_LDT)
+	if (svs_enabled) {
+		svs_ldt_sync(pmap);
+	} else
+#endif
+	lldt(pmap->pm_ldt_sel);
+
+	cpu_load_pmap(pmap, oldpmap);
+
+	return oldpmap;
+}
+
+/*
+ * pmap_deactivate_sync: synchronously deactivate specified pmap and
+ * restore whatever was active before pmap_activate_sync.
+ *
+ * => Must be called with kernel preemption disabled (high IPL is enough).
+ * => Must not have slept since pmap_activate_sync.
+ */
+void
+pmap_deactivate_sync(struct pmap *pmap, void *cookie)
+{
+	struct cpu_info *ci = curcpu();
+	struct pmap *oldpmap = cookie;
+	unsigned cid = cpu_index(ci);
+
+	KASSERT(kpreempt_disabled());
+	KASSERT(pmap != pmap_kernel());
+	KASSERT(ci->ci_pmap == pmap);
+
+	KASSERT_PDIRPA(pmap);
+
+	KASSERT(kcpuset_isset(pmap->pm_cpus, cid));
+	KASSERT(kcpuset_isset(pmap->pm_kernel_cpus, cid));
+
+	pmap_tlb_shootnow();
+
+	kcpuset_atomic_clear(pmap->pm_cpus, cid);
+	kcpuset_atomic_clear(pmap->pm_kernel_cpus, cid);
+
+	ci->ci_tlbstate = TLBSTATE_VALID;
+	ci->ci_pmap = oldpmap;
+	if (oldpmap) {
+		kcpuset_atomic_set(oldpmap->pm_cpus, cid);
+		kcpuset_atomic_set(oldpmap->pm_kernel_cpus, cid);
+#if defined(SVS) && defined(USER_LDT)
+		if (svs_enabled) {
+			svs_ldt_sync(oldpmap);
+		} else
+#endif
+		lldt(oldpmap->pm_ldt_sel);
+		cpu_load_pmap(oldpmap, pmap);
+	} else {
+		lcr3(pmap_pdirpa(pmap_kernel(), 0));
+	}
+}
+
+#endif	/* EFI_RUNTIME */
+
 /*
  * some misc. functions
  */
@@ -4893,7 +5001,8 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t
 	npte |= pmap_pat_flags(flags);
 	if (wired)
 		npte |= PTE_WIRED;
-	if (va < VM_MAXUSER_ADDRESS)
+	if (va < VM_MAXUSER_ADDRESS &&
+	    (pmap == pmap_kernel() || pmap_is_user(pmap)))
 		npte |= PTE_U;
 
 	if (pmap == pmap_kernel())

Index: src/sys/arch/x86/x86/svs.c
diff -u src/sys/arch/x86/x86/svs.c:1.41 src/sys/arch/x86/x86/svs.c:1.42
--- src/sys/arch/x86/x86/svs.c:1.41	Sat Aug 20 23:48:51 2022
+++ src/sys/arch/x86/x86/svs.c	Sat Sep 24 11:05:18 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: svs.c,v 1.41 2022/08/20 23:48:51 riastradh Exp $	*/
+/*	$NetBSD: svs.c,v 1.42 2022/09/24 11:05:18 riastradh Exp $	*/
 
 /*
  * Copyright (c) 2018-2020 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: svs.c,v 1.41 2022/08/20 23:48:51 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: svs.c,v 1.42 2022/09/24 11:05:18 riastradh Exp $");
 
 #include "opt_svs.h"
 #include "opt_user_ldt.h"
@@ -575,6 +575,7 @@ svs_pmap_sync(struct pmap *pmap, int ind
 
 	KASSERT(pmap != NULL);
 	KASSERT(pmap != pmap_kernel());
+	KASSERT(pmap_is_user(pmap));
 	KASSERT(mutex_owned(&pmap->pm_lock));
 	KASSERT(kpreempt_disabled());
 	KASSERT(index < PDIR_SLOT_USERLIM);
@@ -699,6 +700,7 @@ svs_pdir_switch(struct pmap *pmap)
 
 	KASSERT(kpreempt_disabled());
 	KASSERT(pmap != pmap_kernel());
+	KASSERT(pmap_is_user(pmap));
 
 	/* Update the info in the UTLS page */
 	utls = (struct svs_utls *)ci->ci_svs_utls;

Added files:

Index: src/sys/arch/amd64/include/efi.h
diff -u /dev/null src/sys/arch/amd64/include/efi.h:1.1
--- /dev/null	Sat Sep 24 11:05:18 2022
+++ src/sys/arch/amd64/include/efi.h	Sat Sep 24 11:05:17 2022
@@ -0,0 +1,3 @@
+/*	$NetBSD: efi.h,v 1.1 2022/09/24 11:05:17 riastradh Exp $	*/
+
+#include <x86/efi.h>

Index: src/sys/arch/i386/include/efi.h
diff -u /dev/null src/sys/arch/i386/include/efi.h:1.1
--- /dev/null	Sat Sep 24 11:05:18 2022
+++ src/sys/arch/i386/include/efi.h	Sat Sep 24 11:05:18 2022
@@ -0,0 +1,3 @@
+/*	$NetBSD: efi.h,v 1.1 2022/09/24 11:05:18 riastradh Exp $	*/
+
+#include <x86/efi.h>

Reply via email to