Our vm system has some optimization logic which, when servicing a page
fault, attempts to fault the neighbouring pages, if this is expected to
speed things up on the long term.
That logic is controlled by the madvise() state of the memory area being
faulted in, with the defaults being that device mappings (being mmap'ed)
use MADV_RANDOM, while other mappings (such as binaries being faulted in
on-demand) use MADV_NORMAL.
The current logic is borrowed from FreeBSD, about 15 years ago, and goes
like this:
- for MADV_RANDOM areas, do not try to fault any other page.
- for MADV_NORMAL areas, try to fault the 3 preceding pages and the 4
following pages.
- for MADV_SEQUENTIAL areas (which do not exist unless explicit
madvise() calls are performed), try to fault the 8 preceding pages and
the 7 following pages.
These values were choosen because they seemed to work best, back when
FreeBSD was only running on i386 systems, with 4KB pages.
However, OpenBSD runs on many platforms, some of which use 8KB and 16KB
pages. Keeping these fixed numbers causes a lot of disk I/O on large
page platforms.
The following diff builds the numbers of neighbouring pages to be
faulted at run-time, depending upon the actual page size, to achieve the
same *sizes* being faulted, rather than the same *number of pages*. This
has little-to-no measurable effect on recent systems (because most
operations are cpu bound rather than i/o bound).
I am undecided whether this is worth doing - unless maybe except for
getting rid of a KASSERT() in a frequently run code path. However, this
could be a start towards better finetuning (especially on some older
platforms, such as luna88k, which lack a DMA controller and where every
I/O is expensive).
Comments?
Index: uvm_fault.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
retrieving revision 1.69
diff -u -p -r1.69 uvm_fault.c
--- uvm_fault.c 30 May 2013 18:02:04 -0000 1.69
+++ uvm_fault.c 22 Mar 2014 21:32:19 -0000
@@ -152,22 +152,15 @@
*/
struct uvm_advice {
- int advice;
int nback;
int nforw;
};
/*
- * page range array:
- * note: index in array must match "advice" value
- * XXX: borrowed numbers from freebsd. do they work well for us?
+ * page range array: set up in uvmfault_init().
*/
-static struct uvm_advice uvmadvice[] = {
- { MADV_NORMAL, 3, 4 },
- { MADV_RANDOM, 0, 0 },
- { MADV_SEQUENTIAL, 8, 7},
-};
+static struct uvm_advice uvmadvice[UVM_ADV_MASK + 1];
#define UVM_MAXRANGE 16 /* must be max() of nback+nforw+1 */
@@ -220,6 +213,32 @@ uvmfault_anonflush(struct vm_anon **anon
*/
/*
+ * uvmfault_init: compute proper values for the uvmadvice[] array.
+ */
+
+void
+uvmfault_init()
+{
+ int npages;
+
+ if (uvmexp.pageshift <= 14) {
+ npages = 1 << (14 - uvmexp.pageshift);
+ KASSERT(npages <= UVM_MAXRANGE / 2);
+
+ uvmadvice[UVM_ADV_NORMAL].nforw = npages;
+ uvmadvice[UVM_ADV_NORMAL].nback = npages - 1;
+ }
+
+ if (uvmexp.pageshift <= 15) {
+ npages = 1 << (15 - uvmexp.pageshift);
+ KASSERT(npages <= UVM_MAXRANGE / 2);
+
+ uvmadvice[UVM_ADV_SEQUENTIAL].nforw = npages - 1;
+ uvmadvice[UVM_ADV_SEQUENTIAL].nback = npages;
+ }
+}
+
+/*
* uvmfault_amapcopy: clear "needs_copy" in a map.
*
* => if we are out of RAM we sleep (waiting for more)
@@ -687,8 +706,6 @@ ReFault:
if (narrow == FALSE) {
/* wide fault (!narrow) */
- KASSERT(uvmadvice[ufi.entry->advice].advice ==
- ufi.entry->advice);
nback = min(uvmadvice[ufi.entry->advice].nback,
(ufi.orig_rvaddr - ufi.entry->start) >> PAGE_SHIFT);
startva = ufi.orig_rvaddr - (nback << PAGE_SHIFT);
Index: uvm_fault.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_fault.h,v
retrieving revision 1.13
diff -u -p -r1.13 uvm_fault.h
--- uvm_fault.h 25 Mar 2009 20:00:18 -0000 1.13
+++ uvm_fault.h 22 Mar 2014 21:32:20 -0000
@@ -72,6 +72,7 @@ struct uvm_faultinfo {
* fault prototypes
*/
+void uvmfault_init(void);
boolean_t uvmfault_lookup(struct uvm_faultinfo *, boolean_t);
boolean_t uvmfault_relock(struct uvm_faultinfo *);
Index: uvm_init.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_init.c,v
retrieving revision 1.30
diff -u -p -r1.30 uvm_init.c
--- uvm_init.c 15 Mar 2012 17:52:28 -0000 1.30
+++ uvm_init.c 22 Mar 2014 21:32:20 -0000
@@ -114,6 +114,12 @@ uvm_init(void)
uvm_km_init(kvm_start, kvm_end);
/*
+ * step 4.5: init (tune) the fault recovery code.
+ */
+
+ uvmfault_init();
+
+ /*
* step 5: init the pmap module. the pmap module is free to allocate
* memory for its private use (e.g. pvlists).
*/