Author: kib
Date: Sat Jun 24 17:01:11 2017
New Revision: 320317
URL: https://svnweb.freebsd.org/changeset/base/320317

Log:
  Implement address space guards.
  
  Guard, requested by the MAP_GUARD mmap(2) flag, prevents the reuse of
  the allocated address space, but does not allow instantiation of the
  pages in the range.  It is useful for more explicit support for usual
  two-stage reserve then commit allocators, since it prevents accidental
  instantiation of the mapping, e.g. by mprotect(2).
  
  Use guards to reimplement stack grow code.  Explicitely track stack
  grow area with the guard, including the stack guard page.  On stack
  grow, trivial shift of the guard map entry and stack map entry limits
  makes the stack expansion.  Move the code to detect stack grow and
  call vm_map_growstack(), from vm_fault() into vm_map_lookup().
  
  As result, it is impossible to get random mapping to occur in the
  stack grow area, or to overlap the stack guard page.
  
  Enable stack guard page by default.
  
  Reviewed by:  alc, markj
  Man page update reviewed by:  alc, bjk, emaste, markj, pho
  Tested by:    pho, Qualys
  Sponsored by: The FreeBSD Foundation
  MFC after:    1 week
  Differential revision:        https://reviews.freebsd.org/D11306 (man pages)

Modified:
  head/lib/libc/sys/mmap.2
  head/lib/libc/sys/munmap.2
  head/sys/sys/mman.h
  head/sys/sys/param.h
  head/sys/vm/vm.h
  head/sys/vm/vm_fault.c
  head/sys/vm/vm_map.c
  head/sys/vm/vm_map.h
  head/sys/vm/vm_mmap.c

Modified: head/lib/libc/sys/mmap.2
==============================================================================
--- head/lib/libc/sys/mmap.2    Sat Jun 24 16:47:41 2017        (r320316)
+++ head/lib/libc/sys/mmap.2    Sat Jun 24 17:01:11 2017        (r320317)
@@ -199,6 +199,21 @@ In contrast, if
 .Dv MAP_EXCL
 is specified, the request will fail if a mapping
 already exists within the range.
+.It Dv MAP_GUARD
+Instead of a mapping, create a guard of the specified size.
+Guards allow a process to create reservations in its address space,
+which can later be replaced by actual mappings.
+.Pp
+.Fa mmap
+will not create mappings in the address range of a guard unless
+the request specifies
+.Dv MAP_FIXED .
+Guards can be destroyed with
+.Xr munmap 2 .
+Any memory access by a thread to the guarded range results
+in the delivery of a
+.Dv SIGSEGV
+signal to that thread.
 .It Dv MAP_NOCORE
 Region is not included in a core file.
 .It Dv MAP_NOSYNC
@@ -303,6 +318,7 @@ must include at least
 .Dv PROT_READ
 and
 .Dv PROT_WRITE .
+.Pp
 This option creates
 a memory region that grows to at most
 .Fa len
@@ -313,6 +329,10 @@ stack top is the starting address returned by the call
 bytes.
 The bottom of the stack at maximum growth is the starting
 address returned by the call.
+The system uses guards to prevent the inadvertent use of
+regions into which stacks created with
+.Dv MAP_STACK
+will automatically grow, without mapping the whole stack in advance.
 .El
 .Pp
 The
@@ -406,6 +426,7 @@ were specified.
 .It Bq Er EINVAL
 None of
 .Dv MAP_ANON ,
+.Dv MAP_GUARD ,
 .Dv MAP_PRIVATE ,
 .Dv MAP_SHARED ,
 or
@@ -455,6 +476,25 @@ were specified, but the requested region is already us
 was specified, but
 .Dv MAP_FIXED
 was not.
+.It Bq Er EINVAL
+.Dv MAP_GUARD
+was specified, but the
+.Fa offset
+argument was not zero, the
+.Fa fd
+argument was not -1, or the
+.Fa prot
+argument was not
+.Dv PROT_NONE .
+.It Bq Er EINVAL
+.Dv MAP_GUARD
+was specified together with one of the flags
+.Dv MAP_ANON ,
+.Dv MAP_PREFAULT ,
+.Dv MAP_PREFAULT_READ ,
+.Dv MAP_PRIVATE ,
+.Dv MAP_SHARED ,
+.Dv MAP_STACK .
 .It Bq Er ENODEV
 .Dv MAP_ANON
 has not been specified and

Modified: head/lib/libc/sys/munmap.2
==============================================================================
--- head/lib/libc/sys/munmap.2  Sat Jun 24 16:47:41 2017        (r320316)
+++ head/lib/libc/sys/munmap.2  Sat Jun 24 17:01:11 2017        (r320317)
@@ -28,7 +28,7 @@
 .\"    @(#)munmap.2    8.3 (Berkeley) 5/27/94
 .\" $FreeBSD$
 .\"
-.Dd May 27, 1994
+.Dd June 22, 2017
 .Dt MUNMAP 2
 .Os
 .Sh NAME
@@ -44,7 +44,7 @@
 The
 .Fn munmap
 system call
-deletes the mappings for the specified address range,
+deletes the mappings and guards for the specified address range,
 and causes further references to addresses within the range
 to generate invalid memory references.
 .Sh RETURN VALUES

Modified: head/sys/sys/mman.h
==============================================================================
--- head/sys/sys/mman.h Sat Jun 24 16:47:41 2017        (r320316)
+++ head/sys/sys/mman.h Sat Jun 24 17:01:11 2017        (r320317)
@@ -90,6 +90,7 @@
 /*
  * Extended flags
  */
+#define        MAP_GUARD        0x00002000 /* reserve but don't map address 
range */
 #define        MAP_EXCL         0x00004000 /* for MAP_FIXED, fail if address 
is used */
 #define        MAP_NOCORE       0x00020000 /* dont include these pages in a 
coredump */
 #define        MAP_PREFAULT_READ 0x00040000 /* prefault mapping for reading */

Modified: head/sys/sys/param.h
==============================================================================
--- head/sys/sys/param.h        Sat Jun 24 16:47:41 2017        (r320316)
+++ head/sys/sys/param.h        Sat Jun 24 17:01:11 2017        (r320317)
@@ -58,7 +58,7 @@
  *             in the range 5 to 9.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1200034      /* Master, propagated to newvers */
+#define __FreeBSD_version 1200035      /* Master, propagated to newvers */
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
@@ -76,12 +76,13 @@
 #undef __FreeBSD_kernel__
 #define __FreeBSD_kernel__
 
-#ifdef _KERNEL
+#if defined(_KERNEL) || defined(IN_RTLD)
 #define        P_OSREL_SIGWAIT                 700000
 #define        P_OSREL_SIGSEGV                 700004
 #define        P_OSREL_MAP_ANON                800104
 #define        P_OSREL_MAP_FSTRICT             1100036
 #define        P_OSREL_SHUTDOWN_ENOTCONN       1100077
+#define        P_OSREL_MAP_GUARD               1200035
 
 #define        P_OSREL_MAJOR(x)                ((x) / 100000)
 #endif

Modified: head/sys/vm/vm.h
==============================================================================
--- head/sys/vm/vm.h    Sat Jun 24 16:47:41 2017        (r320316)
+++ head/sys/vm/vm.h    Sat Jun 24 17:01:11 2017        (r320317)
@@ -78,6 +78,7 @@ typedef u_char vm_prot_t;     /* protection codes */
 #define        VM_PROT_WRITE           ((vm_prot_t) 0x02)
 #define        VM_PROT_EXECUTE         ((vm_prot_t) 0x04)
 #define        VM_PROT_COPY            ((vm_prot_t) 0x08)      /* copy-on-read 
*/
+#define        VM_PROT_FAULT_LOOKUP    ((vm_prot_t) 0x010)
 
 #define        VM_PROT_ALL             
(VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)
 #define VM_PROT_RW             (VM_PROT_READ|VM_PROT_WRITE)

Modified: head/sys/vm/vm_fault.c
==============================================================================
--- head/sys/vm/vm_fault.c      Sat Jun 24 16:47:41 2017        (r320316)
+++ head/sys/vm/vm_fault.c      Sat Jun 24 17:01:11 2017        (r320317)
@@ -495,13 +495,12 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot
        int locked, nera, result, rv;
        u_char behavior;
        boolean_t wired;        /* Passed by reference. */
-       bool dead, growstack, hardfault, is_first_object_locked;
+       bool dead, hardfault, is_first_object_locked;
 
        VM_CNT_INC(v_vm_faults);
        fs.vp = NULL;
        faultcount = 0;
        nera = -1;
-       growstack = true;
        hardfault = false;
 
 RetryFault:;
@@ -511,17 +510,10 @@ RetryFault:;
         * search.
         */
        fs.map = map;
-       result = vm_map_lookup(&fs.map, vaddr, fault_type, &fs.entry,
-           &fs.first_object, &fs.first_pindex, &prot, &wired);
+       result = vm_map_lookup(&fs.map, vaddr, fault_type |
+           VM_PROT_FAULT_LOOKUP, &fs.entry, &fs.first_object,
+           &fs.first_pindex, &prot, &wired);
        if (result != KERN_SUCCESS) {
-               if (growstack && result == KERN_INVALID_ADDRESS &&
-                   map != kernel_map) {
-                       result = vm_map_growstack(curproc, vaddr);
-                       if (result != KERN_SUCCESS)
-                               return (KERN_FAILURE);
-                       growstack = false;
-                       goto RetryFault;
-               }
                unlock_vp(&fs);
                return (result);
        }
@@ -546,6 +538,8 @@ RetryFault:;
                        vm_map_unlock(fs.map);
                goto RetryFault;
        }
+
+       MPASS((fs.entry->eflags & MAP_ENTRY_GUARD) == 0);
 
        if (wired)
                fault_type = prot | (fault_type & VM_PROT_COPY);

Modified: head/sys/vm/vm_map.c
==============================================================================
--- head/sys/vm/vm_map.c        Sat Jun 24 16:47:41 2017        (r320316)
+++ head/sys/vm/vm_map.c        Sat Jun 24 17:01:11 2017        (r320317)
@@ -133,6 +133,8 @@ static void _vm_map_init(vm_map_t map, pmap_t pmap, vm
 static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t 
system_map);
 static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry);
 static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry);
+static int vm_map_growstack(vm_map_t map, vm_offset_t addr,
+    vm_map_entry_t gap_entry);
 static void vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
     vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags);
 #ifdef INVARIANTS
@@ -1214,6 +1216,10 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_oof
        if (prev_entry->next != &map->header && prev_entry->next->start < end)
                return (KERN_NO_SPACE);
 
+       if ((cow & MAP_CREATE_GUARD) != 0 && (object != NULL ||
+           max != VM_PROT_NONE))
+               return (KERN_INVALID_ARGUMENT);
+
        protoeflags = 0;
        if (cow & MAP_COPY_ON_WRITE)
                protoeflags |= MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY;
@@ -1229,13 +1235,19 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_oof
                protoeflags |= MAP_ENTRY_GROWS_UP;
        if (cow & MAP_VN_WRITECOUNT)
                protoeflags |= MAP_ENTRY_VN_WRITECNT;
+       if ((cow & MAP_CREATE_GUARD) != 0)
+               protoeflags |= MAP_ENTRY_GUARD;
+       if ((cow & MAP_CREATE_STACK_GAP_DN) != 0)
+               protoeflags |= MAP_ENTRY_STACK_GAP_DN;
+       if ((cow & MAP_CREATE_STACK_GAP_UP) != 0)
+               protoeflags |= MAP_ENTRY_STACK_GAP_UP;
        if (cow & MAP_INHERIT_SHARE)
                inheritance = VM_INHERIT_SHARE;
        else
                inheritance = VM_INHERIT_DEFAULT;
 
        cred = NULL;
-       if (cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT))
+       if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0)
                goto charged;
        if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) &&
            ((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) {
@@ -1284,7 +1296,8 @@ charged:
                if (prev_entry->inheritance == inheritance &&
                    prev_entry->protection == prot &&
                    prev_entry->max_protection == max) {
-                       map->size += end - prev_entry->end;
+                       if ((prev_entry->eflags & MAP_ENTRY_GUARD) == 0)
+                               map->size += end - prev_entry->end;
                        prev_entry->end = end;
                        vm_map_entry_resize_free(map, prev_entry);
                        vm_map_simplify_entry(map, prev_entry);
@@ -1321,7 +1334,6 @@ charged:
        new_entry->eflags = protoeflags;
        new_entry->object.vm_object = object;
        new_entry->offset = offset;
-       new_entry->avail_ssize = 0;
 
        new_entry->inheritance = inheritance;
        new_entry->protection = prot;
@@ -1339,7 +1351,8 @@ charged:
         * Insert the new entry into the list
         */
        vm_map_entry_link(map, prev_entry, new_entry);
-       map->size += new_entry->end - new_entry->start;
+       if ((new_entry->eflags & MAP_ENTRY_GUARD) == 0)
+               map->size += new_entry->end - new_entry->start;
 
        /*
         * Try to coalesce the new entry with both the previous and next
@@ -1674,7 +1687,8 @@ _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry,
         * map.  This is a bit of a hack, but is also about the best place to
         * put this improvement.
         */
-       if (entry->object.vm_object == NULL && !map->system_map) {
+       if (entry->object.vm_object == NULL && !map->system_map &&
+           (entry->eflags & MAP_ENTRY_GUARD) == 0) {
                vm_object_t object;
                object = vm_object_allocate(OBJT_DEFAULT,
                                atop(entry->end - entry->start));
@@ -1753,7 +1767,8 @@ _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, v
         * map.  This is a bit of a hack, but is also about the best place to
         * put this improvement.
         */
-       if (entry->object.vm_object == NULL && !map->system_map) {
+       if (entry->object.vm_object == NULL && !map->system_map &&
+           (entry->eflags & MAP_ENTRY_GUARD) == 0) {
                vm_object_t object;
                object = vm_object_allocate(OBJT_DEFAULT,
                                atop(entry->end - entry->start));
@@ -2010,7 +2025,8 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_off
 
                if (set_max ||
                    ((new_prot & ~(current->protection)) & VM_PROT_WRITE) == 0 
||
-                   ENTRY_CHARGED(current)) {
+                   ENTRY_CHARGED(current) ||
+                   (current->eflags & MAP_ENTRY_GUARD) != 0) {
                        continue;
                }
 
@@ -2059,6 +2075,9 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_off
         */
        for (current = entry; current != &map->header && current->start < end;
            current = current->next) {
+               if ((current->eflags & MAP_ENTRY_GUARD) != 0)
+                       continue;
+
                old_prot = current->protection;
 
                if (set_max)
@@ -2312,7 +2331,9 @@ vm_map_inherit(vm_map_t map, vm_offset_t start, vm_off
                entry = temp_entry->next;
        while ((entry != &map->header) && (entry->start < end)) {
                vm_map_clip_end(map, entry, end);
-               entry->inheritance = new_inheritance;
+               if ((entry->eflags & MAP_ENTRY_GUARD) == 0 ||
+                   new_inheritance != VM_INHERIT_ZERO)
+                       entry->inheritance = new_inheritance;
                vm_map_simplify_entry(map, entry);
                entry = entry->next;
        }
@@ -2918,6 +2939,15 @@ vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry
 
        vm_map_entry_unlink(map, entry);
        object = entry->object.vm_object;
+
+       if ((entry->eflags & MAP_ENTRY_GUARD) != 0) {
+               MPASS(entry->cred == NULL);
+               MPASS((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0);
+               MPASS(object == NULL);
+               vm_map_entry_deallocate(entry, map->system_map);
+               return;
+       }
+
        size = entry->end - entry->start;
        map->size -= size;
 
@@ -3276,6 +3306,8 @@ vmspace_map_entry_forked(const struct vmspace *vm1, st
        vm_size_t entrysize;
        vm_offset_t newend;
 
+       if ((entry->eflags & MAP_ENTRY_GUARD) != 0)
+               return;
        entrysize = entry->end - entry->start;
        vm2->vm_map.size += entrysize;
        if (entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP)) {
@@ -3312,6 +3344,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c
        vm_map_entry_t new_entry, old_entry;
        vm_object_t object;
        int locked;
+       vm_inherit_t inh;
 
        old_map = &vm1->vm_map;
        /* Copy immutable fields of vm1 to vm2. */
@@ -3334,7 +3367,12 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c
                if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
                        panic("vm_map_fork: encountered a submap");
 
-               switch (old_entry->inheritance) {
+               inh = old_entry->inheritance;
+               if ((old_entry->eflags & MAP_ENTRY_GUARD) != 0 &&
+                   inh != VM_INHERIT_NONE)
+                       inh = VM_INHERIT_COPY;
+
+               switch (inh) {
                case VM_INHERIT_NONE:
                        break;
 
@@ -3467,7 +3505,6 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c
 
                        new_entry->start = old_entry->start;
                        new_entry->end = old_entry->end;
-                       new_entry->avail_ssize = old_entry->avail_ssize;
                        new_entry->eflags = old_entry->eflags &
                            ~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION |
                            MAP_ENTRY_VN_WRITECNT);
@@ -3535,7 +3572,7 @@ vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos,
     vm_size_t growsize, vm_prot_t prot, vm_prot_t max, int cow)
 {
        vm_map_entry_t new_entry, prev_entry;
-       vm_offset_t bot, top;
+       vm_offset_t bot, gap_bot, gap_top, top;
        vm_size_t init_ssize;
        int orient, rv;
 
@@ -3543,10 +3580,11 @@ vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos,
         * The stack orientation is piggybacked with the cow argument.
         * Extract it into orient and mask the cow argument so that we
         * don't pass it around further.
-        * NOTE: We explicitly allow bi-directional stacks.
         */
-       orient = cow & (MAP_STACK_GROWS_DOWN|MAP_STACK_GROWS_UP);
+       orient = cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP);
        KASSERT(orient != 0, ("No stack grow direction"));
+       KASSERT(orient != (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP),
+           ("bi-dir stack"));
 
        if (addrbos < vm_map_min(map) ||
            addrbos > vm_map_max(map) ||
@@ -3582,57 +3620,58 @@ vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos,
         * and cow to be 0.  Possibly we should eliminate these as input
         * parameters, and just pass these values here in the insert call.
         */
-       if (orient == MAP_STACK_GROWS_DOWN)
+       if (orient == MAP_STACK_GROWS_DOWN) {
                bot = addrbos + max_ssize - init_ssize;
-       else if (orient == MAP_STACK_GROWS_UP)
+               top = bot + init_ssize;
+               gap_bot = addrbos;
+               gap_top = bot;
+       } else /* if (orient == MAP_STACK_GROWS_UP) */ {
                bot = addrbos;
-       else
-               bot = round_page(addrbos + max_ssize/2 - init_ssize/2);
-       top = bot + init_ssize;
-       rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow);
-
-       /* Now set the avail_ssize amount. */
-       if (rv == KERN_SUCCESS) {
-               new_entry = prev_entry->next;
-               if (new_entry->end != top || new_entry->start != bot)
-                       panic("Bad entry start/end for new stack entry");
-
-               new_entry->avail_ssize = max_ssize - init_ssize;
-               KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 ||
-                   (new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0,
-                   ("new entry lacks MAP_ENTRY_GROWS_DOWN"));
-               KASSERT((orient & MAP_STACK_GROWS_UP) == 0 ||
-                   (new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0,
-                   ("new entry lacks MAP_ENTRY_GROWS_UP"));
+               top = bot + init_ssize;
+               gap_bot = top;
+               gap_top = addrbos + max_ssize;
        }
-
+       rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow);
+       if (rv != KERN_SUCCESS)
+               return (rv);
+       new_entry = prev_entry->next;
+       KASSERT(new_entry->end == top || new_entry->start == bot,
+           ("Bad entry start/end for new stack entry"));
+       KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 ||
+           (new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0,
+           ("new entry lacks MAP_ENTRY_GROWS_DOWN"));
+       KASSERT((orient & MAP_STACK_GROWS_UP) == 0 ||
+           (new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0,
+           ("new entry lacks MAP_ENTRY_GROWS_UP"));
+       rv = vm_map_insert(map, NULL, 0, gap_bot, gap_top, VM_PROT_NONE,
+           VM_PROT_NONE, MAP_CREATE_GUARD | (orient == MAP_STACK_GROWS_DOWN ?
+           MAP_CREATE_STACK_GAP_DN : MAP_CREATE_STACK_GAP_UP));
+       if (rv != KERN_SUCCESS)
+               (void)vm_map_delete(map, bot, top);
        return (rv);
 }
 
-static int stack_guard_page = 0;
+static int stack_guard_page = 1;
 SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RWTUN,
     &stack_guard_page, 0,
-    "Insert stack guard page ahead of the growable segments.");
+    "Specifies the number of guard pages for a stack that grows.");
 
-/* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
- * desired address is already mapped, or if we successfully grow
- * the stack.  Also returns KERN_SUCCESS if addr is outside the
- * stack range (this is strange, but preserves compatibility with
- * the grow function in vm_machdep.c).
+/*
+ * Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if we
+ * successfully grow the stack.
  */
-int
-vm_map_growstack(struct proc *p, vm_offset_t addr)
+static int
+vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry)
 {
-       vm_map_entry_t next_entry, prev_entry;
-       vm_map_entry_t new_entry, stack_entry;
-       struct vmspace *vm = p->p_vmspace;
-       vm_map_t map = &vm->vm_map;
-       vm_offset_t end;
-       vm_size_t growsize;
+       vm_map_entry_t stack_entry;
+       struct proc *p;
+       struct vmspace *vm;
+       struct ucred *cred;
+       vm_offset_t gap_end, gap_start, grow_start;
        size_t grow_amount, max_grow;
        rlim_t lmemlim, stacklim, vmemlim;
-       int is_procstack, rv;
-       struct ucred *cred;
+       int rv, rv1;
+       bool gap_deleted, grow_down, is_procstack;
 #ifdef notyet
        uint64_t limit;
 #endif
@@ -3640,125 +3679,71 @@ vm_map_growstack(struct proc *p, vm_offset_t addr)
        int error;
 #endif
 
+       p = curproc;
+       vm = p->p_vmspace;
+       MPASS(map == &p->p_vmspace->vm_map);
+       MPASS(!map->system_map);
+
        lmemlim = lim_cur(curthread, RLIMIT_MEMLOCK);
        stacklim = lim_cur(curthread, RLIMIT_STACK);
        vmemlim = lim_cur(curthread, RLIMIT_VMEM);
-Retry:
-
-       vm_map_lock_read(map);
-
-       /* If addr is already in the entry range, no need to grow.*/
-       if (vm_map_lookup_entry(map, addr, &prev_entry)) {
-               vm_map_unlock_read(map);
+retry:
+       /* If addr is not in a hole for a stack grow area, no need to grow. */
+       if (gap_entry == NULL && !vm_map_lookup_entry(map, addr, &gap_entry))
+               return (KERN_FAILURE);
+       if ((gap_entry->eflags & MAP_ENTRY_GUARD) == 0)
                return (KERN_SUCCESS);
-       }
-
-       next_entry = prev_entry->next;
-       if (!(prev_entry->eflags & MAP_ENTRY_GROWS_UP)) {
-               /*
-                * This entry does not grow upwards. Since the address lies
-                * beyond this entry, the next entry (if one exists) has to
-                * be a downward growable entry. The entry list header is
-                * never a growable entry, so it suffices to check the flags.
-                */
-               if (!(next_entry->eflags & MAP_ENTRY_GROWS_DOWN)) {
-                       vm_map_unlock_read(map);
-                       return (KERN_SUCCESS);
-               }
-               stack_entry = next_entry;
+       if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_DN) != 0) {
+               stack_entry = gap_entry->next;
+               if ((stack_entry->eflags & MAP_ENTRY_GROWS_DOWN) == 0 ||
+                   stack_entry->start != gap_entry->end)
+                       return (KERN_FAILURE);
+               grow_amount = round_page(stack_entry->start - addr);
+               grow_down = true;
+       } else if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_UP) != 0) {
+               stack_entry = gap_entry->prev;
+               if ((stack_entry->eflags & MAP_ENTRY_GROWS_UP) == 0 ||
+                   stack_entry->end != gap_entry->start)
+                       return (KERN_FAILURE);
+               grow_amount = round_page(addr + 1 - stack_entry->end);
+               grow_down = false;
        } else {
-               /*
-                * This entry grows upward. If the next entry does not at
-                * least grow downwards, this is the entry we need to grow.
-                * otherwise we have two possible choices and we have to
-                * select one.
-                */
-               if (next_entry->eflags & MAP_ENTRY_GROWS_DOWN) {
-                       /*
-                        * We have two choices; grow the entry closest to
-                        * the address to minimize the amount of growth.
-                        */
-                       if (addr - prev_entry->end <= next_entry->start - addr)
-                               stack_entry = prev_entry;
-                       else
-                               stack_entry = next_entry;
-               } else
-                       stack_entry = prev_entry;
+               return (KERN_FAILURE);
        }
-
-       if (stack_entry == next_entry) {
-               KASSERT(stack_entry->eflags & MAP_ENTRY_GROWS_DOWN, ("foo"));
-               KASSERT(addr < stack_entry->start, ("foo"));
-               end = (prev_entry != &map->header) ? prev_entry->end :
-                   stack_entry->start - stack_entry->avail_ssize;
-               grow_amount = roundup(stack_entry->start - addr, PAGE_SIZE);
-               max_grow = stack_entry->start - end;
-       } else {
-               KASSERT(stack_entry->eflags & MAP_ENTRY_GROWS_UP, ("foo"));
-               KASSERT(addr >= stack_entry->end, ("foo"));
-               end = (next_entry != &map->header) ? next_entry->start :
-                   stack_entry->end + stack_entry->avail_ssize;
-               grow_amount = roundup(addr + 1 - stack_entry->end, PAGE_SIZE);
-               max_grow = end - stack_entry->end;
-       }
-
-       if (grow_amount > stack_entry->avail_ssize) {
-               vm_map_unlock_read(map);
+       max_grow = gap_entry->end - gap_entry->start - stack_guard_page *
+           PAGE_SIZE;
+       if (grow_amount > max_grow)
                return (KERN_NO_SPACE);
-       }
 
        /*
-        * If there is no longer enough space between the entries nogo, and
-        * adjust the available space.  Note: this  should only happen if the
-        * user has mapped into the stack area after the stack was created,
-        * and is probably an error.
-        *
-        * This also effectively destroys any guard page the user might have
-        * intended by limiting the stack size.
-        */
-       if (grow_amount + (stack_guard_page ? PAGE_SIZE : 0) > max_grow) {
-               if (vm_map_lock_upgrade(map))
-                       goto Retry;
-
-               stack_entry->avail_ssize = max_grow;
-
-               vm_map_unlock(map);
-               return (KERN_NO_SPACE);
-       }
-
-       is_procstack = (addr >= (vm_offset_t)vm->vm_maxsaddr &&
-           addr < (vm_offset_t)p->p_sysent->sv_usrstack) ? 1 : 0;
-
-       /*
         * If this is the main process stack, see if we're over the stack
         * limit.
         */
-       if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
-               vm_map_unlock_read(map);
+       is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr &&
+           addr < (vm_offset_t)p->p_sysent->sv_usrstack;
+       if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim))
                return (KERN_NO_SPACE);
-       }
+
 #ifdef RACCT
        if (racct_enable) {
                PROC_LOCK(p);
                if (is_procstack && racct_set(p, RACCT_STACK,
                    ctob(vm->vm_ssize) + grow_amount)) {
                        PROC_UNLOCK(p);
-                       vm_map_unlock_read(map);
                        return (KERN_NO_SPACE);
                }
                PROC_UNLOCK(p);
        }
 #endif
 
-       /* Round up the grow amount modulo sgrowsiz */
-       growsize = sgrowsiz;
-       grow_amount = roundup(grow_amount, growsize);
-       if (grow_amount > stack_entry->avail_ssize)
-               grow_amount = stack_entry->avail_ssize;
+       grow_amount = roundup(grow_amount, sgrowsiz);
+       if (grow_amount > max_grow)
+               grow_amount = max_grow;
        if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
                grow_amount = trunc_page((vm_size_t)stacklim) -
                    ctob(vm->vm_ssize);
        }
+
 #ifdef notyet
        PROC_LOCK(p);
        limit = racct_get_available(p, RACCT_STACK);
@@ -3766,9 +3751,9 @@ Retry:
        if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit))
                grow_amount = limit - ctob(vm->vm_ssize);
 #endif
-       if (!old_mlock && map->flags & MAP_WIREFUTURE) {
+
+       if (!old_mlock && (map->flags & MAP_WIREFUTURE) != 0) {
                if (ptoa(pmap_wired_count(map->pmap)) + grow_amount > lmemlim) {
-                       vm_map_unlock_read(map);
                        rv = KERN_NO_SPACE;
                        goto out;
                }
@@ -3778,7 +3763,6 @@ Retry:
                        if (racct_set(p, RACCT_MEMLOCK,
                            ptoa(pmap_wired_count(map->pmap)) + grow_amount)) {
                                PROC_UNLOCK(p);
-                               vm_map_unlock_read(map);
                                rv = KERN_NO_SPACE;
                                goto out;
                        }
@@ -3786,9 +3770,9 @@ Retry:
                }
 #endif
        }
+
        /* If we would blow our VMEM resource limit, no go */
        if (map->size + grow_amount > vmemlim) {
-               vm_map_unlock_read(map);
                rv = KERN_NO_SPACE;
                goto out;
        }
@@ -3797,7 +3781,6 @@ Retry:
                PROC_LOCK(p);
                if (racct_set(p, RACCT_VMEM, map->size + grow_amount)) {
                        PROC_UNLOCK(p);
-                       vm_map_unlock_read(map);
                        rv = KERN_NO_SPACE;
                        goto out;
                }
@@ -3805,62 +3788,42 @@ Retry:
        }
 #endif
 
-       if (vm_map_lock_upgrade(map))
-               goto Retry;
+       if (vm_map_lock_upgrade(map)) {
+               gap_entry = NULL;
+               vm_map_lock_read(map);
+               goto retry;
+       }
 
-       if (stack_entry == next_entry) {
-               /*
-                * Growing downward.
-                */
-               /* Get the preliminary new entry start value */
-               addr = stack_entry->start - grow_amount;
-
-               /*
-                * If this puts us into the previous entry, cut back our
-                * growth to the available space. Also, see the note above.
-                */
-               if (addr < end) {
-                       stack_entry->avail_ssize = max_grow;
-                       addr = end;
-                       if (stack_guard_page)
-                               addr += PAGE_SIZE;
+       if (grow_down) {
+               grow_start = gap_entry->end - grow_amount;
+               if (gap_entry->start + grow_amount == gap_entry->end) {
+                       gap_start = gap_entry->start;
+                       gap_end = gap_entry->end;
+                       vm_map_entry_delete(map, gap_entry);
+                       gap_deleted = true;
+               } else {
+                       MPASS(gap_entry->start < gap_entry->end - grow_amount);
+                       gap_entry->end -= grow_amount;
+                       vm_map_entry_resize_free(map, gap_entry);
+                       gap_deleted = false;
                }
-
-               rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
-                   next_entry->protection, next_entry->max_protection,
+               rv = vm_map_insert(map, NULL, 0, grow_start,
+                   grow_start + grow_amount,
+                   stack_entry->protection, stack_entry->max_protection,
                    MAP_STACK_GROWS_DOWN);
-
-               /* Adjust the available stack space by the amount we grew. */
-               if (rv == KERN_SUCCESS) {
-                       new_entry = prev_entry->next;
-                       KASSERT(new_entry == stack_entry->prev, ("foo"));
-                       KASSERT(new_entry->end == stack_entry->start, ("foo"));
-                       KASSERT(new_entry->start == addr, ("foo"));
-                       KASSERT((new_entry->eflags & MAP_ENTRY_GROWS_DOWN) !=
-                           0, ("new entry lacks MAP_ENTRY_GROWS_DOWN"));
-                       grow_amount = new_entry->end - new_entry->start;
-                       new_entry->avail_ssize = stack_entry->avail_ssize -
-                           grow_amount;
-                       stack_entry->eflags &= ~MAP_ENTRY_GROWS_DOWN;
+               if (rv != KERN_SUCCESS) {
+                       if (gap_deleted) {
+                               rv1 = vm_map_insert(map, NULL, 0, gap_start,
+                                   gap_end, VM_PROT_NONE, VM_PROT_NONE,
+                                   MAP_CREATE_GUARD | MAP_CREATE_STACK_GAP_DN);
+                               MPASS(rv1 == KERN_SUCCESS);
+                       } else {
+                               gap_entry->end += grow_amount;
+                               vm_map_entry_resize_free(map, gap_entry);
+                       }
                }
        } else {
-               /*
-                * Growing upward.
-                */
-               addr = stack_entry->end + grow_amount;
-
-               /*
-                * If this puts us into the next entry, cut back our growth
-                * to the available space. Also, see the note above.
-                */
-               if (addr > end) {
-                       stack_entry->avail_ssize = end - stack_entry->end;
-                       addr = end;
-                       if (stack_guard_page)
-                               addr -= PAGE_SIZE;
-               }
-
-               grow_amount = addr - stack_entry->end;
+               grow_start = stack_entry->end;
                cred = stack_entry->cred;
                if (cred == NULL && stack_entry->object.vm_object != NULL)
                        cred = stack_entry->object.vm_object->cred;
@@ -3872,30 +3835,30 @@ Retry:
                    stack_entry->offset,
                    (vm_size_t)(stack_entry->end - stack_entry->start),
                    (vm_size_t)grow_amount, cred != NULL)) {
-                       map->size += (addr - stack_entry->end);
-                       /* Update the current entry. */
-                       stack_entry->end = addr;
-                       stack_entry->avail_ssize -= grow_amount;
+                       if (gap_entry->start + grow_amount == gap_entry->end)
+                               vm_map_entry_delete(map, gap_entry);
+                       else
+                               gap_entry->start += grow_amount;
+                       stack_entry->end += grow_amount;
+                       map->size += grow_amount;
                        vm_map_entry_resize_free(map, stack_entry);
                        rv = KERN_SUCCESS;
                } else
                        rv = KERN_FAILURE;
        }
-
        if (rv == KERN_SUCCESS && is_procstack)
                vm->vm_ssize += btoc(grow_amount);
 
-       vm_map_unlock(map);
-
        /*
         * Heed the MAP_WIREFUTURE flag if it was set for this process.
         */
-       if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE)) {
-               vm_map_wire(map,
-                   (stack_entry == next_entry) ? addr : addr - grow_amount,
-                   (stack_entry == next_entry) ? stack_entry->start : addr,
+       if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE) != 0) {
+               vm_map_unlock(map);
+               vm_map_wire(map, grow_start, grow_start + grow_amount,
                    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
-       }
+               vm_map_lock_read(map);
+       } else
+               vm_map_lock_downgrade(map);
 
 out:
 #ifdef RACCT
@@ -4019,10 +3982,11 @@ vm_map_lookup(vm_map_t *var_map,                /* 
IN/OUT */
        vm_size_t size;
        struct ucred *cred;
 
-RetryLookup:;
+RetryLookup:
 
        vm_map_lock_read(map);
 
+RetryLookupLocked:
        /*
         * Lookup the faulting address.
         */
@@ -4048,7 +4012,16 @@ RetryLookup:;
         * Check whether this task is allowed to have this page.
         */
        prot = entry->protection;
-       fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
+       if ((fault_typea & VM_PROT_FAULT_LOOKUP) != 0) {
+               fault_typea &= ~VM_PROT_FAULT_LOOKUP;
+               if (prot == VM_PROT_NONE && map != kernel_map &&
+                   (entry->eflags & MAP_ENTRY_GUARD) != 0 &&
+                   (entry->eflags & (MAP_ENTRY_STACK_GAP_DN |
+                   MAP_ENTRY_STACK_GAP_UP)) != 0 &&
+                   vm_map_growstack(map, vaddr, entry) == KERN_SUCCESS)
+                       goto RetryLookupLocked;
+       }
+       fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
        if ((fault_type & prot) != fault_type || prot == VM_PROT_NONE) {
                vm_map_unlock_read(map);
                return (KERN_PROTECTION_FAILURE);
@@ -4282,8 +4255,9 @@ vm_map_print(vm_map_t map)
        db_indent += 2;
        for (entry = map->header.next; entry != &map->header;
            entry = entry->next) {
-               db_iprintf("map entry %p: start=%p, end=%p\n",
-                   (void *)entry, (void *)entry->start, (void *)entry->end);
+               db_iprintf("map entry %p: start=%p, end=%p, eflags=%#x, \n",
+                   (void *)entry, (void *)entry->start, (void *)entry->end,
+                   entry->eflags);
                {
                        static char *inheritance_name[4] =
                        {"share", "copy", "none", "donate_copy"};

Modified: head/sys/vm/vm_map.h
==============================================================================
--- head/sys/vm/vm_map.h        Sat Jun 24 16:47:41 2017        (r320316)
+++ head/sys/vm/vm_map.h        Sat Jun 24 17:01:11 2017        (r320317)
@@ -103,7 +103,6 @@ struct vm_map_entry {
        struct vm_map_entry *right;     /* right child in binary search tree */
        vm_offset_t start;              /* start address */
        vm_offset_t end;                /* end address */
-       vm_offset_t avail_ssize;        /* amt can grow if this is a stack */
        vm_offset_t next_read;          /* vaddr of the next sequential read */
        vm_size_t adj_free;             /* amount of adjacent free space */
        vm_size_t max_free;             /* max free space in subtree */
@@ -142,6 +141,9 @@ struct vm_map_entry {
 
 #define        MAP_ENTRY_WIRE_SKIPPED          0x4000
 #define        MAP_ENTRY_VN_WRITECNT           0x8000  /* writeable vnode 
mapping */
+#define        MAP_ENTRY_GUARD                 0x10000
+#define        MAP_ENTRY_STACK_GAP_DN          0x20000
+#define        MAP_ENTRY_STACK_GAP_UP          0x40000
 
 #ifdef _KERNEL
 static __inline u_char
@@ -315,6 +317,7 @@ long vmspace_resident_count(struct vmspace *vmspace);
 #define MAP_PREFAULT_PARTIAL   0x0010
 #define MAP_DISABLE_SYNCER     0x0020
 #define        MAP_CHECK_EXCL          0x0040
+#define        MAP_CREATE_GUARD        0x0080
 #define MAP_DISABLE_COREDUMP   0x0100
 #define MAP_PREFAULT_MADVISE   0x0200  /* from (user) madvise request */
 #define        MAP_VN_WRITECOUNT       0x0400
@@ -322,6 +325,8 @@ long vmspace_resident_count(struct vmspace *vmspace);
 #define        MAP_STACK_GROWS_UP      0x2000
 #define        MAP_ACC_CHARGED         0x4000
 #define        MAP_ACC_NO_CHARGE       0x8000
+#define        MAP_CREATE_STACK_GAP_UP 0x10000
+#define        MAP_CREATE_STACK_GAP_DN 0x20000
 
 /*
  * vm_fault option flags
@@ -387,7 +392,6 @@ int vm_map_submap (vm_map_t, vm_offset_t, vm_offset_t,
 int vm_map_sync(vm_map_t, vm_offset_t, vm_offset_t, boolean_t, boolean_t);
 int vm_map_madvise (vm_map_t, vm_offset_t, vm_offset_t, int);
 int vm_map_stack (vm_map_t, vm_offset_t, vm_size_t, vm_prot_t, vm_prot_t, int);
-int vm_map_growstack (struct proc *p, vm_offset_t addr);
 int vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
     int flags);
 int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,

Modified: head/sys/vm/vm_mmap.c
==============================================================================
--- head/sys/vm/vm_mmap.c       Sat Jun 24 16:47:41 2017        (r320316)
+++ head/sys/vm/vm_mmap.c       Sat Jun 24 17:01:11 2017        (r320317)
@@ -226,7 +226,7 @@ kern_mmap(struct thread *td, uintptr_t addr0, size_t s
        }
        if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_HASSEMAPHORE |
            MAP_STACK | MAP_NOSYNC | MAP_ANON | MAP_EXCL | MAP_NOCORE |
-           MAP_PREFAULT_READ |
+           MAP_PREFAULT_READ | MAP_GUARD |
 #ifdef MAP_32BIT
            MAP_32BIT |
 #endif
@@ -239,6 +239,10 @@ kern_mmap(struct thread *td, uintptr_t addr0, size_t s
        if (prot != PROT_NONE &&
            (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0)
                return (EINVAL);
+       if ((flags & MAP_GUARD) != 0 && (prot != PROT_NONE || fd != -1 ||
+           pos != 0 || (flags & (MAP_SHARED | MAP_PRIVATE | MAP_PREFAULT |
+           MAP_PREFAULT_READ | MAP_ANON | MAP_STACK)) != 0))
+               return (EINVAL);
 
        /*
         * Align the file position to a page boundary,
@@ -314,7 +318,10 @@ kern_mmap(struct thread *td, uintptr_t addr0, size_t s
                 * returns an error earlier.
                 */
                error = 0;
-       } else if (flags & MAP_ANON) {
+       } else if ((flags & MAP_GUARD) != 0) {
+               error = vm_mmap_object(&vms->vm_map, &addr, size, VM_PROT_NONE,
+                   VM_PROT_NONE, flags, NULL, pos, FALSE, td);
+       } else if ((flags & MAP_ANON) != 0) {
                /*
                 * Mapping blank space is trivial.
                 *
@@ -1511,6 +1518,8 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_siz
        }
        if ((flags & MAP_EXCL) != 0)
                docow |= MAP_CHECK_EXCL;
+       if ((flags & MAP_GUARD) != 0)
+               docow |= MAP_CREATE_GUARD;
 
        if (fitit) {
                if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER)
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to