Author: jeff
Date: Mon Jan 20 22:49:52 2020
New Revision: 356933
URL: https://svnweb.freebsd.org/changeset/base/356933

Log:
  Reduce object locking in vm_fault.  Once we have an exclusively busied page we
  no longer need an object lock.  This reduces the longest hold times and
  eliminates some trylock code blocks.
  
  Reviewed by:  kib, markj
  Differential Revision:        https://reviews.freebsd.org/D23034

Modified:
  head/sys/vm/vm_fault.c

Modified: head/sys/vm/vm_fault.c
==============================================================================
--- head/sys/vm/vm_fault.c      Mon Jan 20 22:15:33 2020        (r356932)
+++ head/sys/vm/vm_fault.c      Mon Jan 20 22:49:52 2020        (r356933)
@@ -342,10 +342,10 @@ vm_fault_soft_fast(struct faultstate *fs, vm_offset_t 
                *m_hold = m;
                vm_page_wire(m);
        }
-       vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags);
        if (psind == 0 && !wired)
                vm_fault_prefault(fs, vaddr, PFBAK, PFFOR, true);
        VM_OBJECT_RUNLOCK(fs->first_object);
+       vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags);
        vm_map_lookup_done(fs->map, fs->entry);
        curthread->td_ru.ru_minflt++;
 
@@ -632,7 +632,7 @@ vm_fault_trap(vm_map_t map, vm_offset_t vaddr, vm_prot
 }
 
 static int
-vm_fault_lock_vnode(struct faultstate *fs)
+vm_fault_lock_vnode(struct faultstate *fs, bool objlocked)
 {
        struct vnode *vp;
        int error, locked;
@@ -668,7 +668,10 @@ vm_fault_lock_vnode(struct faultstate *fs)
        }
 
        vhold(vp);
-       unlock_and_deallocate(fs);
+       if (objlocked)
+               unlock_and_deallocate(fs);
+       else
+               fault_deallocate(fs);
        error = vget(vp, locked | LK_RETRY | LK_CANRECURSE, curthread);
        vdrop(vp);
        fs->vp = vp;
@@ -863,9 +866,11 @@ RetryFault_oom:
                         */
                        if (!vm_page_all_valid(fs.m))
                                goto readrest;
-                       break; /* break to PAGE HAS BEEN FOUND */
+                       VM_OBJECT_WUNLOCK(fs.object);
+                       break; /* break to PAGE HAS BEEN FOUND. */
                }
                KASSERT(fs.m == NULL, ("fs.m should be NULL, not %p", fs.m));
+               VM_OBJECT_ASSERT_WLOCKED(fs.object);
 
                /*
                 * Page is not resident.  If the pager might contain the page
@@ -876,7 +881,7 @@ RetryFault_oom:
                if (fs.object->type != OBJT_DEFAULT ||
                    fs.object == fs.first_object) {
                        if ((fs.object->flags & OBJ_SIZEVNLOCK) != 0) {
-                               rv = vm_fault_lock_vnode(&fs);
+                               rv = vm_fault_lock_vnode(&fs, true);
                                MPASS(rv == KERN_SUCCESS ||
                                    rv == KERN_RESOURCE_SHORTAGE);
                                if (rv == KERN_RESOURCE_SHORTAGE)
@@ -956,12 +961,23 @@ RetryFault_oom:
 
 readrest:
                /*
+                * Default objects have no pager so no exclusive busy exists
+                * to protect this page in the chain.  Skip to the next
+                * object without dropping the lock to preserve atomicity of
+                * shadow faults.
+                */
+               if (fs.object->type == OBJT_DEFAULT)
+                       goto next;
+
+               /*
                 * At this point, we have either allocated a new page or found
                 * an existing page that is only partially valid.
                 *
                 * We hold a reference on the current object and the page is
-                * exclusive busied.
+                * exclusive busied.  The exclusive busy prevents simultaneous
+                * faults and collapses while the object lock is dropped.
                 */
+               VM_OBJECT_WUNLOCK(fs.object);
 
                /*
                 * If the pager for the current object might have the page,
@@ -972,8 +988,7 @@ readrest:
                 * have the page, the number of additional pages to read will
                 * apply to subsequent objects in the shadow chain.
                 */
-               if (fs.object->type != OBJT_DEFAULT && nera == -1 &&
-                   !P_KILLED(curproc)) {
+               if (nera == -1 && !P_KILLED(curproc)) {
                        KASSERT(fs.lookup_still_valid, ("map unlocked"));
                        era = fs.entry->read_ahead;
                        behavior = vm_map_entry_behavior(fs.entry);
@@ -1039,7 +1054,7 @@ readrest:
                         */
                        unlock_map(&fs);
 
-                       rv = vm_fault_lock_vnode(&fs);
+                       rv = vm_fault_lock_vnode(&fs, false);
                        MPASS(rv == KERN_SUCCESS ||
                            rv == KERN_RESOURCE_SHORTAGE);
                        if (rv == KERN_RESOURCE_SHORTAGE)
@@ -1080,15 +1095,14 @@ readrest:
                                }
                                ahead = ulmin(ahead, atop(e_end - vaddr) - 1);
                        }
-                       VM_OBJECT_WUNLOCK(fs.object);
                        rv = vm_pager_get_pages(fs.object, &fs.m, 1,
                            &behind, &ahead);
-                       VM_OBJECT_WLOCK(fs.object);
                        if (rv == VM_PAGER_OK) {
                                faultcount = behind + 1 + ahead;
                                hardfault = true;
-                               break; /* break to PAGE HAS BEEN FOUND */
+                               break; /* break to PAGE HAS BEEN FOUND. */
                        }
+                       VM_OBJECT_WLOCK(fs.object);
                        if (rv == VM_PAGER_ERROR)
                                printf("vm_fault: pager read error, pid %d 
(%s)\n",
                                    curproc->p_pid, curproc->p_comm);
@@ -1106,6 +1120,7 @@ readrest:
 
                }
 
+next:
                /*
                 * The requested page does not exist at this object/
                 * offset.  Remove the invalid page from the object,
@@ -1126,19 +1141,18 @@ readrest:
                 * Move on to the next object.  Lock the next object before
                 * unlocking the current one.
                 */
+               VM_OBJECT_ASSERT_WLOCKED(fs.object);
                next_object = fs.object->backing_object;
                if (next_object == NULL) {
                        /*
                         * If there's no object left, fill the page in the top
                         * object with zeros.
                         */
+                       VM_OBJECT_WUNLOCK(fs.object);
                        if (fs.object != fs.first_object) {
                                vm_object_pip_wakeup(fs.object);
-                               VM_OBJECT_WUNLOCK(fs.object);
-
                                fs.object = fs.first_object;
                                fs.pindex = fs.first_pindex;
-                               VM_OBJECT_WLOCK(fs.object);
                        }
                        MPASS(fs.first_m != NULL);
                        MPASS(fs.m == NULL);
@@ -1157,7 +1171,7 @@ readrest:
                        vm_page_valid(fs.m);
                        /* Don't try to prefault neighboring pages. */
                        faultcount = 1;
-                       break;  /* break to PAGE HAS BEEN FOUND */
+                       break;  /* break to PAGE HAS BEEN FOUND. */
                } else {
                        MPASS(fs.first_m != NULL);
                        KASSERT(fs.object != next_object,
@@ -1173,12 +1187,12 @@ readrest:
                }
        }
 
-       vm_page_assert_xbusied(fs.m);
-
        /*
-        * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock
-        * is held.]
+        * PAGE HAS BEEN FOUND.  A valid page has been found and exclusively
+        * busied.  The object lock must no longer be held.
         */
+       vm_page_assert_xbusied(fs.m);
+       VM_OBJECT_ASSERT_UNLOCKED(fs.object);
 
        /*
         * If the page is being written, but isn't already owned by the
@@ -1202,27 +1216,28 @@ readrest:
                         */
                        is_first_object_locked = false;
                        if (
-                               /*
-                                * Only one shadow object
-                                */
-                               (fs.object->shadow_count == 1) &&
-                               /*
-                                * No COW refs, except us
-                                */
-                               (fs.object->ref_count == 1) &&
-                               /*
-                                * No one else can look this object up
-                                */
-                               (fs.object->handle == NULL) &&
-                               /*
-                                * No other ways to look the object up
-                                */
-                               ((fs.object->flags & OBJ_ANON) != 0) &&
+                           /*
+                            * Only one shadow object
+                            */
+                           fs.object->shadow_count == 1 &&
+                           /*
+                            * No COW refs, except us
+                            */
+                           fs.object->ref_count == 1 &&
+                           /*
+                            * No one else can look this object up
+                            */
+                           fs.object->handle == NULL &&
+                           /*
+                            * No other ways to look the object up
+                            */
+                           (fs.object->flags & OBJ_ANON) != 0 &&
                            (is_first_object_locked = 
VM_OBJECT_TRYWLOCK(fs.first_object)) &&
-                               /*
-                                * We don't chase down the shadow chain
-                                */
-                           fs.object == fs.first_object->backing_object) {
+                           /*
+                            * We don't chase down the shadow chain
+                            */
+                           fs.object == fs.first_object->backing_object &&
+                           VM_OBJECT_TRYWLOCK(fs.object)) {
 
                                /*
                                 * Remove but keep xbusy for replace.  fs.m is
@@ -1242,11 +1257,13 @@ readrest:
                                    fs.first_object->backing_object_offset));
 #endif
                                VM_OBJECT_WUNLOCK(fs.object);
+                               VM_OBJECT_WUNLOCK(fs.first_object);
                                fs.first_m = fs.m;
                                fs.m = NULL;
                                VM_CNT_INC(v_cow_optim);
                        } else {
-                               VM_OBJECT_WUNLOCK(fs.object);
+                               if (is_first_object_locked)
+                                       VM_OBJECT_WUNLOCK(fs.first_object);
                                /*
                                 * Oh, well, lets copy it.
                                 */
@@ -1285,8 +1302,6 @@ readrest:
                        fs.object = fs.first_object;
                        fs.pindex = fs.first_pindex;
                        fs.m = fs.first_m;
-                       if (!is_first_object_locked)
-                               VM_OBJECT_WLOCK(fs.object);
                        VM_CNT_INC(v_cow_faults);
                        curthread->td_cow++;
                } else {
@@ -1300,7 +1315,7 @@ readrest:
         */
        if (!fs.lookup_still_valid) {
                if (!vm_map_trylock_read(fs.map)) {
-                       unlock_and_deallocate(&fs);
+                       fault_deallocate(&fs);
                        goto RetryFault;
                }
                fs.lookup_still_valid = true;
@@ -1314,7 +1329,7 @@ readrest:
                         * pageout will grab it eventually.
                         */
                        if (result != KERN_SUCCESS) {
-                               unlock_and_deallocate(&fs);
+                               fault_deallocate(&fs);
 
                                /*
                                 * If retry of map lookup would have blocked 
then
@@ -1326,7 +1341,7 @@ readrest:
                        }
                        if ((retry_object != fs.first_object) ||
                            (retry_pindex != fs.first_pindex)) {
-                               unlock_and_deallocate(&fs);
+                               fault_deallocate(&fs);
                                goto RetryFault;
                        }
 
@@ -1341,7 +1356,7 @@ readrest:
                        prot &= retry_prot;
                        fault_type &= retry_prot;
                        if (prot == 0) {
-                               unlock_and_deallocate(&fs);
+                               fault_deallocate(&fs);
                                goto RetryFault;
                        }
 
@@ -1350,6 +1365,7 @@ readrest:
                            ("!wired && VM_FAULT_WIRE"));
                }
        }
+       VM_OBJECT_ASSERT_UNLOCKED(fs.object);
 
        /*
         * If the page was filled by a pager, save the virtual address that
@@ -1360,17 +1376,16 @@ readrest:
        if (hardfault)
                fs.entry->next_read = vaddr + ptoa(ahead) + PAGE_SIZE;
 
-       vm_page_assert_xbusied(fs.m);
-       vm_fault_dirty(fs.entry, fs.m, prot, fault_type, fault_flags);
-
        /*
         * Page must be completely valid or it is not fit to
         * map into user space.  vm_pager_get_pages() ensures this.
         */
+       vm_page_assert_xbusied(fs.m);
        KASSERT(vm_page_all_valid(fs.m),
            ("vm_fault: page %p partially invalid", fs.m));
-       VM_OBJECT_WUNLOCK(fs.object);
 
+       vm_fault_dirty(fs.entry, fs.m, prot, fault_type, fault_flags);
+
        /*
         * Put this page into the physical map.  We had to do the unlock above
         * because pmap_enter() may sleep.  We don't put the page
@@ -1451,17 +1466,11 @@ vm_fault_dontneed(const struct faultstate *fs, vm_offs
        vm_size_t size;
 
        object = fs->object;
-       VM_OBJECT_ASSERT_WLOCKED(object);
+       VM_OBJECT_ASSERT_UNLOCKED(object);
        first_object = fs->first_object;
-       if (first_object != object) {
-               if (!VM_OBJECT_TRYWLOCK(first_object)) {
-                       VM_OBJECT_WUNLOCK(object);
-                       VM_OBJECT_WLOCK(first_object);
-                       VM_OBJECT_WLOCK(object);
-               }
-       }
        /* Neither fictitious nor unmanaged pages can be reclaimed. */
        if ((first_object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0) {
+               VM_OBJECT_RLOCK(first_object);
                size = VM_FAULT_DONTNEED_MIN;
                if (MAXPAGESIZES > 1 && size < pagesizes[1])
                        size = pagesizes[1];
@@ -1501,9 +1510,8 @@ vm_fault_dontneed(const struct faultstate *fs, vm_offs
                                        vm_page_deactivate(m);
                        }
                }
+               VM_OBJECT_RUNLOCK(first_object);
        }
-       if (first_object != object)
-               VM_OBJECT_WUNLOCK(first_object);
 }
 
 /*
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to