Author: attilio
Date: Mon Aug  5 08:55:35 2013
New Revision: 253953
URL: http://svnweb.freebsd.org/changeset/base/253953

Log:
  Revert r253939:
  We cannot busy a page before doing pagefaults.
  Infact, it can deadlock against vnode lock, as it tries to vget().
  Other functions, right now, have an opposite lock ordering, like
  vm_object_sync(), which acquires the vnode lock first and then
  sleeps on the busy mechanism.
  
  Before this patch is reinserted we need to break this ordering.
  
  Sponsored by: EMC / Isilon storage division
  Reported by:  kib

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
  head/sys/fs/tmpfs/tmpfs_vnops.c
  head/sys/kern/imgact_elf.c
  head/sys/kern/kern_exec.c
  head/sys/kern/sys_process.c
  head/sys/vm/vm_extern.h
  head/sys/vm/vm_fault.c
  head/sys/vm/vm_glue.c
  head/sys/vm/vm_map.h

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c     Mon Aug 
 5 08:27:35 2013        (r253952)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c     Mon Aug 
 5 08:55:35 2013        (r253953)
@@ -324,8 +324,7 @@ zfs_ioctl(vnode_t *vp, u_long com, intpt
 }
 
 static vm_page_t
-page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes,
-    boolean_t alloc)
+page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
 {
        vm_object_t obj;
        vm_page_t pp;
@@ -347,8 +346,6 @@ page_busy(vnode_t *vp, int64_t start, in
                                continue;
                        }
                } else if (pp == NULL) {
-                       if (!alloc)
-                               break;
                        pp = vm_page_alloc(obj, OFF_TO_IDX(start),
                            VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED |
                            VM_ALLOC_NOBUSY);
@@ -359,10 +356,8 @@ page_busy(vnode_t *vp, int64_t start, in
 
                if (pp != NULL) {
                        ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
-                       vm_page_io_start(pp);
-                       if (!alloc)
-                               break;
                        vm_object_pip_add(obj, 1);
+                       vm_page_io_start(pp);
                        pmap_remove_write(pp);
                        vm_page_clear_dirty(pp, off, nbytes);
                }
@@ -372,12 +367,55 @@ page_busy(vnode_t *vp, int64_t start, in
 }
 
 static void
-page_unbusy(vm_page_t pp, boolean_t unalloc)
+page_unbusy(vm_page_t pp)
 {
 
        vm_page_io_finish(pp);
-       if (unalloc)
-               vm_object_pip_subtract(pp->object, 1);
+       vm_object_pip_subtract(pp->object, 1);
+}
+
+static vm_page_t
+page_hold(vnode_t *vp, int64_t start)
+{
+       vm_object_t obj;
+       vm_page_t pp;
+
+       obj = vp->v_object;
+       zfs_vmobject_assert_wlocked(obj);
+
+       for (;;) {
+               if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
+                   pp->valid) {
+                       if ((pp->oflags & VPO_BUSY) != 0) {
+                               /*
+                                * Reference the page before unlocking and
+                                * sleeping so that the page daemon is less
+                                * likely to reclaim it.
+                                */
+                               vm_page_reference(pp);
+                               vm_page_sleep(pp, "zfsmwb");
+                               continue;
+                       }
+
+                       ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
+                       vm_page_lock(pp);
+                       vm_page_hold(pp);
+                       vm_page_unlock(pp);
+
+               } else
+                       pp = NULL;
+               break;
+       }
+       return (pp);
+}
+
+static void
+page_unhold(vm_page_t pp)
+{
+
+       vm_page_lock(pp);
+       vm_page_unhold(pp);
+       vm_page_unlock(pp);
 }
 
 static caddr_t
@@ -441,8 +479,7 @@ update_pages(vnode_t *vp, int64_t start,
 
                        zfs_vmobject_wlock(obj);
                        vm_page_undirty(pp);
-               } else if ((pp = page_busy(vp, start, off, nbytes,
-                   TRUE)) != NULL) {
+               } else if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
                        zfs_vmobject_wunlock(obj);
 
                        va = zfs_map_page(pp, &sf);
@@ -451,7 +488,7 @@ update_pages(vnode_t *vp, int64_t start,
                        zfs_unmap_page(sf);
 
                        zfs_vmobject_wlock(obj);
-                       page_unbusy(pp, TRUE);
+                       page_unbusy(pp);
                }
                len -= nbytes;
                off = 0;
@@ -561,7 +598,7 @@ mappedread(vnode_t *vp, int nbytes, uio_
                vm_page_t pp;
                uint64_t bytes = MIN(PAGESIZE - off, len);
 
-               if (pp = page_busy(vp, start, 0, 0, FALSE)) {
+               if (pp = page_hold(vp, start)) {
                        struct sf_buf *sf;
                        caddr_t va;
 
@@ -570,7 +607,7 @@ mappedread(vnode_t *vp, int nbytes, uio_
                        error = uiomove(va + off, bytes, UIO_READ, uio);
                        zfs_unmap_page(sf);
                        zfs_vmobject_wlock(obj);
-                       page_unbusy(pp, FALSE);
+                       page_unhold(pp);
                } else {
                        zfs_vmobject_wunlock(obj);
                        error = dmu_read_uio(os, zp->z_id, uio, bytes);

Modified: head/sys/fs/tmpfs/tmpfs_vnops.c
==============================================================================
--- head/sys/fs/tmpfs/tmpfs_vnops.c     Mon Aug  5 08:27:35 2013        
(r253952)
+++ head/sys/fs/tmpfs/tmpfs_vnops.c     Mon Aug  5 08:55:35 2013        
(r253953)
@@ -485,13 +485,13 @@ tmpfs_nocacheread(vm_object_t tobj, vm_p
                        vm_page_zero_invalid(m, TRUE);
                vm_page_wakeup(m);
        }
-       vm_page_io_start(m);
+       vm_page_lock(m);
+       vm_page_hold(m);
+       vm_page_unlock(m);
        VM_OBJECT_WUNLOCK(tobj);
        error = uiomove_fromphys(&m, offset, tlen, uio);
-       VM_OBJECT_WLOCK(tobj);
-       vm_page_io_finish(m);
-       VM_OBJECT_WUNLOCK(tobj);
        vm_page_lock(m);
+       vm_page_unhold(m);
        if (m->queue == PQ_NONE) {
                vm_page_deactivate(m);
        } else {
@@ -602,14 +602,16 @@ tmpfs_mappedwrite(vm_object_t tobj, size
                        vm_page_zero_invalid(tpg, TRUE);
                vm_page_wakeup(tpg);
        }
-       vm_page_io_start(tpg);
+       vm_page_lock(tpg);
+       vm_page_hold(tpg);
+       vm_page_unlock(tpg);
        VM_OBJECT_WUNLOCK(tobj);
        error = uiomove_fromphys(&tpg, offset, tlen, uio);
        VM_OBJECT_WLOCK(tobj);
-       vm_page_io_finish(tpg);
        if (error == 0)
                vm_page_dirty(tpg);
        vm_page_lock(tpg);
+       vm_page_unhold(tpg);
        if (tpg->queue == PQ_NONE) {
                vm_page_deactivate(tpg);
        } else {

Modified: head/sys/kern/imgact_elf.c
==============================================================================
--- head/sys/kern/imgact_elf.c  Mon Aug  5 08:27:35 2013        (r253952)
+++ head/sys/kern/imgact_elf.c  Mon Aug  5 08:55:35 2013        (r253953)
@@ -378,7 +378,7 @@ __elfN(map_partial)(vm_map_t map, vm_obj
                off = offset - trunc_page(offset);
                error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
                    end - start);
-               vm_imgact_unmap_page(object, sf);
+               vm_imgact_unmap_page(sf);
                if (error) {
                        return (KERN_FAILURE);
                }
@@ -433,7 +433,7 @@ __elfN(map_insert)(vm_map_t map, vm_obje
                                        sz = PAGE_SIZE - off;
                                error = copyout((caddr_t)sf_buf_kva(sf) + off,
                                    (caddr_t)start, sz);
-                               vm_imgact_unmap_page(object, sf);
+                               vm_imgact_unmap_page(sf);
                                if (error) {
                                        return (KERN_FAILURE);
                                }
@@ -553,7 +553,7 @@ __elfN(load_section)(struct image_params
                    trunc_page(offset + filsz);
                error = copyout((caddr_t)sf_buf_kva(sf) + off,
                    (caddr_t)map_addr, copy_len);
-               vm_imgact_unmap_page(object, sf);
+               vm_imgact_unmap_page(sf);
                if (error) {
                        return (error);
                }

Modified: head/sys/kern/kern_exec.c
==============================================================================
--- head/sys/kern/kern_exec.c   Mon Aug  5 08:27:35 2013        (r253952)
+++ head/sys/kern/kern_exec.c   Mon Aug  5 08:55:35 2013        (r253953)
@@ -973,7 +973,7 @@ exec_map_first_page(imgp)
                vm_page_wakeup(ma[0]);
        }
        vm_page_lock(ma[0]);
-       vm_page_wire(ma[0]);
+       vm_page_hold(ma[0]);
        vm_page_unlock(ma[0]);
        VM_OBJECT_WUNLOCK(object);
 
@@ -994,7 +994,7 @@ exec_unmap_first_page(imgp)
                sf_buf_free(imgp->firstpage);
                imgp->firstpage = NULL;
                vm_page_lock(m);
-               vm_page_unwire(m, 0);
+               vm_page_unhold(m);
                vm_page_unlock(m);
        }
 }

Modified: head/sys/kern/sys_process.c
==============================================================================
--- head/sys/kern/sys_process.c Mon Aug  5 08:27:35 2013        (r253952)
+++ head/sys/kern/sys_process.c Mon Aug  5 08:55:35 2013        (r253953)
@@ -263,7 +263,6 @@ proc_rwmem(struct proc *p, struct uio *u
        writing = uio->uio_rw == UIO_WRITE;
        reqprot = writing ? VM_PROT_COPY | VM_PROT_READ : VM_PROT_READ;
        fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL;
-       fault_flags |= VM_FAULT_IOBUSY;
 
        /*
         * Only map in one page at a time.  We don't have to, but it
@@ -288,9 +287,9 @@ proc_rwmem(struct proc *p, struct uio *u
                len = min(PAGE_SIZE - page_offset, uio->uio_resid);
 
                /*
-                * Fault and busy the page on behalf of the process.
+                * Fault and hold the page on behalf of the process.
                 */
-               error = vm_fault_handle(map, pageno, reqprot, fault_flags, &m);
+               error = vm_fault_hold(map, pageno, reqprot, fault_flags, &m);
                if (error != KERN_SUCCESS) {
                        if (error == KERN_RESOURCE_SHORTAGE)
                                error = ENOMEM;
@@ -316,9 +315,9 @@ proc_rwmem(struct proc *p, struct uio *u
                /*
                 * Release the page.
                 */
-               VM_OBJECT_WLOCK(m->object);
-               vm_page_io_finish(m);
-               VM_OBJECT_WUNLOCK(m->object);
+               vm_page_lock(m);
+               vm_page_unhold(m);
+               vm_page_unlock(m);
 
        } while (error == 0 && uio->uio_resid > 0);
 

Modified: head/sys/vm/vm_extern.h
==============================================================================
--- head/sys/vm/vm_extern.h     Mon Aug  5 08:27:35 2013        (r253952)
+++ head/sys/vm/vm_extern.h     Mon Aug  5 08:55:35 2013        (r253953)
@@ -63,7 +63,7 @@ void vm_fault_copy_entry(vm_map_t, vm_ma
     vm_ooffset_t *);
 int vm_fault_disable_pagefaults(void);
 void vm_fault_enable_pagefaults(int save);
-int vm_fault_handle(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
     int fault_flags, vm_page_t *m_hold);
 int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
     vm_prot_t prot, vm_page_t *ma, int max_count);
@@ -87,7 +87,7 @@ void vnode_pager_setsize(struct vnode *,
 int vslock(void *, size_t);
 void vsunlock(void *, size_t);
 struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset);
-void vm_imgact_unmap_page(vm_object_t, struct sf_buf *sf);
+void vm_imgact_unmap_page(struct sf_buf *sf);
 void vm_thread_dispose(struct thread *td);
 int vm_thread_new(struct thread *td, int pages);
 int vm_mlock(struct proc *, struct ucred *, const void *, size_t);

Modified: head/sys/vm/vm_fault.c
==============================================================================
--- head/sys/vm/vm_fault.c      Mon Aug  5 08:27:35 2013        (r253952)
+++ head/sys/vm/vm_fault.c      Mon Aug  5 08:55:35 2013        (r253953)
@@ -221,8 +221,8 @@ vm_fault(vm_map_t map, vm_offset_t vaddr
        if (map != kernel_map && KTRPOINT(td, KTR_FAULT))
                ktrfault(vaddr, fault_type);
 #endif
-       result = vm_fault_handle(map, trunc_page(vaddr), fault_type,
-           fault_flags, NULL);
+       result = vm_fault_hold(map, trunc_page(vaddr), fault_type, fault_flags,
+           NULL);
 #ifdef KTRACE
        if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND))
                ktrfaultend(result);
@@ -231,7 +231,7 @@ vm_fault(vm_map_t map, vm_offset_t vaddr
 }
 
 int
-vm_fault_handle(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
     int fault_flags, vm_page_t *m_hold)
 {
        vm_prot_t prot;
@@ -943,10 +943,7 @@ vnode_locked:
                vm_page_activate(fs.m);
        if (m_hold != NULL) {
                *m_hold = fs.m;
-               if (fault_flags & VM_FAULT_IOBUSY)
-                       vm_page_io_start(fs.m);
-               else
-                       vm_page_hold(fs.m);
+               vm_page_hold(fs.m);
        }
        vm_page_unlock(fs.m);
        vm_page_wakeup(fs.m);
@@ -1148,7 +1145,7 @@ vm_fault_quick_hold_pages(vm_map_t map, 
                 * and hold these pages.
                 */
                for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE)
-                       if (*mp == NULL && vm_fault_handle(map, va, prot,
+                       if (*mp == NULL && vm_fault_hold(map, va, prot,
                            VM_FAULT_NORMAL, mp) != KERN_SUCCESS)
                                goto error;
        }

Modified: head/sys/vm/vm_glue.c
==============================================================================
--- head/sys/vm/vm_glue.c       Mon Aug  5 08:27:35 2013        (r253952)
+++ head/sys/vm/vm_glue.c       Mon Aug  5 08:55:35 2013        (r253953)
@@ -223,7 +223,7 @@ vsunlock(void *addr, size_t len)
  * Return the pinned page if successful; otherwise, return NULL.
  */
 static vm_page_t
-vm_imgact_page_iostart(vm_object_t object, vm_ooffset_t offset)
+vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset)
 {
        vm_page_t m, ma[1];
        vm_pindex_t pindex;
@@ -249,7 +249,9 @@ vm_imgact_page_iostart(vm_object_t objec
                }
                vm_page_wakeup(m);
        }
-       vm_page_io_start(m);
+       vm_page_lock(m);
+       vm_page_hold(m);
+       vm_page_unlock(m);
 out:
        VM_OBJECT_WUNLOCK(object);
        return (m);
@@ -264,7 +266,7 @@ vm_imgact_map_page(vm_object_t object, v
 {
        vm_page_t m;
 
-       m = vm_imgact_page_iostart(object, offset);
+       m = vm_imgact_hold_page(object, offset);
        if (m == NULL)
                return (NULL);
        sched_pin();
@@ -275,16 +277,16 @@ vm_imgact_map_page(vm_object_t object, v
  * Destroy the given CPU private mapping and unpin the page that it mapped.
  */
 void
-vm_imgact_unmap_page(vm_object_t object, struct sf_buf *sf)
+vm_imgact_unmap_page(struct sf_buf *sf)
 {
        vm_page_t m;
 
        m = sf_buf_page(sf);
        sf_buf_free(sf);
        sched_unpin();
-       VM_OBJECT_WLOCK(object);
-       vm_page_io_finish(m);
-       VM_OBJECT_WUNLOCK(object);
+       vm_page_lock(m);
+       vm_page_unhold(m);
+       vm_page_unlock(m);
 }
 
 void

Modified: head/sys/vm/vm_map.h
==============================================================================
--- head/sys/vm/vm_map.h        Mon Aug  5 08:27:35 2013        (r253952)
+++ head/sys/vm/vm_map.h        Mon Aug  5 08:55:35 2013        (r253953)
@@ -329,7 +329,6 @@ long vmspace_resident_count(struct vmspa
 #define VM_FAULT_NORMAL 0              /* Nothing special */
 #define VM_FAULT_CHANGE_WIRING 1       /* Change the wiring as appropriate */
 #define        VM_FAULT_DIRTY 2                /* Dirty the page; use 
w/VM_PROT_COPY */
-#define        VM_FAULT_IOBUSY 4               /* Busy the faulted page */
 
 /*
  * Initially, mappings are slightly sequential.  The maximum window size must
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to