Author: alc
Date: Sat Dec 25 21:26:56 2010
New Revision: 216699
URL: http://svn.freebsd.org/changeset/base/216699

Log:
  Introduce and use a new VM interface for temporarily pinning pages.  This
  new interface replaces the combined use of vm_fault_quick() and
  pmap_extract_and_hold() throughout the kernel.
  
  In collaboration with:        kib@

Deleted:
  head/sys/dev/cxgb/ulp/tom/cxgb_vm.c
  head/sys/dev/cxgb/ulp/tom/cxgb_vm.h
Modified:
  head/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c
  head/sys/dev/cxgb/ulp/tom/cxgb_ddp.c
  head/sys/dev/drm/via_dmablit.c
  head/sys/kern/sys_pipe.c
  head/sys/kern/uipc_cow.c
  head/sys/kern/vfs_bio.c
  head/sys/net/bpf_zerocopy.c
  head/sys/vm/vm_extern.h
  head/sys/vm/vm_fault.c

Modified: head/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c
==============================================================================
--- head/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c Sat Dec 25 17:35:30 2010        
(r216698)
+++ head/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c Sat Dec 25 21:26:56 2010        
(r216699)
@@ -90,7 +90,6 @@ __FBSDID("$FreeBSD$");
 #include <ulp/tom/cxgb_t3_ddp.h>
 #include <ulp/tom/cxgb_toepcb.h>
 #include <ulp/tom/cxgb_tcp.h>
-#include <ulp/tom/cxgb_vm.h>
 
 
 static int     (*pru_sosend)(struct socket *so, struct sockaddr *addr,
@@ -218,8 +217,9 @@ cxgb_hold_iovec_pages(struct uio *uio, v
                
                count = min(count, npages);
 
-               err = vm_fault_hold_user_pages(map,
-                       (vm_offset_t)iov->iov_base, mp, count, prot);
+               /* The following return value is not used. XXX */
+               err = vm_fault_quick_hold_pages(map,
+                   (vm_offset_t)iov->iov_base, iov->iov_len, prot, mp, count);
                mp += count;
                totcount += count;
                curbytes = iov->iov_len;
@@ -503,7 +503,7 @@ cxgb_sosend(struct socket *so, struct so
         *  - the number of bytes to be transferred exceeds the threshold
         *  - the number of bytes currently in flight won't exceed the in-flight
         *    threshold XXX TODO
-        *  - vm_fault_hold_user_pages succeeds
+        *  - vm_fault_quick_hold_pages succeeds
         *  - blocking socket XXX for now
         *
         */
@@ -970,7 +970,7 @@ cxgb_soreceive(struct socket *so, struct
         *  - the number of bytes to be transferred exceeds the threshold
         *  - the number of bytes currently in flight won't exceed the in-flight
         *    threshold XXX TODO
-        *  - vm_fault_hold_user_pages succeeds
+        *  - vm_fault_quick_hold_pages succeeds
         *  - blocking socket XXX for now
         *  - iovcnt is 1
         *

Modified: head/sys/dev/cxgb/ulp/tom/cxgb_ddp.c
==============================================================================
--- head/sys/dev/cxgb/ulp/tom/cxgb_ddp.c        Sat Dec 25 17:35:30 2010        
(r216698)
+++ head/sys/dev/cxgb/ulp/tom/cxgb_ddp.c        Sat Dec 25 21:26:56 2010        
(r216699)
@@ -90,7 +90,6 @@ __FBSDID("$FreeBSD$");
 #include <ulp/tom/cxgb_t3_ddp.h>
 #include <ulp/tom/cxgb_toepcb.h>
 #include <ulp/tom/cxgb_tcp.h>
-#include <ulp/tom/cxgb_vm.h>
 
 
 #define MAX_SCHEDULE_TIMEOUT   300
@@ -130,14 +129,6 @@ t3_pin_pages(bus_dma_tag_t tag, bus_dmam
        struct ddp_gather_list *p;
        vm_map_t map;
        
-       /*
-        * XXX need x86 agnostic check
-        */
-       if (addr + len > VM_MAXUSER_ADDRESS)
-               return (EFAULT);
-
-
-       
        pg_off = addr & PAGE_MASK;
        npages = (pg_off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
        p = malloc(sizeof(struct ddp_gather_list) + npages * sizeof(vm_page_t 
*),
@@ -146,10 +137,11 @@ t3_pin_pages(bus_dma_tag_t tag, bus_dmam
                return (ENOMEM);
 
        map = &curthread->td_proc->p_vmspace->vm_map;
-       err = vm_fault_hold_user_pages(map, addr, p->dgl_pages, npages,
-           VM_PROT_READ | VM_PROT_WRITE);
-       if (err)
+       if (vm_fault_quick_hold_pages(map, addr, len, VM_PROT_READ |
+           VM_PROT_WRITE, p->dgl_pages, npages) < 0) {
+               err = EFAULT;
                goto free_gl;
+       }
 
        if (gl && gl->dgl_offset == pg_off && gl->dgl_nelem >= npages &&
            gl->dgl_length >= len) {

Modified: head/sys/dev/drm/via_dmablit.c
==============================================================================
--- head/sys/dev/drm/via_dmablit.c      Sat Dec 25 17:35:30 2010        
(r216698)
+++ head/sys/dev/drm/via_dmablit.c      Sat Dec 25 21:26:56 2010        
(r216699)
@@ -177,11 +177,10 @@ via_free_sg_info(drm_via_sg_info_t *vsg)
                free(vsg->desc_pages, DRM_MEM_DRIVER);
        case dr_via_pages_locked:
                for (i=0; i < vsg->num_pages; ++i) {
-                       if ( NULL != (page = vsg->pages[i])) {
-                               vm_page_lock(page);
-                               vm_page_unwire(page, 0);
-                               vm_page_unlock(page);
-                       }
+                       page = vsg->pages[i];
+                       vm_page_lock(page);
+                       vm_page_unwire(page, 0);
+                       vm_page_unlock(page);
                }
        case dr_via_pages_alloc:
                free(vsg->pages, DRM_MEM_DRIVER);
@@ -224,41 +223,31 @@ via_lock_all_dma_pages(drm_via_sg_info_t
 {
        unsigned long first_pfn = VIA_PFN(xfer->mem_addr);
        vm_page_t m;
-       vm_map_t map;
        int i;
 
-       map = &curproc->p_vmspace->vm_map;
-
        vsg->num_pages = VIA_PFN(xfer->mem_addr +
            (xfer->num_lines * xfer->mem_stride -1)) - first_pfn + 1;
 
-       /* Make sure that the user has access to these pages */
-       for(i = 0; i < vsg->num_pages; i++) {
-               if (vm_fault_quick((caddr_t)xfer->mem_addr + IDX_TO_OFF(i),
-                   VM_PROT_RW) < 0)
-                       return (-EACCES);
-       }
-
        if (NULL == (vsg->pages = malloc(sizeof(vm_page_t) * vsg->num_pages,
-           DRM_MEM_DRIVER, M_NOWAIT | M_ZERO)))
+           DRM_MEM_DRIVER, M_NOWAIT)))
                return -ENOMEM;
 
-       for(i = 0; i < vsg->num_pages; i++) {
-               m = pmap_extract_and_hold(map->pmap,
-                   (vm_offset_t)xfer->mem_addr + IDX_TO_OFF(i), VM_PROT_RW);
-               if (m == NULL)
-                       break;
+       vsg->state = dr_via_pages_alloc;
+
+       if (vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
+           (vm_offset_t)xfer->mem_addr, vsg->num_pages * PAGE_SIZE,
+           VM_PROT_READ | VM_PROT_WRITE, vsg->pages, vsg->num_pages) < 0)
+               return -EACCES;
+
+       for (i = 0; i < vsg->num_pages; i++) {
+               m = vsg->pages[i];
                vm_page_lock(m);
                vm_page_wire(m);
                vm_page_unhold(m);
                vm_page_unlock(m);
-               vsg->pages[i] = m;
        }
        vsg->state = dr_via_pages_locked;
 
-       if (i != vsg->num_pages)
-               return -EINVAL;
-
        DRM_DEBUG("DMA pages locked\n");
 
        return 0;

Modified: head/sys/kern/sys_pipe.c
==============================================================================
--- head/sys/kern/sys_pipe.c    Sat Dec 25 17:35:30 2010        (r216698)
+++ head/sys/kern/sys_pipe.c    Sat Dec 25 21:26:56 2010        (r216699)
@@ -747,10 +747,8 @@ pipe_build_write_buffer(wpipe, uio)
        struct pipe *wpipe;
        struct uio *uio;
 {
-       pmap_t pmap;
        u_int size;
        int i;
-       vm_offset_t addr, endaddr;
 
        PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
        KASSERT(wpipe->pipe_state & PIPE_DIRECTW,
@@ -760,25 +758,10 @@ pipe_build_write_buffer(wpipe, uio)
        if (size > wpipe->pipe_buffer.size)
                size = wpipe->pipe_buffer.size;
 
-       pmap = vmspace_pmap(curproc->p_vmspace);
-       endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
-       addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
-       if (endaddr < addr)
+       if ((i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
+           (vm_offset_t)uio->uio_iov->iov_base, size, VM_PROT_READ,
+           wpipe->pipe_map.ms, PIPENPAGES)) < 0)
                return (EFAULT);
-       for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
-               /*
-                * vm_fault_quick() can sleep.
-                */
-       race:
-               if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) {
-                       vm_page_unhold_pages(wpipe->pipe_map.ms, i);
-                       return (EFAULT);
-               }
-               wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr,
-                   VM_PROT_READ);
-               if (wpipe->pipe_map.ms[i] == NULL)
-                       goto race;
-       }
 
 /*
  * set up the control block

Modified: head/sys/kern/uipc_cow.c
==============================================================================
--- head/sys/kern/uipc_cow.c    Sat Dec 25 17:35:30 2010        (r216698)
+++ head/sys/kern/uipc_cow.c    Sat Dec 25 21:26:56 2010        (r216699)
@@ -103,24 +103,20 @@ socow_setup(struct mbuf *m0, struct uio 
        struct vmspace *vmspace;
        struct vm_map *map;
        vm_offset_t offset, uva;
+       vm_size_t len;
 
        socow_stats.attempted++;
        vmspace = curproc->p_vmspace;
        map = &vmspace->vm_map;
        uva = (vm_offset_t) uio->uio_iov->iov_base;
        offset = uva & PAGE_MASK;
+       len = PAGE_SIZE - offset;
 
        /*
         * Verify that access to the given address is allowed from user-space.
         */
-       if (vm_fault_quick((caddr_t)uva, VM_PROT_READ) < 0)
-               return (0);
-
-       /* 
-       * verify page is mapped & not already wired for i/o
-       */
-       pp = pmap_extract_and_hold(map->pmap, uva, VM_PROT_READ);
-       if (pp == NULL) {
+       if (vm_fault_quick_hold_pages(map, uva, len, &pp, 1, VM_PROT_READ) <
+           0) {
                socow_stats.fail_not_mapped++;
                return(0);
        }
@@ -165,7 +161,7 @@ socow_setup(struct mbuf *m0, struct uio 
         */
        MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, socow_iodone,
            (void*)sf_buf_kva(sf), sf, M_RDONLY, EXT_SFBUF);
-       m0->m_len = PAGE_SIZE - offset;
+       m0->m_len = len;
        m0->m_data = (caddr_t)sf_buf_kva(sf) + offset;
        socow_stats.success++;
 

Modified: head/sys/kern/vfs_bio.c
==============================================================================
--- head/sys/kern/vfs_bio.c     Sat Dec 25 17:35:30 2010        (r216698)
+++ head/sys/kern/vfs_bio.c     Sat Dec 25 21:26:56 2010        (r216699)
@@ -3855,46 +3855,19 @@ vm_hold_free_pages(struct buf *bp, int n
 int
 vmapbuf(struct buf *bp)
 {
-       caddr_t addr, kva;
+       caddr_t kva;
        vm_prot_t prot;
-       int pidx, i;
-       struct vm_page *m;
-       struct pmap *pmap = &curproc->p_vmspace->vm_pmap;
+       int pidx;
 
        if (bp->b_bufsize < 0)
                return (-1);
        prot = VM_PROT_READ;
        if (bp->b_iocmd == BIO_READ)
                prot |= VM_PROT_WRITE;  /* Less backwards than it looks */
-       for (addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data), pidx = 0;
-            addr < bp->b_data + bp->b_bufsize;
-            addr += PAGE_SIZE, pidx++) {
-               /*
-                * Do the vm_fault if needed; do the copy-on-write thing
-                * when reading stuff off device into memory.
-                *
-                * NOTE! Must use pmap_extract() because addr may be in
-                * the userland address space, and kextract is only guarenteed
-                * to work for the kernland address space (see: sparc64 port).
-                */
-retry:
-               if (vm_fault_quick(addr >= bp->b_data ? addr : bp->b_data,
-                   prot) < 0) {
-                       for (i = 0; i < pidx; ++i) {
-                               vm_page_lock(bp->b_pages[i]);
-                               vm_page_unhold(bp->b_pages[i]);
-                               vm_page_unlock(bp->b_pages[i]);
-                               bp->b_pages[i] = NULL;
-                       }
-                       return(-1);
-               }
-               m = pmap_extract_and_hold(pmap, (vm_offset_t)addr, prot);
-               if (m == NULL)
-                       goto retry;
-               bp->b_pages[pidx] = m;
-       }
-       if (pidx > btoc(MAXPHYS))
-               panic("vmapbuf: mapped more than MAXPHYS");
+       if ((pidx = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
+           (vm_offset_t)bp->b_data, bp->b_bufsize, prot, bp->b_pages,
+           btoc(MAXPHYS))) < 0)
+               return (-1);
        pmap_qenter((vm_offset_t)bp->b_saveaddr, bp->b_pages, pidx);
        
        kva = bp->b_saveaddr;

Modified: head/sys/net/bpf_zerocopy.c
==============================================================================
--- head/sys/net/bpf_zerocopy.c Sat Dec 25 17:35:30 2010        (r216698)
+++ head/sys/net/bpf_zerocopy.c Sat Dec 25 21:26:56 2010        (r216699)
@@ -161,12 +161,8 @@ zbuf_sfbuf_get(struct vm_map *map, vm_of
        struct sf_buf *sf;
        vm_page_t pp;
 
-       if (vm_fault_quick((caddr_t) uaddr, VM_PROT_READ | VM_PROT_WRITE) <
-           0)
-               return (NULL);
-       pp = pmap_extract_and_hold(map->pmap, uaddr, VM_PROT_READ |
-           VM_PROT_WRITE);
-       if (pp == NULL)
+       if (vm_fault_quick_hold_pages(map, uaddr, PAGE_SIZE, VM_PROT_READ |
+           VM_PROT_WRITE, &pp, 1) < 0)
                return (NULL);
        vm_page_lock(pp);
        vm_page_wire(pp);

Modified: head/sys/vm/vm_extern.h
==============================================================================
--- head/sys/vm/vm_extern.h     Sat Dec 25 17:35:30 2010        (r216698)
+++ head/sys/vm/vm_extern.h     Sat Dec 25 21:26:56 2010        (r216699)
@@ -63,6 +63,8 @@ void vm_fault_copy_entry(vm_map_t, vm_ma
     vm_ooffset_t *);
 int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
     int fault_flags, vm_page_t *m_hold);
+int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
+    vm_prot_t prot, vm_page_t *ma, int max_count);
 void vm_fault_unwire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t);
 int vm_fault_wire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t);
 int vm_forkproc(struct thread *, struct proc *, struct thread *, struct 
vmspace *, int);

Modified: head/sys/vm/vm_fault.c
==============================================================================
--- head/sys/vm/vm_fault.c      Sat Dec 25 17:35:30 2010        (r216698)
+++ head/sys/vm/vm_fault.c      Sat Dec 25 21:26:56 2010        (r216699)
@@ -1045,6 +1045,81 @@ vm_fault_prefault(pmap_t pmap, vm_offset
 }
 
 /*
+ * Hold each of the physical pages that are mapped by the specified range of
+ * virtual addresses, ["addr", "addr" + "len"), if those mappings are valid
+ * and allow the specified types of access, "prot".  If all of the implied
+ * pages are successfully held, then the number of held pages is returned
+ * together with pointers to those pages in the array "ma".  However, if any
+ * of the pages cannot be held, -1 is returned.
+ */
+int
+vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
+    vm_prot_t prot, vm_page_t *ma, int max_count)
+{
+       vm_offset_t end, va;
+       vm_page_t *mp;
+       int count;
+       boolean_t pmap_failed;
+
+       end = round_page(addr + len);   
+       addr = trunc_page(addr);
+
+       /*
+        * Check for illegal addresses.
+        */
+       if (addr < vm_map_min(map) || addr > end || end > vm_map_max(map))
+               return (-1);
+
+       count = howmany(end - addr, PAGE_SIZE);
+       if (count > max_count)
+               panic("vm_fault_quick_hold_pages: count > max_count");
+
+       /*
+        * Most likely, the physical pages are resident in the pmap, so it is
+        * faster to try pmap_extract_and_hold() first.
+        */
+       pmap_failed = FALSE;
+       for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) {
+               *mp = pmap_extract_and_hold(map->pmap, va, prot);
+               if (*mp == NULL)
+                       pmap_failed = TRUE;
+               else if ((prot & VM_PROT_WRITE) != 0 &&
+                   (*ma)->dirty != VM_PAGE_BITS_ALL) {
+                       /*
+                        * Explicitly dirty the physical page.  Otherwise, the
+                        * caller's changes may go unnoticed because they are
+                        * performed through an unmanaged mapping or by a DMA
+                        * operation.
+                        */
+                       vm_page_lock_queues();
+                       vm_page_dirty(*mp);
+                       vm_page_unlock_queues();
+               }
+       }
+       if (pmap_failed) {
+               /*
+                * One or more pages could not be held by the pmap.  Either no
+                * page was mapped at the specified virtual address or that
+                * mapping had insufficient permissions.  Attempt to fault in
+                * and hold these pages.
+                */
+               for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE)
+                       if (*mp == NULL && vm_fault_hold(map, va, prot,
+                           VM_FAULT_NORMAL, mp) != KERN_SUCCESS)
+                               goto error;
+       }
+       return (count);
+error: 
+       for (mp = ma; mp < ma + count; mp++)
+               if (*mp != NULL) {
+                       vm_page_lock(*mp);
+                       vm_page_unhold(*mp);
+                       vm_page_unlock(*mp);
+               }
+       return (-1);
+}
+
+/*
  *     vm_fault_quick:
  *
  *     Ensure that the requested virtual address, which may be in userland,
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to