Author: gjb
Date: Tue Sep 10 19:00:32 2013
New Revision: 255456
URL: http://svnweb.freebsd.org/changeset/base/255456

Log:
  Reintegrate r250907 (previously reverted as r254754) plus MFS r254947,
  r255443:
  
  r250907 (scottl):
    MFC r248830, r250027, r250409: Several fixes and improvements
    to sendfile()
  
  r254947 (kib):
    NFS implementation of VOP_READ() sometimes upgrades the vnode lock,
    which causes drop of the shared lock and sleep for exclusive.  As
    result, busying of the page before the call to vn_rdwr() makes NFS
    code to wait for vnode lock while page is busy, which contradicts the
    proper order of vnode lock -> busy.
  
  r255443 (des):
    Fix the length calculation for the final block of a sendfile(2)
    transmission which could be tricked into rounding up to the nearest
    page size, leaking up to a page of kernel memory.  [13:11]
  
  Approved by:  re (delphij)
  Sponsored by: The FreeBSD Foundation

Modified:
  releng/9.2/sys/kern/uipc_syscalls.c
Directory Properties:
  releng/9.2/sys/   (props changed)

Modified: releng/9.2/sys/kern/uipc_syscalls.c
==============================================================================
--- releng/9.2/sys/kern/uipc_syscalls.c Tue Sep 10 18:40:43 2013        
(r255455)
+++ releng/9.2/sys/kern/uipc_syscalls.c Tue Sep 10 19:00:32 2013        
(r255456)
@@ -1835,9 +1835,11 @@ kern_sendfile(struct thread *td, struct 
        struct mbuf *m = NULL;
        struct sf_buf *sf;
        struct vm_page *pg;
+       struct vattr va;
        off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
        int error, hdrlen = 0, mnw = 0;
        int vfslocked;
+       int bsize;
        struct sendfile_sync *sfs = NULL;
 
        /*
@@ -1852,6 +1854,18 @@ kern_sendfile(struct thread *td, struct 
        vfslocked = VFS_LOCK_GIANT(vp->v_mount);
        vn_lock(vp, LK_SHARED | LK_RETRY);
        if (vp->v_type == VREG) {
+               bsize = vp->v_mount->mnt_stat.f_iosize;
+               if (uap->nbytes == 0) {
+                       error = VOP_GETATTR(vp, &va, td->td_ucred);
+                       if (error != 0) {
+                               VOP_UNLOCK(vp, 0);
+                               VFS_UNLOCK_GIANT(vfslocked);
+                               obj = NULL;
+                               goto out;
+                       }
+                       rem = va.va_size;
+               } else
+                       rem = uap->nbytes;
                obj = vp->v_object;
                if (obj != NULL) {
                        /*
@@ -1869,7 +1883,8 @@ kern_sendfile(struct thread *td, struct 
                                obj = NULL;
                        }
                }
-       }
+       } else
+               bsize = 0;      /* silence gcc */
        VOP_UNLOCK(vp, 0);
        VFS_UNLOCK_GIANT(vfslocked);
        if (obj == NULL) {
@@ -1962,11 +1977,20 @@ kern_sendfile(struct thread *td, struct 
         * The outer loop checks the state and available space of the socket
         * and takes care of the overall progress.
         */
-       for (off = uap->offset, rem = uap->nbytes; ; ) {
-               struct mbuf *mtail = NULL;
-               int loopbytes = 0;
-               int space = 0;
-               int done = 0;
+       for (off = uap->offset; ; ) {
+               struct mbuf *mtail;
+               int loopbytes;
+               int space;
+               int done;
+
+               if ((uap->nbytes != 0 && uap->nbytes == fsbytes) ||
+                   (uap->nbytes == 0 && va.va_size == fsbytes))
+                       break;
+
+               mtail = NULL;
+               loopbytes = 0;
+               space = 0;
+               done = 0;
 
                /*
                 * Check the socket state for ongoing connection,
@@ -2034,6 +2058,20 @@ retry_space:
                 */
                space -= hdrlen;
 
+               vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+               error = vn_lock(vp, LK_SHARED);
+               if (error != 0) {
+                       VFS_UNLOCK_GIANT(vfslocked);
+                       goto done;
+               }
+               error = VOP_GETATTR(vp, &va, td->td_ucred);
+               if (error != 0 || off >= va.va_size) {
+                       VOP_UNLOCK(vp, 0);
+                       VFS_UNLOCK_GIANT(vfslocked);
+                       goto done;
+               }
+               VFS_UNLOCK_GIANT(vfslocked);
+
                /*
                 * Loop and construct maximum sized mbuf chain to be bulk
                 * dumped into socket buffer.
@@ -2043,25 +2081,19 @@ retry_space:
                        vm_offset_t pgoff;
                        struct mbuf *m0;
 
-                       VM_OBJECT_LOCK(obj);
                        /*
                         * Calculate the amount to transfer.
                         * Not to exceed a page, the EOF,
                         * or the passed in nbytes.
                         */
                        pgoff = (vm_offset_t)(off & PAGE_MASK);
-                       xfsize = omin(PAGE_SIZE - pgoff,
-                           obj->un_pager.vnp.vnp_size - uap->offset -
-                           fsbytes - loopbytes);
-                       if (uap->nbytes)
-                               rem = (uap->nbytes - fsbytes - loopbytes);
-                       else
-                               rem = obj->un_pager.vnp.vnp_size -
-                                   uap->offset - fsbytes - loopbytes;
-                       xfsize = omin(rem, xfsize);
+                       rem = va.va_size - uap->offset;
+                       if (uap->nbytes != 0)
+                               rem = omin(rem, uap->nbytes);
+                       rem -= fsbytes + loopbytes;
+                       xfsize = omin(PAGE_SIZE - pgoff, rem);
                        xfsize = omin(space - loopbytes, xfsize);
                        if (xfsize <= 0) {
-                               VM_OBJECT_UNLOCK(obj);
                                done = 1;               /* all data sent */
                                break;
                        }
@@ -2071,6 +2103,7 @@ retry_space:
                         * if not found or wait and loop if busy.
                         */
                        pindex = OFF_TO_IDX(off);
+                       VM_OBJECT_LOCK(obj);
                        pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
                            VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
 
@@ -2088,42 +2121,25 @@ retry_space:
                        else if (uap->flags & SF_NODISKIO)
                                error = EBUSY;
                        else {
-                               int bsize;
                                ssize_t resid;
 
-                               /*
-                                * Ensure that our page is still around
-                                * when the I/O completes.
-                                */
-                               vm_page_io_start(pg);
                                VM_OBJECT_UNLOCK(obj);
 
                                /*
                                 * Get the page from backing store.
-                                */
-                               vfslocked = VFS_LOCK_GIANT(vp->v_mount);
-                               error = vn_lock(vp, LK_SHARED);
-                               if (error != 0)
-                                       goto after_read;
-                               bsize = vp->v_mount->mnt_stat.f_iosize;
-
-                               /*
                                 * XXXMAC: Because we don't have fp->f_cred
                                 * here, we pass in NOCRED.  This is probably
                                 * wrong, but is consistent with our original
                                 * implementation.
                                 */
+                               vfslocked = VFS_LOCK_GIANT(vp->v_mount);
                                error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
                                    trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
                                    IO_VMIO | ((MAXBSIZE / bsize) << 
IO_SEQSHIFT),
                                    td->td_ucred, NOCRED, &resid, td);
-                               VOP_UNLOCK(vp, 0);
-                       after_read:
                                VFS_UNLOCK_GIANT(vfslocked);
-                               VM_OBJECT_LOCK(obj);
-                               vm_page_io_finish(pg);
-                               if (!error)
-                                       VM_OBJECT_UNLOCK(obj);
+                               if (error)
+                                       VM_OBJECT_LOCK(obj);
                                mbstat.sf_iocnt++;
                        }
                        if (error) {
@@ -2174,7 +2190,7 @@ retry_space:
                        m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
                        if (m0 == NULL) {
                                error = (mnw ? EAGAIN : ENOBUFS);
-                               sf_buf_mext((void *)sf_buf_kva(sf), sf);
+                               sf_buf_mext(NULL, sf);
                                break;
                        }
                        MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
@@ -2202,6 +2218,8 @@ retry_space:
                        }
                }
 
+               VOP_UNLOCK(vp, 0);
+
                /* Add the buffer chain to the socket buffer. */
                if (m != NULL) {
                        int mlen, err;
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to