Author: mdf
Date: Mon Apr 18 16:32:22 2011
New Revision: 220791
URL: http://svn.freebsd.org/changeset/base/220791

Log:
  Add the posix_fallocate(2) syscall.  The default implementation in
  vop_stdallocate() is filesystem agnostic and will run as slow as a
  read/write loop in userspace; however, it serves to correctly
  implement the functionality for filesystems that do not implement a
  VOP_ALLOCATE.
  
  Note that __FreeBSD_version was already bumped today to 900036 for any
  ports which would like to use this function.
  
  Also reserve space in the syscall table for posix_fadvise(2).
  
  Reviewed by:  -arch (previous version)

Added:
  head/lib/libc/sys/posix_fallocate.2   (contents, props changed)
Modified:
  head/lib/libc/sys/Makefile.inc
  head/lib/libc/sys/Symbol.map
  head/sys/compat/freebsd32/freebsd32_misc.c
  head/sys/compat/freebsd32/syscalls.master
  head/sys/kern/syscalls.master
  head/sys/kern/vfs_default.c
  head/sys/kern/vfs_syscalls.c
  head/sys/kern/vnode_if.src
  head/sys/sys/fcntl.h
  head/sys/sys/vnode.h

Modified: head/lib/libc/sys/Makefile.inc
==============================================================================
--- head/lib/libc/sys/Makefile.inc      Mon Apr 18 16:15:59 2011        
(r220790)
+++ head/lib/libc/sys/Makefile.inc      Mon Apr 18 16:32:22 2011        
(r220791)
@@ -96,7 +96,7 @@ MAN+= abort2.2 accept.2 access.2 acct.2 
        mq_setattr.2 \
        msgctl.2 msgget.2 msgrcv.2 msgsnd.2 \
        msync.2 munmap.2 nanosleep.2 nfssvc.2 ntp_adjtime.2 open.2 \
-       pathconf.2 pipe.2 poll.2 posix_openpt.2 profil.2 \
+       pathconf.2 pipe.2 poll.2 posix_fallocate.2 posix_openpt.2 profil.2 \
        pselect.2 ptrace.2 quotactl.2 \
        read.2 readlink.2 reboot.2 recv.2 rename.2 revoke.2 rfork.2 rmdir.2 \
        rtprio.2

Modified: head/lib/libc/sys/Symbol.map
==============================================================================
--- head/lib/libc/sys/Symbol.map        Mon Apr 18 16:15:59 2011        
(r220790)
+++ head/lib/libc/sys/Symbol.map        Mon Apr 18 16:32:22 2011        
(r220791)
@@ -364,6 +364,7 @@ FBSD_1.2 {
        cap_enter;
        cap_getmode;
        getloginclass;
+       posix_fallocate;
        rctl_get_racct;
        rctl_get_rules;
        rctl_get_limits;

Added: head/lib/libc/sys/posix_fallocate.2
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/lib/libc/sys/posix_fallocate.2 Mon Apr 18 16:32:22 2011        
(r220791)
@@ -0,0 +1,146 @@
+.\" Copyright (c) 1980, 1991, 1993
+.\"    The Regents of the University of California.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\"    may be used to endorse or promote products derived from this software
+.\"    without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\"     @(#)open.2     8.2 (Berkeley) 11/16/93
+.\" $FreeBSD$
+.\"
+.Dd April 13, 2011
+.Dt POSIX_FALLOCATE 2
+.Os
+.Sh NAME
+.Nm posix_fallocate
+.Nd pre-allocate storage for a range in a file
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In fcntl.h
+.Ft int
+.Fn posix_fallocate "int fd" "off_t offset" "off_t len"
+.Sh DESCRIPTION
+Required storage for the range
+.Fa offset
+to
+.Fa offset +
+.Fa len
+in the file referenced by
+.Fa fd
+is guarateed to be allocated upon successful return.
+That is, if
+.Fn posix_fallocate
+returns successfully, subsequent writes to the specified file data
+will not fail due to lack of free space on the file system storage
+media.
+Any existing file data in the specified range is unmodified.
+If
+.Fa offset +
+.Fa len
+is beyond the current file size, then
+.Fn posix_fallocate
+will adjust the file size to
+.Fa offset +
+.Fa len .
+Otherwise, the file size will not be changed.
+.Pp
+Space allocated by
+.Fn posix_fallocate
+will be freed by a successful call to
+.Xr creat 2
+or
+.Xr open 2
+that truncates the size of the file.
+Space allocated via
+.Fn posix_fallocate
+may be freed by a successful call to
+.Xr ftruncate 2
+that reduces the file size to a size smaller than
+.Fa offset +
+.Fa len .
+.Pp
+.Sh RETURN VALUES
+If successful,
+.Fn posix_fallocate
+returns zero.
+It returns -1 on failure, and sets
+.Va errno
+to indicate the error.
+.Sh ERRORS
+Possible failure conditions:
+.Bl -tag -width Er
+.It Bq Er EBADF
+The
+.Fa fd
+argument is not a valid file descriptor.
+.It Bq Er EBADF
+The
+.Fa fd
+argument references a file that was opened without write permission.
+.It Bq Er EFBIG
+The value of
+.Fa offset +
+.Fa len
+is greater than the maximum file size.
+.It Bq Er EINTR
+A signal was caught during execution.
+.It Bq Er EINVAL
+The
+.Fa len
+argument was zero or the
+.Fa offset
+argument was less than zero.
+.It Bq Er EIO
+An I/O error occurred while reading from or writing to a file system.
+.It Bq Er ENODEV
+The
+.Fa fd
+argument does not refer to a regular file.
+.It Bq Er ENOSPC
+There is insufficient free space remaining on the file system storage
+media.
+.It Bq Er ESPIPE
+The
+.Fa fd
+argument is associated with a pipe or FIFO.
+.El
+.Sh SEE ALSO
+.Xr creat 2 ,
+.Xr ftruncate 2 ,
+.Xr open 2 ,
+.Xr unlink 2
+.Sh STANDARDS
+The
+.Fn posix_fallocate
+system call conforms to
+.St -p1003.1-2004 .
+.Sh HISTORY
+The
+.Fn posix_fallocate
+function appeared in
+.Fx 9.0 .
+.Sh AUTHORS
+.Fn posix_fallocate
+and this manual page were initially written by
+.An Matthew Fleming Aq m...@freebsd.org .

Modified: head/sys/compat/freebsd32/freebsd32_misc.c
==============================================================================
--- head/sys/compat/freebsd32/freebsd32_misc.c  Mon Apr 18 16:15:59 2011        
(r220790)
+++ head/sys/compat/freebsd32/freebsd32_misc.c  Mon Apr 18 16:32:22 2011        
(r220791)
@@ -2790,3 +2790,15 @@ freebsd32_kldstat(struct thread *td, str
        bcopy(&stat.pathname[0], &stat32.pathname[0], sizeof(stat.pathname));
        return (copyout(&stat32, uap->stat, version));
 }
+
+int
+freebsd32_posix_fallocate(struct thread *td,
+    struct freebsd32_posix_fallocate_args *uap)
+{
+       struct posix_fallocate_args ap;
+
+       ap.fd = uap->fd;
+       ap.offset = (uap->offsetlo | ((off_t)uap->offsethi << 32));
+       ap.len = (uap->lenlo | ((off_t)uap->lenhi << 32));
+       return (posix_fallocate(td, &ap));
+}

Modified: head/sys/compat/freebsd32/syscalls.master
==============================================================================
--- head/sys/compat/freebsd32/syscalls.master   Mon Apr 18 16:15:59 2011        
(r220790)
+++ head/sys/compat/freebsd32/syscalls.master   Mon Apr 18 16:32:22 2011        
(r220791)
@@ -986,3 +986,7 @@
 529    AUE_NULL        NOPROTO { int rctl_remove_rule(const void *inbufp, \
                                    size_t inbuflen, void *outbufp, \
                                    size_t outbuflen); }
+530    AUE_NULL        STD     { int freebsd32_posix_fallocate(int fd,\
+                                    uint32_t offsetlo, uint32_t offsethi,\
+                                    uint32_t lenlo, uint32_t lenhi); }
+531    AUE_NULL        UNIMPL  posix_fadvise

Modified: head/sys/kern/syscalls.master
==============================================================================
--- head/sys/kern/syscalls.master       Mon Apr 18 16:15:59 2011        
(r220790)
+++ head/sys/kern/syscalls.master       Mon Apr 18 16:32:22 2011        
(r220791)
@@ -944,5 +944,8 @@
 529    AUE_NULL        STD     { int rctl_remove_rule(const void *inbufp, \
                                    size_t inbuflen, void *outbufp, \
                                    size_t outbuflen); }
+530    AUE_NULL        STD     { int posix_fallocate(int fd, \
+                                   off_t offset, off_t len); }
+531    AUE_NULL        UNIMPL  posix_fadvise
 ; Please copy any additions and changes to the following compatability tables:
 ; sys/compat/freebsd32/syscalls.master

Modified: head/sys/kern/vfs_default.c
==============================================================================
--- head/sys/kern/vfs_default.c Mon Apr 18 16:15:59 2011        (r220790)
+++ head/sys/kern/vfs_default.c Mon Apr 18 16:32:22 2011        (r220791)
@@ -99,6 +99,7 @@ struct vop_vector default_vnodeops = {
        .vop_advlock =          vop_stdadvlock,
        .vop_advlockasync =     vop_stdadvlockasync,
        .vop_advlockpurge =     vop_stdadvlockpurge,
+       .vop_allocate =         vop_stdallocate,
        .vop_bmap =             vop_stdbmap,
        .vop_close =            VOP_NULL,
        .vop_fsync =            VOP_NULL,
@@ -855,6 +856,136 @@ out:
        return (error);
 }
 
+int
+vop_stdallocate(struct vop_allocate_args *ap)
+{
+#ifdef __notyet__
+       struct statfs sfs;
+#endif
+       struct iovec aiov;
+       struct vattr vattr, *vap;
+       struct uio auio;
+       off_t len, cur, offset;
+       uint8_t *buf;
+       struct thread *td;
+       struct vnode *vp;
+       size_t iosize;
+       int error, locked;
+
+       buf = NULL;
+       error = 0;
+       locked = 1;
+       td = curthread;
+       vap = &vattr;
+       vp = ap->a_vp;
+       len = ap->a_len;
+       offset = ap->a_offset;
+
+       error = VOP_GETATTR(vp, vap, td->td_ucred);
+       if (error != 0)
+               goto out;
+       iosize = vap->va_blocksize;
+       if (iosize == 0)
+               iosize = BLKDEV_IOSIZE;
+       if (iosize > MAXPHYS)
+               iosize = MAXPHYS;
+       buf = malloc(iosize, M_TEMP, M_WAITOK);
+
+#ifdef __notyet__
+       /*
+        * Check if the filesystem sets f_maxfilesize; if not use
+        * VOP_SETATTR to perform the check.
+        */
+       error = VFS_STATFS(vp->v_mount, &sfs, td);
+       if (error != 0)
+               goto out;
+       if (sfs.f_maxfilesize) {
+               if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize ||
+                   offset + len > sfs.f_maxfilesize) {
+                       error = EFBIG;
+                       goto out;
+               }
+       } else
+#endif
+       if (offset + len > vap->va_size) {
+               VATTR_NULL(vap);
+               vap->va_size = offset + len;
+               error = VOP_SETATTR(vp, vap, td->td_ucred);
+               if (error != 0)
+                       goto out;
+       }
+
+       while (len > 0) {
+               if (should_yield()) {
+                       VOP_UNLOCK(vp, 0);
+                       locked = 0;
+                       kern_yield(-1);
+                       error = vn_lock(vp, LK_EXCLUSIVE);
+                       if (error != 0)
+                               break;
+                       locked = 1;
+                       error = VOP_GETATTR(vp, vap, td->td_ucred);
+                       if (error != 0)
+                               break;
+               }
+
+               /*
+                * Read and write back anything below the nominal file
+                * size.  There's currently no way outside the filesystem
+                * to know whether this area is sparse or not.
+                */
+               cur = iosize;
+               if ((offset % iosize) != 0)
+                       cur -= (offset % iosize);
+               if (cur > len)
+                       cur = len;
+               if (offset < vap->va_size) {
+                       aiov.iov_base = buf;
+                       aiov.iov_len = cur;
+                       auio.uio_iov = &aiov;
+                       auio.uio_iovcnt = 1;
+                       auio.uio_offset = offset;
+                       auio.uio_resid = cur;
+                       auio.uio_segflg = UIO_SYSSPACE;
+                       auio.uio_rw = UIO_READ;
+                       auio.uio_td = td;
+                       error = VOP_READ(vp, &auio, 0, td->td_ucred);
+                       if (error != 0)
+                               break;
+                       if (auio.uio_resid > 0) {
+                               bzero(buf + cur - auio.uio_resid,
+                                   auio.uio_resid);
+                       }
+               } else {
+                       bzero(buf, cur);
+               }
+
+               aiov.iov_base = buf;
+               aiov.iov_len = cur;
+               auio.uio_iov = &aiov;
+               auio.uio_iovcnt = 1;
+               auio.uio_offset = offset;
+               auio.uio_resid = cur;
+               auio.uio_segflg = UIO_SYSSPACE;
+               auio.uio_rw = UIO_WRITE;
+               auio.uio_td = td;
+
+               error = VOP_WRITE(vp, &auio, 0, td->td_ucred);
+               if (error != 0)
+                       break;
+
+               len -= cur;
+               offset += cur;
+       }
+
+ out:
+       KASSERT(locked || error != 0, ("How'd I get unlocked with no error?"));
+       if (locked && error != 0)
+               VOP_UNLOCK(vp, 0);
+       free(buf, M_TEMP);
+       return (error);
+}
+
 /*
  * vfs default ops
  * used to fill the vfs function table to get reasonable default return values.

Modified: head/sys/kern/vfs_syscalls.c
==============================================================================
--- head/sys/kern/vfs_syscalls.c        Mon Apr 18 16:15:59 2011        
(r220790)
+++ head/sys/kern/vfs_syscalls.c        Mon Apr 18 16:32:22 2011        
(r220791)
@@ -4671,3 +4671,83 @@ out:
        VFS_UNLOCK_GIANT(vfslocked);
        return (error);
 }
+
+static int
+kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
+{
+       struct file *fp;
+       struct mount *mp;
+       struct vnode *vp;
+       int error, vfslocked, vnlocked;
+
+       fp = NULL;
+       mp = NULL;
+       vfslocked = 0;
+       vnlocked = 0;
+       error = fget(td, fd, &fp);
+       if (error != 0)
+               goto out;
+
+       switch (fp->f_type) {
+       case DTYPE_VNODE:
+               break;
+       case DTYPE_PIPE:
+       case DTYPE_FIFO:
+               error = ESPIPE;
+               goto out;
+       default:
+               error = ENODEV;
+               goto out;
+       }
+        if ((fp->f_flag & FWRITE) == 0) {
+                error = EBADF;
+               goto out;
+        }
+       vp = fp->f_vnode;
+       if (vp->v_type != VREG) {
+               error = ENODEV;
+               goto out;
+       }
+       if (offset < 0 || len <= 0) {
+               error = EINVAL;
+               goto out;
+       }
+       /* Check for wrap. */
+       if (offset > OFF_MAX - len) {
+               error = EFBIG;
+               goto out;
+       }
+
+       bwillwrite();
+       vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+       error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
+       if (error != 0)
+               goto out;
+       error = vn_lock(vp, LK_EXCLUSIVE);
+       if (error != 0)
+               goto out;
+       vnlocked = 1;
+#ifdef MAC
+       error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
+       if (error != 0)
+               goto out;
+#endif
+       error = VOP_ALLOCATE(vp, offset, len);
+       if (error != 0)
+               vnlocked = 0;
+ out:
+       if (vnlocked)
+               VOP_UNLOCK(vp, 0);
+       vn_finished_write(mp);
+       VFS_UNLOCK_GIANT(vfslocked);
+       if (fp != NULL)
+               fdrop(fp, td);
+       return (error);
+}
+
+int
+posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
+{
+
+       return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len));
+}

Modified: head/sys/kern/vnode_if.src
==============================================================================
--- head/sys/kern/vnode_if.src  Mon Apr 18 16:15:59 2011        (r220790)
+++ head/sys/kern/vnode_if.src  Mon Apr 18 16:32:22 2011        (r220791)
@@ -608,6 +608,7 @@ vop_vptofh {
        IN struct fid *fhp;
 };
 
+
 %% vptocnp             vp      L L L
 %% vptocnp             vpp     - U -
 
@@ -618,3 +619,12 @@ vop_vptocnp {
        INOUT char *buf;
        INOUT int *buflen;
 };
+
+
+%% allocate    vp      E E U
+
+vop_allocate {
+       IN struct vnode *vp;
+       IN off_t offset;
+       IN off_t len;
+};

Modified: head/sys/sys/fcntl.h
==============================================================================
--- head/sys/sys/fcntl.h        Mon Apr 18 16:15:59 2011        (r220790)
+++ head/sys/sys/fcntl.h        Mon Apr 18 16:32:22 2011        (r220791)
@@ -278,7 +278,7 @@ struct oflock {
 #endif
 
 /*
- * XXX missing posix_fadvise() and posix_fallocate(), and POSIX_FADV_* macros.
+ * XXX missing posix_fadvise() and POSIX_FADV_* macros.
  */
 
 #ifndef _KERNEL
@@ -289,6 +289,9 @@ int fcntl(int, int, ...);
 #if __BSD_VISIBLE || __POSIX_VISIBLE >= 200809
 int    openat(int, const char *, int, ...);
 #endif
+#if __BSD_VISIBLE || __POSIX_VISIBLE >= 200112
+int    posix_fallocate(int, off_t, off_t);
+#endif
 #if __BSD_VISIBLE
 int    flock(int, int);
 #endif

Modified: head/sys/sys/vnode.h
==============================================================================
--- head/sys/sys/vnode.h        Mon Apr 18 16:15:59 2011        (r220790)
+++ head/sys/sys/vnode.h        Mon Apr 18 16:32:22 2011        (r220791)
@@ -689,6 +689,7 @@ int vop_stdaccessx(struct vop_accessx_ar
 int    vop_stdadvlock(struct vop_advlock_args *ap);
 int    vop_stdadvlockasync(struct vop_advlockasync_args *ap);
 int    vop_stdadvlockpurge(struct vop_advlockpurge_args *ap);
+int    vop_stdallocate(struct vop_allocate_args *ap);
 int    vop_stdpathconf(struct vop_pathconf_args *);
 int    vop_stdpoll(struct vop_poll_args *);
 int    vop_stdvptocnp(struct vop_vptocnp_args *ap);
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to