Author: mjg
Date: Fri Aug  7 23:06:40 2020
New Revision: 364044
URL: https://svnweb.freebsd.org/changeset/base/364044

Log:
  vfs: add VOP_STAT
  
  The current scheme of calling VOP_GETATTR adds avoidable overhead.
  
  An example with tmpfs doing fstat (ops/s):
  before: 7488958
  after:  7913833
  
  Reviewed by:  kib (previous version)
  Differential Revision:        https://reviews.freebsd.org/D25910

Modified:
  head/share/man/man9/Makefile
  head/share/man/man9/VOP_ATTRIB.9
  head/sys/compat/linuxkpi/common/src/linux_compat.c
  head/sys/kern/vfs_default.c
  head/sys/kern/vfs_syscalls.c
  head/sys/kern/vfs_vnops.c
  head/sys/kern/vnode_if.src
  head/sys/security/audit/audit_arg.c
  head/sys/sys/vnode.h

Modified: head/share/man/man9/Makefile
==============================================================================
--- head/share/man/man9/Makefile        Fri Aug  7 19:58:16 2020        
(r364043)
+++ head/share/man/man9/Makefile        Fri Aug  7 23:06:40 2020        
(r364044)
@@ -2308,7 +2308,8 @@ MLINKS+=vm_page_insert.9 vm_page_remove.9
 MLINKS+=vm_page_wire.9 vm_page_unwire.9
 MLINKS+=VOP_ACCESS.9 VOP_ACCESSX.9
 MLINKS+=VOP_ATTRIB.9 VOP_GETATTR.9 \
-       VOP_ATTRIB.9 VOP_SETATTR.9
+       VOP_ATTRIB.9 VOP_SETATTR.9 \
+       VOP_ATTRIB.9 VOP_STAT.9
 MLINKS+=VOP_CREATE.9 VOP_MKDIR.9 \
        VOP_CREATE.9 VOP_MKNOD.9 \
        VOP_CREATE.9 VOP_SYMLINK.9

Modified: head/share/man/man9/VOP_ATTRIB.9
==============================================================================
--- head/share/man/man9/VOP_ATTRIB.9    Fri Aug  7 19:58:16 2020        
(r364043)
+++ head/share/man/man9/VOP_ATTRIB.9    Fri Aug  7 23:06:40 2020        
(r364044)
@@ -28,7 +28,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd August 29, 2008
+.Dd August 8, 2020
 .Dt VOP_ATTRIB 9
 .Os
 .Sh NAME
@@ -42,21 +42,51 @@
 .Fn VOP_GETATTR "struct vnode *vp" "struct vattr *vap" "struct ucred *cred"
 .Ft int
 .Fn VOP_SETATTR "struct vnode *vp" "struct vattr *vap" "struct ucred *cred"
+.Ft int
+.Fn VOP_STAT "struct vnode *vp" "struct stat *sb" "struct ucred *active_cred" \
+"struct ucred *file_cred" "struct thread *td"
 .Sh DESCRIPTION
 These entry points manipulate various attributes of a file or directory,
 including file permissions, owner, group, size,
 access time and modification time.
 .Pp
-The arguments are:
+.Fn VOP_STAT
+returns data in a format suitable for the
+.Xr stat 2
+system call and by default is implemented as a wrapper around
+.Fn VOP_GETATTR .
+Filesystems may want to implement their own variant for performance reasons.
+.Pp
+For
+.Fn VOP_GETATTR
+and
+.Fn VOP_SETATTR
+the arguments are:
 .Bl -tag -width cred
 .It Fa vp
 The vnode of the file.
 .It Fa vap
 The attributes of the file.
 .It Fa cred
-The user credentials of the calling process.
+The user credentials of the calling thread.
 .El
 .Pp
+For
+.Fn VOP_STAT
+the arguments are:
+.Bl -tag -width active_cred
+.It Fa vp
+The vnode of the file.
+.It Fa sb
+The attributes of the file.
+.It Fa active_cred
+The user credentials of the calling thread.
+.It Fa file_cred
+The credentials installed on the file description pointing to the vnode or 
NOCRED.
+.It Fa td
+The calling thread.
+.El
+.Pp
 Attributes which are not being modified by
 .Fn VOP_SETATTR
 should be set to the value
@@ -67,8 +97,11 @@ the contents of
 .Fa *vap
 prior to setting specific values.
 .Sh LOCKS
+Both
 .Fn VOP_GETATTR
-expects the vnode to be locked on entry and will leave the vnode locked on
+and
+.Fn VOP_STAT
+expect the vnode to be locked on entry and will leave the vnode locked on
 return.
 The lock type can be either shared or exclusive.
 .Pp
@@ -84,6 +117,10 @@ otherwise an appropriate error is returned.
 .Fn VOP_SETATTR
 returns zero if the attributes were changed successfully, otherwise an
 appropriate error is returned.
+.Fn VOP_STAT
+returns 0 if it was able to retrieve the attribute data
+.Fa *sb ,
+otherwise an appropriate error is returned.
 .Sh ERRORS
 .Bl -tag -width Er
 .It Bq Er EPERM

Modified: head/sys/compat/linuxkpi/common/src/linux_compat.c
==============================================================================
--- head/sys/compat/linuxkpi/common/src/linux_compat.c  Fri Aug  7 19:58:16 
2020        (r364043)
+++ head/sys/compat/linuxkpi/common/src/linux_compat.c  Fri Aug  7 23:06:40 
2020        (r364044)
@@ -1691,7 +1691,7 @@ linux_file_stat(struct file *fp, struct stat *sb, stru
        vp = filp->f_vnode;
 
        vn_lock(vp, LK_SHARED | LK_RETRY);
-       error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
+       error = VOP_STAT(vp, sb, td->td_ucred, NOCRED, td);
        VOP_UNLOCK(vp);
 
        return (error);

Modified: head/sys/kern/vfs_default.c
==============================================================================
--- head/sys/kern/vfs_default.c Fri Aug  7 19:58:16 2020        (r364043)
+++ head/sys/kern/vfs_default.c Fri Aug  7 23:06:40 2020        (r364044)
@@ -57,6 +57,9 @@ __FBSDID("$FreeBSD$");
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/poll.h>
+#include <sys/stat.h>
+#include <security/audit/audit.h>
+#include <sys/priv.h>
 
 #include <security/mac/mac_framework.h>
 
@@ -87,6 +90,7 @@ static int vop_stdadd_writecount(struct vop_add_writec
 static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap);
 static int vop_stdfdatasync(struct vop_fdatasync_args *ap);
 static int vop_stdgetpages_async(struct vop_getpages_async_args *ap);
+static int vop_stdstat(struct vop_stat_args *ap);
 
 /*
  * This vnode table stores what we want to do if the filesystem doesn't
@@ -114,6 +118,7 @@ struct vop_vector default_vnodeops = {
        .vop_bmap =             vop_stdbmap,
        .vop_close =            VOP_NULL,
        .vop_fsync =            VOP_NULL,
+       .vop_stat =             vop_stdstat,
        .vop_fdatasync =        vop_stdfdatasync,
        .vop_getpages =         vop_stdgetpages,
        .vop_getpages_async =   vop_stdgetpages_async,
@@ -1460,4 +1465,112 @@ vop_sigdefer(struct vop_vector *vop, struct vop_generi
        rc = bp(a);
        sigallowstop(prev_stops);
        return (rc);
+}
+
+static int
+vop_stdstat(struct vop_stat_args *a)
+{
+       struct vattr vattr;
+       struct vattr *vap;
+       struct vnode *vp;
+       struct stat *sb;
+       int error;
+       u_short mode;
+
+       vp = a->a_vp;
+       sb = a->a_sb;
+
+       error = vop_stat_helper_pre(a);
+       if (error != 0)
+               return (error);
+
+       vap = &vattr;
+
+       /*
+        * Initialize defaults for new and unusual fields, so that file
+        * systems which don't support these fields don't need to know
+        * about them.
+        */
+       vap->va_birthtime.tv_sec = -1;
+       vap->va_birthtime.tv_nsec = 0;
+       vap->va_fsid = VNOVAL;
+       vap->va_rdev = NODEV;
+
+       error = VOP_GETATTR(vp, vap, a->a_active_cred);
+       if (error)
+               goto out;
+
+       /*
+        * Zero the spare stat fields
+        */
+       bzero(sb, sizeof *sb);
+
+       /*
+        * Copy from vattr table
+        */
+       if (vap->va_fsid != VNOVAL)
+               sb->st_dev = vap->va_fsid;
+       else
+               sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
+       sb->st_ino = vap->va_fileid;
+       mode = vap->va_mode;
+       switch (vap->va_type) {
+       case VREG:
+               mode |= S_IFREG;
+               break;
+       case VDIR:
+               mode |= S_IFDIR;
+               break;
+       case VBLK:
+               mode |= S_IFBLK;
+               break;
+       case VCHR:
+               mode |= S_IFCHR;
+               break;
+       case VLNK:
+               mode |= S_IFLNK;
+               break;
+       case VSOCK:
+               mode |= S_IFSOCK;
+               break;
+       case VFIFO:
+               mode |= S_IFIFO;
+               break;
+       default:
+               error = EBADF;
+               goto out;
+       }
+       sb->st_mode = mode;
+       sb->st_nlink = vap->va_nlink;
+       sb->st_uid = vap->va_uid;
+       sb->st_gid = vap->va_gid;
+       sb->st_rdev = vap->va_rdev;
+       if (vap->va_size > OFF_MAX) {
+               error = EOVERFLOW;
+               goto out;
+       }
+       sb->st_size = vap->va_size;
+       sb->st_atim.tv_sec = vap->va_atime.tv_sec;
+       sb->st_atim.tv_nsec = vap->va_atime.tv_nsec;
+       sb->st_mtim.tv_sec = vap->va_mtime.tv_sec;
+       sb->st_mtim.tv_nsec = vap->va_mtime.tv_nsec;
+       sb->st_ctim.tv_sec = vap->va_ctime.tv_sec;
+       sb->st_ctim.tv_nsec = vap->va_ctime.tv_nsec;
+       sb->st_birthtim.tv_sec = vap->va_birthtime.tv_sec;
+       sb->st_birthtim.tv_nsec = vap->va_birthtime.tv_nsec;
+
+       /*
+        * According to www.opengroup.org, the meaning of st_blksize is
+        *   "a filesystem-specific preferred I/O block size for this
+        *    object.  In some filesystem types, this may vary from file
+        *    to file"
+        * Use minimum/default of PAGE_SIZE (e.g. for VCHR).
+        */
+
+       sb->st_blksize = max(PAGE_SIZE, vap->va_blocksize);
+       sb->st_flags = vap->va_flags;
+       sb->st_blocks = vap->va_bytes / S_BLKSIZE;
+       sb->st_gen = vap->va_gen;
+out:
+       return (vop_stat_helper_post(a, error));
 }

Modified: head/sys/kern/vfs_syscalls.c
==============================================================================
--- head/sys/kern/vfs_syscalls.c        Fri Aug  7 19:58:16 2020        
(r364043)
+++ head/sys/kern/vfs_syscalls.c        Fri Aug  7 23:06:40 2020        
(r364044)
@@ -1867,7 +1867,7 @@ restart:
        if (vp->v_type == VDIR && oldinum == 0) {
                error = EPERM;          /* POSIX */
        } else if (oldinum != 0 &&
-                 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
+                 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) 
&&
                  sb.st_ino != oldinum) {
                error = EIDRM;  /* Identifier removed */
        } else if (fp != NULL && fp->f_vnode != vp) {
@@ -2381,7 +2381,7 @@ kern_statat(struct thread *td, int flag, int fd, const
 
        if ((error = namei(&nd)) != 0)
                return (error);
-       error = vn_stat(nd.ni_vp, sbp, td->td_ucred, NOCRED, td);
+       error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED, td);
        if (error == 0) {
                SDT_PROBE2(vfs, , stat, mode, path, sbp->st_mode);
                if (S_ISREG(sbp->st_mode))
@@ -4566,7 +4566,7 @@ kern_fhstat(struct thread *td, struct fhandle fh, stru
        vfs_unbusy(mp);
        if (error != 0)
                return (error);
-       error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
+       error = VOP_STAT(vp, sb, td->td_ucred, NOCRED, td);
        vput(vp);
        return (error);
 }

Modified: head/sys/kern/vfs_vnops.c
==============================================================================
--- head/sys/kern/vfs_vnops.c   Fri Aug  7 19:58:16 2020        (r364043)
+++ head/sys/kern/vfs_vnops.c   Fri Aug  7 23:06:40 2020        (r364044)
@@ -1455,121 +1455,10 @@ vn_statfile(struct file *fp, struct stat *sb, struct u
        int error;
 
        vn_lock(vp, LK_SHARED | LK_RETRY);
-       error = vn_stat(vp, sb, active_cred, fp->f_cred, td);
+       error = VOP_STAT(vp, sb, active_cred, fp->f_cred, td);
        VOP_UNLOCK(vp);
 
        return (error);
-}
-
-/*
- * Stat a vnode; implementation for the stat syscall
- */
-int
-vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred,
-    struct ucred *file_cred, struct thread *td)
-{
-       struct vattr vattr;
-       struct vattr *vap;
-       int error;
-       u_short mode;
-
-       AUDIT_ARG_VNODE1(vp);
-#ifdef MAC
-       error = mac_vnode_check_stat(active_cred, file_cred, vp);
-       if (error)
-               return (error);
-#endif
-
-       vap = &vattr;
-
-       /*
-        * Initialize defaults for new and unusual fields, so that file
-        * systems which don't support these fields don't need to know
-        * about them.
-        */
-       vap->va_birthtime.tv_sec = -1;
-       vap->va_birthtime.tv_nsec = 0;
-       vap->va_fsid = VNOVAL;
-       vap->va_rdev = NODEV;
-
-       error = VOP_GETATTR(vp, vap, active_cred);
-       if (error)
-               return (error);
-
-       /*
-        * Zero the spare stat fields
-        */
-       bzero(sb, sizeof *sb);
-
-       /*
-        * Copy from vattr table
-        */
-       if (vap->va_fsid != VNOVAL)
-               sb->st_dev = vap->va_fsid;
-       else
-               sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
-       sb->st_ino = vap->va_fileid;
-       mode = vap->va_mode;
-       switch (vap->va_type) {
-       case VREG:
-               mode |= S_IFREG;
-               break;
-       case VDIR:
-               mode |= S_IFDIR;
-               break;
-       case VBLK:
-               mode |= S_IFBLK;
-               break;
-       case VCHR:
-               mode |= S_IFCHR;
-               break;
-       case VLNK:
-               mode |= S_IFLNK;
-               break;
-       case VSOCK:
-               mode |= S_IFSOCK;
-               break;
-       case VFIFO:
-               mode |= S_IFIFO;
-               break;
-       default:
-               return (EBADF);
-       }
-       sb->st_mode = mode;
-       sb->st_nlink = vap->va_nlink;
-       sb->st_uid = vap->va_uid;
-       sb->st_gid = vap->va_gid;
-       sb->st_rdev = vap->va_rdev;
-       if (vap->va_size > OFF_MAX)
-               return (EOVERFLOW);
-       sb->st_size = vap->va_size;
-       sb->st_atim.tv_sec = vap->va_atime.tv_sec;
-       sb->st_atim.tv_nsec = vap->va_atime.tv_nsec;
-       sb->st_mtim.tv_sec = vap->va_mtime.tv_sec;
-       sb->st_mtim.tv_nsec = vap->va_mtime.tv_nsec;
-       sb->st_ctim.tv_sec = vap->va_ctime.tv_sec;
-       sb->st_ctim.tv_nsec = vap->va_ctime.tv_nsec;
-       sb->st_birthtim.tv_sec = vap->va_birthtime.tv_sec;
-       sb->st_birthtim.tv_nsec = vap->va_birthtime.tv_nsec;
-
-       /*
-        * According to www.opengroup.org, the meaning of st_blksize is 
-        *   "a filesystem-specific preferred I/O block size for this 
-        *    object.  In some filesystem types, this may vary from file
-        *    to file"
-        * Use minimum/default of PAGE_SIZE (e.g. for VCHR).
-        */
-
-       sb->st_blksize = max(PAGE_SIZE, vap->va_blocksize);
-
-       sb->st_flags = vap->va_flags;
-       if (priv_check_cred_vfs_generation(td->td_ucred))
-               sb->st_gen = 0;
-       else
-               sb->st_gen = vap->va_gen;
-
-       sb->st_blocks = vap->va_bytes / S_BLKSIZE;
-       return (0);
 }
 
 /*

Modified: head/sys/kern/vnode_if.src
==============================================================================
--- head/sys/kern/vnode_if.src  Fri Aug  7 19:58:16 2020        (r364043)
+++ head/sys/kern/vnode_if.src  Fri Aug  7 23:06:40 2020        (r364044)
@@ -177,6 +177,17 @@ vop_accessx {
 };
 
 
+%% stat        vp      L L L
+
+vop_stat {
+       IN struct vnode *vp;
+       OUT struct stat *sb;
+       IN struct ucred *active_cred;
+       IN struct ucred *file_cred;
+       IN struct thread *td;
+};
+
+
 %% getattr     vp      L L L
 
 vop_getattr {

Modified: head/sys/security/audit/audit_arg.c
==============================================================================
--- head/sys/security/audit/audit_arg.c Fri Aug  7 19:58:16 2020        
(r364043)
+++ head/sys/security/audit/audit_arg.c Fri Aug  7 23:06:40 2020        
(r364044)
@@ -854,7 +854,7 @@ audit_arg_upath2_canon(char *upath)
  * It is assumed that the caller will hold any vnode locks necessary to
  * perform a VOP_GETATTR() on the passed vnode.
  *
- * XXX: The attr code is very similar to vfs_vnops.c:vn_stat(), but always
+ * XXX: The attr code is very similar to vfs_default.c:vop_stdstat(), but 
always
  * provides access to the generation number as we need that to construct the
  * BSM file ID.
  *

Modified: head/sys/sys/vnode.h
==============================================================================
--- head/sys/sys/vnode.h        Fri Aug  7 19:58:16 2020        (r364043)
+++ head/sys/sys/vnode.h        Fri Aug  7 23:06:40 2020        (r364044)
@@ -737,8 +737,6 @@ int vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp,
            struct thread *td);
 int    vn_rlimit_fsize(const struct vnode *vn, const struct uio *uio,
            struct thread *td);
-int    vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred,
-           struct ucred *file_cred, struct thread *td);
 int    vn_start_write(struct vnode *vp, struct mount **mpp, int flags);
 int    vn_start_secondary_write(struct vnode *vp, struct mount **mpp,
            int flags);
@@ -892,6 +890,22 @@ void       vop_need_inactive_debugpost(void *a, int rc);
 #endif
 
 void   vop_rename_fail(struct vop_rename_args *ap);
+
+#define        vop_stat_helper_pre(ap) ({                                      
        \
+       int _error;                                                             
\
+       AUDIT_ARG_VNODE1(ap->a_vp);                                             
\
+       _error = mac_vnode_check_stat(ap->a_active_cred, ap->a_file_cred, 
ap->a_vp);\
+       if (__predict_true(_error == 0))                                        
\
+               bzero(ap->a_sb, sizeof(*ap->a_sb));                             
\
+       _error;                                                                 
\
+})
+
+#define        vop_stat_helper_post(ap, error) ({                              
        \
+       int _error = (error);                                                   
\
+       if (priv_check_cred_vfs_generation(ap->a_td->td_ucred))                 
\
+               ap->a_sb->st_gen = 0;                                           
\
+       _error;                                                                 
\
+})
 
 #define        VOP_WRITE_PRE(ap)                                               
\
        struct vattr va;                                                \
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to