vfs_mkdir() does not guarantee to leave the child dentry hashed or make
it positive on success, and in many such cases the filesystem had to use
a different dentry which it can now return.

This patch changes vfs_mkdir() to return the dentry provided by the
filesystems which is hashed and positive when provided.  This reduces
the number of cases where the resulting dentry is not positive to a
handful which don't deserve extra efforts.

The only callers of vfs_mkdir() which are interested in the resulting
inode are in-kernel filesystem clients: cachefiles, nfsd, smb/server.
The only filesystems that don't reliably provide the inode are:
- kernfs, tracefs which these clients are unlikely to be interested in
- cifs in some configurations would need to do a lookup to find the
  created inode, but doesn't.  cifs cannot be exported via NFS, is
  unlikely to be used by cachefiles, and smb/server only has a soft
  requirement for the inode, so this is unlikely to be a problem in
  practice.
- hostfs, nfs, cifs may need to do a lookup (rarely for NFS) and it is
  possible for a race to make that lookup fail.  Actual failure
  is unlikely and providing callers handle negative dentries graceful
  they will fail-safe.

So this patch removes the lookup code in nfsd and smb/server and adjusts
them to fail safe if a negative dentry is provided:
- cache-files already fails safe by restarting the task from the
  top - it still does with this change, though it no longer calls
  cachefiles_put_directory() as that will crash if the dentry is
  negative.
- nfsd reports "Server-fault" which it what it used to do if the lookup
  failed. This will never happen on any file-systems that it can actually
  export, so this is of no consequence.  I removed the fh_update()
  call as that is not needed and out-of-place.  A subsequent
  nfsd_create_setattr() call will call fh_update() when needed.
- smb/server only wants the inode to call ksmbd_smb_inherit_owner()
  which updates ->i_uid (without calling notify_change() or similar)
  which can be safely skipping on cifs (I hope).

If a different dentry is returned, the first one is put.  If necessary
the fact that it is new can be determined by comparing pointers.  A new
dentry will certainly have a new pointer (as the old is put after the
new is obtained).
Similarly if an error is returned (via ERR_PTR()) the original dentry is
put.

Reviewed-by: Jeff Layton <jlay...@kernel.org>
Signed-off-by: NeilBrown <ne...@suse.de>
---
 drivers/base/devtmpfs.c  |  7 +++----
 fs/cachefiles/namei.c    | 16 +++++++-------
 fs/ecryptfs/inode.c      | 14 +++++++++----
 fs/init.c                |  7 +++++--
 fs/namei.c               | 45 ++++++++++++++++++++++++++--------------
 fs/nfsd/nfs4recover.c    |  7 +++++--
 fs/nfsd/vfs.c            | 31 ++++++++++-----------------
 fs/overlayfs/dir.c       | 37 ++++-----------------------------
 fs/overlayfs/overlayfs.h | 15 +++++++-------
 fs/overlayfs/super.c     |  7 ++++---
 fs/smb/server/vfs.c      | 32 +++++++++-------------------
 fs/xfs/scrub/orphanage.c |  9 ++++----
 include/linux/fs.h       |  4 ++--
 13 files changed, 104 insertions(+), 127 deletions(-)

diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 7a101009bee7..6dd1a8860f1c 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -175,18 +175,17 @@ static int dev_mkdir(const char *name, umode_t mode)
 {
        struct dentry *dentry;
        struct path path;
-       int err;
 
        dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY);
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
 
-       err = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode);
-       if (!err)
+       dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode);
+       if (!IS_ERR(dentry))
                /* mark as kernel-created inode */
                d_inode(dentry)->i_private = &thread;
        done_path_create(&path, dentry);
-       return err;
+       return PTR_ERR_OR_ZERO(dentry);
 }
 
 static int create_path(const char *nodepath)
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 7cf59713f0f7..83a60126de0f 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -128,18 +128,19 @@ struct dentry *cachefiles_get_directory(struct 
cachefiles_cache *cache,
                ret = security_path_mkdir(&path, subdir, 0700);
                if (ret < 0)
                        goto mkdir_error;
-               ret = cachefiles_inject_write_error();
-               if (ret == 0)
-                       ret = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 
0700);
-               if (ret < 0) {
+               subdir = ERR_PTR(cachefiles_inject_write_error());
+               if (!IS_ERR(subdir))
+                       subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), 
subdir, 0700);
+               ret = PTR_ERR(subdir);
+               if (IS_ERR(subdir)) {
                        trace_cachefiles_vfs_error(NULL, d_inode(dir), ret,
                                                   
cachefiles_trace_mkdir_error);
                        goto mkdir_error;
                }
                trace_cachefiles_mkdir(dir, subdir);
 
-               if (unlikely(d_unhashed(subdir))) {
-                       cachefiles_put_directory(subdir);
+               if (unlikely(d_unhashed(subdir) || d_is_negative(subdir))) {
+                       dput(subdir);
                        goto retry;
                }
                ASSERT(d_backing_inode(subdir));
@@ -195,7 +196,8 @@ struct dentry *cachefiles_get_directory(struct 
cachefiles_cache *cache,
 
 mkdir_error:
        inode_unlock(d_inode(dir));
-       dput(subdir);
+       if (!IS_ERR(subdir))
+               dput(subdir);
        pr_err("mkdir %s failed with error %d\n", dirname, ret);
        return ERR_PTR(ret);
 
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 6315dd194228..51a5c54eb740 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -511,10 +511,16 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap 
*idmap, struct inode *dir,
        struct inode *lower_dir;
 
        rc = lock_parent(dentry, &lower_dentry, &lower_dir);
-       if (!rc)
-               rc = vfs_mkdir(&nop_mnt_idmap, lower_dir,
-                              lower_dentry, mode);
-       if (rc || d_really_is_negative(lower_dentry))
+       if (rc)
+               goto out;
+
+       lower_dentry = vfs_mkdir(&nop_mnt_idmap, lower_dir,
+                                lower_dentry, mode);
+       rc = PTR_ERR(lower_dentry);
+       if (IS_ERR(lower_dentry))
+               goto out;
+       rc = 0;
+       if (d_unhashed(lower_dentry))
                goto out;
        rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb);
        if (rc)
diff --git a/fs/init.c b/fs/init.c
index e9387b6c4f30..eef5124885e3 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -230,9 +230,12 @@ int __init init_mkdir(const char *pathname, umode_t mode)
                return PTR_ERR(dentry);
        mode = mode_strip_umask(d_inode(path.dentry), mode);
        error = security_path_mkdir(&path, dentry, mode);
-       if (!error)
-               error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
+       if (!error) {
+               dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
                                  dentry, mode);
+               if (IS_ERR(dentry))
+                       error = PTR_ERR(dentry);
+       }
        done_path_create(&path, dentry);
        return error;
 }
diff --git a/fs/namei.c b/fs/namei.c
index e26574651a28..d00443e38d3a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4125,7 +4125,8 @@ EXPORT_SYMBOL(kern_path_create);
 
 void done_path_create(struct path *path, struct dentry *dentry)
 {
-       dput(dentry);
+       if (!IS_ERR(dentry))
+               dput(dentry);
        inode_unlock(path->dentry->d_inode);
        mnt_drop_write(path->mnt);
        path_put(path);
@@ -4271,7 +4272,7 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, 
umode_t, mode, unsigned, d
 }
 
 /**
- * vfs_mkdir - create directory
+ * vfs_mkdir - create directory returning correct dentry if possible
  * @idmap:     idmap of the mount the inode was found from
  * @dir:       inode of the parent directory
  * @dentry:    dentry of the child directory
@@ -4284,9 +4285,15 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, 
umode_t, mode, unsigned, d
  * care to map the inode according to @idmap before checking permissions.
  * On non-idmapped mounts or if permission checking is to be performed on the
  * raw inode simply pass @nop_mnt_idmap.
+ *
+ * In the event that the filesystem does not use the *@dentry but leaves it
+ * negative or unhashes it and possibly splices a different one returning it,
+ * the original dentry is dput() and the alternate is returned.
+ *
+ * In case of an error the dentry is dput() and an ERR_PTR() is returned.
  */
-int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
-             struct dentry *dentry, umode_t mode)
+struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
+                        struct dentry *dentry, umode_t mode)
 {
        int error;
        unsigned max_links = dir->i_sb->s_max_links;
@@ -4294,31 +4301,35 @@ int vfs_mkdir(struct mnt_idmap *idmap, struct inode 
*dir,
 
        error = may_create(idmap, dir, dentry);
        if (error)
-               return error;
+               goto err;
 
+       error = -EPERM;
        if (!dir->i_op->mkdir)
-               return -EPERM;
+               goto err;
 
        mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0);
        error = security_inode_mkdir(dir, dentry, mode);
        if (error)
-               return error;
+               goto err;
 
+       error = -EMLINK;
        if (max_links && dir->i_nlink >= max_links)
-               return -EMLINK;
+               goto err;
 
        de = dir->i_op->mkdir(idmap, dir, dentry, mode);
+       error = PTR_ERR(de);
        if (IS_ERR(de))
-               return PTR_ERR(de);
+               goto err;
        if (de) {
-               fsnotify_mkdir(dir, de);
-               /* Cannot return de yet */
-               dput(de);
-       } else {
-               fsnotify_mkdir(dir, dentry);
+               dput(dentry);
+               dentry = de;
        }
+       fsnotify_mkdir(dir, dentry);
+       return dentry;
 
-       return 0;
+err:
+       dput(dentry);
+       return ERR_PTR(error);
 }
 EXPORT_SYMBOL(vfs_mkdir);
 
@@ -4338,8 +4349,10 @@ int do_mkdirat(int dfd, struct filename *name, umode_t 
mode)
        error = security_path_mkdir(&path, dentry,
                        mode_strip_umask(path.dentry->d_inode, mode));
        if (!error) {
-               error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
+               dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
                                  dentry, mode);
+               if (IS_ERR(dentry))
+                       error = PTR_ERR(dentry);
        }
        done_path_create(&path, dentry);
        if (retry_estale(error, lookup_flags)) {
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 28f4d5311c40..c1d9bd07285f 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -233,9 +233,12 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
                 * as well be forgiving and just succeed silently.
                 */
                goto out_put;
-       status = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU);
+       dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU);
+       if (IS_ERR(dentry))
+               status = PTR_ERR(dentry);
 out_put:
-       dput(dentry);
+       if (!status)
+               dput(dentry);
 out_unlock:
        inode_unlock(d_inode(dir));
        if (status == 0) {
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 1035010f1198..34d7aa531662 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1461,7 +1461,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh 
*fhp,
        struct inode    *dirp;
        struct iattr    *iap = attrs->na_iattr;
        __be32          err;
-       int             host_err;
+       int             host_err = 0;
 
        dentry = fhp->fh_dentry;
        dirp = d_inode(dentry);
@@ -1488,25 +1488,15 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct 
svc_fh *fhp,
                        nfsd_check_ignore_resizing(iap);
                break;
        case S_IFDIR:
-               host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, 
iap->ia_mode);
-               if (!host_err && unlikely(d_unhashed(dchild))) {
-                       struct dentry *d;
-                       d = lookup_one_len(dchild->d_name.name,
-                                          dchild->d_parent,
-                                          dchild->d_name.len);
-                       if (IS_ERR(d)) {
-                               host_err = PTR_ERR(d);
-                               break;
-                       }
-                       if (unlikely(d_is_negative(d))) {
-                               dput(d);
-                               err = nfserr_serverfault;
-                               goto out;
-                       }
+               dchild = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode);
+               if (IS_ERR(dchild)) {
+                       host_err = PTR_ERR(dchild);
+               } else if (d_is_negative(dchild)) {
+                       err = nfserr_serverfault;
+                       goto out;
+               } else if (unlikely(dchild != resfhp->fh_dentry)) {
                        dput(resfhp->fh_dentry);
-                       resfhp->fh_dentry = dget(d);
-                       dput(dchild);
-                       dchild = d;
+                       resfhp->fh_dentry = dget(dchild);
                }
                break;
        case S_IFCHR:
@@ -1527,7 +1517,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh 
*fhp,
        err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
 
 out:
-       dput(dchild);
+       if (!IS_ERR(dchild))
+               dput(dchild);
        return err;
 
 out_nfserr:
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 21c3aaf7b274..fe493f3ed6b6 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -138,37 +138,6 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct 
inode *dir,
        goto out;
 }
 
-int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir,
-                  struct dentry **newdentry, umode_t mode)
-{
-       int err;
-       struct dentry *d, *dentry = *newdentry;
-
-       err = ovl_do_mkdir(ofs, dir, dentry, mode);
-       if (err)
-               return err;
-
-       if (likely(!d_unhashed(dentry)))
-               return 0;
-
-       /*
-        * vfs_mkdir() may succeed and leave the dentry passed
-        * to it unhashed and negative. If that happens, try to
-        * lookup a new hashed and positive dentry.
-        */
-       d = ovl_lookup_upper(ofs, dentry->d_name.name, dentry->d_parent,
-                            dentry->d_name.len);
-       if (IS_ERR(d)) {
-               pr_warn("failed lookup after mkdir (%pd2, err=%i).\n",
-                       dentry, err);
-               return PTR_ERR(d);
-       }
-       dput(dentry);
-       *newdentry = d;
-
-       return 0;
-}
-
 struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir,
                               struct dentry *newdentry, struct ovl_cattr *attr)
 {
@@ -191,7 +160,8 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct 
inode *dir,
 
                case S_IFDIR:
                        /* mkdir is special... */
-                       err =  ovl_mkdir_real(ofs, dir, &newdentry, attr->mode);
+                       newdentry =  ovl_do_mkdir(ofs, dir, newdentry, 
attr->mode);
+                       err = PTR_ERR_OR_ZERO(newdentry);
                        break;
 
                case S_IFCHR:
@@ -219,7 +189,8 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct 
inode *dir,
        }
 out:
        if (err) {
-               dput(newdentry);
+               if (!IS_ERR(newdentry))
+                       dput(newdentry);
                return ERR_PTR(err);
        }
        return newdentry;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 0021e2025020..6f2f8f4cfbbc 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -241,13 +241,14 @@ static inline int ovl_do_create(struct ovl_fs *ofs,
        return err;
 }
 
-static inline int ovl_do_mkdir(struct ovl_fs *ofs,
-                              struct inode *dir, struct dentry *dentry,
-                              umode_t mode)
+static inline struct dentry *ovl_do_mkdir(struct ovl_fs *ofs,
+                                         struct inode *dir,
+                                         struct dentry *dentry,
+                                         umode_t mode)
 {
-       int err = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode);
-       pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
-       return err;
+       dentry = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode);
+       pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, 
PTR_ERR_OR_ZERO(dentry));
+       return dentry;
 }
 
 static inline int ovl_do_mknod(struct ovl_fs *ofs,
@@ -838,8 +839,6 @@ struct ovl_cattr {
 
 #define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) })
 
-int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir,
-                  struct dentry **newdentry, umode_t mode);
 struct dentry *ovl_create_real(struct ovl_fs *ofs,
                               struct inode *dir, struct dentry *newdentry,
                               struct ovl_cattr *attr);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 61e21c3129e8..b63474d1b064 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -327,9 +327,10 @@ static struct dentry *ovl_workdir_create(struct ovl_fs 
*ofs,
                        goto retry;
                }
 
-               err = ovl_mkdir_real(ofs, dir, &work, attr.ia_mode);
-               if (err)
-                       goto out_dput;
+               work = ovl_do_mkdir(ofs, dir, work, attr.ia_mode);
+               err = PTR_ERR(work);
+               if (IS_ERR(work))
+                       goto out_err;
 
                /* Weird filesystem returning with hashed negative (kernfs)? */
                err = -EINVAL;
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index fe29acef5872..8554aa5a1059 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -206,8 +206,8 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char 
*name, umode_t mode)
 {
        struct mnt_idmap *idmap;
        struct path path;
-       struct dentry *dentry;
-       int err;
+       struct dentry *dentry, *d;
+       int err = 0;
 
        dentry = ksmbd_vfs_kern_path_create(work, name,
                                            LOOKUP_NO_SYMLINKS | 
LOOKUP_DIRECTORY,
@@ -222,27 +222,15 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char 
*name, umode_t mode)
 
        idmap = mnt_idmap(path.mnt);
        mode |= S_IFDIR;
-       err = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode);
-       if (!err && d_unhashed(dentry)) {
-               struct dentry *d;
-
-               d = lookup_one(idmap, dentry->d_name.name, dentry->d_parent,
-                              dentry->d_name.len);
-               if (IS_ERR(d)) {
-                       err = PTR_ERR(d);
-                       goto out_err;
-               }
-               if (unlikely(d_is_negative(d))) {
-                       dput(d);
-                       err = -ENOENT;
-                       goto out_err;
-               }
-
-               ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(d));
-               dput(d);
-       }
+       d = dentry;
+       dentry = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode);
+       if (IS_ERR(dentry))
+               err = PTR_ERR(dentry);
+       else if (d_is_negative(dentry))
+               err = -ENOENT;
+       if (!err && dentry != d)
+               ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), 
d_inode(dentry));
 
-out_err:
        done_path_create(&path, dentry);
        if (err)
                pr_err("mkdir(%s): creation failed (err:%d)\n", name, err);
diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c
index c287c755f2c5..3537f3cca6d5 100644
--- a/fs/xfs/scrub/orphanage.c
+++ b/fs/xfs/scrub/orphanage.c
@@ -167,10 +167,11 @@ xrep_orphanage_create(
         * directory to control access to a file we put in here.
         */
        if (d_really_is_negative(orphanage_dentry)) {
-               error = vfs_mkdir(&nop_mnt_idmap, root_inode, orphanage_dentry,
-                               0750);
-               if (error)
-                       goto out_dput_orphanage;
+               orphanage_dentry = vfs_mkdir(&nop_mnt_idmap, root_inode,
+                                            orphanage_dentry, 0750);
+               error = PTR_ERR(orphanage_dentry);
+               if (IS_ERR(orphanage_dentry))
+                       goto out_unlock_root;
        }
 
        /* Not a directory? Bail out. */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4962f4a4e603..4c545c875efe 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1971,8 +1971,8 @@ bool inode_owner_or_capable(struct mnt_idmap *idmap,
  */
 int vfs_create(struct mnt_idmap *, struct inode *,
               struct dentry *, umode_t, bool);
-int vfs_mkdir(struct mnt_idmap *, struct inode *,
-             struct dentry *, umode_t);
+struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *,
+                        struct dentry *, umode_t);
 int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
               umode_t, dev_t);
 int vfs_symlink(struct mnt_idmap *, struct inode *,
-- 
2.48.1


Reply via email to