On Mon, 2024-05-27 at 13:04 +1000, NeilBrown wrote:
> 
> dentry->d_fsdata is set to NFS_FSDATA_BLOCKED while unlinking or
> renaming-over a file to ensure that no open succeeds while the NFS
> operation progressed on the server.
> 
> Setting dentry->d_fsdata to NFS_FSDATA_BLOCKED is done under ->d_lock
> after checking the refcount is not elevated.  Any attempt to open the
> file (through that name) will go through lookp_open() which will take
> ->d_lock while incrementing the refcount, we can be sure that once
> the
> new value is set, __nfs_lookup_revalidate() *will* see the new value
> and
> will block.
> 
> We don't have any locking guarantee that when we set ->d_fsdata to
> NULL,
> the wait_var_event() in __nfs_lookup_revalidate() will notice.
> wait/wake primitives do NOT provide barriers to guarantee order.  We
> must use smp_load_acquire() in wait_var_event() to ensure we look at
> an
> up-to-date value, and must use smp_store_release() before
> wake_up_var().
> 
> This patch adds those barrier functions and factors out
> block_revalidate() and unblock_revalidate() far clarity.
> 
> There is also a hypothetical bug in that if memory allocation fails
> (which never happens in practice) we might leave ->d_fsdata locked.
> This patch adds the missing call to unblock_revalidate().
> 
> Reported-and-tested-by: Richard Kojedzinszky
> <richard+debian+bugrep...@kojedz.in>
> Closes: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1071501
> Fixes: 3c59366c207e ("NFS: don't unhash dentry during unlink/rename")
> Signed-off-by: NeilBrown <ne...@suse.de>
> ---
>  fs/nfs/dir.c | 44 +++++++++++++++++++++++++++++---------------
>  1 file changed, 29 insertions(+), 15 deletions(-)
> 
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index ac505671efbd..c91dc36d41cc 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -1802,9 +1802,10 @@ __nfs_lookup_revalidate(struct dentry *dentry,
> unsigned int flags,
>               if (parent != READ_ONCE(dentry->d_parent))
>                       return -ECHILD;
>       } else {
> -             /* Wait for unlink to complete */
> +             /* Wait for unlink to complete - see
> unblock_revalidate() */
>               wait_var_event(&dentry->d_fsdata,
> -                            dentry->d_fsdata !=
> NFS_FSDATA_BLOCKED);
> +                            smp_load_acquire(&dentry->d_fsdata)
> +                            != NFS_FSDATA_BLOCKED);

Doesn't this end up being a reversed ACQUIRE+RELEASE as described in
the "LOCK ACQUISITION FUNCTIONS" section of Documentation/memory-
barriers.txt?

IOW: Shouldn't the above rather be using READ_ONCE()?

>               parent = dget_parent(dentry);
>               ret = reval(d_inode(parent), dentry, flags);
>               dput(parent);
> @@ -1817,6 +1818,26 @@ static int nfs_lookup_revalidate(struct dentry
> *dentry, unsigned int flags)
>       return __nfs_lookup_revalidate(dentry, flags,
> nfs_do_lookup_revalidate);
>  }
>  
> +static void block_revalidate(struct dentry *dentry)
> +{
> +     /* old devname - just in case */
> +     kfree(dentry->d_fsdata);
> +
> +     /* Any new reference that could lead to an open
> +      * will take ->d_lock in lookup_open() -> d_lookup().
> +      */
> +     lockdep_assert_held(&dentry->d_lock);
> +
> +     dentry->d_fsdata = NULL;

Why are you doing a barrier free change to dentry->d_fsdata here when
you have the memory barrier protected change in unblock_revalidate()?

> +}
> +
> +static void unblock_revalidate(struct dentry *dentry)
> +{
> +     /* store_release ensures wait_var_event() sees the update */
> +     smp_store_release(&dentry->d_fsdata, NULL);

Shouldn't this be a WRITE_ONCE(), for the same reason as above?

> +     wake_up_var(&dentry->d_fsdata);
> +}
> +
>  /*
>   * A weaker form of d_revalidate for revalidating just the
> d_inode(dentry)
>   * when we don't really care about the dentry name. This is called
> when a
> @@ -2501,15 +2522,12 @@ int nfs_unlink(struct inode *dir, struct
> dentry *dentry)
>               spin_unlock(&dentry->d_lock);
>               goto out;
>       }
> -     /* old devname */
> -     kfree(dentry->d_fsdata);
> -     dentry->d_fsdata = NFS_FSDATA_BLOCKED;
> +     block_revalidate(dentry);
>  
>       spin_unlock(&dentry->d_lock);
>       error = nfs_safe_remove(dentry);
>       nfs_dentry_remove_handle_error(dir, dentry, error);
> -     dentry->d_fsdata = NULL;
> -     wake_up_var(&dentry->d_fsdata);
> +     unblock_revalidate(dentry);
>  out:
>       trace_nfs_unlink_exit(dir, dentry, error);
>       return error;
> @@ -2616,8 +2634,7 @@ nfs_unblock_rename(struct rpc_task *task,
> struct nfs_renamedata *data)
>  {
>       struct dentry *new_dentry = data->new_dentry;
>  
> -     new_dentry->d_fsdata = NULL;
> -     wake_up_var(&new_dentry->d_fsdata);
> +     unblock_revalidate(new_dentry);
>  }
>  
>  /*
> @@ -2679,11 +2696,6 @@ int nfs_rename(struct mnt_idmap *idmap, struct
> inode *old_dir,
>               if (WARN_ON(new_dentry->d_flags &
> DCACHE_NFSFS_RENAMED) ||
>                   WARN_ON(new_dentry->d_fsdata ==
> NFS_FSDATA_BLOCKED))
>                       goto out;
> -             if (new_dentry->d_fsdata) {
> -                     /* old devname */
> -                     kfree(new_dentry->d_fsdata);
> -                     new_dentry->d_fsdata = NULL;
> -             }
>  
>               spin_lock(&new_dentry->d_lock);
>               if (d_count(new_dentry) > 2) {
> @@ -2705,7 +2717,7 @@ int nfs_rename(struct mnt_idmap *idmap, struct
> inode *old_dir,
>                       new_dentry = dentry;
>                       new_inode = NULL;
>               } else {
> -                     new_dentry->d_fsdata = NFS_FSDATA_BLOCKED;
> +                     block_revalidate(new_dentry);
>                       must_unblock = true;
>                       spin_unlock(&new_dentry->d_lock);
>               }
> @@ -2717,6 +2729,8 @@ int nfs_rename(struct mnt_idmap *idmap, struct
> inode *old_dir,
>       task = nfs_async_rename(old_dir, new_dir, old_dentry,
> new_dentry,
>                               must_unblock ? nfs_unblock_rename :
> NULL);
>       if (IS_ERR(task)) {
> +             if (must_unblock)
> +                     unblock_revalidate(new_dentry);
>               error = PTR_ERR(task);
>               goto out;
>       }

-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.mykleb...@hammerspace.com


Reply via email to