From: NeilBrown <[email protected]> afs needs to block lookup of dentries during unlink and rename. There are two reasons: 1/ If the target is to be removed, not silly-renamed, the subsequent opens cannot be allowed as the file won't exist on the server. 2/ If the rename source is being moved between directories a lookup, particularly d_revalidate, might change ->d_time asynchronously with rename changing ->d_time with possible incorrect results.
afs current unhashes the dentry to force a lookup which will wait on the directory lock, and rehashes afterwards. This is incompatible with proposed changed to directory locking which will require a dentry to remain hashed throughout rename/unlink/etc operations. This patch copies a mechanism developed for NFS. ->d_fsdata which is currently unused is now set to a non-NULL value when lookups must be blocked. d_revalidate checks for this value, and waits for it to become NULL. ->d_lock is used to ensure d_revalidate never updates ->d_time while ->d_fsdata is set. Signed-off-by: NeilBrown <[email protected]> --- fs/afs/afs.h | 7 ++++++ fs/afs/dir.c | 64 +++++++++++++++++++++++++++++------------------ fs/afs/internal.h | 5 +--- 3 files changed, 47 insertions(+), 29 deletions(-) diff --git a/fs/afs/afs.h b/fs/afs/afs.h index ec3db00bd081..019e77b08458 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -26,6 +26,13 @@ typedef u64 afs_volid_t; typedef u64 afs_vnodeid_t; typedef u64 afs_dataversion_t; +/* This is stored in ->d_fsdata to stop d_revalidate looking at, + * and possibly changing, ->d_time on a dentry which is being moved + * between directories, and to block lookup for dentry that is + * being removed without silly-rename. + */ +#define AFS_FSDATA_BLOCKED ((void*)1) + typedef enum { AFSVL_RWVOL, /* read/write volume */ AFSVL_ROVOL, /* read-only volume */ diff --git a/fs/afs/dir.c b/fs/afs/dir.c index a0417292314c..9c57614feccf 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1034,6 +1034,10 @@ static int afs_d_revalidate_rcu(struct afs_vnode *dvnode, struct dentry *dentry) if (!afs_check_validity(dvnode)) return -ECHILD; + /* A rename/unlink is pending */ + if (dentry->d_fsdata) + return -ECHILD; + /* We only need to invalidate a dentry if the server's copy changed * behind our back. If we made the change, it's no problem. Note that * on a 32-bit system, we only have 32 bits in the dentry to store the @@ -1069,6 +1073,10 @@ static int afs_d_revalidate(struct inode *parent_dir, const struct qstr *name, if (flags & LOOKUP_RCU) return afs_d_revalidate_rcu(dir, dentry); + /* Wait for rename/unlink to complete */ +wait_for_rename: + wait_var_event(&dentry->d_fsdata, dentry->d_fsdata == NULL); + if (d_really_is_positive(dentry)) { vnode = AFS_FS_I(d_inode(dentry)); _enter("{v={%llx:%llu} n=%pd fl=%lx},", @@ -1161,7 +1169,13 @@ static int afs_d_revalidate(struct inode *parent_dir, const struct qstr *name, } out_valid: + spin_lock(&dentry->d_lock); + if (dentry->d_fsdata) { + spin_unlock(&dentry->d_lock); + goto wait_for_rename; + } dentry->d_time = (unsigned long)dir_version; + spin_unlock(&dentry->d_lock); out_valid_noupdate: key_put(key); _leave(" = 1 [valid]"); @@ -1536,8 +1550,7 @@ static void afs_unlink_edit_dir(struct afs_operation *op) static void afs_unlink_put(struct afs_operation *op) { _enter("op=%08x", op->debug_id); - if (op->unlink.need_rehash && afs_op_error(op) < 0 && afs_op_error(op) != -ENOENT) - d_rehash(op->dentry); + store_release_wake_up(&op->dentry->d_fsdata, NULL); } static const struct afs_operation_ops afs_unlink_operation = { @@ -1591,11 +1604,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) afs_op_set_error(op, afs_sillyrename(dvnode, vnode, dentry, op->key)); goto error; } - if (!d_unhashed(dentry)) { - /* Prevent a race with RCU lookup. */ - __d_drop(dentry); - op->unlink.need_rehash = true; - } + dentry->d_fsdata = AFS_FSDATA_BLOCKED; spin_unlock(&dentry->d_lock); op->file[1].vnode = vnode; @@ -1885,9 +1894,10 @@ static void afs_rename_edit_dir(struct afs_operation *op) _enter("op=%08x", op->debug_id); - if (op->rename.rehash) { - d_rehash(op->rename.rehash); - op->rename.rehash = NULL; + if (op->rename.unblock) { + /* Rename has finished, so unlocks lookups to target */ + store_release_wake_up(&op->rename.unblock->d_fsdata, NULL); + op->rename.unblock = NULL; } fscache_begin_write_operation(&orig_cres, afs_vnode_cache(orig_dvnode)); @@ -1970,6 +1980,9 @@ static void afs_rename_exchange_edit_dir(struct afs_operation *op) d_exchange(old_dentry, new_dentry); up_write(&orig_dvnode->validate_lock); + /* dentry has been moved, so d_validate can safely proceed */ + store_release_wake_up(&old_dentry->d_fsdata, NULL); + } else { down_write(&orig_dvnode->validate_lock); if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) && @@ -2009,11 +2022,10 @@ static void afs_rename_exchange_edit_dir(struct afs_operation *op) static void afs_rename_put(struct afs_operation *op) { _enter("op=%08x", op->debug_id); - if (op->rename.rehash) - d_rehash(op->rename.rehash); + if (op->rename.unblock) + store_release_wake_up(&op->rename.unblock->d_fsdata, NULL); + store_release_wake_up(&op->dentry->d_fsdata, NULL); dput(op->rename.tmp); - if (afs_op_error(op)) - d_rehash(op->dentry); } static const struct afs_operation_ops afs_rename_operation = { @@ -2121,7 +2133,6 @@ static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir, op->ops = &afs_rename_noreplace_operation; } else if (flags & RENAME_EXCHANGE) { op->ops = &afs_rename_exchange_operation; - d_drop(new_dentry); } else { /* If we might displace the target, we might need to do silly * rename. @@ -2135,14 +2146,12 @@ static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir, */ if (d_is_positive(new_dentry) && !d_is_dir(new_dentry)) { /* To prevent any new references to the target during - * the rename, we unhash the dentry in advance. + * the rename, we set d_fsdata which afs_d_revalidate will wait for. + * d_lock ensures d_count() and ->d_fsdata are consistent. */ - if (!d_unhashed(new_dentry)) { - d_drop(new_dentry); - op->rename.rehash = new_dentry; - } - + spin_lock(&new_dentry->d_lock); if (d_count(new_dentry) > 2) { + spin_unlock(&new_dentry->d_lock); /* copy the target dentry's name */ op->rename.tmp = d_alloc(new_dentry->d_parent, &new_dentry->d_name); @@ -2160,8 +2169,12 @@ static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir, } op->dentry_2 = op->rename.tmp; - op->rename.rehash = NULL; op->rename.new_negative = true; + } else { + /* Block any lookups to target until the rename completes */ + new_dentry->d_fsdata = AFS_FSDATA_BLOCKED; + op->rename.unblock = new_dentry; + spin_unlock(&new_dentry->d_lock); } } } @@ -2172,10 +2185,11 @@ static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir, * d_revalidate may see old_dentry between the op having taken place * and the version being updated. * - * So drop the old_dentry for now to make other threads go through - * lookup instead - which we hold a lock against. + * So block revalidate on the old_dentry until the rename completes. */ - d_drop(old_dentry); + spin_lock(&old_dentry->d_lock); + old_dentry->d_fsdata = AFS_FSDATA_BLOCKED; + spin_unlock(&old_dentry->d_lock); ret = afs_do_sync_operation(op); if (ret == -ENOTSUPP) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 106a7fe06b56..f2898ce9c0e6 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -891,10 +891,7 @@ struct afs_operation { const char *symlink; } create; struct { - bool need_rehash; - } unlink; - struct { - struct dentry *rehash; + struct dentry *unblock; struct dentry *tmp; unsigned int rename_flags; bool new_negative; -- 2.50.0.107.gf914562f5916.dirty
