If thread A is using an inode it must not be deleted by thread B when processing a FUSE_FORGET request.
The FUSE protocol itself already has a counter called nlookup that is used in FUSE_FORGET messages. We cannot trust this counter since the untrusted client can manipulate it via FUSE_FORGET messages. Introduce a new refcount to keep inodes alive for the required lifespan. lo_inode_put() must be called to release a reference. FUSE's nlookup counter holds exactly one reference so that the inode stays alive as long as the client still wants to remember it. Note that the lo_inode->is_symlink field is moved to avoid creating a hole in the struct due to struct field alignment. Signed-off-by: Stefan Hajnoczi <stefa...@redhat.com> --- contrib/virtiofsd/passthrough_ll.c | 212 ++++++++++++++++++++++++----- 1 file changed, 178 insertions(+), 34 deletions(-) diff --git a/contrib/virtiofsd/passthrough_ll.c b/contrib/virtiofsd/passthrough_ll.c index 125e9d9f96..0c90e352d2 100644 --- a/contrib/virtiofsd/passthrough_ll.c +++ b/contrib/virtiofsd/passthrough_ll.c @@ -95,7 +95,12 @@ struct lo_key { struct lo_inode { int fd; - bool is_symlink; + + /* Atomic reference count for this object. The nlookup field holds a + * reference and release it when nlookup reaches 0. + */ + gint refcount; + struct lo_key key; /* This counter keeps the inode alive during the FUSE session. @@ -115,6 +120,8 @@ struct lo_inode { fuse_ino_t fuse_ino; pthread_mutex_t plock_mutex; GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ + + bool is_symlink; }; struct lo_cred { @@ -198,6 +205,7 @@ static const struct fuse_opt lo_opts[] = { FUSE_OPT_END }; static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); +static void put_shared(struct lo_data *lo, struct lo_inode *inode); static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); @@ -359,6 +367,24 @@ static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) return elem - lo_data(req)->ino_map.elems; } +static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep) +{ + struct lo_inode *inode = *inodep; + + if (!inode) { + return; + } + + *inodep = NULL; + + if (g_atomic_int_dec_and_test(&inode->refcount)) { + close(inode->fd); + put_shared(lo, inode); + free(inode); + } +} + +/* Caller must release refcount using lo_inode_put() */ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) { struct lo_data *lo = lo_data(req); @@ -366,6 +392,9 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) pthread_mutex_lock(&lo->mutex); elem = lo_map_get(&lo->ino_map, ino); + if (elem) { + g_atomic_int_inc(&elem->inode->refcount); + } pthread_mutex_unlock(&lo->mutex); if (!elem) @@ -374,10 +403,22 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) return elem->inode; } +/* TODO Remove this helper and force callers to hold an inode refcount until + * they are done with the fd. This will be done in a later patch to make + * review easier. + */ static int lo_fd(fuse_req_t req, fuse_ino_t ino) { struct lo_inode *inode = lo_inode(req, ino); - return inode ? inode->fd : -1; + int fd; + + if (!inode) { + return -1; + } + + fd = inode->fd; + lo_inode_put(lo_data(req), &inode); + return fd; } static bool lo_debug(fuse_req_t req) @@ -463,6 +504,9 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino, fuse_reply_attr(req, &buf, lo->timeout); } +/* Increments parent->nlookup and caller must release refcount using + * lo_inode_put(&parent). + */ static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, char path[PATH_MAX], struct lo_inode **parent) { @@ -498,6 +542,7 @@ retry: p = &lo->root; pthread_mutex_lock(&lo->mutex); p->nlookup++; + g_atomic_int_inc(&p->refcount); pthread_mutex_unlock(&lo->mutex); } else { *last = '\0'; @@ -570,6 +615,7 @@ fallback: if (res != -1) { res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); unref_inode(lo, parent, 1); + lo_inode_put(lo, &parent); } return res; @@ -683,11 +729,13 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, goto out_err; } update_version(lo, inode); + lo_inode_put(lo, &inode); return lo_getattr(req, ino, fi); out_err: saverr = errno; + lo_inode_put(lo, &inode); fuse_reply_err(req, saverr); } @@ -704,6 +752,7 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) if (p) { assert(p->nlookup > 0); p->nlookup++; + g_atomic_int_inc(&p->refcount); } pthread_mutex_unlock(&lo->mutex); @@ -771,6 +820,9 @@ static void put_shared(struct lo_data *lo, struct lo_inode *inode) } } +/* Increments nlookup and caller must release refcount using + * lo_inode_put(&parent). + */ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, struct fuse_entry_param *e) { @@ -778,7 +830,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, int res; int saverr; struct lo_data *lo = lo_data(req); - struct lo_inode *inode, *dir = lo_inode(req, parent); + struct lo_inode *inode = NULL; + struct lo_inode *dir = lo_inode(req, parent); if (!dir) { return EBADF; @@ -811,6 +864,10 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, goto out_err; inode->is_symlink = S_ISLNK(e->attr.st_mode); + + /* One for the caller and one for nlookup (released in unref_inode()) */ + g_atomic_int_set(&inode->refcount, 2); + inode->nlookup = 1; inode->fd = newfd; newfd = -1; @@ -839,6 +896,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, e->ino = inode->fuse_ino; e->version_offset = inode->version_offset; + lo_inode_put(lo, &inode); + lo_inode_put(lo, &dir); if (lo_debug(req)) fuse_debug(" %lli/%s -> %lli (version_table[%lli]=%lli)\n", @@ -853,6 +912,8 @@ out_err: saverr = errno; if (newfd != -1) close(newfd); + lo_inode_put(lo, &inode); + lo_inode_put(lo, &dir); return saverr; } @@ -963,7 +1024,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, if (res == -1) goto out; - update_version(lo, lo_inode(req, parent)); + update_version(lo, dir); saverr = lo_do_lookup(req, parent, name, &e); if (saverr) @@ -975,9 +1036,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, (unsigned long long) e.ino); fuse_reply_entry(req, &e, lo->shared); + lo_inode_put(lo, &dir); return; out: + lo_inode_put(lo, &dir); if (newfd != -1) close(newfd); fuse_reply_err(req, saverr); @@ -1029,6 +1092,7 @@ fallback: if (res != -1) { res = linkat(parent->fd, path, dfd, name, 0); unref_inode(lo, parent, 1); + lo_inode_put(lo, &parent); } return res; @@ -1039,6 +1103,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, { int res; struct lo_data *lo = lo_data(req); + struct lo_inode *parent_inode; struct lo_inode *inode; struct fuse_entry_param e; int saverr; @@ -1048,17 +1113,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, return; } + parent_inode = lo_inode(req, parent); inode = lo_inode(req, ino); - if (!inode) { - fuse_reply_err(req, EBADF); - return; + if (!parent_inode || !inode) { + errno = EBADF; + goto out_err; } memset(&e, 0, sizeof(struct fuse_entry_param)); e.attr_timeout = lo->timeout; e.entry_timeout = lo->timeout; - res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name); + res = linkat_empty_nofollow(lo, inode, parent_inode->fd, name); if (res == -1) goto out_err; @@ -1071,7 +1137,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, pthread_mutex_unlock(&lo->mutex); e.ino = inode->fuse_ino; update_version(lo, inode); - update_version(lo, lo_inode(req, parent)); + update_version(lo, parent_inode); if (lo_debug(req)) fuse_debug(" %lli/%s -> %lli\n", @@ -1079,13 +1145,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, (unsigned long long) e.ino); fuse_reply_entry(req, &e, lo->shared); + lo_inode_put(lo, &parent_inode); + lo_inode_put(lo, &inode); return; out_err: saverr = errno; + lo_inode_put(lo, &parent_inode); + lo_inode_put(lo, &inode); fuse_reply_err(req, saverr); } +/* Increments nlookup and caller must release refcount using lo_inode_put() */ static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, const char *name) { @@ -1121,11 +1192,20 @@ static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) if (res == -1) { fuse_reply_err(req, errno); } else { + struct lo_inode *parent_inode; + update_version(lo, inode); - update_version(lo, lo_inode(req, parent)); + + parent_inode = lo_inode(req, parent); + if (parent_inode) { + update_version(lo, parent_inode); + lo_inode_put(lo, &parent_inode); + } + fuse_reply_err(req, 0); } unref_inode(lo, inode, 1); + lo_inode_put(lo, &inode); } static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, @@ -1133,8 +1213,10 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, unsigned int flags) { int res; - struct lo_inode *oldinode; - struct lo_inode *newinode; + struct lo_inode *parent_inode; + struct lo_inode *newparent_inode; + struct lo_inode *oldinode = NULL; + struct lo_inode *newinode = NULL; struct lo_data *lo = lo_data(req); if (!is_safe_path_component(name) || @@ -1143,6 +1225,13 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, return; } + parent_inode = lo_inode(req, parent); + newparent_inode = lo_inode(req, newparent); + if (!parent_inode || !newparent_inode) { + fuse_reply_err(req, EBADF); + goto out; + } + oldinode = lookup_name(req, parent, name); newinode = lookup_name(req, newparent, newname); if (!oldinode) { @@ -1155,8 +1244,8 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, #ifndef SYS_renameat2 fuse_reply_err(req, EINVAL); #else - res = syscall(SYS_renameat2, lo_fd(req, parent), name, - lo_fd(req, newparent), newname, flags); + res = syscall(SYS_renameat2, parent_inode->fd, name, + newparent_inode->fd, newname, flags); if (res == -1 && errno == ENOSYS) fuse_reply_err(req, EINVAL); else @@ -1165,21 +1254,24 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, goto out; } - res = renameat(lo_fd(req, parent), name, - lo_fd(req, newparent), newname); + res = renameat(parent_inode->fd, name, newparent_inode->fd, newname); if (res == -1) { fuse_reply_err(req, errno); } else { update_version(lo, oldinode); if (newinode) update_version(lo, newinode); - update_version(lo, lo_inode(req, parent)); - update_version(lo, lo_inode(req, newparent)); + update_version(lo, parent_inode); + update_version(lo, newparent_inode); fuse_reply_err(req, 0); } out: unref_inode(lo, oldinode, 1); unref_inode(lo, newinode, 1); + lo_inode_put(lo, &oldinode); + lo_inode_put(lo, &newinode); + lo_inode_put(lo, &parent_inode); + lo_inode_put(lo, &newparent_inode); } static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) @@ -1203,11 +1295,20 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) if (res == -1) { fuse_reply_err(req, errno); } else { + struct lo_inode *parent_inode; + update_version(lo, inode); - update_version(lo, lo_inode(req, parent)); + + parent_inode = lo_inode(req, parent); + if (parent_inode) { + update_version(lo, parent_inode); + lo_inode_put(lo, &parent_inode); + } + fuse_reply_err(req, 0); } unref_inode(lo, inode, 1); + lo_inode_put(lo, &inode); } static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) @@ -1227,9 +1328,9 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) g_hash_table_destroy(inode->posix_locks); pthread_mutex_destroy(&inode->plock_mutex); pthread_mutex_unlock(&lo->mutex); - close(inode->fd); - put_shared(lo, inode); - free(inode); + + /* Drop our refcount from lo_do_lookup() */ + lo_inode_put(lo, &inode); } else { pthread_mutex_unlock(&lo->mutex); } @@ -1244,6 +1345,7 @@ static int unref_all_inodes_cb(gpointer key, gpointer value, inode->nlookup = 0; lo_map_remove(&lo->ino_map, inode->fuse_ino); close(inode->fd); + lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */ return TRUE; } @@ -1273,6 +1375,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) } unref_inode(lo, inode, nlookup); + lo_inode_put(lo, &inode); } static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) @@ -1492,6 +1595,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, err = 0; error: lo_dirp_put(&d); + lo_inode_put(lo, &dinode); // If there's an error, we can only signal it if we haven't stored // any entries yet - otherwise we'd end up with wrong lookup @@ -1546,6 +1650,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, { int fd; struct lo_data *lo = lo_data(req); + struct lo_inode *parent_inode; struct fuse_entry_param e; int err; struct lo_cred old = {}; @@ -1559,11 +1664,17 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, return; } + parent_inode = lo_inode(req, parent); + if (!parent_inode) { + fuse_reply_err(req, EBADF); + return; + } + err = lo_change_cred(req, &old); if (err) goto out; - fd = openat(lo_fd(req, parent), name, + fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode); err = fd == -1 ? errno : 0; lo_restore_cred(&old); @@ -1571,15 +1682,15 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, if (!err) { ssize_t fh; - update_version(lo, lo_inode(req, parent)); + update_version(lo, parent_inode); pthread_mutex_lock(&lo->mutex); fh = lo_add_fd_mapping(req, fd); pthread_mutex_unlock(&lo->mutex); if (fh == -1) { close(fd); - fuse_reply_err(req, ENOMEM); - return; + err = ENOMEM; + goto out; } fi->fh = fh; @@ -1591,6 +1702,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, fi->keep_cache = 1; out: + lo_inode_put(lo, &parent_inode); + if (err) fuse_reply_err(req, err); else @@ -1660,15 +1773,17 @@ static void lo_getlk(fuse_req_t req, fuse_ino_t ino, plock = lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); if (!plock) { - pthread_mutex_unlock(&inode->plock_mutex); - fuse_reply_err(req, ret); - return; + saverr = ret; + goto out; } ret = fcntl(plock->fd, F_OFD_GETLK, lock); if (ret == -1) saverr = errno; + +out: pthread_mutex_unlock(&inode->plock_mutex); + lo_inode_put(lo, &inode); if (saverr) fuse_reply_err(req, saverr); @@ -1707,9 +1822,8 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, &ret); if (!plock) { - pthread_mutex_unlock(&inode->plock_mutex); - fuse_reply_err(req, ret); - return; + saverr = ret; + goto out; } /* TODO: Is it alright to modify flock? */ @@ -1718,7 +1832,11 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, if (ret == -1) { saverr = errno; } + +out: pthread_mutex_unlock(&inode->plock_mutex); + lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); } @@ -1849,6 +1967,8 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) res = close(dup(lo_fi_fd(req, fi))); fuse_reply_err(req, res == -1 ? errno : 0); + + lo_inode_put(lo_data(req), &inode); } static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, @@ -1921,7 +2041,14 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, if(res < 0) { fuse_reply_err(req, -res); } else { - update_version(lo, lo_inode(req, ino)); + struct lo_inode *inode; + + inode = lo_inode(req, ino); + if (inode) { + update_version(lo, inode); + lo_inode_put(lo, &inode); + } + fuse_reply_write(req, (size_t) res); } } @@ -1948,7 +2075,13 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, if (err < 0) { err = errno; } else { - update_version(lo, lo_inode(req, ino)); + struct lo_inode *inode; + + inode = lo_inode(req, ino); + if (inode) { + update_version(lo, inode); + lo_inode_put(lo, &inode); + } } fuse_reply_err(req, err); @@ -2029,11 +2162,14 @@ out_free: if (fd >= 0) { close(fd); } + + lo_inode_put(lo, &inode); return; out_err: saverr = errno; out: + lo_inode_put(lo, &inode); fuse_reply_err(req, saverr); goto out_free; } @@ -2101,11 +2237,14 @@ out_free: if (fd >= 0) { close(fd); } + + lo_inode_put(lo, &inode); return; out_err: saverr = errno; out: + lo_inode_put(lo, &inode); fuse_reply_err(req, saverr); goto out_free; } @@ -2157,6 +2296,8 @@ out: if (fd >= 0) { close(fd); } + + lo_inode_put(lo, &inode); fuse_reply_err(req, saverr); } @@ -2206,6 +2347,8 @@ out: if (fd >= 0) { close(fd); } + + lo_inode_put(lo, &inode); fuse_reply_err(req, saverr); } @@ -2598,6 +2741,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) root->key.ino = stat.st_ino; root->key.dev = stat.st_dev; root->nlookup = 2; + g_atomic_int_set(&root->refcount, 2); } static void setup_proc_self_fd(struct lo_data *lo) -- 2.21.0