The commit is pushed to "branch-rh9-5.14.0-427.37.1.vz9.78.x-ovz" and will 
appear at g...@bitbucket.org:openvz/vzkernel.git
after rh9-5.14.0-427.37.1.vz9.78.3
------>
commit ead9b9aec946bcf8f4395f89e82310d180221f0b
Author: Alexey Kuznetsov <kuz...@virtuozzo.com>
Date:   Fri Nov 8 00:01:10 2024 +0800

    fs: fuse: kio: resolve race condition fallocate vs shrink
    
    It was missed kio/pcs module has warning on out of mutex execution
    of fallocate request. It is not a bug per se, yet the situation
    is dubious. The rationale was that logically fallocate(zerorange) is
    _not_ different of aio direct write of zeros to the same area,
    and that write used to be executed out of inode_lock. So asserting
    the lock in fallocate might be a noise.
    
    Yet we must comply completely. There is one difference, aio takes
    dio_write counter, which blocks concurrent truncates until write
    is completed. This is important mostly for shrinks, as expanding
    writes are not executed asynchronously, fallocates likewise.
    
    So, we take dio_write for async fallocate out of inode_lock,
    making it 100% equivalent to write.
    
    NOTE: the situation with expanding writes/fallocates is complicated.
    We might even have a zero day bug here, I do not see one now, but this does
    not mean we did not have there any holes. Just a note.
    
    Affects: #VSTOR-94829
    https://virtuozzo.atlassian.net/browse/VSTOR-94829
    
    Signed-off-by: Alexey Kuznetsov <kuz...@virtuozzo.com>
    Feature: vStorage
---
 fs/fuse/file.c                     | 11 +++++++++--
 fs/fuse/kio/pcs/pcs_fuse_kdirect.c | 21 ++++++++++++++++-----
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 7bf7c9e45b98..195b77114d02 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -3450,8 +3450,13 @@ static long fuse_file_fallocate(struct file *file, int 
mode, loff_t offset,
        if (!(mode & FALLOC_FL_KEEP_SIZE))
                set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
 
-       if (revoke_lock)
+       if (revoke_lock) {
+               /* This blocks truncates which might happen after
+                * inode_lock is released
+                */
+               fuse_write_dio_begin(fi);
                inode_unlock(inode);
+       }
 
        args.opcode = FUSE_FALLOCATE;
        args.io_inode = inode;
@@ -3460,8 +3465,10 @@ static long fuse_file_fallocate(struct file *file, int 
mode, loff_t offset,
        args.in_args[0].size = sizeof(inarg);
        args.in_args[0].value = &inarg;
        err = fuse_simple_request(fm, &args);
-       if (revoke_lock)
+       if (revoke_lock) {
                inode_lock(inode);
+               fuse_write_dio_end(fi);
+       }
        if (err == -ENOSYS) {
                fm->fc->no_fallocate = 1;
                err = -EOPNOTSUPP;
diff --git a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c 
b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
index 1bd43ecf3b74..5b141fe5bbd4 100644
--- a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
+++ b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c
@@ -1019,19 +1019,19 @@ static void pcs_fuse_submit(struct pcs_fuse_cluster 
*pfc, struct fuse_req *req)
                break;
        case FUSE_FALLOCATE: {
                struct fuse_fallocate_in *inarg = (void*) 
args->in_args[0].value;
+               size_t sz = READ_ONCE(di->fileinfo.attr.size);
 
                if (pfc->fc->no_fallocate) {
                        req->out.h.error = -EOPNOTSUPP;
                        goto error;
                }
 
-               if (inarg->offset >= di->fileinfo.attr.size)
+               if (inarg->offset >= sz)
                        inarg->mode &= ~FALLOC_FL_ZERO_RANGE;
 
                if (inarg->mode == FALLOC_FL_KEEP_SIZE)
                        break; /* NOPE */
 
-               WARN_ON_ONCE(!inode_is_locked(&fi->inode));
                if (inarg->mode & (FALLOC_FL_ZERO_RANGE|FALLOC_FL_PUNCH_HOLE)) {
                        if ((inarg->offset & (PAGE_SIZE - 1)) || (inarg->length 
& (PAGE_SIZE - 1))) {
                                req->out.h.error = -EINVAL;
@@ -1040,10 +1040,21 @@ static void pcs_fuse_submit(struct pcs_fuse_cluster 
*pfc, struct fuse_req *req)
                }
 
                if (inarg->mode & FALLOC_FL_KEEP_SIZE) {
-                       if (inarg->offset > di->fileinfo.attr.size)
+                       if (inarg->offset > sz)
                                break; /* NOPE */
-                       if (inarg->offset + inarg->length > 
di->fileinfo.attr.size)
-                               inarg->length = di->fileinfo.attr.size - 
inarg->offset;
+                       if (inarg->offset + inarg->length > sz)
+                               inarg->length = sz - inarg->offset;
+               } else {
+                       /* attr.size can expand, but not shrink. If the file is 
expanded
+                        * by fallocate inode_lock cannot be dropped.
+                        *
+                        * Interesting new situation to consider: update i_size 
happens of exit
+                        * from allocating write/falloc and we have a window 
where attr.size is ahead
+                        * of i_size. It looks safe as allocating call is 
deemed to be synchronous,
+                        * and as i_size is still not advanced all the 
following ones are.
+                        */
+                       WARN_ON_ONCE(inarg->offset + inarg->length > sz &&
+                                    !inode_is_locked(&fi->inode));
                }
 
                ret = pcs_fuse_prep_rw(r);
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to