The commit is pushed to "branch-rh9-5.14.0-284.25.1.vz9.30.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh9-5.14.0-284.25.1.vz9.30.8 ------> commit 54e40be2a9b78ed72cfa8fd7a059aad3d7936036 Author: Alexey Kuznetsov <kuz...@virtuozzo.com> Date: Fri Oct 6 18:44:14 2023 +0800
fuse: skip bg_queue for async direct io pcs requests There is a capital problem in fuse pcs implementation. While requests scale by cpu we still have contention on bg_lock and all the requests go through single bottleneck at bg_queue. Of course we had inferior performance due to this, but we ignored the problem as the preformance still was good. But recently it was found that under some realistic curcumstances we get collapse of preformance, it drop > 10 times when load on pcs increases. The reason is that algorithm effectively reduces number of cpus used to 1 or a few and besides that triggers an extreme contention on bg_lock. Yet, bg_queue for kio pcs requests is entirely useless. The request is already allocated, resources are consumed. If we push it to kio pcs it will be treated by pcs finegrain congenstion avoidance. So, we can skip bg_queue. This patch makes this only for async direct io requests. The reason is that for page cache reqs we must conform invalidation rules which need some serialization. It is not impossible, but requires some work and in fact not very useful. The patch is extreme. It removes not only bg_queue, which is good deal, no doubts, but also blocking at allocation of aio request, which is dubious. Right now we are limited only by system aio limit. https://pmc.acronis.work/browse/VSTOR-54040 Signed-off-by: Alexey Kuznetsov <kuz...@acronis.com> Feature: vStorage --- fs/fuse/dev.c | 26 +++++++++++++++++++++----- fs/fuse/file.c | 1 + fs/fuse/fuse_i.h | 2 ++ 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index eb3fc44fe324..b93d77af2d24 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -362,7 +362,8 @@ void __fuse_request_end( struct fuse_req *req, bool flush_bg) flush_bg_queue_and_unlock(fc); else spin_unlock(&fc->bg_lock); - } + } else if (test_bit(FR_NO_ACCT, &req->flags)) + bg = true; if (test_bit(FR_ASYNC, &req->flags)) { req->args->end(fm, req->args, req->out.h.error); @@ -465,9 +466,10 @@ static void __fuse_request_send(struct fuse_req *req) if (fc->kio.op) { int ret = fc->kio.op->req_classify(req, false, false); - if (likely(!ret)) - return fc->kio.op->req_send(req, false); - else if (ret < 0) + if (likely(!ret)) { + fc->kio.op->req_send(req, false); + return; + } else if (ret < 0) return; } @@ -600,6 +602,7 @@ static int fuse_request_queue_background(struct fuse_req *req) struct fuse_conn *fc = fm->fc; struct fuse_file *ff = req->args->ff; struct fuse_iqueue *fiq = req->args->fiq; + int nonblocking = test_bit(FR_NONBLOCKING, &req->flags); int ret = -ENOTCONN; WARN_ON(!test_bit(FR_BACKGROUND, &req->flags)); @@ -609,6 +612,19 @@ static int fuse_request_queue_background(struct fuse_req *req) atomic_inc(&fc->num_waiting); } __set_bit(FR_ISREPLY, &req->flags); + + if (fc->kio.op && req->args->async && !nonblocking && + (!ff || !test_bit(FUSE_S_FAIL_IMMEDIATELY, &ff->ff_state))) { + int ret = fc->kio.op->req_classify(req, false, false); + if (likely(!ret)) { + __clear_bit(FR_BACKGROUND, &req->flags); + __set_bit(FR_NO_ACCT, &req->flags); + fc->kio.op->req_send(req, true); + return 0; + } else if (ret < 0) + return 0; + } + spin_lock(&fc->bg_lock); if (ff && test_bit(FUSE_S_FAIL_IMMEDIATELY, &ff->ff_state)) { ret = -EIO; @@ -622,7 +638,7 @@ static int fuse_request_queue_background(struct fuse_req *req) set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC); } - if (test_bit(FR_NONBLOCKING, &req->flags)) { + if (nonblocking) { fc->active_background++; spin_lock(&fiq->lock); req->in.h.unique = fuse_get_unique(fiq); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 869274095664..c685e019073d 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -998,6 +998,7 @@ static ssize_t fuse_async_req_send(struct fuse_mount *fm, ia->ap.args.end = fuse_aio_complete_req; ia->ap.args.may_block = io->should_dirty; + ia->ap.args.async = true; err = fuse_simple_background(fm, &ia->ap.args, GFP_KERNEL); if (err) fuse_aio_complete_req(fm, &ia->ap.args, err); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e3654005abef..d9e27b36784a 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -313,6 +313,7 @@ struct fuse_args { bool may_block:1; bool nonblocking:1; bool kio_internal:1; + bool async:1; struct fuse_in_arg in_args[3]; struct fuse_arg out_args[3]; void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error); @@ -407,6 +408,7 @@ enum fuse_req_flag { FR_ASYNC, FR_NONBLOCKING, FR_KIO_INTERNAL, + FR_NO_ACCT, }; /** _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel