The commit is pushed to "branch-rh9-5.14.0-284.25.1.vz9.30.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh9-5.14.0-284.25.1.vz9.30.19 ------> commit 31203864352ac758e9b586a8a3c40ca0f1e9a798 Author: Alexey Kuznetsov <kuz...@virtuozzo.com> Date: Tue Feb 6 01:34:44 2024 +0800
fs/fuse: hashed write buckets Previous write record was 3.5G/sec and this ceiling could not be penetrated even though eventloop had lots of spare cpu. The bottlneck is diagnosed as saturation of thread copying requests from kernel. So, we have to switch to spreading it over multiple threads, similar to scheme used for reads. So, for writes we introduce two-level table, keyed by request size and by inode hash. New record is 4.8G/sec. Further progress is not going to be easy, now we simultaneously: - saturate eventloop - saturate ec_offload thread, increasing offload threads does not help, we get 2 threads each 50% busy - we are near to 100Gib ethernet limit, 9.6G/sec of network load https://pmc.acronis.work/browse/VSTOR-79527 Signed-off-by: Alexey Kuznetsov <kuz...@acronis.com> --- fs/fuse/file.c | 4 ++-- fs/fuse/fuse_i.h | 1 + fs/fuse/inode.c | 9 +++++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 071225151a5f..b5abc7b260b6 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -791,8 +791,8 @@ struct fuse_iqueue *fuse_route_io(struct fuse_conn *fc, struct fuse_rtable *rt, if (iosize == 0) return NULL; - for (i = 0; i < rt->rt_size; i++) { - fiq = rt->iqs_table + i; + for (i = 0; i < rt->rt_size; i += rt->divisor) { + fiq = rt->iqs_table + i + (jhash_1word((u32)inode->i_ino, 0) % rt->divisor); if (iosize <= fiq->size && fiq->handled_by_fud) return fiq; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index c8bc1f75f4a7..9a5a52afaaa6 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -673,6 +673,7 @@ static inline unsigned int fuse_qhash_bucket(void) struct fuse_rtable { int type; int rt_size; + int divisor; union { void *iqs; struct fuse_iqueue __percpu *iqs_cpu; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index d5c3f7ef7da7..53ee80addbe2 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1005,6 +1005,8 @@ static int alloc_rt_table(struct fuse_dev *fud, struct fuse_rtable *rt, int res = -EINVAL; int idx; + rt->divisor = 1; + switch (req->type) { case FUSE_ROUTING_CPU: if (req->index >= NR_CPUS || !cpu_possible(req->index)) @@ -1020,7 +1022,9 @@ static int alloc_rt_table(struct fuse_dev *fud, struct fuse_rtable *rt, res = 0; break; case FUSE_ROUTING_SIZE: - if (req->key > FUSE_MAX_MAX_PAGES*PAGE_SIZE || (req->key % PAGE_SIZE)) + rt->divisor = 1 + (req->key & (PAGE_SIZE - 1)); + if (rt->divisor > req->table_size || + (req->key & ~(PAGE_SIZE - 1)) > FUSE_MAX_MAX_PAGES*PAGE_SIZE) break; fallthrough; case FUSE_ROUTING_HASH: @@ -1044,7 +1048,7 @@ static int alloc_rt_table(struct fuse_dev *fud, struct fuse_rtable *rt, static void adjust_rt_table(struct fuse_dev *fud, struct fuse_iqueue *fiq, struct fuse_iq_routing *req) { - u32 size = req->key; + u32 size = req->key & ~(PAGE_SIZE - 1); fiq->size = size; @@ -1090,6 +1094,7 @@ int fuse_install_iq_route(struct fuse_dev *fud, struct fuse_iq_routing *req) if (rt->iqs == NULL) { rt->iqs = rtl.iqs; + rt->divisor = rtl.divisor; rt->type = rtl.type; rt->rt_size = rtl.rt_size; } else if (rt->iqs != rtl.iqs) { _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel