The commit is pushed to "branch-rh9-5.14.0-427.44.1.vz9.80.x-ovz" and will appear at g...@bitbucket.org:openvz/vzkernel.git after rh9-5.14.0-427.44.1.vz9.80.4 ------> commit 3ba3aabb0b42206cc13cee6ab7e456a50237d80c Author: Alexey Kuznetsov <kuz...@virtuozzo.com> Date: Sat Jan 18 02:08:58 2025 +0800
fs/fuse/kio: switch to TCP_NODELAY/MSG_MORE from TCP_CORK In user space we switched to this mode long ago, because it saves a syscall per message. In kernel this is not an issue, yet we have to do downcall to network taking socket lock etc. So, let us do this. Signed-off-by: Alexey Kuznetsov <kuz...@virtuozzo.com> Feature: vStorage --- fs/fuse/kio/pcs/pcs_sock_conn.c | 21 ++++++++++++++++++--- fs/fuse/kio/pcs/pcs_sock_io.c | 14 +++++++++++--- fs/fuse/kio/pcs/pcs_sock_io.h | 1 + 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/fs/fuse/kio/pcs/pcs_sock_conn.c b/fs/fuse/kio/pcs/pcs_sock_conn.c index bce2d898fe5d..69cfe0026b98 100644 --- a/fs/fuse/kio/pcs/pcs_sock_conn.c +++ b/fs/fuse/kio/pcs/pcs_sock_conn.c @@ -9,6 +9,7 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/tcp.h> +#include <linux/module.h> #include "pcs_types.h" #include "pcs_sock_io.h" @@ -18,6 +19,10 @@ #include "log.h" #include "fuse_ktrace.h" +static unsigned int pcs_use_cork; +module_param(pcs_use_cork, uint, 0644); +MODULE_PARM_DESC(pcs_use_cork, "Use TCP_CORK instead of TCP_NODELAY"); + static inline void pcs_sock_keepalive(struct socket *sock) { sock_set_keepalive(sock->sk); @@ -33,6 +38,11 @@ static inline void pcs_sock_cork(struct socket *sock) tcp_sock_set_cork(sock->sk, true); } +static inline void pcs_sock_nodelay(struct socket *sock) +{ + tcp_sock_set_nodelay(sock->sk); +} + static inline void set_sock_parameters(struct socket *sock, struct pcs_cluster_core *cc) { if (sock->sk->sk_family == PF_INET || sock->sk->sk_family == PF_INET6) { @@ -82,7 +92,7 @@ void pcs_sockconnect_start(struct pcs_rpc *ep) iov_iter_kvec(&sio->read_iter, READ, NULL, 0, 0); iov_iter_kvec(&sio->write_iter, WRITE, NULL, 0, 0); sio->hdr_max = sizeof(struct pcs_rpc_hdr); - sio->flags = sa->sa_family != AF_UNIX ? PCS_SOCK_F_CORK : 0; + sio->flags = 0; err = sock_create(sa->sa_family, SOCK_STREAM, 0, &sock); if (err < 0) { @@ -101,8 +111,13 @@ void pcs_sockconnect_start(struct pcs_rpc *ep) } pcs_sock_keepalive(sock); if (sa->sa_family == PF_INET || sa->sa_family == PF_INET6) { - pcs_sock_cork(sock); - sio->flags |= PCS_SOCK_F_CORK; + if (pcs_use_cork) { + pcs_sock_cork(sock); + sio->flags |= PCS_SOCK_F_CORK; + } else { + pcs_sock_nodelay(sock); + sio->flags |= PCS_SOCK_F_NODELAY; + } } set_sock_parameters(sock, container_of(ep->eng, struct pcs_cluster_core, eng)); diff --git a/fs/fuse/kio/pcs/pcs_sock_io.c b/fs/fuse/kio/pcs/pcs_sock_io.c index 29a5c272d6fe..7c62f483ea45 100644 --- a/fs/fuse/kio/pcs/pcs_sock_io.c +++ b/fs/fuse/kio/pcs/pcs_sock_io.c @@ -12,6 +12,7 @@ #include <linux/types.h> #include <linux/highmem.h> #include <linux/file.h> +#include <linux/module.h> #include "pcs_types.h" #include "pcs_sock_io.h" @@ -20,6 +21,9 @@ #include "log.h" #include "fuse_ktrace.h" +static unsigned int pcs_use_eor; +module_param(pcs_use_eor, uint, 0644); +MODULE_PARM_DESC(pcs_use_eor, "Use MSG_EOR"); void pcs_msg_sent(struct pcs_msg * msg) { @@ -118,12 +122,15 @@ static bool pcs_should_fail_sock_io(void) } #endif -static int do_send_one_seg(struct socket *sock, struct iov_iter *it, size_t left) +static int do_send_one_seg(struct socket *sock, struct iov_iter *it, size_t left, int has_more) { int ret = -EIO; size_t size = iov_iter_single_seg_count(it); bool more = (size < left); - int flags = (MSG_DONTWAIT | MSG_NOSIGNAL) | (more ? MSG_MORE : MSG_EOR); + int flags = (MSG_DONTWAIT | MSG_NOSIGNAL) | ((more || has_more) ? MSG_MORE : 0); + + if (unlikely(pcs_use_eor) && !more) + flags |= MSG_EOR; DTRACE("sock(%p) len:%ld, more:%d\n", sock, iov_iter_count(it), more); @@ -372,6 +379,7 @@ static void pcs_sockio_send(struct pcs_sockio *sio) /* TODO: cond resched here? */ while (sio->write_offset < msg->size) { size_t left = msg->size - sio->write_offset; + int has_more = (msg->list.next != &sio->write_queue); int n; TRACE(PEER_FMT "offset:%d msg:%p left:%ld, it->len:%ld\n", PEER_ARGS(ep), sio->write_offset, msg, @@ -382,7 +390,7 @@ static void pcs_sockio_send(struct pcs_sockio *sio) msg->get_iter(msg, sio->write_offset, it, WRITE); } BUG_ON(iov_iter_count(it) > left); - n = do_send_one_seg(sio->socket, it, left); + n = do_send_one_seg(sio->socket, it, left, has_more); if (n > 0) { sio->write_offset += n; iov_iter_advance(it, n); diff --git a/fs/fuse/kio/pcs/pcs_sock_io.h b/fs/fuse/kio/pcs/pcs_sock_io.h index 872faffefe01..09870b38cdad 100644 --- a/fs/fuse/kio/pcs/pcs_sock_io.h +++ b/fs/fuse/kio/pcs/pcs_sock_io.h @@ -101,6 +101,7 @@ enum PCS_SOCK_F_EOF = 8, PCS_SOCK_F_POOLIN = 0x10, PCS_SOCK_F_POOLOUT = 0x20, + PCS_SOCK_F_NODELAY = 0x40, }; enum _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel