The commit is pushed to "branch-rh9-5.14.0-427.44.1.vz9.80.x-ovz" and will 
appear at g...@bitbucket.org:openvz/vzkernel.git
after rh9-5.14.0-427.44.1.vz9.80.23
------>
commit 44b11631cff26667d0d2d63f1e3dd8cb3394d43a
Author: Alexey Kuznetsov <kuz...@virtuozzo.com>
Date:   Fri Mar 28 20:01:03 2025 +0800

    fs/fuse/kio: tidy up RPC_AFFINITY_RSS
    
    Also, enable it for rdma (tested with mellanox) and unix sockets.
    Now it provides essentially perfect affinity when socket contexts
    never hit lock contention and cache bouncing provided RSS and XPS
    are configured correctly.
    
    Change fallback when rx_cpu is not available from RPC_AFFINITY_RETENT
    to RPC_AFFINITY_FAIR_SPREAD.
    
    Unfortunatley, we cannot enable it by default, since enabling
    RSS/XPS is an advanced performance tuning.
    
    Also, change fallback when rx_cpu is unknown from RPC_AFFINITY_RETENT
    to RPC_AFFINITY_FAIR_SPREAD
    
    Signed-off-by: Alexey Kuznetsov <kuz...@virtuozzo.com>
    
    Feature: vStorage
---
 fs/fuse/kio/pcs/pcs_rdma_io.c |  1 +
 fs/fuse/kio/pcs/pcs_rpc.c     | 25 ++++++++++++++++---------
 fs/fuse/kio/pcs/pcs_rpc.h     |  1 +
 fs/fuse/kio/pcs/pcs_sock_io.c |  5 ++---
 4 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/fs/fuse/kio/pcs/pcs_rdma_io.c b/fs/fuse/kio/pcs/pcs_rdma_io.c
index 2755b13fb8a5..d50f2c1e97e3 100644
--- a/fs/fuse/kio/pcs/pcs_rdma_io.c
+++ b/fs/fuse/kio/pcs/pcs_rdma_io.c
@@ -1096,6 +1096,7 @@ static void pcs_rdma_cq_comp_handler(struct ib_cq *cq, 
void *private)
 
        set_bit(PCS_RDMA_IO_CQE, &rio->io_flags);
        wake_up(&rio->waitq);
+       ep->rx_cpu = smp_processor_id();
        pcs_rpc_kick_queue(ep);
 }
 
diff --git a/fs/fuse/kio/pcs/pcs_rpc.c b/fs/fuse/kio/pcs/pcs_rpc.c
index b9774ce1ab34..71c2a3b54da7 100644
--- a/fs/fuse/kio/pcs/pcs_rpc.c
+++ b/fs/fuse/kio/pcs/pcs_rpc.c
@@ -339,6 +339,7 @@ void pcs_rpc_attach_new_ep(struct pcs_rpc * ep, struct 
pcs_rpc_engine * eng)
        atomic_set(&ep->netlat_cnt, 0);
        atomic64_set(&ep->netlat_avg, 0);
        ep->cpu = WORK_CPU_UNBOUND;
+       ep->rx_cpu = WORK_CPU_UNBOUND;
 
        ep->gc = NULL;
        if (eng->max_gc_index)
@@ -863,27 +864,33 @@ static void pcs_rpc_affinity(struct pcs_rpc *ep, bool 
was_idle)
                                ep->cpu = WORK_CPU_UNBOUND;
                        }
                        break;
-               case RPC_AFFINITY_RSS:
-                       if (!(ep->flags & PCS_RPC_F_LOCAL) && ep->addr.type != 
PCS_ADDRTYPE_RDMA)
-                               break;
+               case RPC_AFFINITY_RSS: {
+                       int rx_cpu = READ_ONCE(ep->rx_cpu);
+
+                       if (rx_cpu != WORK_CPU_UNBOUND && ep->cpu != rx_cpu)
+                               ep->cpu = rx_cpu;
                        fallthrough;
+               }
+               case RPC_AFFINITY_FAIR_SPREAD:
+                       if (ep->cpu == WORK_CPU_UNBOUND ||
+                           (time_is_before_jiffies(ep->cpu_stamp) && was_idle))
+                               pcs_rpc_cpu_select(ep);
+                       break;
                case RPC_AFFINITY_RETENT:
                        /* Naive socket-to-cpu binding approach */
-                       if (time_is_before_jiffies(ep->cpu_stamp) && was_idle) {
+                       if (ep->cpu == WORK_CPU_UNBOUND ||
+                           (time_is_before_jiffies(ep->cpu_stamp) && 
was_idle)) {
                                ep->cpu_stamp = jiffies + rpc_cpu_time_slice;
                                ep->cpu = smp_processor_id();
                        }
                        break;
                case RPC_AFFINITY_SPREAD:
-                       if (time_is_before_jiffies(ep->cpu_stamp) && was_idle) {
+                       if (ep->cpu == WORK_CPU_UNBOUND ||
+                           (time_is_before_jiffies(ep->cpu_stamp) && 
was_idle)) {
                                ep->cpu_stamp = jiffies + rpc_cpu_time_slice;
                                ep->cpu = pcs_rpc_cpu_next();
                        }
                        break;
-               case RPC_AFFINITY_FAIR_SPREAD:
-                       if (time_is_before_jiffies(ep->cpu_stamp) && was_idle)
-                               pcs_rpc_cpu_select(ep);
-                       break;
                default:
                        pr_err("Unknown affinity mode: %u\n", 
rpc_affinity_mode);
        }
diff --git a/fs/fuse/kio/pcs/pcs_rpc.h b/fs/fuse/kio/pcs/pcs_rpc.h
index cb18557a3da5..0bafc8a74263 100644
--- a/fs/fuse/kio/pcs/pcs_rpc.h
+++ b/fs/fuse/kio/pcs/pcs_rpc.h
@@ -142,6 +142,7 @@ struct pcs_rpc
        int                     cpu;
        unsigned long           cpu_stamp;
        struct delayed_work     cpu_timer_work; /* reset cpu affinity after 
being idle */
+       int                     rx_cpu;
 
        struct mutex            mutex;
        u64                     accounted;
diff --git a/fs/fuse/kio/pcs/pcs_sock_io.c b/fs/fuse/kio/pcs/pcs_sock_io.c
index 7c62f483ea45..805b8f1e56b0 100644
--- a/fs/fuse/kio/pcs/pcs_sock_io.c
+++ b/fs/fuse/kio/pcs/pcs_sock_io.c
@@ -561,9 +561,8 @@ static void pcs_sk_kick_queue(struct sock *sk)
        sio = rcu_dereference_sk_user_data(sk);
        if (sio) {
                struct pcs_rpc *ep = sio->netio.parent;
-               TRACE(PEER_FMT" queue cpu=%d\n", PEER_ARGS(ep), 
smp_processor_id());
-               if (rpc_affinity_mode == RPC_AFFINITY_RSS && !(ep->flags & 
PCS_RPC_F_LOCAL))
-                       ep->cpu = smp_processor_id();
+               DTRACE(PEER_FMT" queue cpu=%d\n", PEER_ARGS(ep), 
smp_processor_id());
+               ep->rx_cpu = smp_processor_id();
                pcs_rpc_kick_queue(ep);
        }
        rcu_read_unlock();
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to