Support to select device queue for transmit based on the per thread transmit queue.
Patch includes: - Add a global queue (gqid) mapping to sock - Function to convert gqid in a sock to a device queue (dqid) by calling sk_tx_gqid_to_dqid_get - Function sock_record_tx_queue to record a queue in a socket taken from ptq_threads in struct task - Call sock_record_tx_queue from af_inet send, listen, and accept functions to populate the socket's gqid for steerig - In netdev_pick_tx try to take the queue index from the socket using sk_tx_gqid_to_dqid_get --- include/net/sock.h | 63 ++++++++++++++++++++++++++++++++++++++++++++++ net/core/dev.c | 9 ++++--- net/ipv4/af_inet.c | 6 +++++ 3 files changed, 75 insertions(+), 3 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index acb76cfaae1b..5ec9d02e7ad0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -140,6 +140,7 @@ typedef __u64 __bitwise __addrpair; * @skc_node: main hash linkage for various protocol lookup tables * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol * @skc_tx_queue_mapping: tx queue number for this connection + * @skc_tx_gqid_mapping: global tx queue number for sending * @skc_rx_queue_mapping: rx queue number for this connection * @skc_flags: place holder for sk_flags * %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, @@ -225,6 +226,9 @@ struct sock_common { struct hlist_nulls_node skc_nulls_node; }; unsigned short skc_tx_queue_mapping; +#ifdef CONFIG_RPS + unsigned short skc_tx_gqid_mapping; +#endif #ifdef CONFIG_XPS unsigned short skc_rx_queue_mapping; #endif @@ -353,6 +357,9 @@ struct sock { #define sk_nulls_node __sk_common.skc_nulls_node #define sk_refcnt __sk_common.skc_refcnt #define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping +#ifdef CONFIG_RPS +#define sk_tx_gqid_mapping __sk_common.skc_tx_gqid_mapping +#endif #ifdef CONFIG_XPS #define sk_rx_queue_mapping __sk_common.skc_rx_queue_mapping #endif @@ -1792,6 +1799,34 @@ static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb, return __sk_receive_skb(sk, skb, nested, 1, true); } +static inline int sk_tx_gqid_get(const struct sock *sk) +{ +#ifdef CONFIG_RPS + if (sk && sk->sk_tx_gqid_mapping != NO_QUEUE) + return sk->sk_tx_gqid_mapping; +#endif + + return -1; +} + +static inline void sk_tx_gqid_set(struct sock *sk, int gqid) +{ +#ifdef CONFIG_RPS + /* sk_tx_queue_mapping accept only up to RPS_MAX_QID (0x7ffe) */ + if (WARN_ON_ONCE((unsigned int)gqid > RPS_MAX_QID && + gqid != NO_QUEUE)) + return; + sk->sk_tx_gqid_mapping = gqid; +#endif +} + +static inline void sk_tx_gqid_clear(struct sock *sk) +{ +#ifdef CONFIG_RPS + sk->sk_tx_gqid_mapping = NO_QUEUE; +#endif +} + static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) { /* sk_tx_queue_mapping accept only upto a 16-bit value */ @@ -1803,6 +1838,9 @@ static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) static inline void sk_tx_queue_clear(struct sock *sk) { sk->sk_tx_queue_mapping = NO_QUEUE; + + /* Clear tx_gqid at same points */ + sk_tx_gqid_clear(sk); } static inline int sk_tx_queue_get(const struct sock *sk) @@ -1813,6 +1851,31 @@ static inline int sk_tx_queue_get(const struct sock *sk) return -1; } +static inline int sk_tx_gqid_to_dqid_get(const struct net_device *dev, + const struct sock *sk) +{ + int ret = -1; +#ifdef CONFIG_RPS + int gqid; + u16 dqid; + + gqid = sk_tx_gqid_get(sk); + if (gqid >= 0) { + dqid = netdev_tx_gqid_to_dqid(dev, gqid); + if (dqid != NO_QUEUE) + ret = dqid; + } +#endif + return ret; +} + +static inline void sock_record_tx_queue(struct sock *sk) +{ +#ifdef CONFIG_PER_THREAD_QUEUES + sk_tx_gqid_set(sk, current->ptq_queues.txq_id); +#endif +} + static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_XPS diff --git a/net/core/dev.c b/net/core/dev.c index f64bf6608775..f4478c9b1c9c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3982,10 +3982,13 @@ u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, if (queue_index < 0 || skb->ooo_okay || queue_index >= dev->real_num_tx_queues) { - int new_index = get_xps_queue(dev, sb_dev, skb); + int new_index = sk_tx_gqid_to_dqid_get(dev, sk); - if (new_index < 0) - new_index = skb_tx_hash(dev, sb_dev, skb); + if (new_index < 0) { + new_index = get_xps_queue(dev, sb_dev, skb); + if (new_index < 0) + new_index = skb_tx_hash(dev, sb_dev, skb); + } if (queue_index != new_index && sk && sk_fullsock(sk) && diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 02aa5cb3a4fd..9b36aa3d1622 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -201,6 +201,8 @@ int inet_listen(struct socket *sock, int backlog) lock_sock(sk); + sock_record_tx_queue(sk); + err = -EINVAL; if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM) goto out; @@ -630,6 +632,8 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, } } + sock_record_tx_queue(sk); + switch (sock->state) { default: err = -EINVAL; @@ -742,6 +746,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags, lock_sock(sk2); sock_rps_record_flow(sk2); + sock_record_tx_queue(sk2); WARN_ON(!((1 << sk2->sk_state) & (TCPF_ESTABLISHED | TCPF_SYN_RECV | TCPF_CLOSE_WAIT | TCPF_CLOSE))); @@ -794,6 +799,7 @@ EXPORT_SYMBOL(inet_getname); int inet_send_prepare(struct sock *sk) { sock_rps_record_flow(sk); + sock_record_tx_queue(sk); /* We may need to bind the socket. */ if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind && -- 2.25.1