This patch adds support for setting a per connection SYN and SYN_ACK RTOs from within a BPF_SOCKET_OPS program. For example, to set small RTOs when it is known both hosts are within a datacenter.
Signed-off-by: Lawrence Brakmo <bra...@fb.com> --- include/net/tcp.h | 11 +++++++++++ include/uapi/linux/bpf.h | 3 +++ net/ipv4/tcp_input.c | 3 ++- net/ipv4/tcp_output.c | 2 +- 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index bc1c92c..8353563 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2019,4 +2019,15 @@ static inline int tcp_call_bpf(struct sock *sk, bool is_req_sock, int op) } #endif +static inline u32 tcp_timeout_init(struct sock *sk, bool is_req_sock) +{ + int timeout; + + timeout = tcp_call_bpf(sk, is_req_sock, BPF_SOCKET_OPS_TIMEOUT_INIT); + + if (timeout <= 0) + timeout = TCP_TIMEOUT_INIT; + return timeout; +} + #endif /* _TCP_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 06e7ee1..c9da55d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -699,6 +699,9 @@ struct bpf_socket_ops { */ enum { BPF_SOCKET_OPS_VOID, + BPF_SOCKET_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or + * -1 if default value should be used + */ }; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4ea8ec5..fefc062 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6405,7 +6405,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, } else { tcp_rsk(req)->tfo_listener = false; if (!want_cookie) - inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + inet_csk_reqsk_queue_hash_add(sk, req, + tcp_timeout_init((struct sock *)req, true)); af_ops->send_synack(sk, dst, &fl, req, &foc, !want_cookie ? TCP_SYNACK_NORMAL : TCP_SYNACK_COOKIE); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e3aab1c..503e478 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3326,7 +3326,7 @@ static void tcp_connect_init(struct sock *sk) tp->rcv_wup = tp->rcv_nxt; tp->copied_seq = tp->rcv_nxt; - inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; + inet_csk(sk)->icsk_rto = tcp_timeout_init(sk, false); inet_csk(sk)->icsk_retransmits = 0; tcp_clear_retrans(tp); } -- 2.9.3