This commit adds a new sysctl option: net.ipv4.tcp_migrate_req. If this option is enabled, and then we call listen() for SO_REUSEPORT enabled sockets and close one, we will be able to migrate its child sockets to another listener.
Reviewed-by: Benjamin Herrenschmidt <b...@amazon.com> Signed-off-by: Kuniyuki Iwashima <kun...@amazon.co.jp> --- Documentation/networking/ip-sysctl.rst | 15 +++++++++++++++ include/net/netns/ipv4.h | 1 + net/ipv4/sysctl_net_ipv4.c | 9 +++++++++ 3 files changed, 25 insertions(+) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index dd2b12a32b73..4116771bf5ef 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -712,6 +712,21 @@ tcp_syncookies - INTEGER network connections you can set this knob to 2 to enable unconditionally generation of syncookies. +tcp_migrate_req - INTEGER + By default, when a listening socket is closed, child sockets are also + closed. If it has SO_REUSEPORT enabled, the dropped connections should + have been accepted by other listeners on the same port. This option + makes it possible to migrate child sockets to another listener when + calling close() or shutdown(). + + Default: 0 + + Note that the source and destination listeners _must_ have the same + settings at the socket API level. If there are different kinds of + sockets on the port, disable this option or use + BPF_PROG_TYPE_SK_REUSEPORT program to select the correct socket by + bpf_sk_select_reuseport() or to cancel migration by returning SK_DROP. + tcp_fastopen - INTEGER Enable TCP Fast Open (RFC7413) to send and accept data in the opening SYN packet. diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 8e4fcac4df72..a3edc30d6a63 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -132,6 +132,7 @@ struct netns_ipv4 { int sysctl_tcp_syn_retries; int sysctl_tcp_synack_retries; int sysctl_tcp_syncookies; + int sysctl_tcp_migrate_req; int sysctl_tcp_reordering; int sysctl_tcp_retries1; int sysctl_tcp_retries2; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3e5f4f2e705e..6b76298fa271 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -933,6 +933,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, #endif + { + .procname = "tcp_migrate_req", + .data = &init_net.ipv4.sysctl_tcp_migrate_req, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE + }, { .procname = "tcp_reordering", .data = &init_net.ipv4.sysctl_tcp_reordering, -- 2.17.2 (Apple Git-113)