This commit adds a new sysctl option: net.ipv4.tcp_migrate_req. If this
option is enabled, and then we call listen() for SO_REUSEPORT enabled
sockets and close one, we will be able to migrate its child sockets to
another listener.

Reviewed-by: Benjamin Herrenschmidt <b...@amazon.com>
Signed-off-by: Kuniyuki Iwashima <kun...@amazon.co.jp>
---
 Documentation/networking/ip-sysctl.rst | 15 +++++++++++++++
 include/net/netns/ipv4.h               |  1 +
 net/ipv4/sysctl_net_ipv4.c             |  9 +++++++++
 3 files changed, 25 insertions(+)

diff --git a/Documentation/networking/ip-sysctl.rst 
b/Documentation/networking/ip-sysctl.rst
index dd2b12a32b73..4116771bf5ef 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -712,6 +712,21 @@ tcp_syncookies - INTEGER
        network connections you can set this knob to 2 to enable
        unconditionally generation of syncookies.
 
+tcp_migrate_req - INTEGER
+       By default, when a listening socket is closed, child sockets are also
+       closed. If it has SO_REUSEPORT enabled, the dropped connections should
+       have been accepted by other listeners on the same port. This option
+       makes it possible to migrate child sockets to another listener when
+       calling close() or shutdown().
+
+       Default: 0
+
+       Note that the source and destination listeners _must_ have the same
+       settings at the socket API level. If there are different kinds of
+       sockets on the port, disable this option or use
+       BPF_PROG_TYPE_SK_REUSEPORT program to select the correct socket by
+       bpf_sk_select_reuseport() or to cancel migration by returning SK_DROP.
+
 tcp_fastopen - INTEGER
        Enable TCP Fast Open (RFC7413) to send and accept data in the opening
        SYN packet.
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 8e4fcac4df72..a3edc30d6a63 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -132,6 +132,7 @@ struct netns_ipv4 {
        int sysctl_tcp_syn_retries;
        int sysctl_tcp_synack_retries;
        int sysctl_tcp_syncookies;
+       int sysctl_tcp_migrate_req;
        int sysctl_tcp_reordering;
        int sysctl_tcp_retries1;
        int sysctl_tcp_retries2;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 3e5f4f2e705e..6b76298fa271 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -933,6 +933,15 @@ static struct ctl_table ipv4_net_table[] = {
                .proc_handler   = proc_dointvec
        },
 #endif
+       {
+               .procname       = "tcp_migrate_req",
+               .data           = &init_net.ipv4.sysctl_tcp_migrate_req,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = SYSCTL_ONE
+       },
        {
                .procname       = "tcp_reordering",
                .data           = &init_net.ipv4.sysctl_tcp_reordering,
-- 
2.17.2 (Apple Git-113)

Reply via email to