commit: b40e4b7205dd73330cf29bf39590327f973a473b Author: Mike Pagano <mpagano <AT> gentoo <DOT> org> AuthorDate: Tue Dec 16 17:29:50 2014 +0000 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org> CommitDate: Tue Dec 16 17:29:50 2014 +0000 URL: http://sources.gentoo.org/gitweb/?p=proj/linux-patches.git;a=commit;h=b40e4b72
Updating multipath tcp patch --- 0000_README | 2 +- ... => 5010_multipath-tcp-v3.16-ac0ec67aa8bb.patch | 250 ++++++++++++--------- 2 files changed, 139 insertions(+), 113 deletions(-) diff --git a/0000_README b/0000_README index 8719a11..7122ab1 100644 --- a/0000_README +++ b/0000_README @@ -118,7 +118,7 @@ Patch: 5003_BFQ-3-block-add-Early-Queue-Merge-EQM-v7r6-for-3.16.0.patch From: http://algo.ing.unimo.it/people/paolo/disk_sched/ Desc: BFQ v7r6 patch 3 for 3.16: Early Queue Merge (EQM) -Patch: 5010_multipath-tcp-v3.16-075df3a63833.patch +Patch: 5010_multipath-tcp-v3.16-ac0ec67aa8bb.patch From: http://multipath-tcp.org/ Desc: Patch for simultaneous use of several IP-addresses/interfaces in TCP for better resource utilization, better throughput and smoother reaction to failures. diff --git a/5010_multipath-tcp-v3.16-075df3a63833.patch b/5010_multipath-tcp-v3.16-ac0ec67aa8bb.patch similarity index 98% rename from 5010_multipath-tcp-v3.16-075df3a63833.patch rename to 5010_multipath-tcp-v3.16-ac0ec67aa8bb.patch index 7520b4a..2858f5b 100644 --- a/5010_multipath-tcp-v3.16-075df3a63833.patch +++ b/5010_multipath-tcp-v3.16-ac0ec67aa8bb.patch @@ -1991,7 +1991,7 @@ index 156350745700..0e23cae8861f 100644 struct timewait_sock_ops; struct inet_hashinfo; diff --git a/include/net/tcp.h b/include/net/tcp.h -index 7286db80e8b8..ff92e74cd684 100644 +index 7286db80e8b8..2130c1c7fe6e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -177,6 +177,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); @@ -2030,7 +2030,7 @@ index 7286db80e8b8..ff92e74cd684 100644 extern struct inet_timewait_death_row tcp_death_row; /* sysctl variables for tcp */ -@@ -344,6 +366,107 @@ extern struct proto tcp_prot; +@@ -344,6 +366,108 @@ extern struct proto tcp_prot; #define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val) #define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val) @@ -2040,6 +2040,7 @@ index 7286db80e8b8..ff92e74cd684 100644 + +struct mptcp_options_received; + ++void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited); +void tcp_enter_quickack_mode(struct sock *sk); +int tcp_close_state(struct sock *sk); +void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now, @@ -2138,7 +2139,7 @@ index 7286db80e8b8..ff92e74cd684 100644 void tcp_tasklet_init(void); void tcp_v4_err(struct sk_buff *skb, u32); -@@ -440,6 +563,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, +@@ -440,6 +564,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, @@ -2146,7 +2147,7 @@ index 7286db80e8b8..ff92e74cd684 100644 int estab, struct tcp_fastopen_cookie *foc); const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); -@@ -493,14 +617,8 @@ static inline u32 tcp_cookie_time(void) +@@ -493,14 +618,8 @@ static inline u32 tcp_cookie_time(void) u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, u16 *mssp); @@ -2163,7 +2164,7 @@ index 7286db80e8b8..ff92e74cd684 100644 #endif __u32 cookie_init_timestamp(struct request_sock *req); -@@ -516,13 +634,6 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, +@@ -516,13 +635,6 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, const struct tcphdr *th, u16 *mssp); __u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mss); @@ -2177,7 +2178,7 @@ index 7286db80e8b8..ff92e74cd684 100644 #endif /* tcp_output.c */ -@@ -551,10 +662,17 @@ void tcp_send_delayed_ack(struct sock *sk); +@@ -551,10 +663,17 @@ void tcp_send_delayed_ack(struct sock *sk); void tcp_send_loss_probe(struct sock *sk); bool tcp_schedule_loss_probe(struct sock *sk); @@ -2195,7 +2196,7 @@ index 7286db80e8b8..ff92e74cd684 100644 /* tcp_timer.c */ void tcp_init_xmit_timers(struct sock *); -@@ -703,14 +821,27 @@ void tcp_send_window_probe(struct sock *sk); +@@ -703,14 +822,27 @@ void tcp_send_window_probe(struct sock *sk); */ struct tcp_skb_cb { union { @@ -2226,7 +2227,7 @@ index 7286db80e8b8..ff92e74cd684 100644 __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ __u8 sacked; /* State flags for SACK/FACK. */ -@@ -1075,7 +1206,8 @@ u32 tcp_default_init_rwnd(u32 mss); +@@ -1075,7 +1207,8 @@ u32 tcp_default_init_rwnd(u32 mss); /* Determine a window scaling and initial window to offer. */ void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, int wscale_ok, @@ -2236,7 +2237,7 @@ index 7286db80e8b8..ff92e74cd684 100644 static inline int tcp_win_from_space(int space) { -@@ -1084,15 +1216,34 @@ static inline int tcp_win_from_space(int space) +@@ -1084,6 +1217,19 @@ static inline int tcp_win_from_space(int space) space - (space>>sysctl_tcp_adv_win_scale); } @@ -2256,22 +2257,7 @@ index 7286db80e8b8..ff92e74cd684 100644 /* Note: caller must be prepared to deal with negative returns */ static inline int tcp_space(const struct sock *sk) { -+ if (mptcp(tcp_sk(sk))) -+ sk = tcp_sk(sk)->meta_sk; -+ - return tcp_win_from_space(sk->sk_rcvbuf - - atomic_read(&sk->sk_rmem_alloc)); - } - - static inline int tcp_full_space(const struct sock *sk) - { -+ if (mptcp(tcp_sk(sk))) -+ sk = tcp_sk(sk)->meta_sk; -+ - return tcp_win_from_space(sk->sk_rcvbuf); - } - -@@ -1115,6 +1266,8 @@ static inline void tcp_openreq_init(struct request_sock *req, +@@ -1115,6 +1261,8 @@ static inline void tcp_openreq_init(struct request_sock *req, ireq->wscale_ok = rx_opt->wscale_ok; ireq->acked = 0; ireq->ecn_ok = 0; @@ -2280,7 +2266,7 @@ index 7286db80e8b8..ff92e74cd684 100644 ireq->ir_rmt_port = tcp_hdr(skb)->source; ireq->ir_num = ntohs(tcp_hdr(skb)->dest); } -@@ -1585,6 +1738,11 @@ int tcp4_proc_init(void); +@@ -1585,6 +1733,11 @@ int tcp4_proc_init(void); void tcp4_proc_exit(void); #endif @@ -2292,7 +2278,7 @@ index 7286db80e8b8..ff92e74cd684 100644 /* TCP af-specific functions */ struct tcp_sock_af_ops { #ifdef CONFIG_TCP_MD5SIG -@@ -1601,7 +1759,32 @@ struct tcp_sock_af_ops { +@@ -1601,7 +1754,33 @@ struct tcp_sock_af_ops { #endif }; @@ -2317,6 +2303,7 @@ index 7286db80e8b8..ff92e74cd684 100644 + void (*time_wait)(struct sock *sk, int state, int timeo); + void (*cleanup_rbuf)(struct sock *sk, int copied); + void (*init_congestion_control)(struct sock *sk); ++ void (*cwnd_validate)(struct sock *sk, bool is_cwnd_limited); +}; +extern const struct tcp_sock_ops tcp_specific; + @@ -2325,7 +2312,7 @@ index 7286db80e8b8..ff92e74cd684 100644 #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *(*md5_lookup) (struct sock *sk, struct request_sock *req); -@@ -1611,8 +1794,39 @@ struct tcp_request_sock_ops { +@@ -1611,8 +1790,39 @@ struct tcp_request_sock_ops { const struct request_sock *req, const struct sk_buff *skb); #endif @@ -2572,20 +2559,20 @@ index 4db3c2a1679c..04cb17d4b0ce 100644 goto drop; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig -index 05c57f0fcabe..811286a6aa9c 100644 +index 05c57f0fcabe..a1ba825c6acd 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -556,6 +556,38 @@ config TCP_CONG_ILLINOIS For further details see: http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html -+config TCP_CONG_COUPLED -+ tristate "MPTCP COUPLED CONGESTION CONTROL" ++config TCP_CONG_LIA ++ tristate "MPTCP Linked Increase" + depends on MPTCP + default n + ---help--- -+ MultiPath TCP Coupled Congestion Control -+ To enable it, just put 'coupled' in tcp_congestion_control ++ MultiPath TCP Linked Increase Congestion Control ++ To enable it, just put 'lia' in tcp_congestion_control + +config TCP_CONG_OLIA + tristate "MPTCP Opportunistic Linked Increase" @@ -2618,8 +2605,8 @@ index 05c57f0fcabe..811286a6aa9c 100644 config DEFAULT_WESTWOOD bool "Westwood" if TCP_CONG_WESTWOOD=y -+ config DEFAULT_COUPLED -+ bool "Coupled" if TCP_CONG_COUPLED=y ++ config DEFAULT_LIA ++ bool "Lia" if TCP_CONG_LIA=y + + config DEFAULT_OLIA + bool "Olia" if TCP_CONG_OLIA=y @@ -2637,7 +2624,7 @@ index 05c57f0fcabe..811286a6aa9c 100644 default "vegas" if DEFAULT_VEGAS default "westwood" if DEFAULT_WESTWOOD default "veno" if DEFAULT_VENO -+ default "coupled" if DEFAULT_COUPLED ++ default "lia" if DEFAULT_LIA + default "wvegas" if DEFAULT_WVEGAS + default "balia" if DEFAULT_BALIA default "reno" if DEFAULT_RENO @@ -2815,7 +2802,7 @@ index c86624b36a62..0ff3fe004d62 100644 ireq->rcv_wscale = rcv_wscale; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c -index 9d2118e5fbc7..2cb89f886d45 100644 +index 9d2118e5fbc7..cb59aef70d26 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -271,6 +271,7 @@ @@ -2826,7 +2813,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 #include <net/tcp.h> #include <net/xfrm.h> #include <net/ip.h> -@@ -371,6 +372,24 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max) +@@ -371,6 +372,25 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max) return period; } @@ -2846,12 +2833,13 @@ index 9d2118e5fbc7..2cb89f886d45 100644 + .retransmit_timer = tcp_retransmit_timer, + .time_wait = tcp_time_wait, + .cleanup_rbuf = tcp_cleanup_rbuf, ++ .cwnd_validate = tcp_cwnd_validate, +}; + /* Address-family independent initialization for a tcp_sock. * * NOTE: A lot of things set to zero explicitly by call to -@@ -419,6 +438,8 @@ void tcp_init_sock(struct sock *sk) +@@ -419,6 +439,8 @@ void tcp_init_sock(struct sock *sk) sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; @@ -2860,7 +2848,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 local_bh_disable(); sock_update_memcg(sk); sk_sockets_allocated_inc(sk); -@@ -726,6 +747,14 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, +@@ -726,6 +748,14 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, int ret; sock_rps_record_flow(sk); @@ -2875,7 +2863,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 /* * We can't seek on a socket input */ -@@ -821,8 +850,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) +@@ -821,8 +851,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) return NULL; } @@ -2885,7 +2873,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 { struct tcp_sock *tp = tcp_sk(sk); u32 xmit_size_goal, old_size_goal; -@@ -872,8 +900,13 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) +@@ -872,8 +901,13 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) { int mss_now; @@ -2901,7 +2889,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 return mss_now; } -@@ -892,11 +925,32 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, +@@ -892,11 +926,32 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, * is fully established. */ if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && @@ -2935,7 +2923,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); mss_now = tcp_send_mss(sk, &size_goal, flags); -@@ -1001,8 +1055,9 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, +@@ -1001,8 +1056,9 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, { ssize_t res; @@ -2947,7 +2935,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 return sock_no_sendpage(sk->sk_socket, page, offset, size, flags); -@@ -1018,6 +1073,9 @@ static inline int select_size(const struct sock *sk, bool sg) +@@ -1018,6 +1074,9 @@ static inline int select_size(const struct sock *sk, bool sg) const struct tcp_sock *tp = tcp_sk(sk); int tmp = tp->mss_cache; @@ -2957,7 +2945,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 if (sg) { if (sk_can_gso(sk)) { /* Small frames wont use a full page: -@@ -1100,11 +1158,18 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, +@@ -1100,11 +1159,18 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, * is fully established. */ if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && @@ -2977,7 +2965,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 if (unlikely(tp->repair)) { if (tp->repair_queue == TCP_RECV_QUEUE) { copied = tcp_send_rcvq(sk, msg, size); -@@ -1132,7 +1197,10 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, +@@ -1132,7 +1198,10 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto out_err; @@ -2989,7 +2977,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 while (--iovlen >= 0) { size_t seglen = iov->iov_len; -@@ -1183,8 +1251,15 @@ new_segment: +@@ -1183,8 +1252,15 @@ new_segment: /* * Check whether we can use HW checksum. @@ -3006,7 +2994,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, skb); -@@ -1422,7 +1497,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) +@@ -1422,7 +1498,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) /* Optimize, __tcp_select_window() is not cheap. */ if (2*rcv_window_now <= tp->window_clamp) { @@ -3015,7 +3003,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 /* Send ACK now, if this read freed lots of space * in our buffer. Certainly, new_window is new window. -@@ -1587,7 +1662,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, +@@ -1587,7 +1663,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, /* Clean up data we have read: This will do ACK frames. */ if (copied > 0) { tcp_recv_skb(sk, seq, &offset); @@ -3024,7 +3012,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 } return copied; } -@@ -1623,6 +1698,14 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, +@@ -1623,6 +1699,14 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, lock_sock(sk); @@ -3039,7 +3027,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 err = -ENOTCONN; if (sk->sk_state == TCP_LISTEN) goto out; -@@ -1761,7 +1844,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, +@@ -1761,7 +1845,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } } @@ -3048,7 +3036,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) { /* Install new reader */ -@@ -1813,7 +1896,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, +@@ -1813,7 +1897,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (tp->rcv_wnd == 0 && !skb_queue_empty(&sk->sk_async_wait_queue)) { tcp_service_net_dma(sk, true); @@ -3057,7 +3045,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 } else dma_async_issue_pending(tp->ucopy.dma_chan); } -@@ -1993,7 +2076,7 @@ skip_copy: +@@ -1993,7 +2077,7 @@ skip_copy: */ /* Clean up data we have read: This will do ACK frames. */ @@ -3066,7 +3054,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 release_sock(sk); return copied; -@@ -2070,7 +2153,7 @@ static const unsigned char new_state[16] = { +@@ -2070,7 +2154,7 @@ static const unsigned char new_state[16] = { /* TCP_CLOSING */ TCP_CLOSING, }; @@ -3075,7 +3063,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 { int next = (int)new_state[sk->sk_state]; int ns = next & TCP_STATE_MASK; -@@ -2100,7 +2183,7 @@ void tcp_shutdown(struct sock *sk, int how) +@@ -2100,7 +2184,7 @@ void tcp_shutdown(struct sock *sk, int how) TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) { /* Clear out any half completed packets. FIN if needed. */ if (tcp_close_state(sk)) @@ -3084,7 +3072,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 } } EXPORT_SYMBOL(tcp_shutdown); -@@ -2125,6 +2208,11 @@ void tcp_close(struct sock *sk, long timeout) +@@ -2125,6 +2209,11 @@ void tcp_close(struct sock *sk, long timeout) int data_was_unread = 0; int state; @@ -3096,7 +3084,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 lock_sock(sk); sk->sk_shutdown = SHUTDOWN_MASK; -@@ -2167,7 +2255,7 @@ void tcp_close(struct sock *sk, long timeout) +@@ -2167,7 +2256,7 @@ void tcp_close(struct sock *sk, long timeout) /* Unread data was tossed, zap the connection. */ NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); tcp_set_state(sk, TCP_CLOSE); @@ -3105,7 +3093,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); -@@ -2247,7 +2335,7 @@ adjudge_to_death: +@@ -2247,7 +2336,7 @@ adjudge_to_death: struct tcp_sock *tp = tcp_sk(sk); if (tp->linger2 < 0) { tcp_set_state(sk, TCP_CLOSE); @@ -3114,7 +3102,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONLINGER); } else { -@@ -2257,7 +2345,8 @@ adjudge_to_death: +@@ -2257,7 +2346,8 @@ adjudge_to_death: inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); } else { @@ -3124,7 +3112,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 goto out; } } -@@ -2266,7 +2355,7 @@ adjudge_to_death: +@@ -2266,7 +2356,7 @@ adjudge_to_death: sk_mem_reclaim(sk); if (tcp_check_oom(sk, 0)) { tcp_set_state(sk, TCP_CLOSE); @@ -3133,7 +3121,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); } -@@ -2291,15 +2380,6 @@ out: +@@ -2291,15 +2381,6 @@ out: } EXPORT_SYMBOL(tcp_close); @@ -3149,7 +3137,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 int tcp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); -@@ -2322,7 +2402,7 @@ int tcp_disconnect(struct sock *sk, int flags) +@@ -2322,7 +2403,7 @@ int tcp_disconnect(struct sock *sk, int flags) /* The last check adjusts for discrepancy of Linux wrt. RFC * states */ @@ -3158,7 +3146,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 sk->sk_err = ECONNRESET; } else if (old_state == TCP_SYN_SENT) sk->sk_err = ECONNRESET; -@@ -2340,6 +2420,13 @@ int tcp_disconnect(struct sock *sk, int flags) +@@ -2340,6 +2421,13 @@ int tcp_disconnect(struct sock *sk, int flags) if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); @@ -3172,7 +3160,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); tp->srtt_us = 0; -@@ -2632,6 +2719,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, +@@ -2632,6 +2720,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_DEFER_ACCEPT: @@ -3185,7 +3173,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 /* Translate value in seconds to number of retransmits */ icsk->icsk_accept_queue.rskq_defer_accept = secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, -@@ -2659,7 +2752,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, +@@ -2659,7 +2753,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && inet_csk_ack_scheduled(sk)) { icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; @@ -3194,7 +3182,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 if (!(val & 1)) icsk->icsk_ack.pingpong = 1; } -@@ -2699,6 +2792,18 @@ static int do_tcp_setsockopt(struct sock *sk, int level, +@@ -2699,6 +2793,18 @@ static int do_tcp_setsockopt(struct sock *sk, int level, tp->notsent_lowat = val; sk->sk_write_space(sk); break; @@ -3213,7 +3201,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 default: err = -ENOPROTOOPT; break; -@@ -2931,6 +3036,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level, +@@ -2931,6 +3037,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_NOTSENT_LOWAT: val = tp->notsent_lowat; break; @@ -3225,7 +3213,7 @@ index 9d2118e5fbc7..2cb89f886d45 100644 default: return -ENOPROTOOPT; } -@@ -3120,8 +3230,11 @@ void tcp_done(struct sock *sk) +@@ -3120,8 +3231,11 @@ void tcp_done(struct sock *sk) if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); @@ -3299,7 +3287,7 @@ index 9771563ab564..5c230d96c4c1 100644 WARN_ON(req->sk == NULL); return true; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c -index 40639c288dc2..3273bb69f387 100644 +index 40639c288dc2..71033189797d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -74,6 +74,9 @@ @@ -3391,7 +3379,7 @@ index 40639c288dc2..3273bb69f387 100644 - if (tp->rcv_ssthresh < tp->window_clamp && - (int)tp->rcv_ssthresh < tcp_space(sk) && + if (meta_tp->rcv_ssthresh < meta_tp->window_clamp && -+ (int)meta_tp->rcv_ssthresh < tcp_space(sk) && ++ (int)meta_tp->rcv_ssthresh < tcp_space(meta_sk) && !sk_under_memory_pressure(sk)) { int incr; @@ -5203,7 +5191,7 @@ index e68e0d4af6c9..ae6946857dff 100644 return ret; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c -index 179b51e6bda3..efd31b6c5784 100644 +index 179b51e6bda3..267d5f7eb303 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -36,6 +36,12 @@ @@ -5559,6 +5547,15 @@ index 179b51e6bda3..efd31b6c5784 100644 /* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. * As additional protections, we do not touch cwnd in retransmission phases, +@@ -1402,7 +1448,7 @@ static void tcp_cwnd_application_limited(struct sock *sk) + tp->snd_cwnd_stamp = tcp_time_stamp; + } + +-static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) ++void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) + { + struct tcp_sock *tp = tcp_sk(sk); + @@ -1446,8 +1492,8 @@ static bool tcp_minshall_check(const struct tcp_sock *tp) * But we can avoid doing the divide again given we already have * skb_pcount = skb->len / mss_now @@ -5680,7 +5677,17 @@ index 179b51e6bda3..efd31b6c5784 100644 /* Do MTU probing. */ result = tcp_mtu_probe(sk); if (!result) { -@@ -2099,7 +2150,8 @@ void tcp_send_loss_probe(struct sock *sk) +@@ -2004,7 +2055,8 @@ repair: + /* Send one loss probe per tail loss episode. */ + if (push_one != 2) + tcp_schedule_loss_probe(sk); +- tcp_cwnd_validate(sk, is_cwnd_limited); ++ if (tp->ops->cwnd_validate) ++ tp->ops->cwnd_validate(sk, is_cwnd_limited); + return false; + } + return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); +@@ -2099,7 +2151,8 @@ void tcp_send_loss_probe(struct sock *sk) int err = -1; if (tcp_send_head(sk) != NULL) { @@ -5690,7 +5697,7 @@ index 179b51e6bda3..efd31b6c5784 100644 goto rearm_timer; } -@@ -2159,8 +2211,8 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, +@@ -2159,8 +2212,8 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, if (unlikely(sk->sk_state == TCP_CLOSE)) return; @@ -5701,7 +5708,7 @@ index 179b51e6bda3..efd31b6c5784 100644 tcp_check_probe_timer(sk); } -@@ -2173,7 +2225,8 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) +@@ -2173,7 +2226,8 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) BUG_ON(!skb || skb->len < mss_now); @@ -5711,7 +5718,7 @@ index 179b51e6bda3..efd31b6c5784 100644 } /* This function returns the amount that we can raise the -@@ -2386,6 +2439,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, +@@ -2386,6 +2440,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) return; @@ -5722,7 +5729,7 @@ index 179b51e6bda3..efd31b6c5784 100644 tcp_for_write_queue_from_safe(skb, tmp, sk) { if (!tcp_can_collapse(sk, skb)) break; -@@ -2843,7 +2900,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, +@@ -2843,7 +2901,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rcv_wnd, 65535U)); @@ -5731,7 +5738,7 @@ index 179b51e6bda3..efd31b6c5784 100644 th->doff = (tcp_header_size >> 2); TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS); -@@ -2897,13 +2954,13 @@ static void tcp_connect_init(struct sock *sk) +@@ -2897,13 +2955,13 @@ static void tcp_connect_init(struct sock *sk) (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0)) tp->window_clamp = tcp_full_space(sk); @@ -5752,7 +5759,7 @@ index 179b51e6bda3..efd31b6c5784 100644 tp->rx_opt.rcv_wscale = rcv_wscale; tp->rcv_ssthresh = tp->rcv_wnd; -@@ -2927,6 +2984,36 @@ static void tcp_connect_init(struct sock *sk) +@@ -2927,6 +2985,36 @@ static void tcp_connect_init(struct sock *sk) inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; inet_csk(sk)->icsk_retransmits = 0; tcp_clear_retrans(tp); @@ -5789,7 +5796,7 @@ index 179b51e6bda3..efd31b6c5784 100644 } static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) -@@ -3176,6 +3263,7 @@ void tcp_send_ack(struct sock *sk) +@@ -3176,6 +3264,7 @@ void tcp_send_ack(struct sock *sk) TCP_SKB_CB(buff)->when = tcp_time_stamp; tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); } @@ -5797,7 +5804,7 @@ index 179b51e6bda3..efd31b6c5784 100644 /* This routine sends a packet with an out of date sequence * number. It assumes the other end will try to ack it. -@@ -3188,7 +3276,7 @@ void tcp_send_ack(struct sock *sk) +@@ -3188,7 +3277,7 @@ void tcp_send_ack(struct sock *sk) * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is * out-of-date with SND.UNA-1 to probe window. */ @@ -5806,7 +5813,7 @@ index 179b51e6bda3..efd31b6c5784 100644 { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; -@@ -3270,7 +3358,7 @@ void tcp_send_probe0(struct sock *sk) +@@ -3270,7 +3359,7 @@ void tcp_send_probe0(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); int err; @@ -5815,7 +5822,7 @@ index 179b51e6bda3..efd31b6c5784 100644 if (tp->packets_out || !tcp_send_head(sk)) { /* Cancel probe timer, if it is not required. */ -@@ -3301,3 +3389,18 @@ void tcp_send_probe0(struct sock *sk) +@@ -3301,3 +3390,18 @@ void tcp_send_probe0(struct sock *sk) TCP_RTO_MAX); } } @@ -7099,7 +7106,7 @@ index 000000000000..cdfc03adabf8 + diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile new file mode 100644 -index 000000000000..2feb3e873206 +index 000000000000..5c70e7cca3b3 --- /dev/null +++ b/net/mptcp/Makefile @@ -0,0 +1,21 @@ @@ -7113,7 +7120,7 @@ index 000000000000..2feb3e873206 +mptcp-y := mptcp_ctrl.o mptcp_ipv4.o mptcp_ofo_queue.o mptcp_pm.o \ + mptcp_output.o mptcp_input.o mptcp_sched.o + -+obj-$(CONFIG_TCP_CONG_COUPLED) += mptcp_coupled.o ++obj-$(CONFIG_TCP_CONG_LIA) += mptcp_coupled.o +obj-$(CONFIG_TCP_CONG_OLIA) += mptcp_olia.o +obj-$(CONFIG_TCP_CONG_WVEGAS) += mptcp_wvegas.o +obj-$(CONFIG_TCP_CONG_BALIA) += mptcp_balia.o @@ -7126,7 +7133,7 @@ index 000000000000..2feb3e873206 + diff --git a/net/mptcp/mptcp_balia.c b/net/mptcp/mptcp_balia.c new file mode 100644 -index 000000000000..5cc224d80b01 +index 000000000000..565cb75e2cea --- /dev/null +++ b/net/mptcp/mptcp_balia.c @@ -0,0 +1,267 @@ @@ -7156,8 +7163,9 @@ index 000000000000..5cc224d80b01 + * if max_rate > 2^rate_scale_limit + */ + -+static int rate_scale_limit = 30; -+static int scale_num = 10; ++static int rate_scale_limit = 25; ++static int alpha_scale = 10; ++static int scale_num = 5; + +struct mptcp_balia { + u64 ai; @@ -7210,7 +7218,6 @@ index 000000000000..5cc224d80b01 + const struct tcp_sock *tp = tcp_sk(sk); + const struct mptcp_cb *mpcb = tp->mpcb; + const struct sock *sub_sk; -+ int can_send = 0; + u64 max_rate = 0, rate = 0, sum_rate = 0; + u64 alpha = 0, ai = 0, md = 0; + int num_scale_down = 0; @@ -7230,27 +7237,24 @@ index 000000000000..5cc224d80b01 + if (!mptcp_balia_sk_can_send(sub_sk)) + continue; + -+ can_send++; -+ + tmp = div_u64((u64)tp->mss_cache * sub_tp->snd_cwnd + * (USEC_PER_SEC << 3), sub_tp->srtt_us); + sum_rate += tmp; + ++ if (tp == sub_tp) ++ rate = tmp; ++ + if (tmp >= max_rate) + max_rate = tmp; + } + -+ /* No subflow is able to send - we don't care anymore */ -+ if (unlikely(!can_send)) ++ /* At least, the current subflow should be able to send */ ++ if (unlikely(!rate)) + goto exit; + -+ rate = div_u64((u64)tp->mss_cache * tp->snd_cwnd * -+ (USEC_PER_SEC << 3), tp->srtt_us); + alpha = div64_u64(max_rate, rate); + -+ /* Scale down max_rate from B/s to KB/s, MB/s, or GB/s -+ * if max_rate is too high (i.e., >2^30) -+ */ ++ /* Scale down max_rate if it is too high (e.g., >2^25) */ + while (max_rate > mptcp_balia_scale(1, rate_scale_limit)) { + max_rate >>= scale_num; + num_scale_down++; @@ -7262,6 +7266,9 @@ index 000000000000..5cc224d80b01 + struct tcp_sock *sub_tp = tcp_sk(sub_sk); + u64 tmp; + ++ if (!mptcp_balia_sk_can_send(sub_sk)) ++ continue; ++ + tmp = div_u64((u64)tp->mss_cache * sub_tp->snd_cwnd + * (USEC_PER_SEC << 3), sub_tp->srtt_us); + tmp >>= (scale_num * num_scale_down); @@ -7283,9 +7290,9 @@ index 000000000000..5cc224d80b01 + if (unlikely(!ai)) + ai = tp->snd_cwnd; + -+ md = ((tp->snd_cwnd >> 1) * min(mptcp_balia_scale(alpha, scale_num), -+ mptcp_balia_scale(3, scale_num) >> 1)) -+ >> scale_num; ++ md = ((tp->snd_cwnd >> 1) * min(mptcp_balia_scale(alpha, alpha_scale), ++ mptcp_balia_scale(3, alpha_scale) >> 1)) ++ >> alpha_scale; + +exit: + mptcp_set_ai(sk, ai); @@ -16520,10 +16527,10 @@ index 000000000000..53f5c43bb488 +MODULE_VERSION("0.1"); diff --git a/net/mptcp/mptcp_output.c b/net/mptcp/mptcp_output.c new file mode 100644 -index 000000000000..400ea254c078 +index 000000000000..e2a6a6d6522d --- /dev/null +++ b/net/mptcp/mptcp_output.c -@@ -0,0 +1,1743 @@ +@@ -0,0 +1,1758 @@ +/* + * MPTCP implementation - Sending side + * @@ -17181,11 +17188,9 @@ index 000000000000..400ea254c078 + struct sock *subsk = NULL; + struct mptcp_cb *mpcb = meta_tp->mpcb; + struct sk_buff *skb; -+ unsigned int sent_pkts; + int reinject = 0; + unsigned int sublimit; -+ -+ sent_pkts = 0; ++ __u32 path_mask = 0; + + while ((skb = mpcb->sched_ops->next_segment(meta_sk, &reinject, &subsk, + &sublimit))) { @@ -17266,6 +17271,7 @@ index 000000000000..400ea254c078 + * always push on the subflow + */ + __tcp_push_pending_frames(subsk, mss_now, TCP_NAGLE_PUSH); ++ path_mask |= mptcp_pi_to_flag(subtp->mptcp->path_index); + TCP_SKB_CB(skb)->when = tcp_time_stamp; + + if (!reinject) { @@ -17276,7 +17282,6 @@ index 000000000000..400ea254c078 + } + + tcp_minshall_update(meta_tp, mss_now, skb); -+ sent_pkts += tcp_skb_pcount(skb); + + if (reinject > 0) { + __skb_unlink(skb, &mpcb->reinject_queue); @@ -17287,6 +17292,22 @@ index 000000000000..400ea254c078 + break; + } + ++ mptcp_for_each_sk(mpcb, subsk) { ++ subtp = tcp_sk(subsk); ++ ++ if (!(path_mask & mptcp_pi_to_flag(subtp->mptcp->path_index))) ++ continue; ++ ++ /* We have pushed data on this subflow. We ignore the call to ++ * cwnd_validate in tcp_write_xmit as is_cwnd_limited will never ++ * be true (we never push more than what the cwnd can accept). ++ * We need to ensure that we call tcp_cwnd_validate with ++ * is_cwnd_limited set to true if we have filled the cwnd. ++ */ ++ tcp_cwnd_validate(subsk, tcp_packets_in_flight(subtp) >= ++ subtp->snd_cwnd); ++ } ++ + return !meta_tp->packets_out && tcp_send_head(meta_sk); +} + @@ -17299,6 +17320,7 @@ index 000000000000..400ea254c078 +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk), *meta_tp = mptcp_meta_tp(tp); ++ struct sock *meta_sk = mptcp_meta_sk(sk); + int mss, free_space, full_space, window; + + /* MSS for the peer's data. Previous versions used mss_clamp @@ -17308,9 +17330,9 @@ index 000000000000..400ea254c078 + * fluctuations. --SAW 1998/11/1 + */ + mss = icsk->icsk_ack.rcv_mss; -+ free_space = tcp_space(sk); ++ free_space = tcp_space(meta_sk); + full_space = min_t(int, meta_tp->window_clamp, -+ tcp_full_space(sk)); ++ tcp_full_space(meta_sk)); + + if (mss > full_space) + mss = full_space; @@ -18751,10 +18773,10 @@ index 000000000000..93278f684069 +MODULE_VERSION("0.89"); diff --git a/net/mptcp/mptcp_sched.c b/net/mptcp/mptcp_sched.c new file mode 100644 -index 000000000000..6c7ff4eceac1 +index 000000000000..4a578821f50e --- /dev/null +++ b/net/mptcp/mptcp_sched.c -@@ -0,0 +1,493 @@ +@@ -0,0 +1,497 @@ +/* MPTCP Scheduler module selector. Highly inspired by tcp_cong.c */ + +#include <linux/module.h> @@ -18979,8 +19001,12 @@ index 000000000000..6c7ff4eceac1 + if (tp_it != tp && + TCP_SKB_CB(skb_head)->path_mask & mptcp_pi_to_flag(tp_it->mptcp->path_index)) { + if (tp->srtt_us < tp_it->srtt_us && inet_csk((struct sock *)tp_it)->icsk_ca_state == TCP_CA_Open) { ++ u32 prior_cwnd = tp_it->snd_cwnd; ++ + tp_it->snd_cwnd = max(tp_it->snd_cwnd >> 1U, 1U); -+ if (tp_it->snd_ssthresh != TCP_INFINITE_SSTHRESH) ++ ++ /* If in slow start, do not reduce the ssthresh */ ++ if (prior_cwnd >= tp_it->snd_ssthresh) + tp_it->snd_ssthresh = max(tp_it->snd_ssthresh >> 1U, 2U); + + dsp->last_rbuf_opti = tcp_time_stamp;