This patch keeps BBR from negotiating ECN if sysctl ECN is set. Prior to this patch, BBR negotiates ECN if enabled, sends CWR upon receiving ECE ACKs but does not react to them. This can cause confusion from the protocol perspective. Therefore this patch prevents the connection from negotiating ECN if BBR is the congestion control during the handshake.
Note that after the handshake, the user can still switch to a different congestion control that supports or even requires ECN (e.g. DCTCP). In that case, the connection can not re-negotiate ECN and has to go with the ECN-free mode in that congestion control. There are other cases BBR would still respond to ECE ACKs with CWR but does not react to it like the behavior before this patch. First, when the user switches to BBR congestion control but the connection has already negotiated ECN before. Second, the system has configured the ip route and/or uses eBPF to enable ECN on the connection that uses BBR congestion control. Signed-off-by: Yuchung Cheng <ych...@google.com> Signed-off-by: Neal Cardwell <ncardw...@google.com> Acked-by: Yousuk Seung <ysse...@google.com> Acked-by: Eric Dumazet <eduma...@google.com> --- include/net/tcp.h | 7 +++++++ net/ipv4/tcp_bbr.c | 2 +- net/ipv4/tcp_input.c | 3 ++- net/ipv4/tcp_output.c | 6 ++++-- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 6939e69d3c37..22345132d969 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -925,6 +925,8 @@ enum tcp_ca_ack_event_flags { #define TCP_CONG_NON_RESTRICTED 0x1 /* Requires ECN/ECT set on all packets */ #define TCP_CONG_NEEDS_ECN 0x2 +/* Does not use or react to ECN */ +#define TCP_CONG_DONT_USE_ECN 0x4 union tcp_cc_info; @@ -1033,6 +1035,11 @@ static inline bool tcp_ca_needs_ecn(const struct sock *sk) return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN; } +static inline bool tcp_ca_uses_ecn(const struct sock *sk) +{ + return !(inet_csk(sk)->icsk_ca_ops->flags & TCP_CONG_DONT_USE_ECN); +} + static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state) { struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 8322f26e770e..27456554b113 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -926,7 +926,7 @@ static void bbr_set_state(struct sock *sk, u8 new_state) } static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { - .flags = TCP_CONG_NON_RESTRICTED, + .flags = TCP_CONG_NON_RESTRICTED | TCP_CONG_DONT_USE_ECN, .name = "bbr", .owner = THIS_MODULE, .init = bbr_init, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ff71b18d9682..6731d0b9b146 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6090,7 +6090,8 @@ static void tcp_ecn_create_request(struct request_sock *req, ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK); - ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst; + ecn_ok = ecn_ok_dst || + (net->ipv4.sysctl_tcp_ecn && tcp_ca_uses_ecn(listen_sk)); if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) || (ecn_ok_dst & DST_FEATURE_ECN_CA) || diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 95461f02ac9a..446cb65090f5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -312,8 +312,10 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); - bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 || - tcp_ca_needs_ecn(sk) || bpf_needs_ecn; + bool use_ecn = tcp_ca_needs_ecn(sk) || bpf_needs_ecn; + + if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 && tcp_ca_uses_ecn(sk)) + use_ecn = true; if (!use_ecn) { const struct dst_entry *dst = __sk_dst_get(sk); -- 2.16.0.rc1.238.g530d649a79-goog