On Thu, Apr 11, 2019 at 8:55 AM Eric Dumazet <eduma...@google.com> wrote: > > After commit e21db6f69a95 ("tcp: track total bytes delivered with ECN CE > marks") > core TCP stack does a very good job tracking ECN signals. > > The "sender's best estimate of CE information" Yuchung mentioned in his > patch is indeed the best we can do. > > DCTCP can use tp->delivered_ce and tp->delivered to not duplicate the logic, > and use the existing best estimate. > > This solves some problems, since current DCTCP logic does not deal with losses > and/or GRO or ack aggregation very well. > > This also removes a dubious use of inet_csk(sk)->icsk_ack.rcv_mss > (this should have been tp->mss_cache), and a 64 bit divide. > > Finally, we can see that the DCTCP logic, calling dctcp_update_alpha() for > every ACK could be done differently, calling it only once per RTT. > > Signed-off-by: Eric Dumazet <eduma...@google.com> > Cc: Yuchung Cheng <ych...@google.com> > Cc: Neal Cardwell <ncardw...@google.com> > Cc: Soheil Hassas Yeganeh <soh...@google.com> > Cc: Florian Westphal <f...@strlen.de> > Cc: Daniel Borkmann <dan...@iogearbox.net> > Cc: Lawrence Brakmo <bra...@fb.com> > Cc: Abdul Kabbani <akabb...@google.com>
Acked-by: Soheil Hassas Yeganeh <soh...@google.com> Thank you! Nice idea! > --- > net/ipv4/tcp_dctcp.c | 45 +++++++++++++++++--------------------------- > 1 file changed, 17 insertions(+), 28 deletions(-) > > diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c > index > 359da68d7c0628360d5f1b727c878f678e28d39a..477cb4aa456c11c70185a982cbadafba857d3619 > 100644 > --- a/net/ipv4/tcp_dctcp.c > +++ b/net/ipv4/tcp_dctcp.c > @@ -49,9 +49,8 @@ > #define DCTCP_MAX_ALPHA 1024U > > struct dctcp { > - u32 acked_bytes_ecn; > - u32 acked_bytes_total; > - u32 prior_snd_una; > + u32 old_delivered; > + u32 old_delivered_ce; > u32 prior_rcv_nxt; > u32 dctcp_alpha; > u32 next_seq; > @@ -73,8 +72,8 @@ static void dctcp_reset(const struct tcp_sock *tp, struct > dctcp *ca) > { > ca->next_seq = tp->snd_nxt; > > - ca->acked_bytes_ecn = 0; > - ca->acked_bytes_total = 0; > + ca->old_delivered = tp->delivered; > + ca->old_delivered_ce = tp->delivered_ce; > } > > static void dctcp_init(struct sock *sk) > @@ -86,7 +85,6 @@ static void dctcp_init(struct sock *sk) > sk->sk_state == TCP_CLOSE)) { > struct dctcp *ca = inet_csk_ca(sk); > > - ca->prior_snd_una = tp->snd_una; > ca->prior_rcv_nxt = tp->rcv_nxt; > > ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); > @@ -118,37 +116,25 @@ static void dctcp_update_alpha(struct sock *sk, u32 > flags) > { > const struct tcp_sock *tp = tcp_sk(sk); > struct dctcp *ca = inet_csk_ca(sk); > - u32 acked_bytes = tp->snd_una - ca->prior_snd_una; > - > - /* If ack did not advance snd_una, count dupack as MSS size. > - * If ack did update window, do not count it at all. > - */ > - if (acked_bytes == 0 && !(flags & CA_ACK_WIN_UPDATE)) > - acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss; > - if (acked_bytes) { > - ca->acked_bytes_total += acked_bytes; > - ca->prior_snd_una = tp->snd_una; > - > - if (flags & CA_ACK_ECE) > - ca->acked_bytes_ecn += acked_bytes; > - } > > /* Expired RTT */ > if (!before(tp->snd_una, ca->next_seq)) { > - u64 bytes_ecn = ca->acked_bytes_ecn; > + u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce; > u32 alpha = ca->dctcp_alpha; > > /* alpha = (1 - g) * alpha + g * F */ > > alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g); > - if (bytes_ecn) { > + if (delivered_ce) { > + u32 delivered = tp->delivered - ca->old_delivered; > + > /* If dctcp_shift_g == 1, a 32bit value would overflow > - * after 8 Mbytes. > + * after 8 M packets. > */ > - bytes_ecn <<= (10 - dctcp_shift_g); > - do_div(bytes_ecn, max(1U, ca->acked_bytes_total)); > + delivered_ce <<= (10 - dctcp_shift_g); > + delivered_ce /= max(1U, delivered); > > - alpha = min(alpha + (u32)bytes_ecn, DCTCP_MAX_ALPHA); > + alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA); > } > /* dctcp_alpha can be read from dctcp_get_info() without > * synchro, so we ask compiler to not use dctcp_alpha > @@ -200,6 +186,7 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, > int *attr, > union tcp_cc_info *info) > { > const struct dctcp *ca = inet_csk_ca(sk); > + const struct tcp_sock *tp = tcp_sk(sk); > > /* Fill it also in case of VEGASINFO due to req struct limits. > * We can still correctly retrieve it later. > @@ -211,8 +198,10 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, > int *attr, > info->dctcp.dctcp_enabled = 1; > info->dctcp.dctcp_ce_state = (u16) ca->ce_state; > info->dctcp.dctcp_alpha = ca->dctcp_alpha; > - info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn; > - info->dctcp.dctcp_ab_tot = ca->acked_bytes_total; > + info->dctcp.dctcp_ab_ecn = tp->mss_cache * > + (tp->delivered_ce - > ca->old_delivered_ce); > + info->dctcp.dctcp_ab_tot = tp->mss_cache * > + (tp->delivered - > ca->old_delivered); > } > > *attr = INET_DIAG_DCTCPINFO; > -- > 2.21.0.392.gf8f6787159e-goog >