On 24/10/17(Tue) 12:27, Mike Belopuhov wrote:
> On Tue, Oct 24, 2017 at 12:05 +0200, Martin Pieuchot wrote:
> > On 21/10/17(Sat) 15:17, Mike Belopuhov wrote:
> > > On Fri, Oct 20, 2017 at 22:59 +0200, Klemens Nanni wrote:
> > > > The comments for both void tcp_{sack,newreno}_partialack() still mention
> > > > tp->snd_last and return value bits.
> > > >
> > >
> > > Good eyes! It made me spot a mistake I made by folding two lines
> > > into an incorrect ifdef in tcp_sack_partialack. I expected it to
> > > say "ifdef TCP_FACK" while it says "ifNdef". The adjusted comment
> > > didn't make sense and I found the bug.
> >
> > Could you send the full/fixed diff?
> >
>
> Sure.
Diff is correct. I have two suggestions, but it's ok mpi@ either way.
> > And what about TCP_FACK? It is disabled by default, is there a
> > point in keeping it?
>
> Job has pointed out that RFC 6675 might be a better alternative
> so it might be a good idea to ditch it while we're at it. I'm
> not certain which parts need to be preserved (if any) however.
I'd say remove it. One can always look in the Attic if necessary.
> diff --git sys/netinet/tcp_input.c sys/netinet/tcp_input.c
> index 790e163975e..3809a2371f2 100644
> --- sys/netinet/tcp_input.c
> +++ sys/netinet/tcp_input.c
> @@ -1664,52 +1664,38 @@ trimthenstep6:
> }
> /*
> * If the congestion window was inflated to account
> * for the other side's cached packets, retract it.
> */
> - if (tp->sack_enable) {
> - if (tp->t_dupacks >= tcprexmtthresh) {
> - /* Check for a partial ACK */
> - if (tcp_sack_partialack(tp, th)) {
> -#ifdef TCP_FACK
> - /* Force call to tcp_output */
> - if (tp->snd_awnd < tp->snd_cwnd)
> - tp->t_flags |= TF_NEEDOUTPUT;
> -#else
> - tp->snd_cwnd += tp->t_maxseg;
> - tp->t_flags |= TF_NEEDOUTPUT;
> -#endif /* TCP_FACK */
> - } else {
> - /* Out of fast recovery */
> - tp->snd_cwnd = tp->snd_ssthresh;
> - if (tcp_seq_subtract(tp->snd_max,
> - th->th_ack) < tp->snd_ssthresh)
> - tp->snd_cwnd =
> - tcp_seq_subtract(tp->snd_max,
> - th->th_ack);
> - tp->t_dupacks = 0;
> -#ifdef TCP_FACK
> - if (SEQ_GT(th->th_ack, tp->snd_fack))
> - tp->snd_fack = th->th_ack;
> -#endif
> - }
> - }
> - } else {
> - if (tp->t_dupacks >= tcprexmtthresh &&
> - !tcp_newreno(tp, th)) {
> + if (tp->t_dupacks >= tcprexmtthresh) {
I'd keep the comment:
/* Check for a partial ACK */
> + if (SEQ_LT(th->th_ack, tp->snd_last)) {
> + if (tp->sack_enable)
> + tcp_sack_partialack(tp, th);
> + else
> + tcp_newreno_partialack(tp, th);
> + } else {
> /* Out of fast recovery */
> tp->snd_cwnd = tp->snd_ssthresh;
> if (tcp_seq_subtract(tp->snd_max, th->th_ack) <
> tp->snd_ssthresh)
> tp->snd_cwnd =
> tcp_seq_subtract(tp->snd_max,
> th->th_ack);
> tp->t_dupacks = 0;
> +#ifdef TCP_FACK
> + if (tp->sack_enable &&
> + SEQ_GT(th->th_ack, tp->snd_fack))
> + tp->snd_fack = th->th_ack;
> +#endif
> }
> - }
> - if (tp->t_dupacks < tcprexmtthresh)
> + } else {
> + /*
> + * Reset the duplicate ACK counter if we
> + * were not in fast recovery.
> + */
> tp->t_dupacks = 0;
> + }
> if (SEQ_GT(th->th_ack, tp->snd_max)) {
> tcpstat_inc(tcps_rcvacktoomuch);
> goto dropafterack_ratelim;
> }
> acked = th->th_ack - tp->snd_una;
> @@ -2703,36 +2689,38 @@ tcp_clean_sackreport(struct tcpcb *tp)
> tp->sackblks[i].start = tp->sackblks[i].end=0;
>
> }
>
> /*
> - * Checks for partial ack. If partial ack arrives, turn off retransmission
> - * timer, deflate the window, do not clear tp->t_dupacks, and return 1.
> - * If the ack advances at least to tp->snd_last, return 0.
> + * Partial ack handling within a sack recovery episode. When a partial ack
> + * arrives, turn off retransmission timer, deflate the window, do not clear
> + * tp->t_dupacks.
> */
> -int
> +void
> tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th)
> {
> - if (SEQ_LT(th->th_ack, tp->snd_last)) {
> - /* Turn off retx. timer (will start again next segment) */
> - TCP_TIMER_DISARM(tp, TCPT_REXMT);
> - tp->t_rtttime = 0;
> + /* Turn off retx. timer (will start again next segment) */
> + TCP_TIMER_DISARM(tp, TCPT_REXMT);
> + tp->t_rtttime = 0;
> #ifndef TCP_FACK
> - /*
> - * Partial window deflation. This statement relies on the
> - * fact that tp->snd_una has not been updated yet. In FACK
> - * hold snd_cwnd constant during fast recovery.
> - */
> - if (tp->snd_cwnd > (th->th_ack - tp->snd_una)) {
> - tp->snd_cwnd -= th->th_ack - tp->snd_una;
> - tp->snd_cwnd += tp->t_maxseg;
> - } else
> - tp->snd_cwnd = tp->t_maxseg;
> + /*
> + * Partial window deflation. This statement relies on the
> + * fact that tp->snd_una has not been updated yet. In FACK
> + * hold snd_cwnd constant during fast recovery.
> + */
> + if (tp->snd_cwnd > (th->th_ack - tp->snd_una)) {
> + tp->snd_cwnd -= th->th_ack - tp->snd_una;
> + tp->snd_cwnd += tp->t_maxseg;
> + } else
> + tp->snd_cwnd = tp->t_maxseg;
> + tp->snd_cwnd += tp->t_maxseg;
> + tp->t_flags |= TF_NEEDOUTPUT;
> +#else
> + /* Force call to tcp_output */
> + if (tp->snd_awnd < tp->snd_cwnd)
> + tp->t_flags |= TF_NEEDOUTPUT;
> #endif
> - return (1);
> - }
> - return (0);
> }
>
> /*
> * Pull out of band byte out of a segment so
> * it doesn't appear in the user's data queue.
> @@ -3089,52 +3077,48 @@ tcp_mss_update(struct tcpcb *tp)
> }
>
> }
>
> /*
> - * Checks for partial ack. If partial ack arrives, force the retransmission
> - * of the next unacknowledged segment, do not clear tp->t_dupacks, and return
> - * 1. By setting snd_nxt to ti_ack, this forces retransmission timer to
> - * be started again. If the ack advances at least to tp->snd_last, return 0.
> + * When a partial ack arrives, force the retransmission of the
> + * next unacknowledged segment. Do not clear tp->t_dupacks.
> + * By setting snd_nxt to ti_ack, this forces retransmission timer
> + * to be started again.
> */
> -int
> -tcp_newreno(struct tcpcb *tp, struct tcphdr *th)
> +void
> +tcp_newreno_partialack(struct tcpcb *tp, struct tcphdr *th)
> {
> - if (SEQ_LT(th->th_ack, tp->snd_last)) {
> - /*
> - * snd_una has not been updated and the socket send buffer
> - * not yet drained of the acked data, so we have to leave
> - * snd_una as it was to get the correct data offset in
> - * tcp_output().
> - */
> - tcp_seq onxt = tp->snd_nxt;
> - u_long ocwnd = tp->snd_cwnd;
> - TCP_TIMER_DISARM(tp, TCPT_REXMT);
> - tp->t_rtttime = 0;
> - tp->snd_nxt = th->th_ack;
> - /*
> - * Set snd_cwnd to one segment beyond acknowledged offset
> - * (tp->snd_una not yet updated when this function is called)
> - */
> - tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una);
> - (void) tcp_output(tp);
> - tp->snd_cwnd = ocwnd;
> - if (SEQ_GT(onxt, tp->snd_nxt))
> - tp->snd_nxt = onxt;
> - /*
> - * Partial window deflation. Relies on fact that tp->snd_una
> - * not updated yet.
> - */
> - if (tp->snd_cwnd > th->th_ack - tp->snd_una)
> - tp->snd_cwnd -= th->th_ack - tp->snd_una;
> - else
> - tp->snd_cwnd = 0;
> - tp->snd_cwnd += tp->t_maxseg;
> + /*
> + * snd_una has not been updated and the socket send buffer
> + * not yet drained of the acked data, so we have to leave
> + * snd_una as it was to get the correct data offset in
> + * tcp_output().
> + */
> + tcp_seq onxt = tp->snd_nxt;
> + u_long ocwnd = tp->snd_cwnd;
>
> - return 1;
> - }
> - return 0;
> + TCP_TIMER_DISARM(tp, TCPT_REXMT);
> + tp->t_rtttime = 0;
> + tp->snd_nxt = th->th_ack;
> + /*
> + * Set snd_cwnd to one segment beyond acknowledged offset
> + * (tp->snd_una not yet updated when this function is called)
> + */
> + tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una);
> + (void)tcp_output(tp);
> + tp->snd_cwnd = ocwnd;
> + if (SEQ_GT(onxt, tp->snd_nxt))
> + tp->snd_nxt = onxt;
> + /*
> + * Partial window deflation. Relies on fact that tp->snd_una
> + * not updated yet.
> + */
> + if (tp->snd_cwnd > th->th_ack - tp->snd_una)
> + tp->snd_cwnd -= th->th_ack - tp->snd_una;
> + else
> + tp->snd_cwnd = 0;
> + tp->snd_cwnd += tp->t_maxseg;
> }
>
> int
> tcp_mss_adv(struct mbuf *m, int af)
> {
> diff --git sys/netinet/tcp_var.h sys/netinet/tcp_var.h
> index 6b797fd48e7..97b04884879 100644
> --- sys/netinet/tcp_var.h
> +++ sys/netinet/tcp_var.h
> @@ -764,15 +764,15 @@ void tcp_update_sack_list(struct tcpcb *tp,
> tcp_seq, tcp_seq);
> void tcp_del_sackholes(struct tcpcb *, struct tcphdr *);
> void tcp_clean_sackreport(struct tcpcb *tp);
> void tcp_sack_adjust(struct tcpcb *tp);
> struct sackhole *
> tcp_sack_output(struct tcpcb *tp);
> -int tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
> +void tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
> +void tcp_newreno_partialack(struct tcpcb *, struct tcphdr *);
> #ifdef DEBUG
> void tcp_print_holes(struct tcpcb *tp);
> #endif
> -int tcp_newreno(struct tcpcb *, struct tcphdr *);
I'd love to see these definitions moved to netinet/tcp_input.c. It
makes code audit much more simpler as you know that their are local.
> u_long tcp_seq_subtract(u_long, u_long );
> #ifdef TCP_SIGNATURE
> int tcp_signature_apply(caddr_t, caddr_t, unsigned int);
> int tcp_signature(struct tdb *, int, struct mbuf *, struct tcphdr *,
> int, int, char *);
>