On Tue, Oct 24, 2017 at 13:37 +0200, Martin Pieuchot wrote:
> On 24/10/17(Tue) 12:27, Mike Belopuhov wrote:
> > On Tue, Oct 24, 2017 at 12:05 +0200, Martin Pieuchot wrote:
> > > On 21/10/17(Sat) 15:17, Mike Belopuhov wrote:
> > > > On Fri, Oct 20, 2017 at 22:59 +0200, Klemens Nanni wrote:
> > > > > The comments for both void tcp_{sack,newreno}_partialack() still
> > > > > mention
> > > > > tp->snd_last and return value bits.
> > > > >
> > > >
> > > > Good eyes! It made me spot a mistake I made by folding two lines
> > > > into an incorrect ifdef in tcp_sack_partialack. I expected it to
> > > > say "ifdef TCP_FACK" while it says "ifNdef". The adjusted comment
> > > > didn't make sense and I found the bug.
> > >
> > > Could you send the full/fixed diff?
> > >
> >
> > Sure.
>
> Diff is correct. I have two suggestions, but it's ok mpi@ either way.
>
> > > And what about TCP_FACK? It is disabled by default, is there a
> > > point in keeping it?
> >
> > Job has pointed out that RFC 6675 might be a better alternative
> > so it might be a good idea to ditch it while we're at it. I'm
> > not certain which parts need to be preserved (if any) however.
>
> I'd say remove it. One can always look in the Attic if necessary.
>
> > diff --git sys/netinet/tcp_input.c sys/netinet/tcp_input.c
> > index 790e163975e..3809a2371f2 100644
> > --- sys/netinet/tcp_input.c
> > +++ sys/netinet/tcp_input.c
> > @@ -1664,52 +1664,38 @@ trimthenstep6:
> > }
> > /*
> > * If the congestion window was inflated to account
> > * for the other side's cached packets, retract it.
> > */
> > - if (tp->sack_enable) {
> > - if (tp->t_dupacks >= tcprexmtthresh) {
> > - /* Check for a partial ACK */
> > - if (tcp_sack_partialack(tp, th)) {
> > -#ifdef TCP_FACK
> > - /* Force call to tcp_output */
> > - if (tp->snd_awnd < tp->snd_cwnd)
> > - tp->t_flags |= TF_NEEDOUTPUT;
> > -#else
> > - tp->snd_cwnd += tp->t_maxseg;
> > - tp->t_flags |= TF_NEEDOUTPUT;
> > -#endif /* TCP_FACK */
> > - } else {
> > - /* Out of fast recovery */
> > - tp->snd_cwnd = tp->snd_ssthresh;
> > - if (tcp_seq_subtract(tp->snd_max,
> > - th->th_ack) < tp->snd_ssthresh)
> > - tp->snd_cwnd =
> > - tcp_seq_subtract(tp->snd_max,
> > - th->th_ack);
> > - tp->t_dupacks = 0;
> > -#ifdef TCP_FACK
> > - if (SEQ_GT(th->th_ack, tp->snd_fack))
> > - tp->snd_fack = th->th_ack;
> > -#endif
> > - }
> > - }
> > - } else {
> > - if (tp->t_dupacks >= tcprexmtthresh &&
> > - !tcp_newreno(tp, th)) {
> > + if (tp->t_dupacks >= tcprexmtthresh) {
>
> I'd keep the comment:
>
> /* Check for a partial ACK */
Sure.
> > diff --git sys/netinet/tcp_var.h sys/netinet/tcp_var.h
> > index 6b797fd48e7..97b04884879 100644
> > --- sys/netinet/tcp_var.h
> > +++ sys/netinet/tcp_var.h
> > @@ -764,15 +764,15 @@ void tcp_update_sack_list(struct tcpcb *tp,
> > tcp_seq, tcp_seq);
> > void tcp_del_sackholes(struct tcpcb *, struct tcphdr *);
> > void tcp_clean_sackreport(struct tcpcb *tp);
> > void tcp_sack_adjust(struct tcpcb *tp);
> > struct sackhole *
> > tcp_sack_output(struct tcpcb *tp);
> > -int tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
> > +void tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
> > +void tcp_newreno_partialack(struct tcpcb *, struct tcphdr *);
> > #ifdef DEBUG
> > void tcp_print_holes(struct tcpcb *tp);
> > #endif
> > -int tcp_newreno(struct tcpcb *, struct tcphdr *);
>
> I'd love to see these definitions moved to netinet/tcp_input.c. It
> makes code audit much more simpler as you know that their are local.
>
I didn't do it because tcp_var.h is where tcp keeps all of it's prototypes
but I don't mind moving them into tcp_input.c. Any objections? Otherwise
I'll check in the diff below.
diff --git sys/netinet/tcp_input.c sys/netinet/tcp_input.c
index 790e163975e..8d172e2905c 100644
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -183,10 +183,13 @@ do { \
else \
TCP_SET_DELACK(tp); \
if_put(ifp); \
} while (0)
+void tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
+void tcp_newreno_partialack(struct tcpcb *, struct tcphdr *);
+
void syn_cache_put(struct syn_cache *);
void syn_cache_rm(struct syn_cache *);
int syn_cache_respond(struct syn_cache *, struct mbuf *);
void syn_cache_timer(void *);
void syn_cache_reaper(void *);
@@ -1664,52 +1667,39 @@ trimthenstep6:
}
/*
* If the congestion window was inflated to account
* for the other side's cached packets, retract it.
*/
- if (tp->sack_enable) {
- if (tp->t_dupacks >= tcprexmtthresh) {
- /* Check for a partial ACK */
- if (tcp_sack_partialack(tp, th)) {
-#ifdef TCP_FACK
- /* Force call to tcp_output */
- if (tp->snd_awnd < tp->snd_cwnd)
- tp->t_flags |= TF_NEEDOUTPUT;
-#else
- tp->snd_cwnd += tp->t_maxseg;
- tp->t_flags |= TF_NEEDOUTPUT;
-#endif /* TCP_FACK */
- } else {
- /* Out of fast recovery */
- tp->snd_cwnd = tp->snd_ssthresh;
- if (tcp_seq_subtract(tp->snd_max,
- th->th_ack) < tp->snd_ssthresh)
- tp->snd_cwnd =
- tcp_seq_subtract(tp->snd_max,
- th->th_ack);
- tp->t_dupacks = 0;
-#ifdef TCP_FACK
- if (SEQ_GT(th->th_ack, tp->snd_fack))
- tp->snd_fack = th->th_ack;
-#endif
- }
- }
- } else {
- if (tp->t_dupacks >= tcprexmtthresh &&
- !tcp_newreno(tp, th)) {
+ if (tp->t_dupacks >= tcprexmtthresh) {
+ /* Check for a partial ACK */
+ if (SEQ_LT(th->th_ack, tp->snd_last)) {
+ if (tp->sack_enable)
+ tcp_sack_partialack(tp, th);
+ else
+ tcp_newreno_partialack(tp, th);
+ } else {
/* Out of fast recovery */
tp->snd_cwnd = tp->snd_ssthresh;
if (tcp_seq_subtract(tp->snd_max, th->th_ack) <
tp->snd_ssthresh)
tp->snd_cwnd =
tcp_seq_subtract(tp->snd_max,
th->th_ack);
tp->t_dupacks = 0;
+#ifdef TCP_FACK
+ if (tp->sack_enable &&
+ SEQ_GT(th->th_ack, tp->snd_fack))
+ tp->snd_fack = th->th_ack;
+#endif
}
- }
- if (tp->t_dupacks < tcprexmtthresh)
+ } else {
+ /*
+ * Reset the duplicate ACK counter if we
+ * were not in fast recovery.
+ */
tp->t_dupacks = 0;
+ }
if (SEQ_GT(th->th_ack, tp->snd_max)) {
tcpstat_inc(tcps_rcvacktoomuch);
goto dropafterack_ratelim;
}
acked = th->th_ack - tp->snd_una;
@@ -2703,36 +2693,38 @@ tcp_clean_sackreport(struct tcpcb *tp)
tp->sackblks[i].start = tp->sackblks[i].end=0;
}
/*
- * Checks for partial ack. If partial ack arrives, turn off retransmission
- * timer, deflate the window, do not clear tp->t_dupacks, and return 1.
- * If the ack advances at least to tp->snd_last, return 0.
+ * Partial ack handling within a sack recovery episode. When a partial ack
+ * arrives, turn off retransmission timer, deflate the window, do not clear
+ * tp->t_dupacks.
*/
-int
+void
tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th)
{
- if (SEQ_LT(th->th_ack, tp->snd_last)) {
- /* Turn off retx. timer (will start again next segment) */
- TCP_TIMER_DISARM(tp, TCPT_REXMT);
- tp->t_rtttime = 0;
+ /* Turn off retx. timer (will start again next segment) */
+ TCP_TIMER_DISARM(tp, TCPT_REXMT);
+ tp->t_rtttime = 0;
#ifndef TCP_FACK
- /*
- * Partial window deflation. This statement relies on the
- * fact that tp->snd_una has not been updated yet. In FACK
- * hold snd_cwnd constant during fast recovery.
- */
- if (tp->snd_cwnd > (th->th_ack - tp->snd_una)) {
- tp->snd_cwnd -= th->th_ack - tp->snd_una;
- tp->snd_cwnd += tp->t_maxseg;
- } else
- tp->snd_cwnd = tp->t_maxseg;
+ /*
+ * Partial window deflation. This statement relies on the
+ * fact that tp->snd_una has not been updated yet. In FACK
+ * hold snd_cwnd constant during fast recovery.
+ */
+ if (tp->snd_cwnd > (th->th_ack - tp->snd_una)) {
+ tp->snd_cwnd -= th->th_ack - tp->snd_una;
+ tp->snd_cwnd += tp->t_maxseg;
+ } else
+ tp->snd_cwnd = tp->t_maxseg;
+ tp->snd_cwnd += tp->t_maxseg;
+ tp->t_flags |= TF_NEEDOUTPUT;
+#else
+ /* Force call to tcp_output */
+ if (tp->snd_awnd < tp->snd_cwnd)
+ tp->t_flags |= TF_NEEDOUTPUT;
#endif
- return (1);
- }
- return (0);
}
/*
* Pull out of band byte out of a segment so
* it doesn't appear in the user's data queue.
@@ -3089,52 +3081,48 @@ tcp_mss_update(struct tcpcb *tp)
}
}
/*
- * Checks for partial ack. If partial ack arrives, force the retransmission
- * of the next unacknowledged segment, do not clear tp->t_dupacks, and return
- * 1. By setting snd_nxt to ti_ack, this forces retransmission timer to
- * be started again. If the ack advances at least to tp->snd_last, return 0.
+ * When a partial ack arrives, force the retransmission of the
+ * next unacknowledged segment. Do not clear tp->t_dupacks.
+ * By setting snd_nxt to ti_ack, this forces retransmission timer
+ * to be started again.
*/
-int
-tcp_newreno(struct tcpcb *tp, struct tcphdr *th)
+void
+tcp_newreno_partialack(struct tcpcb *tp, struct tcphdr *th)
{
- if (SEQ_LT(th->th_ack, tp->snd_last)) {
- /*
- * snd_una has not been updated and the socket send buffer
- * not yet drained of the acked data, so we have to leave
- * snd_una as it was to get the correct data offset in
- * tcp_output().
- */
- tcp_seq onxt = tp->snd_nxt;
- u_long ocwnd = tp->snd_cwnd;
- TCP_TIMER_DISARM(tp, TCPT_REXMT);
- tp->t_rtttime = 0;
- tp->snd_nxt = th->th_ack;
- /*
- * Set snd_cwnd to one segment beyond acknowledged offset
- * (tp->snd_una not yet updated when this function is called)
- */
- tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una);
- (void) tcp_output(tp);
- tp->snd_cwnd = ocwnd;
- if (SEQ_GT(onxt, tp->snd_nxt))
- tp->snd_nxt = onxt;
- /*
- * Partial window deflation. Relies on fact that tp->snd_una
- * not updated yet.
- */
- if (tp->snd_cwnd > th->th_ack - tp->snd_una)
- tp->snd_cwnd -= th->th_ack - tp->snd_una;
- else
- tp->snd_cwnd = 0;
- tp->snd_cwnd += tp->t_maxseg;
+ /*
+ * snd_una has not been updated and the socket send buffer
+ * not yet drained of the acked data, so we have to leave
+ * snd_una as it was to get the correct data offset in
+ * tcp_output().
+ */
+ tcp_seq onxt = tp->snd_nxt;
+ u_long ocwnd = tp->snd_cwnd;
- return 1;
- }
- return 0;
+ TCP_TIMER_DISARM(tp, TCPT_REXMT);
+ tp->t_rtttime = 0;
+ tp->snd_nxt = th->th_ack;
+ /*
+ * Set snd_cwnd to one segment beyond acknowledged offset
+ * (tp->snd_una not yet updated when this function is called)
+ */
+ tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una);
+ (void)tcp_output(tp);
+ tp->snd_cwnd = ocwnd;
+ if (SEQ_GT(onxt, tp->snd_nxt))
+ tp->snd_nxt = onxt;
+ /*
+ * Partial window deflation. Relies on fact that tp->snd_una
+ * not updated yet.
+ */
+ if (tp->snd_cwnd > th->th_ack - tp->snd_una)
+ tp->snd_cwnd -= th->th_ack - tp->snd_una;
+ else
+ tp->snd_cwnd = 0;
+ tp->snd_cwnd += tp->t_maxseg;
}
int
tcp_mss_adv(struct mbuf *m, int af)
{
diff --git sys/netinet/tcp_var.h sys/netinet/tcp_var.h
index 6b797fd48e7..4c1b37b2b68 100644
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -764,15 +764,13 @@ void tcp_update_sack_list(struct tcpcb *tp,
tcp_seq, tcp_seq);
void tcp_del_sackholes(struct tcpcb *, struct tcphdr *);
void tcp_clean_sackreport(struct tcpcb *tp);
void tcp_sack_adjust(struct tcpcb *tp);
struct sackhole *
tcp_sack_output(struct tcpcb *tp);
-int tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
#ifdef DEBUG
void tcp_print_holes(struct tcpcb *tp);
#endif
-int tcp_newreno(struct tcpcb *, struct tcphdr *);
u_long tcp_seq_subtract(u_long, u_long );
#ifdef TCP_SIGNATURE
int tcp_signature_apply(caddr_t, caddr_t, unsigned int);
int tcp_signature(struct tdb *, int, struct mbuf *, struct tcphdr *,
int, int, char *);