svn commit: r362577 - head/sys/netinet

2020-06-24 Thread Richard Scheffenegger
Author: rscheff
Date: Wed Jun 24 13:42:42 2020
New Revision: 362577
URL: https://svnweb.freebsd.org/changeset/base/362577

Log:
  TCP: make after-idle work for transactional sessions.
  
  The use of t_rcvtime as proxy for the last transmission
  fails for transactional IO, where the client requests
  data before the server can respond with a bulk transfer.
  
  Set aside a dedicated variable to actually track the last
  locally sent segment going forward.
  
  Reported by:  rrs
  Reviewed by:  rrs, tuexen (mentor)
  Approved by:  tuexen (mentor), rgrimes (mentor)
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D25016

Modified:
  head/sys/netinet/tcp_output.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_output.c
==
--- head/sys/netinet/tcp_output.c   Wed Jun 24 13:11:19 2020
(r362576)
+++ head/sys/netinet/tcp_output.c   Wed Jun 24 13:42:42 2020
(r362577)
@@ -260,7 +260,8 @@ tcp_output(struct tcpcb *tp)
 * to send, then transmit; otherwise, investigate further.
 */
idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una);
-   if (idle && ticks - tp->t_rcvtime >= tp->t_rxtcur)
+   if (idle && (((ticks - tp->t_rcvtime) >= tp->t_rxtcur) ||
+   (tp->t_sndtime && ((ticks - tp->t_sndtime) >= tp->t_rxtcur
cc_after_idle(tp);
tp->t_flags &= ~TF_LASTIDLE;
if (idle) {
@@ -1502,6 +1503,7 @@ out:
 * Time this transmission if not a retransmission and
 * not currently timing anything.
 */
+   tp->t_sndtime = ticks;
if (tp->t_rtttime == 0) {
tp->t_rtttime = ticks;
tp->t_rtseq = startseq;

Modified: head/sys/netinet/tcp_var.h
==
--- head/sys/netinet/tcp_var.h  Wed Jun 24 13:11:19 2020(r362576)
+++ head/sys/netinet/tcp_var.h  Wed Jun 24 13:42:42 2020(r362577)
@@ -188,8 +188,9 @@ struct tcpcb {
tcp_seq snd_wl2;/* window update seg ack number */
 
tcp_seq irs;/* initial receive sequence number */
-   tcp_seq iss;/* initial send sequence number */
-   u_int   t_acktime;
+   tcp_seq iss;/* initial send sequence number */
+   u_int   t_acktime;  /* RACK and BBR incoming new data was 
acked */
+   u_int   t_sndtime;  /* time last data was sent */
u_int   ts_recent_age;  /* when last updated */
tcp_seq snd_recover;/* for use in NewReno Fast Recovery */
uint16_t cl4_spare; /* Spare to adjust CL 4 */
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r362580 - head/sys/netinet/cc

2020-06-24 Thread Richard Scheffenegger
Author: rscheff
Date: Wed Jun 24 13:52:53 2020
New Revision: 362580
URL: https://svnweb.freebsd.org/changeset/base/362580

Log:
  TCP: fix cubic RTO reaction.
  
  Proper TCP Cubic operation requires the knowledge
  of the maximum congestion window prior to the
  last congestion event.
  
  This restores and improves a bugfix previously added
  by jtl@ but subsequently removed due to a revert.
  
  Reported by:  chengc_netapp.com
  Reviewed by:  chengc_netapp.com, tuexen (mentor)
  Approved by:  tuexen (mentor), rgrimes (mentor)
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D25133

Modified:
  head/sys/netinet/cc/cc_cubic.c

Modified: head/sys/netinet/cc/cc_cubic.c
==
--- head/sys/netinet/cc/cc_cubic.c  Wed Jun 24 13:49:30 2020
(r362579)
+++ head/sys/netinet/cc/cc_cubic.c  Wed Jun 24 13:52:53 2020
(r362580)
@@ -313,10 +313,15 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type)
 * timeout has fired more than once, as there is a reasonable
 * chance the first one is a false alarm and may not indicate
 * congestion.
+* This will put Cubic firmly into the concave / TCP friendly
+* region, for a slower ramp-up after two consecutive RTOs.
 */
if (CCV(ccv, t_rxtshift) >= 2) {
cubic_data->flags |= CUBICFLAG_CONG_EVENT;
cubic_data->t_last_cong = ticks;
+   cubic_data->max_cwnd = CCV(ccv, snd_cwnd_prev);
+   cubic_data->K = cubic_k(cubic_data->max_cwnd /
+   CCV(ccv, t_maxseg));
}
break;
}
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r362988 - head/sys/netinet

2020-07-07 Thread Richard Scheffenegger
Author: rscheff
Date: Tue Jul  7 12:10:59 2020
New Revision: 362988
URL: https://svnweb.freebsd.org/changeset/base/362988

Log:
  Fix KASSERT during tcp_newtcpcb when low on memory
  
  While testing with system default cc set to cubic, and
  running a memory exhaustion validation, FreeBSD panics for a
  missing inpcb reference / lock.
  
  Reviewed by:  rgrimes (mentor), tuexen (mentor)
  Approved by:  rgrimes (mentor), tuexen (mentor)
  MFC after:3 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D25583

Modified:
  head/sys/netinet/tcp_subr.c

Modified: head/sys/netinet/tcp_subr.c
==
--- head/sys/netinet/tcp_subr.c Tue Jul  7 07:51:09 2020(r362987)
+++ head/sys/netinet/tcp_subr.c Tue Jul  7 12:10:59 2020(r362988)
@@ -1702,6 +1702,12 @@ tcp_newtcpcb(struct inpcb *inp)
KASSERT(!STAILQ_EMPTY(&cc_list), ("cc_list is empty!"));
CC_ALGO(tp) = CC_DEFAULT();
CC_LIST_RUNLOCK();
+   /*
+* The tcpcb will hold a reference on its inpcb until tcp_discardcb()
+* is called.
+*/
+   in_pcbref(inp); /* Reference for tcpcb */
+   tp->t_inpcb = inp;
 
if (CC_ALGO(tp)->cb_init != NULL)
if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) {
@@ -1746,12 +1752,6 @@ tcp_newtcpcb(struct inpcb *inp)
if (V_tcp_do_sack)
tp->t_flags |= TF_SACK_PERMIT;
TAILQ_INIT(&tp->snd_holes);
-   /*
-* The tcpcb will hold a reference on its inpcb until tcp_discardcb()
-* is called.
-*/
-   in_pcbref(inp); /* Reference for tcpcb */
-   tp->t_inpcb = inp;
 
/*
 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r363380 - head/sys/netinet/cc

2020-07-20 Thread Richard Scheffenegger
Author: rscheff
Date: Mon Jul 20 23:47:27 2020
New Revision: 363380
URL: https://svnweb.freebsd.org/changeset/base/363380

Log:
  Add MODULE_VERSION to TCP loadable congestion control modules.
  
  Without versioning information, using preexisting loader /
  linker code is not easily possible when another module may
  have dependencies on pre-loaded modules, and also doesn't
  allow the automatic loading of dependent modules.
  
  No functional change of the actual modules.
  
  Reviewed by:  tuexen (mentor), rgrimes (mentor)
  Approved by:  tuexen (mentor), rgrimes (mentor)
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D25744

Modified:
  head/sys/netinet/cc/cc_cdg.c
  head/sys/netinet/cc/cc_chd.c
  head/sys/netinet/cc/cc_cubic.c
  head/sys/netinet/cc/cc_dctcp.c
  head/sys/netinet/cc/cc_hd.c
  head/sys/netinet/cc/cc_htcp.c
  head/sys/netinet/cc/cc_newreno.c
  head/sys/netinet/cc/cc_vegas.c

Modified: head/sys/netinet/cc/cc_cdg.c
==
--- head/sys/netinet/cc/cc_cdg.cMon Jul 20 22:32:39 2020
(r363379)
+++ head/sys/netinet/cc/cc_cdg.cMon Jul 20 23:47:27 2020
(r363380)
@@ -714,5 +714,5 @@ SYSCTL_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, loss_compe
 "the window backoff for loss based CC compatibility");
 
 DECLARE_CC_MODULE(cdg, &cdg_cc_algo);
-
+MODULE_VERSION(cdg, 1);
 MODULE_DEPEND(cdg, ertt, 1, 1, 1);

Modified: head/sys/netinet/cc/cc_chd.c
==
--- head/sys/netinet/cc/cc_chd.cMon Jul 20 22:32:39 2020
(r363379)
+++ head/sys/netinet/cc/cc_chd.cMon Jul 20 23:47:27 2020
(r363380)
@@ -493,4 +493,5 @@ SYSCTL_UINT(_net_inet_tcp_cc_chd,  OID_AUTO, use_max,
 "as the basic delay measurement for the algorithm.");
 
 DECLARE_CC_MODULE(chd, &chd_cc_algo);
+MODULE_VERSION(chd, 1);
 MODULE_DEPEND(chd, ertt, 1, 1, 1);

Modified: head/sys/netinet/cc/cc_cubic.c
==
--- head/sys/netinet/cc/cc_cubic.c  Mon Jul 20 22:32:39 2020
(r363379)
+++ head/sys/netinet/cc/cc_cubic.c  Mon Jul 20 23:47:27 2020
(r363380)
@@ -473,3 +473,4 @@ cubic_ssthresh_update(struct cc_var *ccv)
 
 
 DECLARE_CC_MODULE(cubic, &cubic_cc_algo);
+MODULE_VERSION(cubic, 1);

Modified: head/sys/netinet/cc/cc_dctcp.c
==
--- head/sys/netinet/cc/cc_dctcp.c  Mon Jul 20 22:32:39 2020
(r363379)
+++ head/sys/netinet/cc/cc_dctcp.c  Mon Jul 20 23:47:27 2020
(r363380)
@@ -464,3 +464,4 @@ SYSCTL_PROC(_net_inet_tcp_cc_dctcp, OID_AUTO, slowstar
 "half CWND reduction after the first slow start");
 
 DECLARE_CC_MODULE(dctcp, &dctcp_cc_algo);
+MODULE_VERSION(dctcp, 1);

Modified: head/sys/netinet/cc/cc_hd.c
==
--- head/sys/netinet/cc/cc_hd.c Mon Jul 20 22:32:39 2020(r363379)
+++ head/sys/netinet/cc/cc_hd.c Mon Jul 20 23:47:27 2020(r363380)
@@ -251,4 +251,5 @@ SYSCTL_PROC(_net_inet_tcp_cc_hd, OID_AUTO, queue_min,
 "minimum queueing delay threshold (qmin) in ticks");
 
 DECLARE_CC_MODULE(hd, &hd_cc_algo);
+MODULE_VERSION(hd, 1);
 MODULE_DEPEND(hd, ertt, 1, 1, 1);

Modified: head/sys/netinet/cc/cc_htcp.c
==
--- head/sys/netinet/cc/cc_htcp.c   Mon Jul 20 22:32:39 2020
(r363379)
+++ head/sys/netinet/cc/cc_htcp.c   Mon Jul 20 23:47:27 2020
(r363380)
@@ -530,3 +530,4 @@ SYSCTL_UINT(_net_inet_tcp_cc_htcp, OID_AUTO, rtt_scali
 "enable H-TCP RTT scaling");
 
 DECLARE_CC_MODULE(htcp, &htcp_cc_algo);
+MODULE_VERSION(htcp, 1);

Modified: head/sys/netinet/cc/cc_newreno.c
==
--- head/sys/netinet/cc/cc_newreno.cMon Jul 20 22:32:39 2020
(r363379)
+++ head/sys/netinet/cc/cc_newreno.cMon Jul 20 23:47:27 2020
(r363380)
@@ -396,3 +396,4 @@ SYSCTL_PROC(_net_inet_tcp_cc_newreno, OID_AUTO, beta_e
 "New Reno beta ecn, specified as number between 1 and 100");
 
 DECLARE_CC_MODULE(newreno, &newreno_cc_algo);
+MODULE_VERSION(newreno, 1);

Modified: head/sys/netinet/cc/cc_vegas.c
==
--- head/sys/netinet/cc/cc_vegas.c  Mon Jul 20 22:32:39 2020
(r363379)
+++ head/sys/netinet/cc/cc_vegas.c  Mon Jul 20 23:47:27 2020
(r363380)
@@ -301,4 +301,5 @@ SYSCTL_PROC(_net_inet_tcp_cc_vegas, OID_AUTO, beta,
 "vegas beta, specified as number of \"buffers\" (0 < alpha < beta)");
 
 DECLARE_CC_MODULE(vegas, &vegas_cc_algo);
+MODULE_VERSION(vegas, 1);
 MODULE_DEPEND(vegas, ertt, 1, 1, 1);
___
svn-src-hea

svn commit: r363397 - head/sys/netinet/cc

2020-07-21 Thread Richard Scheffenegger
Author: rscheff
Date: Tue Jul 21 16:21:52 2020
New Revision: 363397
URL: https://svnweb.freebsd.org/changeset/base/363397

Log:
  Fix style and comment around concave/convex regions in TCP cubic.
  
  In cubic, the concave region is when snd_cwnd starts growing slower
  towards max_cwnd (cwnd at the time of the congestion event), and
  the convex region is when snd_cwnd starts to grow faster and
  eventually appearing like slow-start like growth.
  
  PR:   238478
  Reviewed by:  tuexen (mentor), rgrimes (mentor)
  Approved by:  tuexen (mentor), rgrimes (mentor)
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D24657

Modified:
  head/sys/netinet/cc/cc_cubic.c

Modified: head/sys/netinet/cc/cc_cubic.c
==
--- head/sys/netinet/cc/cc_cubic.c  Tue Jul 21 16:17:23 2020
(r363396)
+++ head/sys/netinet/cc/cc_cubic.c  Tue Jul 21 16:21:52 2020
(r363397)
@@ -185,12 +185,11 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
 */
if (CCV(ccv, snd_cwnd) < w_tf)
CCV(ccv, snd_cwnd) = ulmin(w_tf, 
INT_MAX);
-   }
-
-   else if (CCV(ccv, snd_cwnd) < w_cubic_next) {
+   } else if (CCV(ccv, snd_cwnd) < w_cubic_next) {
/*
 * Concave or convex region, follow CUBIC
 * cwnd growth.
+* Only update snd_cwnd, if it doesn't shrink.
 */
if (V_tcp_do_rfc3465)
CCV(ccv, snd_cwnd) = ulmin(w_cubic_next,
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r364195 - head/sys/netinet

2020-08-13 Thread Richard Scheffenegger
Author: rscheff
Date: Thu Aug 13 16:30:09 2020
New Revision: 364195
URL: https://svnweb.freebsd.org/changeset/base/364195

Log:
  Improve SACK support code for RFC6675 and PRR
  
  Adding proper accounting of sacked_bytes and (per-ACK)
  delivered data to the SACK scoreboard. This will
  allow more aspects of RFC6675 to be implemented as well
  as Proportional Rate Reduction (RFC6937).
  
  Prior to this change, the pipe calculation controlled with
  net.inet.tcp.rfc6675_pipe was also susceptible to incorrect
  results when more than 3 (or 4) holes in the sequence space
  were present, which can no longer all fit into a single
  ACK's SACK option.
  
  Reviewed by:  kbowling, rgrimes (mentor)
  Approved by:  rgrimes (mentor, blanket)
  MFC after:3 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D18624

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_sack.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_input.c
==
--- head/sys/netinet/tcp_input.cThu Aug 13 14:26:25 2020
(r364194)
+++ head/sys/netinet/tcp_input.cThu Aug 13 16:30:09 2020
(r364195)
@@ -2673,9 +2673,16 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
tp->t_dupacks = 0;
/*
 * If this ack also has new SACK info, increment the
-* counter as per rfc6675.
+* counter as per rfc6675. The variable
+* sack_changed tracks all changes to the SACK
+* scoreboard, including when partial ACKs without
+* SACK options are received, and clear the scoreboard
+* from the left side. Such partial ACKs should not be
+* counted as dupacks here.
 */
-   if ((tp->t_flags & TF_SACK_PERMIT) && sack_changed)
+   if ((tp->t_flags & TF_SACK_PERMIT) &&
+   (to.to_flags & TOF_SACK) &&
+   sack_changed)
tp->t_dupacks++;
}
 

Modified: head/sys/netinet/tcp_sack.c
==
--- head/sys/netinet/tcp_sack.c Thu Aug 13 14:26:25 2020(r364194)
+++ head/sys/netinet/tcp_sack.c Thu Aug 13 16:30:09 2020(r364195)
@@ -535,9 +535,7 @@ tcp_sackhole_remove(struct tcpcb *tp, struct sackhole 
  * tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
  * the sequence space).
  * Returns 1 if incoming ACK has previously unknown SACK information,
- * 0 otherwise. Note: We treat (snd_una, th_ack) as a sack block so any changes
- * to that (i.e. left edge moving) would also be considered a change in SACK
- * information which is slightly different than rfc6675.
+ * 0 otherwise.
  */
 int
 tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
@@ -545,16 +543,21 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tc
struct sackhole *cur, *temp;
struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
int i, j, num_sack_blks, sack_changed;
+   int delivered_data, left_edge_delta;
 
INP_WLOCK_ASSERT(tp->t_inpcb);
 
num_sack_blks = 0;
sack_changed = 0;
+   delivered_data = 0;
+   left_edge_delta = 0;
/*
 * If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist,
 * treat [SND.UNA, SEG.ACK) as if it is a SACK block.
+* Account changes to SND.UNA always in delivered data.
 */
if (SEQ_LT(tp->snd_una, th_ack) && !TAILQ_EMPTY(&tp->snd_holes)) {
+   left_edge_delta = th_ack - tp->snd_una;
sack_blocks[num_sack_blks].start = tp->snd_una;
sack_blocks[num_sack_blks++].end = th_ack;
}
@@ -563,7 +566,6 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tc
 * received new blocks from the other side.
 */
if (to->to_flags & TOF_SACK) {
-   tp->sackhint.sacked_bytes = 0;  /* reset */
for (i = 0; i < to->to_nsacks; i++) {
bcopy((to->to_sacks + i * TCPOLEN_SACK),
&sack, sizeof(sack));
@@ -576,8 +578,6 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tc
SEQ_GT(sack.end, tp->snd_una) &&
SEQ_LEQ(sack.end, tp->snd_max)) {
sack_blocks[num_sack_blks++] = sack;
-   tp->sackhint.sacked_bytes +=
-   (sack.end-sack.start);
}
}
}
@@ -602,7 +602,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tc
}
}
}

svn commit: r364196 - head/sys/netinet/cc

2020-08-13 Thread Richard Scheffenegger
Author: rscheff
Date: Thu Aug 13 16:38:51 2020
New Revision: 364196
URL: https://svnweb.freebsd.org/changeset/base/364196

Log:
  TCP Cubic: After leaving slowstart fix unintended cwnd jump.
  
  Initializing K to zero in D23655 introduced a miscalculation,
  where cwnd would suddenly jump to cwnd_max instead of gradually
  increasing, after leaving slow-start.
  
  Properly calculating K instead of resetting it to zero resolves
  this issue. Also making sure, that cwnd is recalculated at the
  earliest opportunity once slow-start is over.
  
  Reported by:  chengc_netapp.com
  Reviewed by:  chengc_netapp.com, tuexen (mentor), rgrimes (mentor)
  Approved by:  tuexen (mentor), rgrimes (mentor)
  MFC after:3 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D25746

Modified:
  head/sys/netinet/cc/cc_cubic.c

Modified: head/sys/netinet/cc/cc_cubic.c
==
--- head/sys/netinet/cc/cc_cubic.c  Thu Aug 13 16:30:09 2020
(r364195)
+++ head/sys/netinet/cc/cc_cubic.c  Thu Aug 13 16:38:51 2020
(r364196)
@@ -132,19 +132,29 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
 
/*
 * Regular ACK and we're not in cong/fast recovery and we're cwnd
-* limited and we're either not doing ABC or are slow starting or are
-* doing ABC and we've sent a cwnd's worth of bytes.
+* limited and we're either not doing ABC or are just coming out
+* from slow-start or were application limited or are slow starting
+* or are doing ABC and we've sent a cwnd's worth of bytes.
 */
if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) &&
(ccv->flags & CCF_CWND_LIMITED) && (!V_tcp_do_rfc3465 ||
+   (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART | 
CUBICFLAG_IN_APPLIMIT)) ||
CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) ||
-   (V_tcp_do_rfc3465 && ccv->flags & CCF_ABC_SENTAWND))) {
+   (V_tcp_do_rfc3465 && (ccv->flags & CCF_ABC_SENTAWND {
 /* Use the logic in NewReno ack_received() for slow start. */
if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) ||
cubic_data->min_rtt_ticks == TCPTV_SRTTBASE) {
cubic_data->flags |= CUBICFLAG_IN_SLOWSTART;
newreno_cc_algo.ack_received(ccv, type);
} else {
+   if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART |
+CUBICFLAG_IN_APPLIMIT)) {
+   cubic_data->flags &= ~(CUBICFLAG_IN_SLOWSTART |
+  CUBICFLAG_IN_APPLIMIT);
+   cubic_data->t_last_cong = ticks;
+   cubic_data->K = cubic_k(cubic_data->max_cwnd /
+   CCV(ccv, t_maxseg));
+   }
if ((ticks_since_cong =
ticks - cubic_data->t_last_cong) < 0) {
/*
@@ -152,14 +162,6 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
 */
ticks_since_cong = INT_MAX;
cubic_data->t_last_cong = ticks - INT_MAX;
-   }
-
-   if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART |
-CUBICFLAG_IN_APPLIMIT)) {
-   cubic_data->flags &= ~(CUBICFLAG_IN_SLOWSTART |
-  CUBICFLAG_IN_APPLIMIT);
-   cubic_data->t_last_cong = ticks;
-   cubic_data->K = 0;
}
/*
 * The mean RTT is used to best reflect the equations in
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r364197 - head/sys/netinet/cc

2020-08-13 Thread Richard Scheffenegger
Author: rscheff
Date: Thu Aug 13 16:45:55 2020
New Revision: 364197
URL: https://svnweb.freebsd.org/changeset/base/364197

Log:
  TCP Cubic: Have Fast Convergence Heuristic work for ECN, and align concave 
region
  
  The Cubic concave region was not aligned nicely for the very first exit from
  slow start, where a 50% cwnd reduction is done instead of the normal 30%.
  
  This addresses an issue, where a short line-rate burst could result from that
  sudden jump of cwnd.
  
  In addition, the Fast Convergence Heuristic has been expanded to work also
  with ECN induced congestion response.
  
  Submitted by: chengc_netapp.com
  Reported by:  chengc_netapp.com
  Reviewed by:  tuexen (mentor), rgrimes (mentor)
  Approved by:  tuexen (mentor), rgrimes (mentor)
  MFC after:3 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D25976

Modified:
  head/sys/netinet/cc/cc_cubic.c

Modified: head/sys/netinet/cc/cc_cubic.c
==
--- head/sys/netinet/cc/cc_cubic.c  Thu Aug 13 16:38:51 2020
(r364196)
+++ head/sys/netinet/cc/cc_cubic.c  Thu Aug 13 16:45:55 2020
(r364197)
@@ -286,8 +286,7 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type)
if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) {
cubic_ssthresh_update(ccv);
cubic_data->flags |= CUBICFLAG_CONG_EVENT;
-   cubic_data->prev_max_cwnd = 
cubic_data->max_cwnd;
-   cubic_data->max_cwnd = CCV(ccv, snd_cwnd);
+   cubic_data->t_last_cong = ticks;
cubic_data->K = cubic_k(cubic_data->max_cwnd / 
CCV(ccv, t_maxseg));
}
ENTER_RECOVERY(CCV(ccv, t_flags));
@@ -298,8 +297,6 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type)
if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) {
cubic_ssthresh_update(ccv);
cubic_data->flags |= CUBICFLAG_CONG_EVENT;
-   cubic_data->prev_max_cwnd = cubic_data->max_cwnd;
-   cubic_data->max_cwnd = CCV(ccv, snd_cwnd);
cubic_data->t_last_cong = ticks;
cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, 
t_maxseg));
CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
@@ -361,11 +358,6 @@ cubic_post_recovery(struct cc_var *ccv)
cubic_data = ccv->cc_data;
pipe = 0;
 
-   /* Fast convergence heuristic. */
-   if (cubic_data->max_cwnd < cubic_data->prev_max_cwnd)
-   cubic_data->max_cwnd = (cubic_data->max_cwnd * CUBIC_FC_FACTOR)
-   >> CUBIC_SHIFT;
-
if (IN_FASTRECOVERY(CCV(ccv, t_flags))) {
/*
 * If inflight data is less than ssthresh, set cwnd
@@ -392,7 +384,6 @@ cubic_post_recovery(struct cc_var *ccv)
CUBIC_BETA) >> CUBIC_SHIFT,
2 * CCV(ccv, t_maxseg));
}
-   cubic_data->t_last_cong = ticks;
 
/* Calculate the average RTT between congestion epochs. */
if (cubic_data->epoch_ack_count > 0 &&
@@ -403,7 +394,6 @@ cubic_post_recovery(struct cc_var *ccv)
 
cubic_data->epoch_ack_count = 0;
cubic_data->sum_rtt_ticks = 0;
-   cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg));
 }
 
 /*
@@ -457,18 +447,32 @@ cubic_ssthresh_update(struct cc_var *ccv)
 {
struct cubic *cubic_data;
uint32_t ssthresh;
+   uint32_t cwnd;
 
cubic_data = ccv->cc_data;
+   cwnd = CCV(ccv, snd_cwnd);
 
+   /* Fast convergence heuristic. */
+   if (cwnd < cubic_data->max_cwnd) {
+   cwnd = ((uint64_t)cwnd * CUBIC_FC_FACTOR) >> CUBIC_SHIFT;
+   }
+   cubic_data->prev_max_cwnd = cubic_data->max_cwnd;
+   cubic_data->max_cwnd = cwnd;
+
/*
-* On the first congestion event, set ssthresh to cwnd * 0.5, on
-* subsequent congestion events, set it to cwnd * beta.
+* On the first congestion event, set ssthresh to cwnd * 0.5
+* and reduce max_cwnd to cwnd * beta. This aligns the cubic concave
+* region appropriately. On subsequent congestion events, set
+* ssthresh to cwnd * beta.
 */
-   if ((cubic_data->flags & CUBICFLAG_CONG_EVENT) == 0)
-   ssthresh = CCV(ccv, snd_cwnd) >> 1;
-   else
-   ssthresh = ((uint64_t)CCV(ccv, snd_cwnd) *
+   if ((cubic_data->flags & CUBICFLAG_CONG_EVENT) == 0) {
+   ssthresh = cwnd >> 1;
+   cubic_data->max_cwnd = ((uint64_t)cwnd *
CUBIC_BETA) >> CUBIC_SHIFT;
+   } else {
+   ssthresh = ((uint64_t)cwnd *
+   CUBIC_BETA) >> CUBIC_SHIFT;
+   }
CCV(ccv, snd_ssthresh) = 

svn commit: r364354 - head/sys/netinet/cc

2020-08-18 Thread Richard Scheffenegger
Author: rscheff
Date: Tue Aug 18 19:34:31 2020
New Revision: 364354
URL: https://svnweb.freebsd.org/changeset/base/364354

Log:
  TCP Cubic: recalculate cwnd for every ACK.
  
  Since cubic calculates cwnd based on absolute
  time, retaining RFC3465 (ABC) once-per-window updates
  can lead to dramatic changes of cwnd in the convex
  region. Updating cwnd for each incoming ack minimizes
  this delta, preventing unintentional line-rate bursts.
  
  Reviewed by:  chengc_netapp.com, tuexen (mentor)
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D26060

Modified:
  head/sys/netinet/cc/cc_cubic.c

Modified: head/sys/netinet/cc/cc_cubic.c
==
--- head/sys/netinet/cc/cc_cubic.c  Tue Aug 18 19:25:03 2020
(r364353)
+++ head/sys/netinet/cc/cc_cubic.c  Tue Aug 18 19:34:31 2020
(r364354)
@@ -131,16 +131,11 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
cubic_record_rtt(ccv);
 
/*
-* Regular ACK and we're not in cong/fast recovery and we're cwnd
-* limited and we're either not doing ABC or are just coming out
-* from slow-start or were application limited or are slow starting
-* or are doing ABC and we've sent a cwnd's worth of bytes.
+* For a regular ACK and we're not in cong/fast recovery and
+* we're cwnd limited, always recalculate cwnd.
 */
if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) &&
-   (ccv->flags & CCF_CWND_LIMITED) && (!V_tcp_do_rfc3465 ||
-   (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART | 
CUBICFLAG_IN_APPLIMIT)) ||
-   CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) ||
-   (V_tcp_do_rfc3465 && (ccv->flags & CCF_ABC_SENTAWND {
+   (ccv->flags & CCF_CWND_LIMITED)) {
 /* Use the logic in NewReno ack_received() for slow start. */
if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) ||
cubic_data->min_rtt_ticks == TCPTV_SRTTBASE) {
@@ -193,15 +188,8 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
 * cwnd growth.
 * Only update snd_cwnd, if it doesn't shrink.
 */
-   if (V_tcp_do_rfc3465)
-   CCV(ccv, snd_cwnd) = ulmin(w_cubic_next,
-   INT_MAX);
-   else
-   CCV(ccv, snd_cwnd) += ulmax(1,
-   ((ulmin(w_cubic_next, INT_MAX) -
-   CCV(ccv, snd_cwnd)) *
-   CCV(ccv, t_maxseg)) /
-   CCV(ccv, snd_cwnd));
+   CCV(ccv, snd_cwnd) = ulmin(w_cubic_next,
+   INT_MAX);
}
 
/*
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r367007 - in head/sys/netinet: . cc

2020-10-24 Thread Richard Scheffenegger
Author: rscheff
Date: Sat Oct 24 16:09:18 2020
New Revision: 367007
URL: https://svnweb.freebsd.org/changeset/base/367007

Log:
  tcp: move cwnd and ssthresh updates into cc modules
  
  This will pave the way of setting ssthresh differently in TCP CUBIC, according
  to RFC8312 section 4.7.
  
  No functional change, only code movement.
  
  Submitted by: chengc_netapp.com
  Reviewed by:  rrs, tuexen, rscheff
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D26807

Modified:
  head/sys/netinet/cc/cc_cubic.c
  head/sys/netinet/cc/cc_dctcp.c
  head/sys/netinet/cc/cc_htcp.c
  head/sys/netinet/cc/cc_newreno.c
  head/sys/netinet/tcp_input.c

Modified: head/sys/netinet/cc/cc_cubic.c
==
--- head/sys/netinet/cc/cc_cubic.c  Sat Oct 24 16:05:37 2020
(r367006)
+++ head/sys/netinet/cc/cc_cubic.c  Sat Oct 24 16:09:18 2020
(r367007)
@@ -264,8 +264,10 @@ static void
 cubic_cong_signal(struct cc_var *ccv, uint32_t type)
 {
struct cubic *cubic_data;
+   u_int mss;
 
cubic_data = ccv->cc_data;
+   mss = tcp_maxseg(ccv->ccvc.tcp);
 
switch (type) {
case CC_NDUPACK:
@@ -292,6 +294,10 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type)
break;
 
case CC_RTO:
+   CCV(ccv, snd_ssthresh) = max(min(CCV(ccv, snd_wnd),
+CCV(ccv, snd_cwnd)) / 2 / mss,
+2) * mss;
+   CCV(ccv, snd_cwnd) = mss;
/*
 * Grab the current time and record it so we know when the
 * most recent congestion event was. Only record it when the

Modified: head/sys/netinet/cc/cc_dctcp.c
==
--- head/sys/netinet/cc/cc_dctcp.c  Sat Oct 24 16:05:37 2020
(r367006)
+++ head/sys/netinet/cc/cc_dctcp.c  Sat Oct 24 16:09:18 2020
(r367007)
@@ -235,7 +235,7 @@ dctcp_cong_signal(struct cc_var *ccv, uint32_t type)
if (CCV(ccv, t_flags2) & TF2_ECN_PERMIT) {
dctcp_data = ccv->cc_data;
cwin = CCV(ccv, snd_cwnd);
-   mss = CCV(ccv, t_maxseg);
+   mss = tcp_maxseg(ccv->ccvc.tcp);
 
switch (type) {
case CC_NDUPACK:
@@ -282,6 +282,10 @@ dctcp_cong_signal(struct cc_var *ccv, uint32_t type)
dctcp_data->ece_curr = 1;
break;
case CC_RTO:
+   CCV(ccv, snd_ssthresh) = max(min(CCV(ccv, snd_wnd),
+CCV(ccv, snd_cwnd)) / 
2 / mss,
+2) * mss;
+   CCV(ccv, snd_cwnd) = mss;
dctcp_update_alpha(ccv);
dctcp_data->save_sndnxt += CCV(ccv, t_maxseg);
dctcp_data->num_cong_events++;

Modified: head/sys/netinet/cc/cc_htcp.c
==
--- head/sys/netinet/cc/cc_htcp.c   Sat Oct 24 16:05:37 2020
(r367006)
+++ head/sys/netinet/cc/cc_htcp.c   Sat Oct 24 16:09:18 2020
(r367007)
@@ -271,8 +271,10 @@ static void
 htcp_cong_signal(struct cc_var *ccv, uint32_t type)
 {
struct htcp *htcp_data;
+   u_int mss;
 
htcp_data = ccv->cc_data;
+   mss = tcp_maxseg(ccv->ccvc.tcp);
 
switch (type) {
case CC_NDUPACK:
@@ -311,6 +313,10 @@ htcp_cong_signal(struct cc_var *ccv, uint32_t type)
break;
 
case CC_RTO:
+   CCV(ccv, snd_ssthresh) = max(min(CCV(ccv, snd_wnd),
+CCV(ccv, snd_cwnd)) / 2 / mss,
+2) * mss;
+   CCV(ccv, snd_cwnd) = mss;
/*
 * Grab the current time and record it so we know when the
 * most recent congestion event was. Only record it when the

Modified: head/sys/netinet/cc/cc_newreno.c
==
--- head/sys/netinet/cc/cc_newreno.cSat Oct 24 16:05:37 2020
(r367006)
+++ head/sys/netinet/cc/cc_newreno.cSat Oct 24 16:09:18 2020
(r367007)
@@ -237,7 +237,7 @@ newreno_cong_signal(struct cc_var *ccv, uint32_t type)
u_int mss;
 
cwin = CCV(ccv, snd_cwnd);
-   mss = CCV(ccv, t_maxseg);
+   mss = tcp_maxseg(ccv->ccvc.tcp);
nreno = ccv->cc_data;
beta = (nreno == NULL) ? V_newreno_beta : nreno->beta;
beta_ecn = (nreno == NULL) ? V_newreno_beta_ecn : nreno->beta_ecn;
@@ -274,6 +274,12 @@ newreno_cong_signal(struct cc_var *ccv, uint32_t type)
CCV(ccv, snd_cwnd) = cwin;
E

svn commit: r367008 - head/sys/netinet/cc

2020-10-24 Thread Richard Scheffenegger
Author: rscheff
Date: Sat Oct 24 16:11:46 2020
New Revision: 367008
URL: https://svnweb.freebsd.org/changeset/base/367008

Log:
  TCP Cubic: improve reaction to (and rollback from) RTO
  
  1. fix compliancy issue of CUBIC RTO handling according to RFC8312 section 4.7
  2. add CUBIC CC_RTO_ERR handling
  
  Submitted by: chengc_netapp.com
  Reviewed by:  rrs, tuexen, rscheff
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D26808

Modified:
  head/sys/netinet/cc/cc_cubic.c

Modified: head/sys/netinet/cc/cc_cubic.c
==
--- head/sys/netinet/cc/cc_cubic.c  Sat Oct 24 16:09:18 2020
(r367007)
+++ head/sys/netinet/cc/cc_cubic.c  Sat Oct 24 16:11:46 2020
(r367008)
@@ -78,7 +78,7 @@ static void   cubic_conn_init(struct cc_var *ccv);
 static int cubic_mod_init(void);
 static voidcubic_post_recovery(struct cc_var *ccv);
 static voidcubic_record_rtt(struct cc_var *ccv);
-static voidcubic_ssthresh_update(struct cc_var *ccv);
+static voidcubic_ssthresh_update(struct cc_var *ccv, uint32_t maxseg);
 static voidcubic_after_idle(struct cc_var *ccv);
 
 struct cubic {
@@ -90,19 +90,28 @@ struct cubic {
unsigned long   max_cwnd;
/* cwnd at the previous congestion event. */
unsigned long   prev_max_cwnd;
+   /* A copy of prev_max_cwnd. Used for CC_RTO_ERR */
+   unsigned long   prev_max_cwnd_cp;
/* various flags */
uint32_tflags;
 #define CUBICFLAG_CONG_EVENT   0x0001  /* congestion experienced */
 #define CUBICFLAG_IN_SLOWSTART 0x0002  /* in slow start */
 #define CUBICFLAG_IN_APPLIMIT  0x0004  /* application limited */
+#define CUBICFLAG_RTO_EVENT0x0008  /* RTO experienced */
/* Minimum observed rtt in ticks. */
int min_rtt_ticks;
/* Mean observed rtt between congestion epochs. */
int mean_rtt_ticks;
/* ACKs since last congestion event. */
int epoch_ack_count;
-   /* Time of last congestion event in ticks. */
+   /* Timestamp (in ticks) of arriving in congestion avoidance from last
+* congestion event.
+*/
int t_last_cong;
+   /* Timestamp (in ticks) of a previous congestion event. Used for
+* CC_RTO_ERR.
+*/
+   int t_last_cong_prev;
 };
 
 static MALLOC_DEFINE(M_CUBIC, "cubic data",
@@ -142,7 +151,14 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
cubic_data->flags |= CUBICFLAG_IN_SLOWSTART;
newreno_cc_algo.ack_received(ccv, type);
} else {
-   if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART |
+   if ((cubic_data->flags & CUBICFLAG_RTO_EVENT) &&
+   (cubic_data->flags & CUBICFLAG_IN_SLOWSTART)) {
+   /* RFC8312 Section 4.7 */
+   cubic_data->flags &= ~(CUBICFLAG_RTO_EVENT |
+  CUBICFLAG_IN_SLOWSTART);
+   cubic_data->max_cwnd = CCV(ccv, snd_cwnd);
+   cubic_data->K = 0;
+   } else if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART |
 CUBICFLAG_IN_APPLIMIT)) {
cubic_data->flags &= ~(CUBICFLAG_IN_SLOWSTART |
   CUBICFLAG_IN_APPLIMIT);
@@ -273,10 +289,10 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type)
case CC_NDUPACK:
if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) {
if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) {
-   cubic_ssthresh_update(ccv);
+   cubic_ssthresh_update(ccv, mss);
cubic_data->flags |= CUBICFLAG_CONG_EVENT;
cubic_data->t_last_cong = ticks;
-   cubic_data->K = cubic_k(cubic_data->max_cwnd / 
CCV(ccv, t_maxseg));
+   cubic_data->K = cubic_k(cubic_data->max_cwnd / 
mss);
}
ENTER_RECOVERY(CCV(ccv, t_flags));
}
@@ -284,37 +300,35 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type)
 
case CC_ECN:
if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) {
-   cubic_ssthresh_update(ccv);
+   cubic_ssthresh_update(ccv, mss);
cubic_data->flags |= CUBICFLAG_CONG_EVENT;
cubic_data->t_last_cong = ticks;
-   cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, 
t_maxseg));
+   cubic_data->K = cubic_k(cubic_data->max_cwnd / mss)

svn commit: r367021 - in head/sbin: ping ping6

2020-10-24 Thread Richard Scheffenegger
Author: rscheff
Date: Sat Oct 24 21:01:18 2020
New Revision: 367021
URL: https://svnweb.freebsd.org/changeset/base/367021

Log:
  Make use of IP_VLAN_PCP setsockopt in ping and ping6.
  
  In order to validate the proper marking and use of a different
  ethernet priority class, add the new session-specific PCP
  feature to the ping/ping6 utilities.
  
  Reviewed by:  mav, bcr
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D26627

Modified:
  head/sbin/ping/ping.8
  head/sbin/ping/ping.c
  head/sbin/ping6/ping6.8
  head/sbin/ping6/ping6.c

Modified: head/sbin/ping/ping.8
==
--- head/sbin/ping/ping.8   Sat Oct 24 20:57:13 2020(r367020)
+++ head/sbin/ping/ping.8   Sat Oct 24 21:01:18 2020(r367021)
@@ -28,7 +28,7 @@
 .\" @(#)ping.8 8.2 (Berkeley) 12/11/93
 .\" $FreeBSD$
 .\"
-.Dd August 22, 2019
+.Dd October 2, 2020
 .Dt PING 8
 .Os
 .Sh NAME
@@ -39,6 +39,7 @@ packets to network hosts
 .Sh SYNOPSIS
 .Nm
 .Op Fl AaDdfHnoQqRrv
+.Op Fl C Ar pcp
 .Op Fl c Ar count
 .Op Fl G Ar sweepmaxsize
 .Op Fl g Ar sweepminsize
@@ -57,6 +58,7 @@ packets to network hosts
 .Ar host
 .Nm
 .Op Fl AaDdfHLnoQqRrv
+.Op Fl C Ar pcp
 .Op Fl c Ar count
 .Op Fl I Ar iface
 .Op Fl i Ar wait
@@ -112,6 +114,9 @@ Include a bell
 character in the output when any packet is received.
 This option is ignored
 if other format options are present.
+.It Fl C Ar pcp
+Add an 802.1p Ethernet Priority Code Point when sending a packet.
+0..7 uses that specific PCP, -1 uses the interface default PCP (or none).
 .It Fl c Ar count
 Stop after sending
 (and receiving)

Modified: head/sbin/ping/ping.c
==
--- head/sbin/ping/ping.c   Sat Oct 24 20:57:13 2020(r367020)
+++ head/sbin/ping/ping.c   Sat Oct 24 21:01:18 2020(r367021)
@@ -155,6 +155,7 @@ static int options;
 #defineF_TIME  0x10
 #defineF_SWEEP 0x20
 #defineF_WAITTIME  0x40
+#defineF_IP_VLAN_PCP   0x80
 
 /*
  * MAX_DUP_CHK is the number of bits in received table, i.e. the maximum
@@ -247,7 +248,7 @@ main(int argc, char *const *argv)
u_long alarmtimeout;
long ltmp;
int almost_done, ch, df, hold, i, icmp_len, mib[4], preload;
-   int ssend_errno, srecv_errno, tos, ttl;
+   int ssend_errno, srecv_errno, tos, ttl, pcp;
char ctrl[CMSG_SPACE(sizeof(struct timespec))];
char hnamebuf[MAXHOSTNAMELEN], snamebuf[MAXHOSTNAMELEN];
 #ifdef IP_OPTIONS
@@ -295,11 +296,11 @@ main(int argc, char *const *argv)
err(EX_OSERR, "srecv socket");
}
 
-   alarmtimeout = df = preload = tos = 0;
+   alarmtimeout = df = preload = tos = pcp = 0;
 
outpack = outpackhdr + sizeof(struct ip);
while ((ch = getopt(argc, argv,
-   "Aac:DdfG:g:Hh:I:i:Ll:M:m:nop:QqRrS:s:T:t:vW:z:"
+   "AaC:c:DdfG:g:Hh:I:i:Ll:M:m:nop:QqRrS:s:T:t:vW:z:"
 #ifdef IPSEC
 #ifdef IPSEC_POLICY_IPSEC
"P:"
@@ -314,6 +315,13 @@ main(int argc, char *const *argv)
case 'a':
options |= F_AUDIBLE;
break;
+   case 'C':
+   options |= F_IP_VLAN_PCP;
+   ltmp = strtol(optarg, &ep, 0);
+   if (*ep || ep == optarg || ltmp > 7 || ltmp < -1)
+   errx(EX_USAGE, "invalid PCP: `%s'", optarg);
+   pcp = ltmp;
+   break;
case 'c':
ltmp = strtol(optarg, &ep, 0);
if (*ep || ep == optarg || ltmp <= 0)
@@ -665,6 +673,10 @@ main(int argc, char *const *argv)
if (options & F_SO_DONTROUTE)
(void)setsockopt(ssend, SOL_SOCKET, SO_DONTROUTE, (char *)&hold,
sizeof(hold));
+   if (options & F_IP_VLAN_PCP) {
+   (void)setsockopt(ssend, IPPROTO_IP, IP_VLAN_PCP, (char *)&pcp,
+   sizeof(pcp));
+   }
 #ifdef IPSEC
 #ifdef IPSEC_POLICY_IPSEC
if (options & F_POLICY) {
@@ -1762,11 +1774,11 @@ usage(void)
 {
 
(void)fprintf(stderr, "%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n",
-"usage: ping [-AaDdfHnoQqRrv] [-c count] [-G sweepmaxsize] [-g sweepminsize]",
+"usage: ping [-AaDdfHnoQqRrv] [-C pcp] [-c count] [-G sweepmaxsize] [-g 
sweepminsize]",
 "[-h sweepincrsize] [-i wait] [-l preload] [-M mask | time] [-m 
ttl]",
 "   " SECOPT " [-p pattern] [-S src_addr] [-s packetsize] [-t 
timeout]",
 "[-W waittime] [-z tos] host",
-"   ping [-AaDdfHLnoQqRrv] [-c count] [-I iface] [-i wait] [-l preload]",
+"   ping [-AaDdfHLnoQqRrv] [-C pcp] [-c count] [-I iface] [-i wait] [-l 
preload]",
 "[-M mask | time] [-m ttl]" SECOPT " [-p pattern] [-S src_addr]",
 "[-s

svn commit: r367023 - in head: sys/dev/iscsi usr.bin/iscsictl usr.sbin/iscsid

2020-10-24 Thread Richard Scheffenegger
Author: rscheff
Date: Sat Oct 24 21:07:13 2020
New Revision: 367023
URL: https://svnweb.freebsd.org/changeset/base/367023

Log:
  Add network QoS support for PCP to iscsi initiator.
  
  Make the Ethernet PCP codepoint configurable
  for L2 local traffic, to allow lower latency for
  iSCSI block IO. This addresses the initiator
  side only.
  
  Reviewed by:  mav, trasz, bcr
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D26739

Modified:
  head/sys/dev/iscsi/iscsi_ioctl.h
  head/usr.bin/iscsictl/iscsi.conf.5
  head/usr.bin/iscsictl/iscsictl.c
  head/usr.bin/iscsictl/iscsictl.h
  head/usr.bin/iscsictl/parse.y
  head/usr.bin/iscsictl/token.l
  head/usr.sbin/iscsid/iscsid.c

Modified: head/sys/dev/iscsi/iscsi_ioctl.h
==
--- head/sys/dev/iscsi/iscsi_ioctl.hSat Oct 24 21:07:10 2020
(r367022)
+++ head/sys/dev/iscsi/iscsi_ioctl.hSat Oct 24 21:07:13 2020
(r367023)
@@ -71,7 +71,8 @@ struct iscsi_session_conf {
charisc_offload[ISCSI_OFFLOAD_LEN];
int isc_enable;
int isc_dscp;
-   int isc_spare[3];
+   int isc_pcp;
+   int isc_spare[2];
 };
 
 /*

Modified: head/usr.bin/iscsictl/iscsi.conf.5
==
--- head/usr.bin/iscsictl/iscsi.conf.5  Sat Oct 24 21:07:10 2020
(r367022)
+++ head/usr.bin/iscsictl/iscsi.conf.5  Sat Oct 24 21:07:13 2020
(r367023)
@@ -155,6 +155,13 @@ and
 codepoints.
 Default is no specified dscp codepoint, which means the default
 of the outgoing interface is used.
+.It Cm pcp
+The 802.1Q Priority CodePoint used for sending packets.
+The PCP can be set to a value in the range between
+.Qq Ar 0
+to
+.Qq Ar 7 .
+When omitted, the default for the outgoing interface is used.
 .El
 .Sh FILES
 .Bl -tag -width indent

Modified: head/usr.bin/iscsictl/iscsictl.c
==
--- head/usr.bin/iscsictl/iscsictl.cSat Oct 24 21:07:10 2020
(r367022)
+++ head/usr.bin/iscsictl/iscsictl.cSat Oct 24 21:07:13 2020
(r367023)
@@ -88,6 +88,7 @@ target_new(struct conf *conf)
xo_err(1, "calloc");
targ->t_conf = conf;
targ->t_dscp = -1;
+   targ->t_pcp = -1;
TAILQ_INSERT_TAIL(&conf->conf_targets, targ, t_next);
 
return (targ);
@@ -360,6 +361,7 @@ conf_from_target(struct iscsi_session_conf *conf,
else
conf->isc_data_digest = ISCSI_DIGEST_NONE;
conf->isc_dscp = targ->t_dscp;
+   conf->isc_pcp = targ->t_pcp;
 }
 
 static int
@@ -540,6 +542,9 @@ kernel_list(int iscsi_fd, const struct target *targ __
if (conf->isc_dscp != -1)
xo_emit("{L:/%-26s}{V:dscp/0x%02x}\n",
"Target DSCP:", conf->isc_dscp);
+   if (conf->isc_pcp != -1)
+   xo_emit("{L:/%-26s}{V:pcp/0x%02x}\n",
+   "Target PCP:", conf->isc_pcp);
xo_close_container("target");
 
xo_open_container("auth");

Modified: head/usr.bin/iscsictl/iscsictl.h
==
--- head/usr.bin/iscsictl/iscsictl.hSat Oct 24 21:07:10 2020
(r367022)
+++ head/usr.bin/iscsictl/iscsictl.hSat Oct 24 21:07:13 2020
(r367023)
@@ -79,6 +79,7 @@ struct target {
int t_enable;
int t_protocol;
int t_dscp;
+   int t_pcp;
char*t_offload;
char*t_user;
char*t_secret;

Modified: head/usr.bin/iscsictl/parse.y
==
--- head/usr.bin/iscsictl/parse.y   Sat Oct 24 21:07:10 2020
(r367022)
+++ head/usr.bin/iscsictl/parse.y   Sat Oct 24 21:07:13 2020
(r367023)
@@ -133,6 +133,8 @@ target_entry:
ignored
|
dscp
+   |
+   pcp
;
 
 target_name:   TARGET_NAME EQUALS STR
@@ -306,6 +308,8 @@ dscp:   DSCP EQUALS STR
{
uint64_t tmp;
 
+   if (target->t_dscp != -1)
+   xo_errx(1, "duplicated dscp at line %d", lineno);
if (strcmp($3, "0x") == 0) {
tmp = strtol($3 + 2, NULL, 16);
} else if (expand_number($3, &tmp) != 0) {
@@ -342,6 +346,27 @@ dscp:  DSCP EQUALS STR
| DSCP EQUALS AF41  { target->t_dscp = IPTOS_DSCP_AF41 >> 2 ; }
| DSCP EQUALS AF42  { target->t_dscp = IPTOS_DSCP_AF42 >> 2 ; }
| DSCP EQUALS AF43  { target->t_dscp =

svn commit: r367024 - head/usr.sbin/ctld

2020-10-24 Thread Richard Scheffenegger
Author: rscheff
Date: Sat Oct 24 21:10:53 2020
New Revision: 367024
URL: https://svnweb.freebsd.org/changeset/base/367024

Log:
  Add network QoS support for PCP to iscsi target.
  
  Mak the Ethernet PCP codepoint configurable
  for L2 local traffic, to allow lower latency for
  iSCSI block IO. This addresses the target
  side only.
  
  Reviewed by:  mav, trasz, bcr
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D26740

Modified:
  head/usr.sbin/ctld/ctl.conf.5
  head/usr.sbin/ctld/ctld.c
  head/usr.sbin/ctld/ctld.h
  head/usr.sbin/ctld/parse.y
  head/usr.sbin/ctld/token.l
  head/usr.sbin/ctld/uclparse.c

Modified: head/usr.sbin/ctld/ctl.conf.5
==
--- head/usr.sbin/ctld/ctl.conf.5   Sat Oct 24 21:07:13 2020
(r367023)
+++ head/usr.sbin/ctld/ctl.conf.5   Sat Oct 24 21:10:53 2020
(r367024)
@@ -258,6 +258,13 @@ well-defined
 and
 .Qq Ar AFxx
 codepoints.
+.It Ic pcp Ar value
+The 802.1Q Priority CodePoint used for sending packets.
+The PCP can be set to a value in the range between
+.Qq Ar 0
+to
+.Qq Ar 7 .
+When omitted, the default for the outgoing interface is used.
 .El
 .Ss target Context
 .Bl -tag -width indent

Modified: head/usr.sbin/ctld/ctld.c
==
--- head/usr.sbin/ctld/ctld.c   Sat Oct 24 21:07:13 2020(r367023)
+++ head/usr.sbin/ctld/ctld.c   Sat Oct 24 21:10:53 2020(r367024)
@@ -626,6 +626,7 @@ portal_group_new(struct conf *conf, const char *name)
pg->pg_conf = conf;
pg->pg_tag = 0; /* Assigned later in conf_apply(). */
pg->pg_dscp = -1;
+   pg->pg_pcp = -1;
TAILQ_INSERT_TAIL(&conf->conf_portal_groups, pg, pg_next);
 
return (pg);
@@ -2210,6 +2211,32 @@ conf_apply(struct conf *oldconf, struct conf *newconf)
IPPROTO_IPV6, IPV6_TCLASS,
&tos, sizeof(tos)) == -1)

log_warn("setsockopt(IPV6_TCLASS) "
+   "failed for %s",
+   newp->p_listen);
+   }
+   }
+   if (newpg->pg_pcp != -1) {
+   struct sockaddr sa;
+   int len = sizeof(sa);
+   getsockname(newp->p_socket, &sa, &len);
+   /*
+* Only allow the 6-bit DSCP
+* field to be modified
+*/
+   int pcp = newpg->pg_pcp;
+   if (sa.sa_family == AF_INET) {
+   if (setsockopt(newp->p_socket,
+   IPPROTO_IP, IP_VLAN_PCP,
+   &pcp, sizeof(pcp)) == -1)
+   
log_warn("setsockopt(IP_VLAN_PCP) "
+   "failed for %s",
+   newp->p_listen);
+   } else
+   if (sa.sa_family == AF_INET6) {
+   if (setsockopt(newp->p_socket,
+   IPPROTO_IPV6, IPV6_VLAN_PCP,
+   &pcp, sizeof(pcp)) == -1)
+   
log_warn("setsockopt(IPV6_VLAN_PCP) "
"failed for %s",
newp->p_listen);
}

Modified: head/usr.sbin/ctld/ctld.h
==
--- head/usr.sbin/ctld/ctld.h   Sat Oct 24 21:07:13 2020(r367023)
+++ head/usr.sbin/ctld/ctld.h   Sat Oct 24 21:10:53 2020(r367024)
@@ -128,6 +128,7 @@ struct portal_group {
char*pg_offload;
char*pg_redirection;
int pg_dscp;
+   int pg_pcp;
 
uint16_tpg_tag;
 };

Modified: head/usr.sbin/ctld/parse.y
==
--- head/usr.sbin/ctld/parse.y  Sat Oct 24 21:07:13 2020(r367023)
+++ head/usr.sbin/ctld/parse.y  Sat Oct 24 21:10:53 2020(r367024)
@@ -65,8 +65,8 @@ extern void   yyrestart(FILE *);
 %token DISCOVERY_AUTH_GROUP DISCOVERY_FILTER DSCP FOREIGN
 %token INITIATOR_NAME INITIATOR_PORTAL ISNS_SERVER ISNS_PERIOD ISNS_TIMEOUT
 %token LISTEN LISTEN_ISER LUN MAXPROC OFFLOAD OPENING_BRACKET OPTION
-%token PA

svn commit: r367492 - in head/sys/netinet: . tcp_stacks

2020-11-08 Thread Richard Scheffenegger
Author: rscheff
Date: Sun Nov  8 18:47:05 2020
New Revision: 367492
URL: https://svnweb.freebsd.org/changeset/base/367492

Log:
  Prevent premature SACK block transmission during loss recovery
  
  Under specific conditions, a window update can be sent with
  outdated SACK information. Some clients react to this by
  subsequently delaying loss recovery, making TCP perform very
  poorly.
  
  Reported by:  chengc_netapp.com
  Reviewed by:  rrs, jtl
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D24237

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_reass.c
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c
  head/sys/netinet/tcp_stacks/rack_bbr_common.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_input.c
==
--- head/sys/netinet/tcp_input.cSun Nov  8 18:27:49 2020
(r367491)
+++ head/sys/netinet/tcp_input.cSun Nov  8 18:47:05 2020
(r367492)
@@ -1462,6 +1462,29 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, stru
 }
 
 void
+tcp_handle_wakeup(struct tcpcb *tp, struct socket *so)
+{
+   /*
+* Since tp might be gone if the session entered
+* the TIME_WAIT state before coming here, we need
+* to check if the socket is still connected.
+*/
+   if ((so->so_state & SS_ISCONNECTED) == 0)
+   return;
+   INP_LOCK_ASSERT(tp->t_inpcb);
+   if (tp->t_flags & TF_WAKESOR) {
+   tp->t_flags &= ~TF_WAKESOR;
+   SOCKBUF_UNLOCK_ASSERT(&so->so_rcv);
+   sorwakeup(so);
+   }
+   if (tp->t_flags & TF_WAKESOW) {
+   tp->t_flags &= ~TF_WAKESOW;
+   SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
+   sowwakeup(so);
+   }
+}
+
+void
 tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
 struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos)
 {
@@ -1811,7 +1834,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
else if (!tcp_timer_active(tp, TT_PERSIST))
tcp_timer_activate(tp, TT_REXMT,
  tp->t_rxtcur);
-   sowwakeup(so);
+   tp->t_flags |= TF_WAKESOW;
if (sbavail(&so->so_snd))
(void) tp->t_fb->tfb_tcp_output(tp);
goto check_delack;
@@ -1876,8 +1899,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
m_adj(m, drop_hdrlen);  /* delayed header drop 
*/
sbappendstream_locked(&so->so_rcv, m, 0);
}
-   /* NB: sorwakeup_locked() does an implicit unlock. */
-   sorwakeup_locked(so);
+   SOCKBUF_UNLOCK(&so->so_rcv);
+   tp->t_flags |= TF_WAKESOR;
if (DELAY_ACK(tp, tlen)) {
tp->t_flags |= TF_DELACK;
} else {
@@ -2811,8 +2834,8 @@ process_ACK:
tp->snd_wnd = 0;
ourfinisacked = 0;
}
-   /* NB: sowwakeup_locked() does an implicit unlock. */
-   sowwakeup_locked(so);
+   SOCKBUF_UNLOCK(&so->so_snd);
+   tp->t_flags |= TF_WAKESOW;
m_freem(mfree);
/* Detect una wraparound. */
if (!IN_RECOVERY(tp->t_flags) &&
@@ -3033,8 +3056,8 @@ dodata:   
/* XXX */
m_freem(m);
else
sbappendstream_locked(&so->so_rcv, m, 0);
-   /* NB: sorwakeup_locked() does an implicit unlock. */
-   sorwakeup_locked(so);
+   SOCKBUF_UNLOCK(&so->so_rcv);
+   tp->t_flags |= TF_WAKESOR;
} else {
/*
 * XXX: Due to the header drop above "th" is
@@ -3101,6 +3124,8 @@ dodata:   
/* XXX */
if (thflags & TH_FIN) {
if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
socantrcvmore(so);
+   /* The socket upcall is handled by socantrcvmore. */
+   tp->t_flags &= ~TF_WAKESOR;
/*
 * If connection is half-synchronized
 * (ie NEEDSYN flag on) then delay ACK,
@@ -3164,6 +3189,7 @@ check_delack:
tp->t_flags &= ~TF_DELACK;
tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
}
+   tcp_handle_wakeup(tp, 

svn commit: r368327 - head/sys/netinet

2020-12-04 Thread Richard Scheffenegger
Author: rscheff
Date: Fri Dec  4 11:29:27 2020
New Revision: 368327
URL: https://svnweb.freebsd.org/changeset/base/368327

Log:
  Add TCP feature Proportional Rate Reduction (PRR) - RFC6937
  
  PRR improves loss recovery and avoids RTOs in a wide range
  of scenarios (ACK thinning) over regular SACK loss recovery.
  
  PRR is disabled by default, enable by net.inet.tcp.do_prr = 1.
  Performance may be impeded by token bucket rate policers at
  the bottleneck, where net.inet.tcp.do_prr_conservate = 1
  should be enabled in addition.
  
  Submitted by: Aris Angelogiannopoulos
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D18892

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_input.c
==
--- head/sys/netinet/tcp_input.cFri Dec  4 04:39:48 2020
(r368326)
+++ head/sys/netinet/tcp_input.cFri Dec  4 11:29:27 2020
(r368327)
@@ -153,6 +153,16 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFL
 &VNET_NAME(drop_synfin), 0,
 "Drop TCP packets with SYN+FIN set");
 
+VNET_DEFINE(int, tcp_do_prr_conservative) = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr_conservative, CTLFLAG_VNET | 
CTLFLAG_RW,
+&VNET_NAME(tcp_do_prr_conservative), 0,
+"Do conservative Proportional Rate Reduction");
+
+VNET_DEFINE(int, tcp_do_prr) = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr, CTLFLAG_VNET | CTLFLAG_RW,
+&VNET_NAME(tcp_do_prr), 1,
+"Enable Proportional Rate Reduction per RFC 6937");
+
 VNET_DEFINE(int, tcp_do_newcwv) = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, newcwv, CTLFLAG_VNET | CTLFLAG_RW,
 &VNET_NAME(tcp_do_newcwv), 0,
@@ -2554,7 +2564,55 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
 IN_FASTRECOVERY(tp->t_flags)) {
cc_ack_received(tp, th, nsegs,
CC_DUPACK);
-   if ((tp->t_flags & TF_SACK_PERMIT) &&
+   if (V_tcp_do_prr &&
+   IN_FASTRECOVERY(tp->t_flags) &&
+   (tp->t_flags & TF_SACK_PERMIT)) {
+   long snd_cnt = 0, limit = 0;
+   long del_data = 0, pipe = 0;
+   /*
+* In a duplicate ACK del_data 
is only the
+* diff_in_sack. If no SACK is 
used del_data
+* will be 0. Pipe is the 
amount of data we
+* estimate to be in the 
network.
+*/
+   del_data = 
tp->sackhint.delivered_data;
+   pipe = (tp->snd_nxt - 
tp->snd_fack) +
+   
tp->sackhint.sack_bytes_rexmit;
+   tp->sackhint.prr_delivered += 
del_data;
+   if (pipe > tp->snd_ssthresh) {
+   snd_cnt = 
(tp->sackhint.prr_delivered *
+   tp->snd_ssthresh /
+   
tp->sackhint.recover_fs) +
+   1 - 
tp->sackhint.sack_bytes_rexmit;
+   } else {
+   if 
(V_tcp_do_prr_conservative)
+   limit = 
tp->sackhint.prr_delivered -
+   
tp->sackhint.sack_bytes_rexmit;
+   else
+   if 
((tp->sackhint.prr_delivered -
+   
tp->sackhint.sack_bytes_rexmit) >
+   del_data)
+   limit = 
tp->sackhint.prr_delivered -
+   
tp->sackhint.sack_bytes_rexmit +
+   
maxseg;
+   else
+   limit = 
del_data + maxseg;
+   if ((tp->snd_ssthresh - 
pipe) <

svn commit: r360010 - in head: share/man/man7 sys/netinet

2020-04-16 Thread Richard Scheffenegger
Author: rscheff
Date: Thu Apr 16 15:59:23 2020
New Revision: 360010
URL: https://svnweb.freebsd.org/changeset/base/360010

Log:
  Reduce default TCP delayed ACK timeout to 40ms.
  
  Reviewed by:  kbowling, tuexen
  Approved by:  tuexen (mentor)
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D23281

Modified:
  head/share/man/man7/tuning.7
  head/sys/netinet/tcp_timer.h

Modified: head/share/man/man7/tuning.7
==
--- head/share/man/man7/tuning.7Thu Apr 16 15:58:58 2020
(r360009)
+++ head/share/man/man7/tuning.7Thu Apr 16 15:59:23 2020
(r360010)
@@ -24,7 +24,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd October 30, 2017
+.Dd April 16, 2020
 .Dt TUNING 7
 .Os
 .Sh NAME
@@ -435,7 +435,7 @@ number of tiny packets flowing across the network in h
 The
 .Fx
 delayed ACK implementation also follows the TCP protocol rule that
-at least every other packet be acknowledged even if the standard 100ms
+at least every other packet be acknowledged even if the standard 40ms
 timeout has not yet passed.
 Normally the worst a delayed ACK can do is
 slightly delay the teardown of a connection, or slightly delay the ramp-up

Modified: head/sys/netinet/tcp_timer.h
==
--- head/sys/netinet/tcp_timer.hThu Apr 16 15:58:58 2020
(r360009)
+++ head/sys/netinet/tcp_timer.hThu Apr 16 15:59:23 2020
(r360010)
@@ -119,7 +119,7 @@
 
 #defineTCP_MAXRXTSHIFT 12  /* maximum retransmits 
*/
 
-#defineTCPTV_DELACK( hz/10 )   /* 100ms timeout */
+#defineTCPTV_DELACK( hz/25 )   /* 40ms timeout */
 
 /*
  * If we exceed this number of retransmits for a single segment, we'll consider
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r360143 - in head/sys/netinet: . tcp_stacks

2020-04-21 Thread Richard Scheffenegger
Author: rscheff
Date: Tue Apr 21 13:05:44 2020
New Revision: 360143
URL: https://svnweb.freebsd.org/changeset/base/360143

Log:
  Correctly set up the initial TCP congestion window
  in all cases, by adjust snd_una right after the
  connection initialization, to include the one byte
  in sequence space occupied by the SYN bit.
  
  This does not change the regular ACK processing,
  while making the BYTES_THIS_ACK macro to work properly.
  
  PR:   235256
  Reviewed by:  tuexen (mentor), rgrimes (mentor)
  Approved by:  tuexen (mentor), rgrimes (mentor)
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D19000

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_input.c
==
--- head/sys/netinet/tcp_input.cTue Apr 21 05:00:35 2020
(r360142)
+++ head/sys/netinet/tcp_input.cTue Apr 21 13:05:44 2020
(r360143)
@@ -2374,12 +2374,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) {
tcp_fastopen_decrement_counter(tp->t_tfo_pending);
tp->t_tfo_pending = NULL;
-
-   /*
-* Account for the ACK of our SYN prior to
-* regular ACK processing below.
-*/
-   tp->snd_una++;
}
if (tp->t_flags & TF_NEEDFIN) {
tcp_state_change(tp, TCPS_FIN_WAIT_1);
@@ -2399,6 +2393,12 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
cc_conn_init(tp);
tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
}
+   if (SEQ_GT(th->th_ack, tp->snd_una))
+   /*
+* Account for the ACK of our SYN prior to
+* regular ACK processing below.
+*/
+   tp->snd_una++;
/*
 * If segment contains data or ACK, will call tcp_reass()
 * later; if not, do so now to pass queued data to user.

Modified: head/sys/netinet/tcp_stacks/bbr.c
==
--- head/sys/netinet/tcp_stacks/bbr.c   Tue Apr 21 05:00:35 2020
(r360142)
+++ head/sys/netinet/tcp_stacks/bbr.c   Tue Apr 21 13:05:44 2020
(r360143)
@@ -9325,11 +9325,6 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, str
 
tcp_fastopen_decrement_counter(tp->t_tfo_pending);
tp->t_tfo_pending = NULL;
-   /*
-* Account for the ACK of our SYN prior to regular
-* ACK processing below.
-*/
-   tp->snd_una++;
}
/*
 * Make transitions: SYN-RECEIVED  -> ESTABLISHED SYN-RECEIVED* ->
@@ -9352,6 +9347,12 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, str
if (!IS_FASTOPEN(tp->t_flags))
cc_conn_init(tp);
}
+   if (SEQ_GT(th->th_ack, tp->snd_una))
+   /*
+* Account for the ACK of our SYN prior to
+* regular ACK processing below.
+*/
+   tp->snd_una++;
/*
 * If segment contains data or ACK, will call tcp_reass() later; if
 * not, do so now to pass queued data to user.

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Tue Apr 21 05:00:35 2020
(r360142)
+++ head/sys/netinet/tcp_stacks/rack.c  Tue Apr 21 13:05:44 2020
(r360143)
@@ -6539,12 +6539,6 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, st
if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) {
tcp_fastopen_decrement_counter(tp->t_tfo_pending);
tp->t_tfo_pending = NULL;
-
-   /*
-* Account for the ACK of our SYN prior to
-* regular ACK processing below.
-*/
-   tp->snd_una++;
}
if (tp->t_flags & TF_NEEDFIN) {
tcp_state_change(tp, TCPS_FIN_WAIT_1);
@@ -6562,6 +6556,12 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, st
if (!IS_FASTOPEN(tp->t_flags))
cc_conn_init(tp);
}
+   if (SEQ_GT(th->th_ack, tp->snd_una))
+   /*
+* Account for the ACK of our SYN prior to
+* regular ACK processing below.
+*/
+   tp->snd_una++;
/*
 * If segment contains data or ACK, will call tcp_reass() later; if
 * not, do so now to

svn commit: r360180 - in head/sys/netinet: . tcp_stacks

2020-04-21 Thread Richard Scheffenegger
Author: rscheff
Date: Wed Apr 22 00:16:42 2020
New Revision: 360180
URL: https://svnweb.freebsd.org/changeset/base/360180

Log:
  revert rS360143 - Correctly set up  initial cwnd
  due to syzkaller panics found
  
  Reported by:  tuexen
  Approved by:  tuexen (mentor)
  Sponsored by: NetApp, Inc.

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_input.c
==
--- head/sys/netinet/tcp_input.cTue Apr 21 23:38:54 2020
(r360179)
+++ head/sys/netinet/tcp_input.cWed Apr 22 00:16:42 2020
(r360180)
@@ -2374,6 +2374,12 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) {
tcp_fastopen_decrement_counter(tp->t_tfo_pending);
tp->t_tfo_pending = NULL;
+
+   /*
+* Account for the ACK of our SYN prior to
+* regular ACK processing below.
+*/
+   tp->snd_una++;
}
if (tp->t_flags & TF_NEEDFIN) {
tcp_state_change(tp, TCPS_FIN_WAIT_1);
@@ -2393,12 +2399,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
cc_conn_init(tp);
tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
}
-   if (SEQ_GT(th->th_ack, tp->snd_una))
-   /*
-* Account for the ACK of our SYN prior to
-* regular ACK processing below.
-*/
-   tp->snd_una++;
/*
 * If segment contains data or ACK, will call tcp_reass()
 * later; if not, do so now to pass queued data to user.

Modified: head/sys/netinet/tcp_stacks/bbr.c
==
--- head/sys/netinet/tcp_stacks/bbr.c   Tue Apr 21 23:38:54 2020
(r360179)
+++ head/sys/netinet/tcp_stacks/bbr.c   Wed Apr 22 00:16:42 2020
(r360180)
@@ -9325,6 +9325,11 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, str
 
tcp_fastopen_decrement_counter(tp->t_tfo_pending);
tp->t_tfo_pending = NULL;
+   /*
+* Account for the ACK of our SYN prior to regular
+* ACK processing below.
+*/
+   tp->snd_una++;
}
/*
 * Make transitions: SYN-RECEIVED  -> ESTABLISHED SYN-RECEIVED* ->
@@ -9347,12 +9352,6 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, str
if (!IS_FASTOPEN(tp->t_flags))
cc_conn_init(tp);
}
-   if (SEQ_GT(th->th_ack, tp->snd_una))
-   /*
-* Account for the ACK of our SYN prior to
-* regular ACK processing below.
-*/
-   tp->snd_una++;
/*
 * If segment contains data or ACK, will call tcp_reass() later; if
 * not, do so now to pass queued data to user.

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Tue Apr 21 23:38:54 2020
(r360179)
+++ head/sys/netinet/tcp_stacks/rack.c  Wed Apr 22 00:16:42 2020
(r360180)
@@ -6539,6 +6539,12 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, st
if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) {
tcp_fastopen_decrement_counter(tp->t_tfo_pending);
tp->t_tfo_pending = NULL;
+
+   /*
+* Account for the ACK of our SYN prior to
+* regular ACK processing below.
+*/
+   tp->snd_una++;
}
if (tp->t_flags & TF_NEEDFIN) {
tcp_state_change(tp, TCPS_FIN_WAIT_1);
@@ -6556,12 +6562,6 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, st
if (!IS_FASTOPEN(tp->t_flags))
cc_conn_init(tp);
}
-   if (SEQ_GT(th->th_ack, tp->snd_una))
-   /*
-* Account for the ACK of our SYN prior to
-* regular ACK processing below.
-*/
-   tp->snd_una++;
/*
 * If segment contains data or ACK, will call tcp_reass() later; if
 * not, do so now to pass queued data to user.
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r360477 - in head/sys/netinet: . tcp_stacks

2020-04-29 Thread Richard Scheffenegger
Author: rscheff
Date: Wed Apr 29 21:48:52 2020
New Revision: 360477
URL: https://svnweb.freebsd.org/changeset/base/360477

Log:
  Correctly set up the initial TCP congestion window in all cases,
  by not including the SYN bit sequence space in cwnd related calculations.
  Snd_und is adjusted explicitly in all cases, outside the cwnd update, instead.
  
  This fixes an off-by-one conformance issue with regular TCP sessions not
  using Appropriate Byte Counting (RFC3465), sending one more packet during
  the initial window than expected.
  
  PR:   235256
  Reviewed by:  tuexen (mentor), rgrimes (mentor)
  Approved by:  tuexen (mentor), rgrimes (mentor)
  MFC after:3 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D19000

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_input.c
==
--- head/sys/netinet/tcp_input.cWed Apr 29 21:12:32 2020
(r360476)
+++ head/sys/netinet/tcp_input.cWed Apr 29 21:48:52 2020
(r360477)
@@ -1470,7 +1470,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
 struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos)
 {
int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
-   int rstreason, todrop, win;
+   int rstreason, todrop, win, incforsyn = 0;
uint32_t tiwin;
uint16_t nsegs;
char *s;
@@ -2374,12 +2374,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) {
tcp_fastopen_decrement_counter(tp->t_tfo_pending);
tp->t_tfo_pending = NULL;
-
-   /*
-* Account for the ACK of our SYN prior to
-* regular ACK processing below.
-*/
-   tp->snd_una++;
}
if (tp->t_flags & TF_NEEDFIN) {
tcp_state_change(tp, TCPS_FIN_WAIT_1);
@@ -2400,6 +2394,13 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
}
/*
+* Account for the ACK of our SYN prior to
+* regular ACK processing below, except for
+* simultaneous SYN, which is handled later.
+*/
+   if (SEQ_GT(th->th_ack, tp->snd_una) && !(tp->t_flags & 
TF_NEEDSYN))
+   incforsyn = 1;
+   /*
 * If segment contains data or ACK, will call tcp_reass()
 * later; if not, do so now to pass queued data to user.
 */
@@ -2693,6 +2694,15 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
 process_ACK:
INP_WLOCK_ASSERT(tp->t_inpcb);
 
+   /*
+* Adjust for the SYN bit in sequence space,
+* but don't account for it in cwnd calculations.
+* This is for the SYN_RECEIVED, non-simultaneous
+* SYN case. SYN_SENT and simultaneous SYN are
+* treated elsewhere.
+*/
+   if (incforsyn)
+   tp->snd_una++;
acked = BYTES_THIS_ACK(tp, th);
KASSERT(acked >= 0, ("%s: acked unexepectedly negative "
"(tp->snd_una=%u, th->th_ack=%u, tp=%p, m=%p)", __func__,

Modified: head/sys/netinet/tcp_stacks/bbr.c
==
--- head/sys/netinet/tcp_stacks/bbr.c   Wed Apr 29 21:12:32 2020
(r360476)
+++ head/sys/netinet/tcp_stacks/bbr.c   Wed Apr 29 21:48:52 2020
(r360477)
@@ -9326,11 +9326,6 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, str
 
tcp_fastopen_decrement_counter(tp->t_tfo_pending);
tp->t_tfo_pending = NULL;
-   /*
-* Account for the ACK of our SYN prior to regular
-* ACK processing below.
-*/
-   tp->snd_una++;
}
/*
 * Make transitions: SYN-RECEIVED  -> ESTABLISHED SYN-RECEIVED* ->
@@ -9353,6 +9348,13 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, str
if (!IS_FASTOPEN(tp->t_flags))
cc_conn_init(tp);
}
+   /*
+* Account for the ACK of our SYN prior to
+* regular ACK processing below, except for
+* simultaneous SYN, which is handled later.
+*/
+   if (SEQ_GT(th->th_ack, tp->snd_una) && !(tp->t_flags & TF_NEEDSYN))
+   tp->snd_una++;
/*
 * If segment contains data or ACK, will call tcp_reass() later; if
 * not, do so now to pass queued data to user.

Modified: head/sys/

svn commit: r360479 - in head/sys/netinet: . tcp_stacks

2020-04-29 Thread Richard Scheffenegger
Author: rscheff
Date: Wed Apr 29 22:01:33 2020
New Revision: 360479
URL: https://svnweb.freebsd.org/changeset/base/360479

Log:
  Prevent premature shrinking of the scaled receive window
  which can cause a TCP client to use invalid or stale TCP sequence numbers for 
ACK packets.
  
  Packets with old sequence numbers are ignored and not used to update the send 
window size.
  This might cause the TCP session to hang indefinitely under some 
circumstances.
  
  Reported by:  Cui Cheng
  Reviewed by:  tuexen (mentor), rgrimes (mentor)
  Approved by:  tuexen (mentor), rgrimes (mentor)
  MFC after:3 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D24515

Modified:
  head/sys/netinet/tcp_output.c
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_output.c
==
--- head/sys/netinet/tcp_output.c   Wed Apr 29 21:54:09 2020
(r360478)
+++ head/sys/netinet/tcp_output.c   Wed Apr 29 22:01:33 2020
(r360479)
@@ -1238,8 +1238,11 @@ send:
if (flags & TH_SYN)
th->th_win = htons((u_short)
(min(sbspace(&so->so_rcv), TCP_MAXWIN)));
-   else
+   else {
+   /* Avoid shrinking window with window scaling. */
+   recwin = roundup2(recwin, 1 << tp->rcv_scale);
th->th_win = htons((u_short)(recwin >> tp->rcv_scale));
+   }
 
/*
 * Adjust the RXWIN0SENT flag - indicate that we have advertised

Modified: head/sys/netinet/tcp_stacks/bbr.c
==
--- head/sys/netinet/tcp_stacks/bbr.c   Wed Apr 29 21:54:09 2020
(r360478)
+++ head/sys/netinet/tcp_stacks/bbr.c   Wed Apr 29 22:01:33 2020
(r360479)
@@ -13756,8 +13756,11 @@ send:
if (flags & TH_SYN)
th->th_win = htons((u_short)
(min(sbspace(&so->so_rcv), TCP_MAXWIN)));
-   else
+   else {
+   /* Avoid shrinking window with window scaling. */
+   recwin = roundup2(recwin, 1 << tp->rcv_scale);
th->th_win = htons((u_short)(recwin >> tp->rcv_scale));
+   }
/*
 * Adjust the RXWIN0SENT flag - indicate that we have advertised a 0
 * window.  This may cause the remote transmitter to stall.  This

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Wed Apr 29 21:54:09 2020
(r360478)
+++ head/sys/netinet/tcp_stacks/rack.c  Wed Apr 29 22:01:33 2020
(r360479)
@@ -9572,8 +9572,11 @@ send:
if (flags & TH_SYN)
th->th_win = htons((u_short)
(min(sbspace(&so->so_rcv), TCP_MAXWIN)));
-   else
+   else {
+   /* Avoid shrinking window with window scaling. */
+   recwin = roundup2(recwin, 1 << tp->rcv_scale);
th->th_win = htons((u_short)(recwin >> tp->rcv_scale));
+   }
/*
 * Adjust the RXWIN0SENT flag - indicate that we have advertised a 0
 * window.  This may cause the remote transmitter to stall.  This
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r360491 - head/sys/netinet/cc

2020-04-30 Thread Richard Scheffenegger
Author: rscheff
Date: Thu Apr 30 11:11:28 2020
New Revision: 360491
URL: https://svnweb.freebsd.org/changeset/base/360491

Log:
  Introduce a lower bound of 2 MSS to TCP Cubic.
  
  Running TCP Cubic together with ECN could end up reducing cwnd down to 1 
byte, if the
  receiver continously sets the ECE flag, resulting in very poor transmission 
speeds.
  
  In line with RFC6582 App. B, a lower bound of 2 MSS is introduced, as well as 
a typecast
  to prevent any potential integer overflows during intermediate calculation 
steps of the
  adjusted cwnd.
  
  Reported by:  Cheng Cui
  Reviewed by:  tuexen (mentor)
  Approved by:  tuexen (mentor)
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D23353

Modified:
  head/sys/netinet/cc/cc_cubic.c

Modified: head/sys/netinet/cc/cc_cubic.c
==
--- head/sys/netinet/cc/cc_cubic.c  Thu Apr 30 06:34:34 2020
(r360490)
+++ head/sys/netinet/cc/cc_cubic.c  Thu Apr 30 11:11:28 2020
(r360491)
@@ -366,8 +366,9 @@ cubic_post_recovery(struct cc_var *ccv)
CCV(ccv, t_maxseg);
else
/* Update cwnd based on beta and adjusted max_cwnd. */
-   CCV(ccv, snd_cwnd) = max(1, ((CUBIC_BETA *
-   cubic_data->max_cwnd) >> CUBIC_SHIFT));
+   CCV(ccv, snd_cwnd) = 
max(((uint64_t)cubic_data->max_cwnd *
+   CUBIC_BETA) >> CUBIC_SHIFT,
+   2 * CCV(ccv, t_maxseg));
}
cubic_data->t_last_cong = ticks;
 
@@ -433,6 +434,7 @@ static void
 cubic_ssthresh_update(struct cc_var *ccv)
 {
struct cubic *cubic_data;
+   uint32_t ssthresh;
 
cubic_data = ccv->cc_data;
 
@@ -441,10 +443,11 @@ cubic_ssthresh_update(struct cc_var *ccv)
 * subsequent congestion events, set it to cwnd * beta.
 */
if (cubic_data->num_cong_events == 0)
-   CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd) >> 1;
+   ssthresh = CCV(ccv, snd_cwnd) >> 1;
else
-   CCV(ccv, snd_ssthresh) = ((u_long)CCV(ccv, snd_cwnd) *
+   ssthresh = ((uint64_t)CCV(ccv, snd_cwnd) *
CUBIC_BETA) >> CUBIC_SHIFT;
+   CCV(ccv, snd_ssthresh) = max(ssthresh, 2 * CCV(ccv, t_maxseg));
 }
 
 
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r361345 - in head/sys/netinet: . tcp_stacks

2020-05-21 Thread Richard Scheffenegger
Author: rscheff
Date: Thu May 21 21:15:25 2020
New Revision: 361345
URL: https://svnweb.freebsd.org/changeset/base/361345

Log:
  Handle ECN handshake in simultaneous open
  
  While testing simultaneous open TCP with ECN, found that
  negotiation fails to arrive at the expected final state.
  
  Reviewed by:  tuexen (mentor)
  Approved by:  tuexen (mentor), rgrimes (mentor)
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D23373

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_output.c
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_input.c
==
--- head/sys/netinet/tcp_input.cThu May 21 21:00:46 2020
(r361344)
+++ head/sys/netinet/tcp_input.cThu May 21 21:15:25 2020
(r361345)
@@ -1611,6 +1611,14 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
 * XXX this is traditional behavior, may need to be cleaned up.
 */
if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
+   /* Handle parallel SYN for ECN */
+   if (!(thflags & TH_ACK) &&
+   ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) &&
+   ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) {
+   tp->t_flags2 |= TF2_ECN_PERMIT;
+   tp->t_flags2 |= TF2_ECN_SND_ECE;
+   TCPSTAT_INC(tcps_ecn_shs);
+   }
if ((to.to_flags & TOF_SCALE) &&
(tp->t_flags & TF_REQ_SCALE)) {
tp->t_flags |= TF_RCVD_SCALE;

Modified: head/sys/netinet/tcp_output.c
==
--- head/sys/netinet/tcp_output.c   Thu May 21 21:00:46 2020
(r361344)
+++ head/sys/netinet/tcp_output.c   Thu May 21 21:15:25 2020
(r361345)
@@ -1154,6 +1154,12 @@ send:
} else
flags |= TH_ECE|TH_CWR;
}
+   /* Handle parallel SYN for ECN */
+   if ((tp->t_state == TCPS_SYN_RECEIVED) &&
+   (tp->t_flags2 & TF2_ECN_SND_ECE)) {
+   flags |= TH_ECE;
+   tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+   }
 
if (tp->t_state == TCPS_ESTABLISHED &&
(tp->t_flags2 & TF2_ECN_PERMIT)) {

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Thu May 21 21:00:46 2020
(r361344)
+++ head/sys/netinet/tcp_stacks/rack.c  Thu May 21 21:15:25 2020
(r361345)
@@ -11070,6 +11070,14 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr
 * this is traditional behavior, may need to be cleaned up.
 */
if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
+   /* Handle parallel SYN for ECN */
+   if (!(thflags & TH_ACK) &&
+   ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | 
TH_ECE)) &&
+   ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) {
+   tp->t_flags2 |= TF2_ECN_PERMIT;
+   tp->t_flags2 |= TF2_ECN_SND_ECE;
+   TCPSTAT_INC(tcps_ecn_shs);
+   }
if ((to.to_flags & TOF_SCALE) &&
(tp->t_flags & TF_REQ_SCALE)) {
tp->t_flags |= TF_RCVD_SCALE;
@@ -13522,6 +13530,12 @@ send:
flags |= TH_ECE | TH_CWR;
} else
flags |= TH_ECE | TH_CWR;
+   }
+   /* Handle parallel SYN for ECN */
+   if ((tp->t_state == TCPS_SYN_RECEIVED) &&
+   (tp->t_flags2 & TF2_ECN_SND_ECE)) {
+   flags |= TH_ECE;
+   tp->t_flags2 &= ~TF2_ECN_SND_ECE;
}
if (tp->t_state == TCPS_ESTABLISHED &&
(tp->t_flags2 & TF2_ECN_PERMIT)) {
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r361346 - in head/sys/netinet: . tcp_stacks

2020-05-21 Thread Richard Scheffenegger
Author: rscheff
Date: Thu May 21 21:26:21 2020
New Revision: 361346
URL: https://svnweb.freebsd.org/changeset/base/361346

Log:
  Retain only mutually supported TCP options after simultaneous SYN
  
  When receiving a parallel SYN in SYN-SENT state, remove all the
  options only we supported locally before sending the SYN,ACK.
  
  This addresses a consistency issue on parallel opens.
  
  Also, on such a parallel open, the stack could be coaxed into
  running with timestamps enabled, even if administratively disabled.
  
  Reviewed by:  tuexen (mentor)
  Approved by:  tuexen (mentor)
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D23371

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_input.c
==
--- head/sys/netinet/tcp_input.cThu May 21 21:15:25 2020
(r361345)
+++ head/sys/netinet/tcp_input.cThu May 21 21:26:21 2020
(r361346)
@@ -1623,17 +1623,20 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
(tp->t_flags & TF_REQ_SCALE)) {
tp->t_flags |= TF_RCVD_SCALE;
tp->snd_scale = to.to_wscale;
-   }
+   } else
+   tp->t_flags &= ~TF_REQ_SCALE;
/*
 * Initial send window.  It will be updated with
 * the next incoming segment to the scaled value.
 */
tp->snd_wnd = th->th_win;
-   if (to.to_flags & TOF_TS) {
+   if ((to.to_flags & TOF_TS) &&
+   (tp->t_flags & TF_REQ_TSTMP)) {
tp->t_flags |= TF_RCVD_TSTMP;
tp->ts_recent = to.to_tsval;
tp->ts_recent_age = tcp_ts_getticks();
-   }
+   } else
+   tp->t_flags &= ~TF_REQ_TSTMP;
if (to.to_flags & TOF_MSS)
tcp_mss(tp, to.to_mss);
if ((tp->t_flags & TF_SACK_PERMIT) &&

Modified: head/sys/netinet/tcp_stacks/bbr.c
==
--- head/sys/netinet/tcp_stacks/bbr.c   Thu May 21 21:15:25 2020
(r361345)
+++ head/sys/netinet/tcp_stacks/bbr.c   Thu May 21 21:26:21 2020
(r361346)
@@ -11595,17 +11595,20 @@ bbr_do_segment_nounlock(struct mbuf *m, struct tcphdr 
(tp->t_flags & TF_REQ_SCALE)) {
tp->t_flags |= TF_RCVD_SCALE;
tp->snd_scale = to.to_wscale;
-   }
+   } else
+   tp->t_flags &= ~TF_REQ_SCALE;
/*
 * Initial send window.  It will be updated with the
 * next incoming segment to the scaled value.
 */
tp->snd_wnd = th->th_win;
-   if (to.to_flags & TOF_TS) {
+   if ((to.to_flags & TOF_TS) &&
+   (tp->t_flags & TF_REQ_TSTMP)) {
tp->t_flags |= TF_RCVD_TSTMP;
tp->ts_recent = to.to_tsval;
tp->ts_recent_age = 
tcp_tv_to_mssectick(&bbr->rc_tv);
-   }
+   } else
+   tp->t_flags &= ~TF_REQ_TSTMP;
if (to.to_flags & TOF_MSS)
tcp_mss(tp, to.to_mss);
if ((tp->t_flags & TF_SACK_PERMIT) &&

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Thu May 21 21:15:25 2020
(r361345)
+++ head/sys/netinet/tcp_stacks/rack.c  Thu May 21 21:26:21 2020
(r361346)
@@ -11082,17 +11082,20 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr
(tp->t_flags & TF_REQ_SCALE)) {
tp->t_flags |= TF_RCVD_SCALE;
tp->snd_scale = to.to_wscale;
-   }
+   } else
+   tp->t_flags &= ~TF_REQ_SCALE;
/*
 * Initial send window.  It will be updated with the
 * next incoming segment to the scaled value.
 */
tp->snd_wnd = th->th_win;
-   if (to.to_flags & TOF_TS) {
+   if ((to.to_flags & TOF_TS) &&
+   (tp->t_flags & TF_REQ_TSTMP)) {
tp->t_flags |= TF_RCVD_TSTMP;
tp->ts_recent = 

svn commit: r361347 - in head/sys/netinet: . tcp_stacks

2020-05-21 Thread Richard Scheffenegger
Author: rscheff
Date: Thu May 21 21:33:15 2020
New Revision: 361347
URL: https://svnweb.freebsd.org/changeset/base/361347

Log:
  With RFC3168 ECN, CWR SHOULD only be sent with new data
  
  Overly conservative data receivers may ignore the CWR flag
  on other packets, and keep ECE latched. This can result in
  continous reduction of the congestion window, and very poor
  performance when ECN is enabled.
  
  Reviewed by:  rgrimes (mentor), rrs
  Approved by:  rgrimes (mentor), tuexen (mentor)
  MFC after:3 days
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D23364

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_output.c
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_input.c
==
--- head/sys/netinet/tcp_input.cThu May 21 21:26:21 2020
(r361346)
+++ head/sys/netinet/tcp_input.cThu May 21 21:33:15 2020
(r361347)
@@ -447,9 +447,15 @@ cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, ui
}
break;
case CC_ECN:
-   if (!IN_CONGRECOVERY(tp->t_flags)) {
+   if (!IN_CONGRECOVERY(tp->t_flags) ||
+   /*
+* Allow ECN reaction on ACK to CWR, if
+* that data segment was also CE marked.
+*/
+   SEQ_GEQ(th->th_ack, tp->snd_recover)) {
+   EXIT_CONGRECOVERY(tp->t_flags);
TCPSTAT_INC(tcps_ecn_rcwnd);
-   tp->snd_recover = tp->snd_max;
+   tp->snd_recover = tp->snd_max + 1;
if (tp->t_flags2 & TF2_ECN_PERMIT)
tp->t_flags2 |= TF2_ECN_SND_CWR;
}

Modified: head/sys/netinet/tcp_output.c
==
--- head/sys/netinet/tcp_output.c   Thu May 21 21:26:21 2020
(r361346)
+++ head/sys/netinet/tcp_output.c   Thu May 21 21:33:15 2020
(r361347)
@@ -1170,7 +1170,8 @@ send:
 */
if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
(sack_rxmit == 0) &&
-   !((tp->t_flags & TF_FORCEDATA) && len == 1)) {
+   !((tp->t_flags & TF_FORCEDATA) && len == 1 &&
+   SEQ_LT(tp->snd_una, tp->snd_max))) {
 #ifdef INET6
if (isipv6)
ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
@@ -1178,14 +1179,14 @@ send:
 #endif
ip->ip_tos |= IPTOS_ECN_ECT0;
TCPSTAT_INC(tcps_ecn_ect0);
-   }
-
-   /*
-* Reply with proper ECN notifications.
-*/
-   if (tp->t_flags2 & TF2_ECN_SND_CWR) {
-   flags |= TH_CWR;
-   tp->t_flags2 &= ~TF2_ECN_SND_CWR;
+   /*
+* Reply with proper ECN notifications.
+* Only set CWR on new data segments.
+*/
+   if (tp->t_flags2 & TF2_ECN_SND_CWR) {
+   flags |= TH_CWR;
+   tp->t_flags2 &= ~TF2_ECN_SND_CWR;
+   }
}
if (tp->t_flags2 & TF2_ECN_SND_ECE)
flags |= TH_ECE;

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Thu May 21 21:26:21 2020
(r361346)
+++ head/sys/netinet/tcp_stacks/rack.c  Thu May 21 21:33:15 2020
(r361347)
@@ -4095,9 +4095,15 @@ rack_cong_signal(struct tcpcb *tp, struct tcphdr *th, 
}
break;
case CC_ECN:
-   if (!IN_CONGRECOVERY(tp->t_flags)) {
+   if (!IN_CONGRECOVERY(tp->t_flags) ||
+   /*
+* Allow ECN reaction on ACK to CWR, if
+* that data segment was also CE marked.
+*/
+   SEQ_GEQ(th->th_ack, tp->snd_recover)) {
+   EXIT_CONGRECOVERY(tp->t_flags);
KMOD_TCPSTAT_INC(tcps_ecn_rcwnd);
-   tp->snd_recover = tp->snd_max;
+   tp->snd_recover = tp->snd_max + 1;
if (tp->t_flags2 & TF2_ECN_PERMIT)
tp->t_flags2 |= TF2_ECN_SND_CWR;
}
@@ -13556,13 +13562,14 @@ send:
 #endif
ip->ip_tos |= IPTOS_ECN_ECT0;
KMOD_TCPSTAT_INC(tcps_ecn_ect0);
-   }
-   /*
-* Reply with proper ECN notifications.
-*/
-   if (tp->t_flags2 & TF2_ECN_SND_CWR) {
-  

svn commit: r361348 - head/sys/netinet/cc

2020-05-21 Thread Richard Scheffenegger
Author: rscheff
Date: Thu May 21 21:42:49 2020
New Revision: 361348
URL: https://svnweb.freebsd.org/changeset/base/361348

Log:
  DCTCP: update alpha only once after loss recovery.
  
  In mixed ECN marking and loss scenarios it was found, that
  the alpha value of DCTCP is updated two times. The second
  update happens with freshly initialized counters indicating
  to ECN loss. Overall this leads to alpha not adjusting as
  quickly as expected to ECN markings, and therefore lead to
  excessive loss.
  
  Reported by:  Cheng Cui
  Reviewed by:  chengc_netapp.com, rrs, tuexen (mentor)
  Approved by:  tuexen (mentor)
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D24817

Modified:
  head/sys/netinet/cc/cc_dctcp.c

Modified: head/sys/netinet/cc/cc_dctcp.c
==
--- head/sys/netinet/cc/cc_dctcp.c  Thu May 21 21:33:15 2020
(r361347)
+++ head/sys/netinet/cc/cc_dctcp.c  Thu May 21 21:42:49 2020
(r361348)
@@ -154,10 +154,8 @@ dctcp_ack_received(struct cc_var *ccv, uint16_t type)
 * Update the fraction of marked bytes at the end of
 * current window size.
 */
-   if ((IN_FASTRECOVERY(CCV(ccv, t_flags)) &&
-   SEQ_GEQ(ccv->curack, CCV(ccv, snd_recover))) ||
-   (!IN_FASTRECOVERY(CCV(ccv, t_flags)) &&
-   SEQ_GT(ccv->curack, dctcp_data->save_sndnxt)))
+   if (!IN_FASTRECOVERY(CCV(ccv, t_flags)) &&
+   SEQ_GT(ccv->curack, dctcp_data->save_sndnxt))
dctcp_update_alpha(ccv);
} else
newreno_cc_algo.ack_received(ccv, type);
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r361806 - head/bin/dd

2020-06-04 Thread Richard Scheffenegger
Author: rscheff
Date: Thu Jun  4 20:47:11 2020
New Revision: 361806
URL: https://svnweb.freebsd.org/changeset/base/361806

Log:
  Add O_DIRECT flag to DD for cache bypass
  
  FreeBSD DD utility has not had support for the O_DIRECT flag, which
  is useful to bypass local caching, e.g. for unconditionally issuing
  NFS IO requests during testing.
  
  Reviewed by:  rgrimes (mentor)
  Approved by:  rgrimes (mentor, blanket)
  MFC after:3 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D25066

Modified:
  head/bin/dd/args.c
  head/bin/dd/dd.1
  head/bin/dd/dd.c
  head/bin/dd/dd.h

Modified: head/bin/dd/args.c
==
--- head/bin/dd/args.c  Thu Jun  4 20:39:28 2020(r361805)
+++ head/bin/dd/args.c  Thu Jun  4 20:47:11 2020(r361806)
@@ -266,6 +266,7 @@ static const struct iflag {
const char *name;
uint64_t set, noset;
 } ilist[] = {
+   { "direct", C_IDIRECT,  0 },
{ "fullblock",  C_IFULLBLOCK,   C_SYNC },
 };
 
@@ -410,6 +411,7 @@ static const struct oflag {
const char *name;
uint64_t set;
 } olist[] = {
+   { "direct", C_ODIRECT },
{ "fsync",  C_OFSYNC },
{ "sync",   C_OFSYNC },
 };

Modified: head/bin/dd/dd.1
==
--- head/bin/dd/dd.1Thu Jun  4 20:39:28 2020(r361805)
+++ head/bin/dd/dd.1Thu Jun  4 20:47:11 2020(r361806)
@@ -32,7 +32,7 @@
 .\" @(#)dd.1   8.2 (Berkeley) 1/13/94
 .\" $FreeBSD$
 .\"
-.Dd March 26, 2019
+.Dd June 4, 2020
 .Dt DD 1
 .Os
 .Sh NAME
@@ -117,6 +117,8 @@ limits the number of times
 is called on the input rather than the number of blocks copied in full.
 May not be combined with
 .Cm conv=sync .
+.It Cm direct
+Set the O_DIRECT flag on the input file to make reads bypass any local caching.
 .El
 .It Cm iseek Ns = Ns Ar n
 Seek on the input file
@@ -143,7 +145,7 @@ the output file is truncated at that point.
 Where
 .Cm value
 is one of the symbols from the following list.
-.Bl -tag -width "fsync"
+.Bl -tag -width "direct"
 .It Cm fsync
 Set the O_FSYNC flag on the output file to make writes synchronous.
 .It Cm sync
@@ -151,6 +153,8 @@ Set the O_SYNC flag on the output file to make writes 
 This is synonymous with the
 .Cm fsync
 value.
+.It Cm direct
+Set the O_DIRECT flag on the output file to make writes bypass any local 
caching.
 .El
 .It Cm oseek Ns = Ns Ar n
 Seek on the output file

Modified: head/bin/dd/dd.c
==
--- head/bin/dd/dd.cThu Jun  4 20:39:28 2020(r361805)
+++ head/bin/dd/dd.cThu Jun  4 20:47:11 2020(r361806)
@@ -143,7 +143,7 @@ static void
 setup(void)
 {
u_int cnt;
-   int oflags;
+   int iflags, oflags;
cap_rights_t rights;
unsigned long cmds[] = { FIODTYPE, MTIOCTOP };
 
@@ -151,7 +151,10 @@ setup(void)
in.name = "stdin";
in.fd = STDIN_FILENO;
} else {
-   in.fd = open(in.name, O_RDONLY, 0);
+   iflags = 0;
+   if (ddflags & C_IDIRECT)
+   iflags |= O_DIRECT;
+   in.fd = open(in.name, O_RDONLY | iflags, 0);
if (in.fd == -1)
err(1, "%s", in.name);
}
@@ -186,6 +189,8 @@ setup(void)
oflags |= O_TRUNC;
if (ddflags & C_OFSYNC)
oflags |= O_FSYNC;
+   if (ddflags & C_ODIRECT)
+   oflags |= O_DIRECT;
out.fd = open(out.name, O_RDWR | oflags, DEFFILEMODE);
/*
 * May not have read access, so try again with write only.

Modified: head/bin/dd/dd.h
==
--- head/bin/dd/dd.hThu Jun  4 20:39:28 2020(r361805)
+++ head/bin/dd/dd.hThu Jun  4 20:47:11 2020(r361806)
@@ -105,6 +105,8 @@ typedef struct {
 #defineC_FDATASYNC 0x0001ULL
 #defineC_OFSYNC0x0002ULL
 #defineC_IFULLBLOCK0x0004ULL
+#defineC_IDIRECT   0x0008ULL
+#defineC_ODIRECT   0x0010ULL
 
 #defineC_PARITY(C_PAREVEN | C_PARODD | C_PARNONE | C_PARSET)
 
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r361987 - head/sys/netinet/cc

2020-06-09 Thread Richard Scheffenegger
Author: rscheff
Date: Tue Jun  9 21:07:58 2020
New Revision: 361987
URL: https://svnweb.freebsd.org/changeset/base/361987

Log:
  Prevent TCP Cubic to abruptly increase cwnd after slow-start
  
  Introducing flags to track the initial Wmax dragging and exit
  from slow-start in TCP Cubic. This prevents sudden jumps in the
  caluclated cwnd by cubic, especially when the flow is application
  limited during slow start (cwnd can not grow as fast as expected).
  The downside is that cubic may remain slightly longer in the
  concave region before starting the convex region beyond Wmax again.
  
  Reviewed by:  chengc_netapp.com, tuexen (mentor)
  Approved by:  tuexen (mentor), rgrimes (mentor, blanket)
  MFC after:3 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D23655

Modified:
  head/sys/netinet/cc/cc_cubic.c

Modified: head/sys/netinet/cc/cc_cubic.c
==
--- head/sys/netinet/cc/cc_cubic.c  Tue Jun  9 20:52:35 2020
(r361986)
+++ head/sys/netinet/cc/cc_cubic.c  Tue Jun  9 21:07:58 2020
(r361987)
@@ -90,8 +90,10 @@ struct cubic {
unsigned long   max_cwnd;
/* cwnd at the previous congestion event. */
unsigned long   prev_max_cwnd;
-   /* Number of congestion events. */
-   uint32_tnum_cong_events;
+   /* various flags */
+   uint32_tflags;
+#define CUBICFLAG_CONG_EVENT   0x0001  /* congestion experienced */
+#define CUBICFLAG_IN_SLOWSTART 0x0002  /* in slow start */
/* Minimum observed rtt in ticks. */
int min_rtt_ticks;
/* Mean observed rtt between congestion epochs. */
@@ -138,9 +140,10 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
(V_tcp_do_rfc3465 && ccv->flags & CCF_ABC_SENTAWND))) {
 /* Use the logic in NewReno ack_received() for slow start. */
if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) ||
-   cubic_data->min_rtt_ticks == TCPTV_SRTTBASE)
+   cubic_data->min_rtt_ticks == TCPTV_SRTTBASE) {
+   cubic_data->flags |= CUBICFLAG_IN_SLOWSTART;
newreno_cc_algo.ack_received(ccv, type);
-   else {
+   } else {
if ((ticks_since_cong =
ticks - cubic_data->t_last_cong) < 0) {
/*
@@ -150,6 +153,11 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
cubic_data->t_last_cong = ticks - INT_MAX;
}
 
+   if (cubic_data->flags & CUBICFLAG_IN_SLOWSTART) {
+   cubic_data->flags &= ~CUBICFLAG_IN_SLOWSTART;
+   cubic_data->t_last_cong = ticks;
+   cubic_data->K = 0;
+   }
/*
 * The mean RTT is used to best reflect the equations in
 * the I-D. Using min_rtt in the tf_cwnd calculation
@@ -199,7 +207,7 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
 * keep updating our current estimate of the
 * max_cwnd.
 */
-   if (cubic_data->num_cong_events == 0 &&
+   if (((cubic_data->flags & CUBICFLAG_CONG_EVENT) == 0) &&
cubic_data->max_cwnd < CCV(ccv, snd_cwnd)) {
cubic_data->max_cwnd = CCV(ccv, snd_cwnd);
cubic_data->K = cubic_k(cubic_data->max_cwnd /
@@ -270,9 +278,10 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type)
if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) {
if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) {
cubic_ssthresh_update(ccv);
-   cubic_data->num_cong_events++;
+   cubic_data->flags |= CUBICFLAG_CONG_EVENT;
cubic_data->prev_max_cwnd = 
cubic_data->max_cwnd;
cubic_data->max_cwnd = CCV(ccv, snd_cwnd);
+   cubic_data->K = cubic_k(cubic_data->max_cwnd / 
CCV(ccv, t_maxseg));
}
ENTER_RECOVERY(CCV(ccv, t_flags));
}
@@ -281,10 +290,11 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type)
case CC_ECN:
if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) {
cubic_ssthresh_update(ccv);
-   cubic_data->num_cong_events++;
+   cubic_data->flags |= CUBICFLAG_CONG_EVENT;
cubic_data->prev_max_cwnd = cubic_data->max_cwnd;
cubic_data->max_cwnd = CCV(ccv, snd_cwnd);
cubi

svn commit: r362006 - head/sys/netinet/cc

2020-06-10 Thread Richard Scheffenegger
Author: rscheff
Date: Wed Jun 10 07:32:02 2020
New Revision: 362006
URL: https://svnweb.freebsd.org/changeset/base/362006

Log:
  Prevent TCP Cubic to abruptly increase cwnd after app-limited
  
  Cubic calculates the new cwnd based on absolute time
  elapsed since the start of an epoch. A cubic epoch is
  started on congestion events, or once the congestion
  avoidance phase is started, after slow-start has
  completed.
  
  When a sender is application limited for an extended
  amount of time and subsequently a larger volume of data
  becomes ready for sending, Cubic recalculates cwnd
  with a lingering cubic epoch. This recalculation
  of the cwnd can induce a massive increase in cwnd,
  causing a burst of data to be sent at line rate by
  the sender.
  
  This adds a flag to reset the cubic epoch once a
  session transitions from app-limited to cwnd-limited
  to prevent the above effect.
  
  Reviewed by:  chengc_netapp.com, tuexen (mentor)
  Approved by:  tuexen (mentor), rgrimes (mentor)
  MFC after:3 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D25065

Modified:
  head/sys/netinet/cc/cc_cubic.c

Modified: head/sys/netinet/cc/cc_cubic.c
==
--- head/sys/netinet/cc/cc_cubic.c  Wed Jun 10 05:01:00 2020
(r362005)
+++ head/sys/netinet/cc/cc_cubic.c  Wed Jun 10 07:32:02 2020
(r362006)
@@ -94,6 +94,7 @@ struct cubic {
uint32_tflags;
 #define CUBICFLAG_CONG_EVENT   0x0001  /* congestion experienced */
 #define CUBICFLAG_IN_SLOWSTART 0x0002  /* in slow start */
+#define CUBICFLAG_IN_APPLIMIT  0x0004  /* application limited */
/* Minimum observed rtt in ticks. */
int min_rtt_ticks;
/* Mean observed rtt between congestion epochs. */
@@ -153,8 +154,10 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
cubic_data->t_last_cong = ticks - INT_MAX;
}
 
-   if (cubic_data->flags & CUBICFLAG_IN_SLOWSTART) {
-   cubic_data->flags &= ~CUBICFLAG_IN_SLOWSTART;
+   if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART |
+CUBICFLAG_IN_APPLIMIT)) {
+   cubic_data->flags &= ~(CUBICFLAG_IN_SLOWSTART |
+  CUBICFLAG_IN_APPLIMIT);
cubic_data->t_last_cong = ticks;
cubic_data->K = 0;
}
@@ -214,6 +217,9 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
CCV(ccv, t_maxseg));
}
}
+   } else if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) &&
+   !(ccv->flags & CCF_CWND_LIMITED)) {
+   cubic_data->flags |= CUBICFLAG_IN_APPLIMIT;
}
 }
 
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r359764 - head/share/misc

2020-04-09 Thread Richard Scheffenegger
Author: rscheff
Date: Fri Apr 10 00:31:52 2020
New Revision: 359764
URL: https://svnweb.freebsd.org/changeset/base/359764

Log:
  add myself (rscheff) as a src committer.
  
  Reviewed by:  rgrimes (mentor), tuexen (mentor)
  Approved by:  rgrimes (mentor), tuexen (mentor)
  MFC after:3 days
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D24318

Modified:
  head/share/misc/committers-src.dot

Modified: head/share/misc/committers-src.dot
==
--- head/share/misc/committers-src.dot  Fri Apr 10 00:27:19 2020
(r359763)
+++ head/share/misc/committers-src.dot  Fri Apr 10 00:31:52 2020
(r359764)
@@ -312,6 +312,7 @@ royger [label="Roger Pau Monne\nroy...@freebsd.org\n20
 rpaulo [label="Rui Paulo\nrpa...@freebsd.org\n2007/09/25"]
 rpokala [label="Ravi Pokala\nrpok...@freebsd.org\n2015/11/19"]
 rrs [label="Randall R Stewart\n...@freebsd.org\n2007/02/08"]
+rscheff [label="Richard Scheffenegger\nrsch...@freebsd.org\n2020/04/06"]
 rse [label="Ralf S. Engelschall\n...@freebsd.org\n1997/07/31"]
 rstone [label="Ryan Stone\nrst...@freebsd.org\n2010/04/19"]
 ru [label="Ruslan Ermilov\n...@freebsd.org\n1999/05/27"]
@@ -792,6 +793,7 @@ pjd -> smh
 pjd -> trociny
 
 rgrimes -> markm
+rgrimes -> rscheff
 
 rmacklem -> jwd
 
@@ -882,6 +884,8 @@ thompsa -> eri
 
 trasz -> jh
 trasz -> mjg
+
+tuexen -> rscheff
 
 ume -> jinmei
 ume -> suz
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r365546 - head/sys/netinet/cc

2020-09-09 Thread Richard Scheffenegger
Author: rscheff
Date: Thu Sep 10 00:46:38 2020
New Revision: 365546
URL: https://svnweb.freebsd.org/changeset/base/365546

Log:
  cc_mod: remove unused CCF_DELACK definition
  
  During the DCTCP improvements, use of CCF_DELACK was
  removed. This change is just to rename the unused flag
  bit to prevent use of it, without also re-implementing
  the tcp_input and tcp_output interfaces.
  
  No functional change.
  
  Reviewed by:  chengc_netapp.com, tuexen
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D26181

Modified:
  head/sys/netinet/cc/cc.h

Modified: head/sys/netinet/cc/cc.h
==
--- head/sys/netinet/cc/cc.hWed Sep  9 23:11:55 2020(r365545)
+++ head/sys/netinet/cc/cc.hThu Sep 10 00:46:38 2020(r365546)
@@ -96,7 +96,7 @@ struct cc_var {
 /* cc_var flags. */
 #defineCCF_ABC_SENTAWND0x0001  /* ABC counted cwnd worth of 
bytes? */
 #defineCCF_CWND_LIMITED0x0002  /* Are we currently cwnd 
limited? */
-#defineCCF_DELACK  0x0004  /* Is this ack delayed? */
+#defineCCF_UNUSED1 0x0004  /* unused */
 #defineCCF_ACKNOW  0x0008  /* Will this ack be sent now? */
 #defineCCF_IPHDR_CE0x0010  /* Does this packet set CE bit? 
*/
 #defineCCF_TCPHDR_CWR  0x0020  /* Does this packet set CWR 
bit? */
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r365547 - head/sbin/ping6

2020-09-09 Thread Richard Scheffenegger
Author: rscheff
Date: Thu Sep 10 00:50:18 2020
New Revision: 365547
URL: https://svnweb.freebsd.org/changeset/base/365547

Log:
  Add -z "TOS" option to ping6, to test DSCP/ECN values
  
  ping has the option to add the (deprecated) TOS byte
  using the -z option. Adding the same option, with the
  same (deprecated) Traffic Class Byte (nowadays actually
  DSCP and ECN fields) to ping6 to validate proper QoS
  processing in network switches.
  
  Reviewed by:  tuexen
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D26384

Modified:
  head/sbin/ping6/ping6.8
  head/sbin/ping6/ping6.c

Modified: head/sbin/ping6/ping6.8
==
--- head/sbin/ping6/ping6.8 Thu Sep 10 00:46:38 2020(r365546)
+++ head/sbin/ping6/ping6.8 Thu Sep 10 00:50:18 2020(r365547)
@@ -29,7 +29,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd October 20, 2019
+.Dd September 10, 2020
 .Dt PING6 8
 .Os
 .Sh NAME
@@ -87,6 +87,9 @@ packets to network hosts
 .Op Fl W Ar waittime
 .Ek
 .Bk -words
+.Op Fl z Ar tclass
+.Ek
+.Bk -words
 .Op Ar hops ...
 .Ek
 .Bk -words
@@ -329,6 +332,8 @@ This option is present for backward compatibility.
 has no effect if
 .Fl y
 is specified.
+.It Fl z Ar tclass
+Use the specified traffic class when sending.
 .It Ar hops
 IPv6 addresses for intermediate nodes,
 which will be put into type 0 routing header.

Modified: head/sbin/ping6/ping6.c
==
--- head/sbin/ping6/ping6.c Thu Sep 10 00:46:38 2020(r365546)
+++ head/sbin/ping6/ping6.c Thu Sep 10 00:50:18 2020(r365547)
@@ -229,6 +229,7 @@ static char *hostname;
 static int ident;  /* process id to identify our packets */
 static u_int8_t nonce[8];  /* nonce field for node information */
 static int hoplimit = -1;  /* hoplimit */
+static int tclass = -1;/* traffic class */
 static u_char *packet = NULL;
 static cap_channel_t *capdns;
 
@@ -352,7 +353,7 @@ main(int argc, char *argv[])
 #endif /*IPSEC_POLICY_IPSEC*/
 #endif
while ((ch = getopt(argc, argv,
-   "k:b:c:DdfHe:m:I:i:l:unNop:qaAS:s:OvyYW:t:" ADDOPTS)) != -1) {
+   "k:b:c:DdfHe:m:I:i:l:unNop:qaAS:s:OvyYW:t:z:" ADDOPTS)) != -1) {
 #undef ADDOPTS
switch (ch) {
case 'k':
@@ -585,6 +586,14 @@ main(int argc, char *argv[])
err(1, "setitimer");
}
break;
+   case 'z':   /* traffic class */
+   tclass = strtol(optarg, &e, 10);
+   if (*optarg == '\0' || *e != '\0')
+   errx(1, "illegal traffic class %s", optarg);
+   if (255 < tclass || tclass < -1)
+   errx(1,
+   "illegal traffic class -- %s", optarg);
+   break;
 #ifdef IPSEC
 #ifdef IPSEC_POLICY_IPSEC
case 'P':
@@ -935,6 +944,12 @@ main(int argc, char *argv[])
memcpy(CMSG_DATA(scmsgp), &hoplimit, sizeof(hoplimit));
 
scmsgp = CMSG_NXTHDR(&smsghdr, scmsgp);
+   }
+
+   if (tclass != -1) {
+   if (setsockopt(ssend, IPPROTO_IPV6, IPV6_TCLASS,
+   &tclass, sizeof(tclass)) == -1)
+   err(1, "setsockopt(IPV6_TCLASS)");
}
 
if (argc > 1) { /* some intermediate addrs are specified */
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r366149 - head/sys/netinet/cc

2020-09-25 Thread Richard Scheffenegger
Author: rscheff
Date: Fri Sep 25 10:23:14 2020
New Revision: 366149
URL: https://svnweb.freebsd.org/changeset/base/366149

Log:
  TCP newreno: improve after_idle ssthresh
  
  Adjust ssthresh in after_idle to the maximum of
  the prior ssthresh, or 3/4 of the prior cwnd. See
  RFC2861 section 2 for an in depth explanation for
  the rationale around this.
  
  As newreno is the default "fall-through" reaction,
  most tcp variants will benefit from this.
  
  Reviewed by:  tuexen
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D22438

Modified:
  head/sys/netinet/cc/cc_newreno.c

Modified: head/sys/netinet/cc/cc_newreno.c
==
--- head/sys/netinet/cc/cc_newreno.cFri Sep 25 10:20:12 2020
(r366148)
+++ head/sys/netinet/cc/cc_newreno.cFri Sep 25 10:23:14 2020
(r366149)
@@ -213,8 +213,15 @@ newreno_after_idle(struct cc_var *ccv)
 * wirespeed, overloading router and switch buffers along the way.
 *
 * See RFC5681 Section 4.1. "Restarting Idle Connections".
+*
+* In addition, per RFC2861 Section 2, the ssthresh is set to the
+* maximum of the former ssthresh or 3/4 of the old cwnd, to
+* not exit slow-start prematurely.
 */
rw = tcp_compute_initwnd(tcp_maxseg(ccv->ccvc.tcp));
+
+   CCV(ccv, snd_ssthresh) = max(CCV(ccv, snd_ssthresh),
+   CCV(ccv, snd_cwnd)-(CCV(ccv, snd_cwnd)>>2));
 
CCV(ccv, snd_cwnd) = min(rw, CCV(ccv, snd_cwnd));
 }
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r366150 - head/sys/netinet

2020-09-25 Thread Richard Scheffenegger
Author: rscheff
Date: Fri Sep 25 10:38:19 2020
New Revision: 366150
URL: https://svnweb.freebsd.org/changeset/base/366150

Log:
  TCP: send full initial window when timestamps are in use
  
  The fastpath in tcp_output tries to send out
  full segments, and avoid sending partial segments by
  comparing against the static t_maxseg variable.
  That value does not consider tcp options like timestamps,
  while the initial window calculation is using
  the correct dynamic tcp_maxseg() function.
  
  Due to this interaction, the last, full size segment
  is considered too short and not sent out immediately.
  
  Reviewed by:  tuexen
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D26478

Modified:
  head/sys/netinet/tcp.h
  head/sys/netinet/tcp_output.c
  head/sys/netinet/tcp_subr.c

Modified: head/sys/netinet/tcp.h
==
--- head/sys/netinet/tcp.h  Fri Sep 25 10:23:14 2020(r366149)
+++ head/sys/netinet/tcp.h  Fri Sep 25 10:38:19 2020(r366150)
@@ -80,6 +80,8 @@ struct tcphdr {
u_short th_urp; /* urgent pointer */
 };
 
+#definePADTCPOLEN(len) len) / 4) + !!((len) % 4)) * 4)
+
 #defineTCPOPT_EOL  0
 #define   TCPOLEN_EOL  1
 #defineTCPOPT_PAD  0   /* padding after EOL */

Modified: head/sys/netinet/tcp_output.c
==
--- head/sys/netinet/tcp_output.c   Fri Sep 25 10:23:14 2020
(r366149)
+++ head/sys/netinet/tcp_output.c   Fri Sep 25 10:38:19 2020
(r366150)
@@ -591,6 +591,20 @@ after_sack_rexmit:
if (len >= tp->t_maxseg)
goto send;
/*
+* As the TCP header options are now
+* considered when setting up the initial
+* window, we would not send the last segment
+* if we skip considering the option length here.
+* Note: this may not work when tcp headers change
+* very dynamically in the future.
+*/
+   if tp->t_flags & TF_SIGNATURE) ?
+   PADTCPOLEN(TCPOLEN_SIGNATURE) : 0) +
+   ((tp->t_flags & TF_RCVD_TSTMP) ?
+   PADTCPOLEN(TCPOLEN_TIMESTAMP) : 0) +
+   len) >= tp->t_maxseg)
+   goto send;
+   /*
 * NOTE! on localhost connections an 'ack' from the remote
 * end may occur synchronously with the output and cause
 * us to flush a buffer queued with moretocome.  XXX

Modified: head/sys/netinet/tcp_subr.c
==
--- head/sys/netinet/tcp_subr.c Fri Sep 25 10:23:14 2020(r366149)
+++ head/sys/netinet/tcp_subr.c Fri Sep 25 10:38:19 2020(r366150)
@@ -3013,7 +3013,6 @@ tcp_maxseg(const struct tcpcb *tp)
 * but this is harmless, since result of tcp_maxseg() is used
 * only in cwnd and ssthresh estimations.
 */
-#definePAD(len)len) / 4) + !!((len) % 4)) * 4)
if (TCPS_HAVEESTABLISHED(tp->t_state)) {
if (tp->t_flags & TF_RCVD_TSTMP)
optlen = TCPOLEN_TSTAMP_APPA;
@@ -3021,26 +3020,26 @@ tcp_maxseg(const struct tcpcb *tp)
optlen = 0;
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
if (tp->t_flags & TF_SIGNATURE)
-   optlen += PAD(TCPOLEN_SIGNATURE);
+   optlen += PADTCPOLEN(TCPOLEN_SIGNATURE);
 #endif
if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) {
optlen += TCPOLEN_SACKHDR;
optlen += tp->rcv_numsacks * TCPOLEN_SACK;
-   optlen = PAD(optlen);
+   optlen = PADTCPOLEN(optlen);
}
} else {
if (tp->t_flags & TF_REQ_TSTMP)
optlen = TCPOLEN_TSTAMP_APPA;
else
-   optlen = PAD(TCPOLEN_MAXSEG);
+   optlen = PADTCPOLEN(TCPOLEN_MAXSEG);
if (tp->t_flags & TF_REQ_SCALE)
-   optlen += PAD(TCPOLEN_WINDOW);
+   optlen += PADTCPOLEN(TCPOLEN_WINDOW);
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
if (tp->t_flags & TF_SIGNATURE)
-   optlen += PAD(TCPOLEN_SIGNATURE);
+   optlen += PADTCPOLEN(TCPOLEN_SIGNATURE);
 #endif
if (tp->t_flags & TF_SACK_PERMIT)
-   optlen += PAD(TCPOLEN_SACK_PERMITTED);
+   optlen += PADTCPOLEN(TCPOLEN_SACK_PERMITTED);
}
 #undef PAD
optlen = min(

svn commit: r366206 - head/usr.sbin/ctld

2020-09-27 Thread Richard Scheffenegger
Author: rscheff
Date: Sun Sep 27 21:43:19 2020
New Revision: 366206
URL: https://svnweb.freebsd.org/changeset/base/366206

Log:
  Add DSCP support for network QoS to iscsi target.
  
  In order to prioritize iSCSI traffic across a network,
  DSCP can be used. In order not to rely on "ipfw setdscp"
  or in-network reclassification, this adds the dscp value
  directly to the portal group (where TCP sessions are accepted).
  
  The incoming iSCSI session is first handled by ctld for any
  CHAP authentication and the socket is then handed off to the
  in-kernel iscsi driver without modification of the socket
  parameters. Simply setting up the socket in ctld is sufficient
  to keep sending outgoing iSCSI related traffic with the
  configured DSCP value.
  
  Reviewed by:  mav, trasz
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D26385

Modified:
  head/usr.sbin/ctld/ctl.conf.5
  head/usr.sbin/ctld/ctld.c
  head/usr.sbin/ctld/ctld.h
  head/usr.sbin/ctld/parse.y
  head/usr.sbin/ctld/token.l
  head/usr.sbin/ctld/uclparse.c

Modified: head/usr.sbin/ctld/ctl.conf.5
==
--- head/usr.sbin/ctld/ctl.conf.5   Sun Sep 27 18:47:06 2020
(r366205)
+++ head/usr.sbin/ctld/ctl.conf.5   Sun Sep 27 21:43:19 2020
(r366206)
@@ -250,6 +250,14 @@ Specifies that this
 .Sy portal-group
 is listened by some other host.
 This host will announce it on discovery stage, but won't listen.
+.It Ic dscp Ar value
+The DiffServ Codepoint used for sending data. The DSCP can be
+set to numeric, or hexadecimal values directly, as well as the
+well-defined
+.Qq Ar CSx
+and
+.Qq Ar AFxx
+codepoints.
 .El
 .Ss target Context
 .Bl -tag -width indent

Modified: head/usr.sbin/ctld/ctld.c
==
--- head/usr.sbin/ctld/ctld.c   Sun Sep 27 18:47:06 2020(r366205)
+++ head/usr.sbin/ctld/ctld.c   Sun Sep 27 21:43:19 2020(r366206)
@@ -625,6 +625,7 @@ portal_group_new(struct conf *conf, const char *name)
TAILQ_INIT(&pg->pg_ports);
pg->pg_conf = conf;
pg->pg_tag = 0; /* Assigned later in conf_apply(). */
+   pg->pg_dscp = -1;
TAILQ_INSERT_TAIL(&conf->conf_portal_groups, pg, pg_next);
 
return (pg);
@@ -2180,6 +2181,32 @@ conf_apply(struct conf *oldconf, struct conf *newconf)
newp->p_socket = 0;
cumulated_error++;
continue;
+   }
+   if (newpg->pg_dscp != -1) {
+   struct sockaddr sa;
+   int len = sizeof(sa);
+   getsockname(newp->p_socket, &sa, &len);
+   /*
+* Only allow the 6-bit DSCP
+* field to be modified
+*/
+   int tos = newpg->pg_dscp << 2;
+   if (sa.sa_family == AF_INET) {
+   if (setsockopt(newp->p_socket,
+   IPPROTO_IP, IP_TOS,
+   &tos, sizeof(tos)) == -1)
+   log_warn("setsockopt(IP_TOS) "
+   "failed for %s",
+   newp->p_listen);
+   } else
+   if (sa.sa_family == AF_INET6) {
+   if (setsockopt(newp->p_socket,
+   IPPROTO_IPV6, IPV6_TCLASS,
+   &tos, sizeof(tos)) == -1)
+   
log_warn("setsockopt(IPV6_TCLASS) "
+   "failed for %s",
+   newp->p_listen);
+   }
}
error = bind(newp->p_socket, newp->p_ai->ai_addr,
newp->p_ai->ai_addrlen);

Modified: head/usr.sbin/ctld/ctld.h
==
--- head/usr.sbin/ctld/ctld.h   Sun Sep 27 18:47:06 2020(r366205)
+++ head/usr.sbin/ctld/ctld.h   Sun Sep 27 21:43:19 2020(r366206)
@@ -127,6 +127,7 @@ struct portal_group {
TAILQ_HEAD(, port)  pg_ports;
char*pg_offload;
char*pg_redirection;
+   int pg_dscp;
 
uint16_tpg_tag;
 };

Modified: head/usr.sbin/ctld/parse.y
===

svn commit: r366566 - head/usr.bin/netstat

2020-10-09 Thread Richard Scheffenegger
Author: rscheff
Date: Fri Oct  9 10:07:41 2020
New Revision: 366566
URL: https://svnweb.freebsd.org/changeset/base/366566

Log:
  Extend netstat to display TCP stack and detailed congestion state
  
  Adding the "-c" option used to show detailed per-connection
  congestion control state for TCP sessions.
  
  This is one summary patch, which adds the relevant variables into
  xtcpcb. As previous "spare" space is used, these changes are ABI
  compatible.
  
  Reviewed by:  tuexen
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D26518

Modified:
  head/usr.bin/netstat/inet.c
  head/usr.bin/netstat/main.c
  head/usr.bin/netstat/netstat.1
  head/usr.bin/netstat/netstat.h

Modified: head/usr.bin/netstat/inet.c
==
--- head/usr.bin/netstat/inet.c Fri Oct  9 09:37:43 2020(r366565)
+++ head/usr.bin/netstat/inet.c Fri Oct  9 10:07:41 2020(r366566)
@@ -85,6 +85,8 @@ __FBSDID("$FreeBSD$");
 #include "netstat.h"
 #include "nl_defs.h"
 
+#define max(a, b) (((a) > (b)) ? (a) : (b))
+
 #ifdef INET
 static void inetprint(const char *, struct in_addr *, int, const char *, int,
 const int);
@@ -204,6 +206,7 @@ protopr(u_long off, const char *name, int af1, int pro
struct xinpcb *inp;
struct xinpgen *xig, *oxig;
struct xsocket *so;
+   int fnamelen, cnamelen;
 
istcp = 0;
switch (proto) {
@@ -236,6 +239,28 @@ protopr(u_long off, const char *name, int af1, int pro
if (!pcblist_sysctl(proto, name, &buf))
return;
 
+   if (cflag || Cflag) {
+   fnamelen = strlen("Stack");
+   cnamelen = strlen("CC");
+   oxig = xig = (struct xinpgen *)buf;
+   for (xig = (struct xinpgen*)((char *)xig + xig->xig_len);
+   xig->xig_len > sizeof(struct xinpgen);
+   xig = (struct xinpgen *)((char *)xig + xig->xig_len)) {
+   if (istcp) {
+   tp = (struct xtcpcb *)xig;
+   inp = &tp->xt_inp;
+   } else {
+   continue;
+   }
+   if (so->xso_protocol != proto)
+   continue;
+   if (inp->inp_gencnt > oxig->xig_gen)
+   continue;
+   fnamelen = max(fnamelen, (int)strlen(tp->xt_stack));
+   cnamelen = max(cnamelen, (int)strlen(tp->xt_cc));
+   }
+   }
+
oxig = xig = (struct xinpgen *)buf;
for (xig = (struct xinpgen *)((char *)xig + xig->xig_len);
xig->xig_len > sizeof(struct xinpgen);
@@ -341,9 +366,19 @@ protopr(u_long off, const char *name, int af1, int pro
xo_emit("  {T:/%8.8s} {T:/%5.5s}",
"flowid", "ftype");
}
+   if (cflag) {
+   xo_emit(" {T:/%-*.*s}",
+   fnamelen, fnamelen, "Stack");
+   }
if (Cflag)
-   xo_emit(" {T:/%-*.*s}", TCP_CA_NAME_MAX,
-   TCP_CA_NAME_MAX, "CC");
+   xo_emit(" {T:/%-*.*s} {T:/%10.10s}"
+   " {T:/%10.10s} {T:/%5.5s}"
+   " {T:/%3.3s}", cnamelen,
+   cnamelen, "CC",
+   "cwin",
+   "ssthresh",
+   "MSS",
+   "ECN");
if (Pflag)
xo_emit(" {T:/%s}", "Log ID");
xo_emit("\n");
@@ -518,9 +553,24 @@ protopr(u_long off, const char *name, int af1, int pro
inp->inp_flowtype);
}
if (istcp) {
+   if (cflag)
+   xo_emit(" {:stack/%-*.*s}",
+   
+   fnamelen, fnamelen, tp->xt_stack);
if (Cflag)
-   xo_emit(" {:cc/%-*.*s}", TCP_CA_NAME_MAX,
-   TCP_CA_NAME_MAX, tp->xt_cc);
+   xo_emit(" {:cc/%-*.*s}"
+   " {:snd-cwnd/%10lu}"
+   " {:snd-ssthresh/%10lu}"
+   " {:t-maxseg/%5u} {:ecn/%3s}",
+   cnamelen, cnamelen, tp->xt_cc,
+   tp->t_snd_cwnd, tp->t_snd_ssthresh,
+   tp->t_maxseg,
+  

svn commit: r366567 - head/sys/netinet

2020-10-09 Thread Richard Scheffenegger
Author: rscheff
Date: Fri Oct  9 10:55:19 2020
New Revision: 366567
URL: https://svnweb.freebsd.org/changeset/base/366567

Log:
  Extend netstat to display TCP stack and detailed congestion state (2)
  
  Extend netstat to display TCP stack and detailed congestion state
  
  Adding the "-c" option used to show detailed per-connection
  congestion control state for TCP sessions.
  
  This is one summary patch, which adds the relevant variables into
  xtcpcb. As previous "spare" space is used, these changes are ABI
  compatible.
  
  Reviewed by:  tuexen
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D26518

Modified:
  head/sys/netinet/tcp_subr.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_subr.c
==
--- head/sys/netinet/tcp_subr.c Fri Oct  9 10:07:41 2020(r366566)
+++ head/sys/netinet/tcp_subr.c Fri Oct  9 10:55:19 2020(r366567)
@@ -3437,6 +3437,13 @@ tcp_inptoxtp(const struct inpcb *inp, struct xtcpcb *x
xt->t_sndzerowin = tp->t_sndzerowin;
xt->t_sndrexmitpack = tp->t_sndrexmitpack;
xt->t_rcvoopack = tp->t_rcvoopack;
+   xt->t_rcv_wnd = tp->rcv_wnd;
+   xt->t_snd_wnd = tp->snd_wnd;
+   xt->t_snd_cwnd = tp->snd_cwnd;
+   xt->t_snd_ssthresh = tp->snd_ssthresh;
+   xt->t_maxseg = tp->t_maxseg;
+   xt->xt_ecn = (tp->t_flags2 & TF2_ECN_PERMIT) ? 1 : 0 +
+(tp->t_flags2 & TF2_ACE_PERMIT) ? 2 : 0;
 
now = getsbinuptime();
 #defineCOPYTIMER(ttt)  do {
\

Modified: head/sys/netinet/tcp_var.h
==
--- head/sys/netinet/tcp_var.h  Fri Oct  9 10:07:41 2020(r366566)
+++ head/sys/netinet/tcp_var.h  Fri Oct  9 10:55:19 2020(r366567)
@@ -768,7 +768,13 @@ struct xtcpcb {
int32_t tt_2msl;/* (s) */
int32_t tt_delack;  /* (s) */
int32_t t_logstate; /* (3) */
-   int32_t spare32[32];
+   uint32_tt_snd_cwnd; /* (s) */
+   uint32_tt_snd_ssthresh; /* (s) */
+   uint32_tt_maxseg;   /* (s) */
+   uint32_tt_rcv_wnd;  /* (s) */
+   uint32_tt_snd_wnd;  /* (s) */
+   uint32_txt_ecn; /* (s) */
+   int32_t spare32[26];
 } __aligned(8);
 
 #ifdef _KERNEL
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r366569 - in head/sys: net netinet netinet6

2020-10-09 Thread Richard Scheffenegger
Author: rscheff
Date: Fri Oct  9 12:06:43 2020
New Revision: 366569
URL: https://svnweb.freebsd.org/changeset/base/366569

Log:
  Add IP(V6)_VLAN_PCP to set 802.1 priority per-flow.
  
  This adds a new IP_PROTO / IPV6_PROTO setsockopt (getsockopt)
  option IP(V6)_VLAN_PCP, which can be set to -1 (interface
  default), or explicitly to any priority between 0 and 7.
  
  Note that for untagged traffic, explicitly adding a
  priority will insert a special 801.1Q vlan header with
  vlan ID = 0 to carry the priority setting
  
  Reviewed by:  gallatin, rrs
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D26409

Modified:
  head/sys/net/if_ethersubr.c
  head/sys/netinet/in.h
  head/sys/netinet/in_pcb.h
  head/sys/netinet/ip_output.c
  head/sys/netinet6/in6.h
  head/sys/netinet6/ip6_output.c

Modified: head/sys/net/if_ethersubr.c
==
--- head/sys/net/if_ethersubr.c Fri Oct  9 11:24:19 2020(r366568)
+++ head/sys/net/if_ethersubr.c Fri Oct  9 12:06:43 2020(r366569)
@@ -1388,6 +1388,13 @@ ether_8021q_frame(struct mbuf **mp, struct ifnet *ife,
}
 
/*
+* If PCP is set in mbuf, use it
+*/
+   if ((*mp)->m_flags & M_VLANTAG) {
+   pcp = EVL_PRIOFTAG((*mp)->m_pkthdr.ether_vtag);
+   }
+
+   /*
 * If underlying interface can do VLAN tag insertion itself,
 * just pass the packet along. However, we need some way to
 * tell the interface where the packet came from so that it

Modified: head/sys/netinet/in.h
==
--- head/sys/netinet/in.h   Fri Oct  9 11:24:19 2020(r366568)
+++ head/sys/netinet/in.h   Fri Oct  9 12:06:43 2020(r366569)
@@ -483,6 +483,10 @@ __END_DECLS
 /* The following option is private; do not use it from user applications. */
 #defineIP_MSFILTER 74   /* set/get filter list */
 
+/* The following option deals with the 802.1Q Ethernet Priority Code Point */
+#defineIP_VLAN_PCP 75   /* int; set/get PCP used for 
packet, */
+/*  -1 use interface default */
+
 /* Protocol Independent Multicast API [RFC3678] */
 #defineMCAST_JOIN_GROUP80   /* join an any-source 
group */
 #defineMCAST_LEAVE_GROUP   81   /* leave all sources for 
group */

Modified: head/sys/netinet/in_pcb.h
==
--- head/sys/netinet/in_pcb.h   Fri Oct  9 11:24:19 2020(r366568)
+++ head/sys/netinet/in_pcb.h   Fri Oct  9 12:06:43 2020(r366569)
@@ -748,6 +748,13 @@ intinp_so_options(const struct inpcb *inp);
 #define INP_SUPPORTS_MBUFQ 0x4000 /* Supports the mbuf queue method of 
LRO */
 #define INP_MBUF_QUEUE_READY   0x8000 /* The transport is pacing, inputs 
can be queued */
 #define INP_DONT_SACK_QUEUE0x0001 /* If a sack arrives do not wake me 
*/
+#define INP_2PCP_SET   0x0002 /* If the Eth PCP should be set 
explicitly */
+#define INP_2PCP_BIT0  0x0004 /* Eth PCP Bit 0 */
+#define INP_2PCP_BIT1  0x0008 /* Eth PCP Bit 1 */
+#define INP_2PCP_BIT2  0x0010 /* Eth PCP Bit 2 */
+#define INP_2PCP_BASE  INP_2PCP_BIT0
+#define INP_2PCP_MASK  (INP_2PCP_BIT0 | INP_2PCP_BIT1 | INP_2PCP_BIT2)
+#define INP_2PCP_SHIFT 18 /* shift PCP field in/out of 
inp_flags2 */
 /*
  * Flags passed to in_pcblookup*() functions.
  */

Modified: head/sys/netinet/ip_output.c
==
--- head/sys/netinet/ip_output.cFri Oct  9 11:24:19 2020
(r366568)
+++ head/sys/netinet/ip_output.cFri Oct  9 12:06:43 2020
(r366569)
@@ -62,7 +62,9 @@ __FBSDID("$FreeBSD$");
 
 #include 
 #include 
+#include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -324,6 +326,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct rou
int hlen = sizeof (struct ip);
int mtu = 0;
int error = 0;
+   int vlan_pcp = -1;
struct sockaddr_in *dst, sin;
const struct sockaddr_in *gw;
struct in_ifaddr *ia = NULL;
@@ -345,6 +348,9 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct rou
m->m_pkthdr.flowid = inp->inp_flowid;
M_HASHTYPE_SET(m, inp->inp_flowtype);
}
+   if ((inp->inp_flags2 & INP_2PCP_SET) != 0)
+   vlan_pcp = (inp->inp_flags2 & INP_2PCP_MASK) >>
+   INP_2PCP_SHIFT;
 #ifdef NUMA
m->m_pkthdr.numa_domain = inp->inp_numa_domain;
 #endif
@@ -717,6 +723,9 @@ sendit:
}
}
 
+   if (vlan_pcp > -1)
+   EVL_APPLY_PRI(m, vlan_pcp);
+

svn commit: r366570 - head/sys/netinet

2020-10-09 Thread Richard Scheffenegger
Author: rscheff
Date: Fri Oct  9 12:44:56 2020
New Revision: 366570
URL: https://svnweb.freebsd.org/changeset/base/366570

Log:
  Stop sending tiny new data segments during SACK recovery
  
  Consider the currently in-use TCP options when
  calculating the amount of new data to be injected during
  SACK loss recovery. That addresses the effect that very small
  (new) segments could be injected on partial ACKs while
  still performing a SACK loss recovery.
  
  Reported by:  Liang Tian
  Reviewed by:  tuexen, chengc_netapp.com
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D26446

Modified:
  head/sys/netinet/tcp_output.c
  head/sys/netinet/tcp_sack.c

Modified: head/sys/netinet/tcp_output.c
==
--- head/sys/netinet/tcp_output.c   Fri Oct  9 12:06:43 2020
(r366569)
+++ head/sys/netinet/tcp_output.c   Fri Oct  9 12:44:56 2020
(r366570)
@@ -336,7 +336,7 @@ again:
sendalot = 1;
TCPSTAT_INC(tcps_sack_rexmits);
TCPSTAT_ADD(tcps_sack_rexmit_bytes,
-   min(len, tp->t_maxseg));
+   min(len, tcp_maxseg(tp)));
}
}
 after_sack_rexmit:
@@ -858,7 +858,6 @@ send:
if (flags & TH_SYN)
to.to_flags |= TOF_SACKPERM;
else if (TCPS_HAVEESTABLISHED(tp->t_state) &&
-   (tp->t_flags & TF_SACK_PERMIT) &&
tp->rcv_numsacks > 0) {
to.to_flags |= TOF_SACK;
to.to_nsacks = tp->rcv_numsacks;

Modified: head/sys/netinet/tcp_sack.c
==
--- head/sys/netinet/tcp_sack.c Fri Oct  9 12:06:43 2020(r366569)
+++ head/sys/netinet/tcp_sack.c Fri Oct  9 12:44:56 2020(r366570)
@@ -787,15 +787,16 @@ void
 tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th)
 {
int num_segs = 1;
+   u_int maxseg = tcp_maxseg(tp);
 
INP_WLOCK_ASSERT(tp->t_inpcb);
tcp_timer_activate(tp, TT_REXMT, 0);
tp->t_rtttime = 0;
/* Send one or 2 segments based on how much new data was acked. */
-   if ((BYTES_THIS_ACK(tp, th) / tp->t_maxseg) >= 2)
+   if ((BYTES_THIS_ACK(tp, th) / maxseg) >= 2)
num_segs = 2;
tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit +
-   (tp->snd_nxt - tp->snd_recover) + num_segs * tp->t_maxseg);
+   (tp->snd_nxt - tp->snd_recover) + num_segs * maxseg);
if (tp->snd_cwnd > tp->snd_ssthresh)
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_flags |= TF_ACKNOW;
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


svn commit: r366573 - in head: sys/dev/iscsi usr.bin/iscsictl usr.sbin/iscsid

2020-10-09 Thread Richard Scheffenegger
Author: rscheff
Date: Fri Oct  9 14:33:09 2020
New Revision: 366573
URL: https://svnweb.freebsd.org/changeset/base/366573

Log:
  Add DSCP support for network QoS to iscsi initiator.
  
  Allow the DSCP codepoint also to be configurable
  for the traffic in the direction from the initiator
  to the target, such that writes and any requests
  are also treated in the appropriate QoS class.
  
  Reviewed by:  mav
  MFC after:2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:https://reviews.freebsd.org/D26714

Modified:
  head/sys/dev/iscsi/iscsi_ioctl.h
  head/usr.bin/iscsictl/Makefile
  head/usr.bin/iscsictl/iscsi.conf.5
  head/usr.bin/iscsictl/iscsictl.c
  head/usr.bin/iscsictl/iscsictl.h
  head/usr.bin/iscsictl/parse.y
  head/usr.bin/iscsictl/token.l
  head/usr.sbin/iscsid/iscsid.c

Modified: head/sys/dev/iscsi/iscsi_ioctl.h
==
--- head/sys/dev/iscsi/iscsi_ioctl.hFri Oct  9 14:03:45 2020
(r366572)
+++ head/sys/dev/iscsi/iscsi_ioctl.hFri Oct  9 14:33:09 2020
(r366573)
@@ -70,7 +70,8 @@ struct iscsi_session_conf {
int isc_iser;
charisc_offload[ISCSI_OFFLOAD_LEN];
int isc_enable;
-   int isc_spare[4];
+   int isc_dscp;
+   int isc_spare[3];
 };
 
 /*

Modified: head/usr.bin/iscsictl/Makefile
==
--- head/usr.bin/iscsictl/Makefile  Fri Oct  9 14:03:45 2020
(r366572)
+++ head/usr.bin/iscsictl/Makefile  Fri Oct  9 14:33:09 2020
(r366573)
@@ -7,7 +7,7 @@ CFLAGS+=-I${.CURDIR}
 CFLAGS+=   -I${SRCTOP}/sys/dev/iscsi
 MAN=   iscsi.conf.5 iscsictl.8
 
-LIBADD=xo
+LIBADD=util xo
 
 YFLAGS+=   -v
 LFLAGS+=   -i

Modified: head/usr.bin/iscsictl/iscsi.conf.5
==
--- head/usr.bin/iscsictl/iscsi.conf.5  Fri Oct  9 14:03:45 2020
(r366572)
+++ head/usr.bin/iscsictl/iscsi.conf.5  Fri Oct  9 14:33:09 2020
(r366573)
@@ -145,6 +145,16 @@ for iSCSI over RDMA, or
 .Qq Ar iSCSI .
 Default is
 .Qq Ar iSCSI .
+.It Cm dscp
+The DiffServ Codepoint used for sending data. The DSCP can be
+set to numeric, or hexadecimal values directly, as well as the
+well-defined
+.Qq Ar cs
+and
+.Qq Ar af
+codepoints.
+Default is no specified dscp codepoint, which means the default
+of the outgoing interface is used.
 .El
 .Sh FILES
 .Bl -tag -width indent

Modified: head/usr.bin/iscsictl/iscsictl.c
==
--- head/usr.bin/iscsictl/iscsictl.cFri Oct  9 14:03:45 2020
(r366572)
+++ head/usr.bin/iscsictl/iscsictl.cFri Oct  9 14:33:09 2020
(r366573)
@@ -87,6 +87,7 @@ target_new(struct conf *conf)
if (targ == NULL)
xo_err(1, "calloc");
targ->t_conf = conf;
+   targ->t_dscp = -1;
TAILQ_INSERT_TAIL(&conf->conf_targets, targ, t_next);
 
return (targ);
@@ -358,6 +359,7 @@ conf_from_target(struct iscsi_session_conf *conf,
conf->isc_data_digest = ISCSI_DIGEST_CRC32C;
else
conf->isc_data_digest = ISCSI_DIGEST_NONE;
+   conf->isc_dscp = targ->t_dscp;
 }
 
 static int
@@ -535,6 +537,9 @@ kernel_list(int iscsi_fd, const struct target *targ __
"Target portal:", conf->isc_target_addr);
xo_emit("{L:/%-26s}{V:alias/%s}\n",
"Target alias:", state->iss_target_alias);
+   if (conf->isc_dscp != -1)
+   xo_emit("{L:/%-26s}{V:dscp/0x%02x}\n",
+   "Target DSCP:", conf->isc_dscp);
xo_close_container("target");
 
xo_open_container("auth");

Modified: head/usr.bin/iscsictl/iscsictl.h
==
--- head/usr.bin/iscsictl/iscsictl.hFri Oct  9 14:03:45 2020
(r366572)
+++ head/usr.bin/iscsictl/iscsictl.hFri Oct  9 14:33:09 2020
(r366573)
@@ -78,6 +78,7 @@ struct target {
int t_session_type;
int t_enable;
int t_protocol;
+   int t_dscp;
char*t_offload;
char*t_user;
char*t_secret;

Modified: head/usr.bin/iscsictl/parse.y
==
--- head/usr.bin/iscsictl/parse.y   Fri Oct  9 14:03:45 2020
(r366572)
+++ head/usr.bin/iscsictl/parse.y   Fri Oct  9 14:33:09 2020
(r366573)
@@ -44,6 +44,8 @@
 #include 
 
 #include "iscsictl.h"
+#include 
+#include 
 
 extern