Hi, The attached patch implements TCP User Timeout Option(RFC 5482 [0]) in freebsd tcp stack. And this patch comes from my GSoC 2009 project -- Implement TCP UTO(mentor, Rui Paulo). I will be very grateful to any tips, suggestions and questions.
Brief introduction about TCP UTO: The TCP user timeout controls how long transmitted data may remain unacknowledged before a connection is forcefully closed. It is a local, per-connection parameter. TCP User Timeout Option allows one end of a TCP connection to advertise it's current user timeout value. This information provides advice to the other end of the TCP connection to adapt it's user timeout accordingly. Increasing the user timeouts on both ends of a TCP connection allows it to survive extended periods without end-to-end connectivity. Decreasing the user timeouts allows busy servers to explicitly notify their clients that they will maintain the connection state only for a short time without connectivity. [0] - http://tools.ietf.org/html/rfc5482 Best Regards, Fang Wang
Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c (revision 196455) +++ sys/netinet/tcp_input.c (working copy) @@ -1196,6 +1196,19 @@ (thflags & TH_SYN) ? TO_SYN : 0); /* + * If the TCP user timeout option is present, record it but + * do nothing because it's an optional option. + * We will process on the following cases: + * 1. We need do retransmission. + * 2. Users request a UTO value. + */ + if (to.to_flags & TOF_UTO) { + tp->uto_flags |= TCPUTO_RCVD; + tp->rcv_uto = to.to_uto; + TCPSTAT_INC(tcps_rcvuto); + } + + /* * If echoed timestamp is later than the current time, * fall back to non RFC1323 RTT calculation. Normalize * timestamp if syncookies were used when this connection @@ -2247,6 +2260,14 @@ } process_ACK: + /* + * If received an ACK for a previously sent TCP UTO option, + * stop including the TCP UTO option on output packets. + */ + if (tp->uto_flags & TCPUTO_SENDING) + if (SEQ_GEQ(th->th_ack, tp->uto_carrier)) + tp->uto_flags &= ~TCPUTO_SENDING; + INP_INFO_LOCK_ASSERT(&V_tcbinfo); KASSERT(ti_locked == TI_RLOCKED || ti_locked == TI_WLOCKED, ("tcp_input: process_ACK ti_locked %d", ti_locked)); @@ -2953,6 +2974,14 @@ to->to_sacks = cp + 2; TCPSTAT_INC(tcps_sack_rcv_blocks); break; + case TCPOPT_UTO: + if (optlen != TCPOLEN_UTO) + continue; + to->to_flags |= TOF_UTO; + bcopy((char *)cp + 2, + (char *)&to->to_uto, sizeof(to->to_uto)); + to->to_uto = ntohs(to->to_uto); + break; default: continue; } Index: sys/netinet/tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c (revision 196455) +++ sys/netinet/tcp_subr.c (working copy) @@ -336,6 +336,7 @@ V_tcp_autorcvbuf_max = 256*1024; V_tcp_do_rfc3465 = 1; V_tcp_abc_l_var = 2; + V_tcp_uto_enable = 0; V_tcp_mssdflt = TCP_MSS; #ifdef INET6 @@ -419,6 +420,8 @@ tcp_rexmit_slop = TCPTV_CPU_VAR; tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT; tcp_tcbhashsize = hashsize; + V_tcp_uto_min = TCPTV_UTO_MIN; + V_tcp_uto_max = TCPTV_UTO_MAX; #ifdef INET6 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) @@ -773,7 +776,18 @@ tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; tp->t_rcvtime = ticks; tp->t_bw_rtttime = ticks; + + tp->uto_flags = TCPUTO_CHANGEABLE; + if (V_tcp_uto_enable) + tp->uto_flags |= TCPUTO_ENABLE; /* + * According to RFC 5482, t_uto_adv is UTO option advertised to the + * remote TCP peer. It defaults to the default system-wide USER + * TIMEOUT. + */ + tp->t_uto_adv = TCPTV_UTO_DEFAULT; + + /* * IPv4 TTL initialization is necessary for an IPv6 socket as well, * because the socket may be bound to an IPv6 wildcard address, * which may match an IPv4-mapped IPv6 address. Index: sys/netinet/tcp_timer.c =================================================================== --- sys/netinet/tcp_timer.c (revision 196455) +++ sys/netinet/tcp_timer.c (working copy) @@ -110,7 +110,21 @@ SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); +VNET_DEFINE(int, tcp_uto_min); +SYSCTL_VNET_PROC(_net_inet_tcp, OID_AUTO, uto_min, CTLTYPE_INT|CTLFLAG_RW, + &VNET_NAME(tcp_uto_min), 0, sysctl_msec_to_ticks, "I", + "Minimun User Timeout"); +VNET_DEFINE(int, tcp_uto_max); +SYSCTL_VNET_PROC(_net_inet_tcp, OID_AUTO, uto_max, CTLTYPE_INT|CTLFLAG_RW, + &VNET_NAME(tcp_uto_max), 0, sysctl_msec_to_ticks, "I", + "Maximum User Timeout"); + +VNET_DEFINE(int, tcp_uto_enable); +SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, uto_always, CTLTYPE_INT|CTLFLAG_RW, + &VNET_NAME(tcp_uto_enable), 0, + "Enable TCP UTO (RFC5482) on every socket"); + static int tcp_keepcnt = TCPTV_KEEPCNT; /* max idle probes */ int tcp_maxpersistidle; @@ -473,12 +487,20 @@ } callout_deactivate(&tp->t_timers->tt_rexmt); tcp_free_sackholes(tp); + + if (tp->t_rxtshift == 0) { + TCPT_RESOLVE_UTO(tp); + tp->t_uto_left = tp->t_uto_impl / hz; + tp->t_uto_left -= tcp_backoff[0]; + } /* * Retransmission timer went off. Message has not * been acked within retransmit interval. Back off * to a longer retransmit interval and retransmit one segment. */ - if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { + if ((++tp->t_rxtshift > TCP_MAXRXTSHIFT && + (tp->uto_flags & TCPUTO_IMPL) == 0) || + (tp->t_uto_left == 0 && tp->uto_flags & TCPUTO_IMPL)) { tp->t_rxtshift = TCP_MAXRXTSHIFT; TCPSTAT_INC(tcps_timeoutdrop); tp = tcp_drop(tp, tp->t_softerror ? @@ -510,9 +532,27 @@ if (tp->t_state == TCPS_SYN_SENT) rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; else - rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; - TCPT_RANGESET(tp->t_rxtcur, rexmt, - tp->t_rttmin, TCPTV_REXMTMAX); + if ((tp->uto_flags & TCPUTO_IMPL) == 0) { + rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; + TCPT_RANGESET(tp->t_rxtcur, rexmt, tp->t_rttmin, + TCPTV_REXMTMAX); + } else { + int rxtshift, interval; + rxtshift = min(TCP_MAXRXTSHIFT, tp->t_rxtshift); + interval = min(TCP_REXMTMAX, tcp_backoff[rxtshift]); + rexmt = TCP_REXMTVAL(tp) * tcp_backoff[rxtshift]; + TCPT_RANGESET(tp->t_rxtcur, rexmt, + tp->t_rttmin, TCPTV_REXMTMAX); + if (tp->t_uto_left < interval) { + tp->t_rxtcur = (tp->t_rxtcur * tp->t_uto_left) + / interval; + /* Prevent t_rxtcur from reaching zero */ + TCPT_RANGESET(tp->t_rxtcur, tp->t_rxtcur, + tp->t_rttmin, TCPTV_REXMTMAX); + } + tp->t_uto_left -= min(tp->t_uto_left, interval); + } + /* * Disable rfc1323 if we havn't got any response to * our third SYN to work-around some broken terminal servers Index: sys/netinet/tcp_timer.h =================================================================== --- sys/netinet/tcp_timer.h (revision 196455) +++ sys/netinet/tcp_timer.h (working copy) @@ -91,6 +91,10 @@ #define TCPTV_FINWAIT2_TIMEOUT (60*hz) /* FIN_WAIT_2 timeout if no receiver */ +#define TCPTV_UTO_MIN ( 120*hz) /* min user timeout */ +#define TCPTV_UTO_MAX (1020*hz) /* max user timeout */ +#define TCPTV_UTO_DEFAULT ( 511*hz) /* default user timeout */ + /* * Minimum retransmit timer is 3 ticks, for algorithmic stability. * TCPT_RANGESET() will add another TCPTV_CPU_VAR to deal with @@ -113,8 +117,9 @@ */ #define TCPTV_MIN ( hz/33 ) /* minimum allowable value */ #define TCPTV_CPU_VAR ( hz/5 ) /* cpu variance allowed (200ms) */ -#define TCPTV_REXMTMAX ( 64*hz) /* max allowable REXMT value */ - +#define TCP_REXMTMAX 64 /* max allowable REXMT value + in seconds */ +#define TCPTV_REXMTMAX ( TCP_REXMTMAX*hz ) /* max allowable REXMT value */ #define TCPTV_TWTRUNC 8 /* RTO factor to truncate TW */ #define TCP_LINGERTIME 120 /* linger at most 2 minutes */ @@ -168,7 +173,6 @@ extern int tcp_finwait2_timeout; extern int tcp_fast_finwait2_recycle; - void tcp_timer_init(void); void tcp_timer_2msl(void *xtp); struct tcptw * Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h (revision 196455) +++ sys/netinet/tcp_var.h (working copy) @@ -200,9 +200,21 @@ void *t_toe; /* TOE pcb pointer */ int t_bytes_acked; /* # bytes acked during current RTT */ - int t_ispare; /* explicit pad for 64bit alignment */ + /* TCP User Timeout variables (RFC 5482) */ + uint16_t rcv_uto; /* received user timeout */ + uint16_t snd_uto; /* send user timeout */ + uint8_t uto_flags; + /* All values in ticks */ + u_int t_uto_adv; /* user timeout sent to remote peer + (ticks) */ + u_int t_uto_impl; /* implemented user timeout (ticks) */ + u_int t_uto_left; /* remained user timeout value + (seconds) */ + tcp_seq uto_carrier; /* max sequence number that carry user + timeout */ + void *t_pspare2[6]; /* 2 CC / 4 TBD */ - uint64_t _pad[12]; /* 7 UTO, 5 TBD (1-2 CC/RTT?) */ + uint64_t _pad[8]; /* 7 TBD (1-2 CC/RTT?) */ }; /* @@ -245,6 +257,35 @@ #define TCPOOB_HAVEDATA 0x01 #define TCPOOB_HADDATA 0x02 +/* + * Flags for the uto_flags field. + */ +#define TCPUTO_ENABLE 0x01 /* enable tcp user timeout */ +#define TCPUTO_CHANGEABLE 0x02 /* user timeout can be changed by other side */ +#define TCPUTO_IMPL 0x04 /* implement user timeout */ +#define TCPUTO_RCVD 0x08 /* other side has requested user timeout */ +#define TCPUTO_NEED 0x10 /* user timeout needs to be sent */ +#define TCPUTO_SENDING 0x20 /* user timeout is in the process of sending */ + +/* + * Resolve user timeout value(ticks). +*/ +#define TCPT_RESOLVE_UTO(tp) do { \ + if ((tp)->uto_flags & TCPUTO_ENABLE && \ + (tp)->uto_flags & TCPUTO_RCVD && \ + (tp)->uto_flags & TCPUTO_CHANGEABLE) { \ + (tp)->t_uto_impl = (tp)->rcv_uto >> 1; \ + if ((tp)->rcv_uto & 1) \ + (tp)->t_uto_impl *= 60; \ + (tp)->t_uto_impl *= hz; \ + (tp)->t_uto_impl = min(tcp_uto_max, \ + max((tp)->t_uto_adv, \ + max((tp)->t_uto_impl, tcp_uto_min))); \ + (tp)->uto_flags &= ~TCPUTO_RCVD; \ + (tp)->uto_flags |= TCPUTO_IMPL; \ + } \ +} while(0) + #ifdef TCP_SIGNATURE /* * Defines which are needed by the xform_tcp module and tcp_[in|out]put @@ -276,7 +317,8 @@ #define TOF_TS 0x0010 /* timestamp */ #define TOF_SIGNATURE 0x0040 /* TCP-MD5 signature option (RFC2385) */ #define TOF_SACK 0x0080 /* Peer sent SACK option */ -#define TOF_MAXOPT 0x0100 +#define TOF_UTO 0x0100 /* user timeout (RFC5482) */ +#define TOF_MAXOPT 0x0200 u_int32_t to_tsval; /* new timestamp */ u_int32_t to_tsecr; /* reflected timestamp */ u_char *to_sacks; /* pointer to the first SACK blocks */ @@ -284,6 +326,7 @@ u_int16_t to_mss; /* maximum segment size */ u_int8_t to_wscale; /* window scaling */ u_int8_t to_nsacks; /* number of SACK blocks */ + u_int16_t to_uto; /* UTO option (RFC5482) */ }; /* @@ -395,6 +438,7 @@ u_long tcps_sndurg; /* packets sent with URG only */ u_long tcps_sndwinup; /* window update-only packets sent */ u_long tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */ + u_long tcps_snduto; /* packets sent with tcp UTO option */ u_long tcps_rcvtotal; /* total packets received */ u_long tcps_rcvpack; /* packets received in sequence */ @@ -418,6 +462,7 @@ u_long tcps_rcvackpack; /* rcvd ack packets */ u_long tcps_rcvackbyte; /* bytes acked by rcvd acks */ u_long tcps_rcvwinupd; /* rcvd window update packets */ + u_long tcps_rcvuto; /* packets received with tcp UTO option */ u_long tcps_pawsdrop; /* segments dropped due to PAWS */ u_long tcps_predack; /* times hdr predict ok for acks */ u_long tcps_preddat; /* times hdr predict ok for data pkts */ @@ -616,6 +661,9 @@ VNET_DECLARE(int, tcp_sc_rst_sock_fail); /* RST on sock alloc failure */ VNET_DECLARE(int, tcp_do_ecn); /* TCP ECN enabled/disabled */ VNET_DECLARE(int, tcp_ecn_maxretries); +VNET_DECLARE(int, tcp_uto_min); +VNET_DECLARE(int, tcp_uto_max); +VNET_DECLARE(int, tcp_uto_enable); #define V_tcp_do_sack VNET(tcp_do_sack) #define V_tcp_sack_maxholes VNET(tcp_sack_maxholes) @@ -624,6 +672,9 @@ #define V_tcp_sc_rst_sock_fail VNET(tcp_sc_rst_sock_fail) #define V_tcp_do_ecn VNET(tcp_do_ecn) #define V_tcp_ecn_maxretries VNET(tcp_ecn_maxretries) +#define V_tcp_uto_min VNET(tcp_uto_min) +#define V_tcp_uto_max VNET(tcp_uto_max) +#define V_tcp_uto_enable VNET(tcp_uto_enable) int tcp_addoptions(struct tcpopt *, u_char *); struct tcpcb * Index: sys/netinet/tcp_output.c =================================================================== --- sys/netinet/tcp_output.c (revision 196455) +++ sys/netinet/tcp_output.c (working copy) @@ -694,9 +694,40 @@ if (tp->t_flags & TF_SIGNATURE) to.to_flags |= TOF_SIGNATURE; #endif /* TCP_SIGNATURE */ + /* + * We set the UTO option in TCP header in two cases: the + * segment has a SYN, a SYN | ACK, or a normal data segment. + */ + if (flags & TH_SYN || + (len && (tp->t_flags & TF_FORCEDATA) == 0)) + if (tp->uto_flags & (TCPUTO_NEED | TCPUTO_SENDING)) { + to.to_uto = tp->snd_uto; + to.to_flags |= TOF_UTO; + } /* Processing the options. */ hdrlen += optlen = tcp_addoptions(&to, opt); + + /* + * According to RFC 5482: + * "In addition to exchanging UTO options in the SYN segments, + * a connection that has enabled UTO options SHOULD include a + * UTO option in the first packet that does not have the SYN + * flag set. This helps to minimize the amount of state + * information TCP must keep for connections in + * non-synchronized states." + * So even though the UTO option is set in the SYN segment, + * we we shall retransmit it. + */ + if (tp->uto_flags & (TCPUTO_NEED | TCPUTO_SENDING) && + (to.to_flags & TOF_UTO) == 0) { + if ((flags & TH_SYN) == 0) { + tp->uto_flags &= ~(TCPUTO_NEED | TCPUTO_SENDING); + tp->uto_flags |= TCPUTO_SENDING; + tp->uto_carrier = tp->snd_nxt + len; + } + TCPSTAT_INC(tcps_snduto); + } } #ifdef INET6 @@ -1323,6 +1354,11 @@ * At minimum we need 10 bytes (to generate 1 SACK block). If both * TCP Timestamps (12 bytes) and TCP Signatures (18 bytes) are present, * we only have 10 bytes for SACK options (40 - (12 + 18)). + * + * TCP option UTO (user timeout, defined in RFC 5482), is an optional option + * that consumes 4 bytes. We attach the UTO option only when there is enough + * free space in the TCP header. + * Although UTO is optional, we should try our best to transmit it. */ int tcp_addoptions(struct tcpopt *to, u_char *optp) @@ -1437,6 +1473,19 @@ TCPSTAT_INC(tcps_sack_send_blocks); break; } + case TOF_UTO: + { + if (TCP_MAXOLEN - optlen < TCPOLEN_UTO) + continue; + *optp++ = TCPOPT_UTO; + *optp++ = TCPOLEN_UTO; + optlen += TCPOLEN_UTO; + to->to_uto = htons(to->to_uto); + bcopy((u_char *)&to->to_uto, optp, sizeof(to->to_uto)); + optp += sizeof(to->to_uto); + to->to_flags &= ~TOF_UTO; + break; + } default: panic("%s: unknown TCP option type", __func__); break; Index: sys/netinet/tcp.h =================================================================== --- sys/netinet/tcp.h (revision 196455) +++ sys/netinet/tcp.h (working copy) @@ -96,6 +96,8 @@ #define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP+2) /* appendix A */ #define TCPOPT_SIGNATURE 19 /* Keyed MD5: RFC 2385 */ #define TCPOLEN_SIGNATURE 18 +#define TCPOPT_UTO 28 +#define TCPOLEN_UTO 4 /* Miscellaneous constants */ #define MAX_SACK_BLKS 6 /* Max # SACK blocks stored at receiver side */ @@ -150,6 +152,14 @@ #define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */ #define TCP_INFO 0x20 /* retrieve tcp_info structure */ #define TCP_CONGESTION 0x40 /* get/set congestion control algorithm */ +#define TCP_UTO 0x80 /* set tcp user timeout */ +struct tcputo { + int uto; + int flags; +}; +#define TCP_UTO_STORE 0x01 +#define TCP_UTO_ENABLE 0x02 +#define TCP_UTO_CHANGE 0x04 #define TCP_CA_NAME_MAX 16 /* max congestion control name length */ @@ -158,6 +168,7 @@ #define TCPI_OPT_WSCALE 0x04 #define TCPI_OPT_ECN 0x08 #define TCPI_OPT_TOE 0x10 +#define TCPI_OPT_UTO 0x20 /* * The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits @@ -217,9 +228,10 @@ u_int32_t tcpi_snd_nxt; /* Next egress seqno */ u_int32_t tcpi_rcv_nxt; /* Next ingress seqno */ u_int32_t tcpi_toe_tid; /* HWTID for TOE endpoints */ - + u_int32_t tcpi_uto; /* tcp user timeout value */ + /* Padding to grow without breaking ABI. */ - u_int32_t __tcpi_pad[29]; /* Padding. */ + u_int32_t __tcpi_pad[28]; /* Padding. */ }; #endif Index: sys/netinet/tcp_syncache.c =================================================================== --- sys/netinet/tcp_syncache.c (revision 196455) +++ sys/netinet/tcp_syncache.c (working copy) @@ -774,6 +774,10 @@ #endif if (sc->sc_flags & SCF_SACK) tp->t_flags |= TF_SACK_PERMIT; + if (sc->sc_flags & SCF_UTO) { + tp->uto_flags |= TCPUTO_RCVD; + tp->rcv_uto = sc->sc_peer_uto; + } } if (sc->sc_flags & SCF_ECN) @@ -1212,6 +1216,11 @@ sc->sc_flags |= SCF_NOOPT; if ((th->th_flags & (TH_ECE|TH_CWR)) && V_tcp_do_ecn) sc->sc_flags |= SCF_ECN; + if (to->to_flags & TOF_UTO) { + sc->sc_peer_uto = to->to_uto; + sc->sc_flags |= SCF_UTO; + TCPSTAT_INC(tcps_rcvuto); + } if (V_tcp_syncookies) { syncookie_generate(sch, sc, &flowtmp); Index: sys/netinet/tcp_syncache.h =================================================================== --- sys/netinet/tcp_syncache.h (revision 196455) +++ sys/netinet/tcp_syncache.h (working copy) @@ -74,6 +74,7 @@ u_int8_t sc_ip_tos; /* IPv4 TOS */ u_int8_t sc_requested_s_scale:4, sc_requested_r_scale:4; + u_int16_t sc_peer_uto; /* peer's user timeout */ u_int16_t sc_flags; #ifndef TCP_OFFLOAD_DISABLE struct toe_usrreqs *sc_tu; /* TOE operations */ @@ -94,6 +95,7 @@ #define SCF_SIGNATURE 0x20 /* send MD5 digests */ #define SCF_SACK 0x80 /* send SACK option */ #define SCF_ECN 0x100 /* send ECN setup packet */ +#define SCF_UTO 0x200 /* UTO option received */ #define SYNCOOKIE_SECRET_SIZE 8 /* dwords */ #define SYNCOOKIE_LIFETIME 16 /* seconds */ Index: sys/netinet/tcp_usrreq.c =================================================================== --- sys/netinet/tcp_usrreq.c (revision 196455) +++ sys/netinet/tcp_usrreq.c (working copy) @@ -1198,6 +1198,11 @@ ti->tcpi_options |= TCPI_OPT_WSCALE; ti->tcpi_snd_wscale = tp->snd_scale; ti->tcpi_rcv_wscale = tp->rcv_scale; + } + TCPT_RESOLVE_UTO(tp); + if (tp->uto_flags & TCPUTO_IMPL) { + ti->tcpi_options |= TCPI_OPT_UTO; + ti->tcpi_uto = tp->t_uto_impl / hz; } ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT; @@ -1242,6 +1247,7 @@ struct inpcb *inp; struct tcpcb *tp; struct tcp_info ti; + struct tcputo tu; error = 0; inp = sotoinpcb(so); @@ -1351,6 +1357,53 @@ error = EINVAL; break; + case TCP_UTO: + INP_WUNLOCK(inp); + error = sooptcopyin(sopt, &tu, sizeof tu, + sizeof tu); + if (error) + return (error); + + INP_WLOCK_RECHECK(inp); + if (tu.flags & ~(TCP_UTO_ENABLE | TCP_UTO_STORE | + TCP_UTO_CHANGE)) { + error = EINVAL; + break; + } + if (tu.flags & TCP_UTO_ENABLE) + tp->uto_flags |= TCPUTO_ENABLE; + if (tu.flags & TCP_UTO_STORE) { + tp->uto_flags |= TCPUTO_NEED; + if (tu.uto > 0 && tu.uto <= 0x7FFF * 60) { + if (tu.uto > 0x7FFF) { + tp->snd_uto = tu.uto / 60; + tp->snd_uto <<= 1; + tp->snd_uto |= 1; + } else { + tp->snd_uto = tu.uto; + tp->snd_uto <<= 1; + } + if (tp->uto_flags & TCPUTO_ENABLE && + tp->uto_flags & TCPUTO_NEED) { + uint32_t maxtime; + + maxtime = max(tu.uto*hz, + tcp_uto_min); + tp->t_uto_impl = + min(tcp_uto_max, maxtime); + tp->t_uto_adv = tp->t_uto_impl; + tp->uto_flags &= + ~TCPUTO_CHANGEABLE; + tp->uto_flags |= TCPUTO_IMPL; + } + } else + error = EINVAL; + } + if (tu.flags & TCP_UTO_CHANGE) + tp->uto_flags |= TCPUTO_CHANGEABLE; + INP_WUNLOCK(inp); + break; + default: INP_WUNLOCK(inp); error = ENOPROTOOPT; @@ -1394,6 +1447,20 @@ INP_WUNLOCK(inp); error = sooptcopyout(sopt, &ti, sizeof ti); break; + case TCP_UTO: + tu.flags = 0; + if (tp->uto_flags & TCPUTO_ENABLE) + tu.flags |= TCP_UTO_ENABLE; + if (tp->uto_flags & TCPUTO_CHANGEABLE) + tu.flags |= TCP_UTO_CHANGE; + TCPT_RESOLVE_UTO(tp); + if (tp->uto_flags & TCPUTO_IMPL) { + tu.flags |= TCP_UTO_STORE; + tu.uto = tp->t_uto_impl / hz; + } + INP_WUNLOCK(inp); + error = sooptcopyout(sopt, &tu, sizeof tu); + break; default: INP_WUNLOCK(inp); error = ENOPROTOOPT; Index: usr.bin/netstat/inet.c =================================================================== --- usr.bin/netstat/inet.c (revision 196455) +++ usr.bin/netstat/inet.c (working copy) @@ -607,6 +607,7 @@ p(tcps_sndprobe, "\t\t%lu window probe packet%s\n"); p(tcps_sndwinup, "\t\t%lu window update packet%s\n"); p(tcps_sndctrl, "\t\t%lu control packet%s\n"); + p(tcps_snduto, "\t\t%lu tcp uto enabled packet%s\n"); p(tcps_rcvtotal, "\t%lu packet%s received\n"); p2(tcps_rcvackpack, tcps_rcvackbyte, "\t\t%lu ack%s (for %lu byte%s)\n"); @@ -623,6 +624,7 @@ "\t\t%lu out-of-order packet%s (%lu byte%s)\n"); p2(tcps_rcvpackafterwin, tcps_rcvbyteafterwin, "\t\t%lu packet%s (%lu byte%s) of data after window\n"); + p(tcps_rcvuto, "\t\t%lu tcp uto enabled packet%s\n"); p(tcps_rcvwinprobe, "\t\t%lu window probe%s\n"); p(tcps_rcvwinupd, "\t\t%lu window update packet%s\n"); p(tcps_rcvafterclose, "\t\t%lu packet%s received after close\n"); Index: contrib/tcpdump/print-tcp.c =================================================================== --- contrib/tcpdump/print-tcp.c (revision 196455) +++ contrib/tcpdump/print-tcp.c (working copy) @@ -124,6 +124,7 @@ { TCPOPT_CCECHO, "" }, { TCPOPT_SIGNATURE, "md5" }, { TCPOPT_AUTH, "enhanced auth" }, + { TCPOPT_UTO, "uto" }, { 0, NULL } }; @@ -613,6 +614,17 @@ */ break; + case TCPOPT_UTO: + datalen = 2; + LENCHECK(datalen); + uint utoval = EXTRACT_16BITS(cp); + if (utoval & 0x0001) + utoval = (utoval >> 1) * 60; + else + utoval >>= 1; + (void)printf(" %u", utoval); + break; + default: datalen = len - 2; for (i = 0; i < datalen; ++i) { Index: contrib/tcpdump/tcp.h =================================================================== --- contrib/tcpdump/tcp.h (revision 196455) +++ contrib/tcpdump/tcp.h (working copy) @@ -83,6 +83,8 @@ #define TCPOLEN_SIGNATURE 18 #define TCP_SIGLEN 16 /* length of an option 19 digest */ #define TCPOPT_AUTH 20 /* Enhanced AUTH option */ +#define TCPOPT_UTO 28 /* tcp user timeout (rfc5482) */ +#define TCPOLEN_UTO 4 #define TCPOPT_TSTAMP_HDR \ (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP) Index: tools/regression/netinet/tcputo/tcputo.c =================================================================== --- tools/regression/netinet/tcputo/tcputo.c (revision 0) +++ tools/regression/netinet/tcputo/tcputo.c (revision 0) @@ -0,0 +1,565 @@ +/*- + * Copyright (c) 2009 Fang Wang <fangw...@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * TCP regression test for tcp user timeout; build a TCP connection, interrupt + * it without detected by operating system, and make sure the timeout time and + * retransmission times has been changed appropriately. + */ + + +#include <sys/types.h> +#include <sys/socket.h> + +#include <net/ethernet.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/tcp.h> + +#include <arpa/inet.h> + +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <time.h> +#include <signal.h> + +#include <pcap.h> +#include <pthread.h> + +#define SIZE_ETHERNET sizeof(struct ether_header) +#define MAX_RXT 200 + +struct tcprxt { + struct timeval ts; + u_int th_off; + tcp_seq th_seq; + tcp_seq th_ack; + u_short th_win; + u_char th_flags; +}; + +static struct tcprxt rxts[MAX_RXT]; + +static void +parse_packet(u_char *args, const struct pcap_pkthdr *pkt_header, + const u_char *packet) +{ + const struct ip *ip; + const struct tcphdr *tcp; + const u_char *tcpopt; + struct tcprxt rxt; + u_int opt, optlen; + u_int hlen; + u_int length; + + length = pkt_header->len - SIZE_ETHERNET; + ip = (struct ip *)(packet + SIZE_ETHERNET); + hlen = ip->ip_hl * 4; + length -= hlen; + if (hlen < 20) { + printf(" * Invalid IP header length: %u bytes\n", hlen); + return; + } + tcp = (struct tcphdr *)((u_char *)ip + hlen); + hlen = tcp->th_off * 4; + length -= hlen; + if (hlen < 20) { + printf(" * Invalid TCP header length: %u bytes\n", hlen); + return; + } + tcpopt = (u_char *)tcp + sizeof(*tcp); + hlen -= sizeof(*tcp); + while (hlen > 0) { + opt = *tcpopt++; + hlen--; + if (opt == TCPOPT_EOL) + break; + if (opt == TCPOPT_NOP) + continue; + optlen = *tcpopt++; + if (opt == TCPOPT_UTO) { + u_int uto = htons(*(u_short *)tcpopt); + struct tm *p; + if (uto & 1) + uto = (uto >> 1) * 60; + else + uto >>= 1; + p = localtime(&pkt_header->ts.tv_sec); + printf("uto packet: "); + printf("%02d:%02d:%02d.%-6u ", p->tm_hour, p->tm_min, + p->tm_sec, pkt_header->ts.tv_usec); + printf("%s.%d > ", inet_ntoa(ip->ip_src), + htons(tcp->th_sport)); + printf("%s.%d, ", inet_ntoa(ip->ip_dst), + htons(tcp->th_dport)); + printf("flags ["); + if (tcp->th_flags & TH_SYN) + printf("S"); + if (tcp->th_flags & TH_RST) + printf("R"); + if (tcp->th_flags & TH_PUSH) + printf("P"); + if (tcp->th_flags & TH_ACK) + printf("."); + if (tcp->th_flags & TH_URG) + printf("U"); + printf("], "); + printf("uto %u, win %u, length %u\n", uto, + htons(tcp->th_win), length); + } + hlen -= optlen - 1; + tcpopt += optlen - 2; + } + if (length > 0 || tcp->th_flags & TH_RST) { + memset(&rxt, 0, sizeof(rxt)); + memcpy(&rxt.ts, &pkt_header->ts, sizeof(rxt.ts)); + rxt.th_win = htons(tcp->th_win); + rxt.th_off = tcp->th_off; + rxt.th_ack = htonl(tcp->th_ack); + rxt.th_seq = htonl(tcp->th_seq); + rxt.th_flags = tcp->th_flags; + memcpy(&rxts[0], &rxts[1], + sizeof(struct tcprxt) * (MAX_RXT - 1)); + memcpy(&rxts[MAX_RXT - 1], &rxt, sizeof(rxt)); + } +} + +static void * +dump_packet(void *arg) +{ + pcap_t *handle; + char errbuf[PCAP_ERRBUF_SIZE]; + struct bpf_program fp; + char filter_exp[128]; + bpf_u_int32 mask; + bpf_u_int32 net; + struct pcap_pkthdr header; + const u_char *packet; + struct sockaddr_in srcaddr, dstaddr, *devaddrp; + int optlen; + pcap_if_t *alldevsp, *devp; + pcap_addr_t *addrp; + int flag; + + optlen = sizeof(srcaddr); + if (getsockname(*((int *)arg), (struct sockaddr *)&srcaddr, + &optlen) == -1) + err(-1, "getsockname"); + optlen = sizeof(dstaddr); + if (getpeername(*((int *)arg), (struct sockaddr *)&dstaddr, + &optlen) == -1) + err(-1, "getsockname"); + + if (pcap_findalldevs(&alldevsp, errbuf) == -1) { + fprintf(stderr, "Couldn't get all device: %s\n", errbuf); + exit(-1); + } + + devp = alldevsp; + flag = 1; + while (devp != NULL) { + addrp = devp->addresses; + while (addrp != NULL && flag) { + devaddrp = (struct sockaddr_in *)addrp->addr; + if (!memcmp(&devaddrp->sin_addr, &srcaddr.sin_addr, + sizeof(srcaddr.sin_addr))) { + flag = 0; + break; + } + addrp = addrp->next; + } + if (!flag) + break; + devp = devp->next; + } + + if (devp == NULL) { + fprintf(stderr, "Couldn't find using device\n"); + exit(-1); + } + + if (pcap_lookupnet(devp->name, &net, &mask, errbuf) == -1) { + fprintf(stderr, "Couldn't get netmask for device %s: %s\n", + devp->name, errbuf); + net = 0; + mask = 0; + } + + handle = pcap_open_live(devp->name, BUFSIZ, 1, 1000, errbuf); + if (handle == NULL) { + fprintf(stderr, "Couldn't open device %s: %s\n", + devp->name, errbuf); + exit(-1); + } + + pcap_freealldevs(alldevsp); + + snprintf(filter_exp, sizeof(filter_exp), + "(tcp src port %d and dst port %d) or" + "(tcp src port %d and dst port %d)", + ntohs(srcaddr.sin_port), ntohs(dstaddr.sin_port), + ntohs(dstaddr.sin_port), ntohs(srcaddr.sin_port)); + + if (pcap_compile(handle, &fp, filter_exp, 0, net) == -1) { + fprintf(stderr, "Couldn't parse filter %s: %s\n", + filter_exp, pcap_geterr(handle)); + exit(-1); + } + if (pcap_setfilter(handle, &fp) == -1) { + fprintf(stderr, "Couldn't install filter %s: %s\n", + filter_exp, pcap_geterr(handle)); + exit(-1); + } + + pcap_loop(handle, -1, parse_packet, NULL); + pcap_close(handle); + + return NULL; +} + +static void +print_result() +{ + tcp_seq rxt_seq; + int i, last, rxt_nr; + struct tm *p; + + /* Get the retransmit sequence number */ + rxt_seq = rxts[MAX_RXT - 2].th_seq; + for (last = -1, rxt_nr = 0, i = 0; i < MAX_RXT; i++) { + if (rxts[i].th_seq && + (rxts[i].th_seq == rxt_seq || rxts[i].th_flags & TH_RST)) { + if (rxts[i].th_flags & TH_RST) + printf("reset packet, "); + else if (rxt_nr) + printf("retransmit %02d, ", rxt_nr); + else if (!rxt_nr) + printf("send packet, "); + p = localtime(&rxts[i].ts.tv_sec); + printf("%02d:%02d:%02d.%-6u ", p->tm_hour, p->tm_min, + p->tm_sec, rxts[i].ts.tv_usec); + if (last != -1) { + /* print interval between two packets */ + if (rxts[i].ts.tv_usec < rxts[last].ts.tv_usec) + printf("(%2u.%-6u) ", + rxts[i].ts.tv_sec - + rxts[last].ts.tv_sec - 1, + 1000000 + rxts[i].ts.tv_usec - + rxts[last].ts.tv_usec); + else + printf("(%2u.%-6u) ", + rxts[i].ts.tv_sec - + rxts[last].ts.tv_sec, + rxts[i].ts.tv_usec - + rxts[last].ts.tv_usec); + } + printf("seq %u, ack %u, ", + rxts[i].th_seq, rxts[i].th_ack); + printf("win %u\n", rxts[i].th_win); + last = i; + rxt_nr++; + } + } +} + +static int +print_uto_status(int sock, int print) +{ + static struct tcputo tu = {0, 0}; + int default_uto = 1 + 2 + 4 + 8 + 16 + 32 + 64 + + 64 + 64 + 64 + 64 + 64 + 64; + int optlen; + int utoval; + + optlen = sizeof(tu); + (void)getsockopt(sock, IPPROTO_TCP, TCP_UTO, &tu, &optlen); + if (print) { + if (tu.flags & TCP_UTO_ENABLE) { + printf("tcputo: enabled, "); + if (tu.flags & TCP_UTO_STORE) { + printf("user timeout(changed): %d seconds\n", + tu.uto); + utoval = tu.uto; + } else { + printf("user timeout(default): %d seconds\n", + default_uto); + utoval = default_uto; + } + } else { + printf("tcputo: disabled, default timeout: %d " + "seconds\n", default_uto); + utoval = default_uto; + } + } + + return (utoval); +} + +static void +usage(void) +{ + fprintf(stderr, "tcputo server port [uto [timeout]]\n"); + fprintf(stderr, "tcputo client ip port [uto [timeout]]\n"); + exit(-1); +} + +static int +tcputo_timer(void) +{ + static time_t start_time = 0; + time_t end_time; + time_t interval; + + if (start_time == 0) { + time(&start_time); + interval = 0; + } else { + time(&end_time); + interval = end_time - start_time; + start_time = 0; + } + + return (int)(interval); +} + +static void +tcputo_server(int argc, char *argv[]) +{ + int listen_sock, accept_sock; + struct sockaddr_in sin; + char *dummy; + char buf[8*1024]; + long port; + int user_timeout; + int optval; + struct tcputo uto; + pthread_t tid; + + if (argc < 1 && argc > 3) + usage(); + + bzero(&sin, sizeof(sin)); + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(INADDR_ANY); + + port = strtoul(argv[0], &dummy, 10); + if (port < 1 || port > 65535 || *dummy != '\0') + usage(); + sin.sin_port = htons(port); + + listen_sock = socket(PF_INET, SOCK_STREAM, 0); + if (listen_sock == -1) + err(-1, "socket"); + optval = 1; + if (setsockopt(listen_sock, SOL_SOCKET, SO_REUSEADDR, &optval, + sizeof(optval)) == -1) + err(-1, "setsockopt"); + + if (bind(listen_sock, (struct sockaddr *)&sin, sizeof(sin)) == -1) + err(-1, "bind"); + + if (listen(listen_sock, -1) == -1) + err(-1, "listen"); + + accept_sock = accept(listen_sock, NULL, NULL); + if (accept_sock == -1) + err(-1, "accept"); + close(listen_sock); + + if (pthread_create(&tid, NULL, dump_packet, (void *)&accept_sock)) + err(-1, "create thread"); + + if (argc >= 2) { + memset(&uto, 0, sizeof(uto)); + /* + * If TCP UTO is enabled but not set, make it changeable, + * otherwise, make it unchangeable. + */ + if (!strcmp(argv[1], "uto")) { + uto.flags |= TCP_UTO_ENABLE; + uto.flags |= TCP_UTO_CHANGE; + } else + usage(); + if (argc == 3) { + uto.uto = strtoul(argv[2], &dummy, 10); + if (uto.uto <= 0 || *dummy != '\0') + usage(); + uto.flags |= TCP_UTO_STORE; + uto.flags &= ~TCP_UTO_CHANGE; + } + if (setsockopt(accept_sock, IPPROTO_TCP, TCP_UTO, &uto, + sizeof(uto)) == -1) + err(-1, "setsockopt"); + } + + optval = 4*1024; + if (setsockopt(accept_sock, SOL_SOCKET, SO_SNDBUF, &optval, + sizeof(optval)) == -1) + err(-1, "setsockopt"); + + for (;;) { + sleep(1); + while (recv(accept_sock, buf, 8*1024, MSG_DONTWAIT) > 0) + ; + (void)tcputo_timer(); + /* + * Send more data than socket send buffer, + * so that data are not buffered. + */ + if (send(accept_sock, buf, 8*1024, MSG_NOSIGNAL) >= 0) { + (void)tcputo_timer(); + (void)print_uto_status(accept_sock, 0); + continue; + } + user_timeout = tcputo_timer(); + printf("Connection timeout, %d seconds.\n", user_timeout); + break; + } + /* wait for the reset packet to be captured */ + sleep(1); + (void)pthread_kill(tid, SIGTERM); + + close(accept_sock); +} + +static void +tcputo_client(int argc, char *argv[]) +{ + struct sockaddr_in sin; + long port; + char *dummy; + char buf[8*1024]; + int sock; + int user_timeout; + int optval; + struct tcputo uto; + pthread_t tid; + + if (argc < 2 && argc > 4) + usage(); + + bzero(&sin, sizeof(sin)); + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + if (inet_aton(argv[0], &sin.sin_addr) == 0) + err(-1, "convert address"); + + port = strtoul(argv[1], &dummy, 10); + if (port < 1 || port > 65535 || *dummy != '\0') + usage(); + sin.sin_port = htons(port); + sock = socket(PF_INET, SOCK_STREAM, 0); + if (sock == -1) + err(-1, "socket"); + + optval = 4*1024; + if (setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &optval, + sizeof(optval)) == -1) + err(-1, "setsockopt"); + + if (argc >= 3) { + memset(&uto, 0, sizeof(uto)); + /* + * If TCP UTO is enabled but not set, make it changeable, + * otherwise, make it unchangeable. + */ + if (!strcmp(argv[2], "uto")) { + uto.flags |= TCP_UTO_ENABLE; + uto.flags |= TCP_UTO_CHANGE; + } else + usage(); + if (argc == 4) { + uto.uto = strtoul(argv[3], &dummy, 10); + if (uto.uto <= 0 || *dummy != '\0') + usage(); + uto.flags |= TCP_UTO_STORE; + uto.flags &= ~TCP_UTO_CHANGE; + } + if (setsockopt(sock, IPPROTO_TCP, TCP_UTO, &uto, + sizeof(uto)) == -1) + err(-1, "setsockopt"); + } + + if (connect(sock, (struct sockaddr *)&sin, sizeof(sin)) == -1) + err(-1, "connect"); + + if (pthread_create(&tid, NULL, dump_packet, (void *)&sock)) + err(-1, "create thread"); + + for (;;) { + sleep(1); + while(recv(sock, buf, 8*1024, MSG_DONTWAIT) > 0) + ; + (void)tcputo_timer(); + /* + * Send more data than socket send buffer, + * so that data are not buffered. + */ + if (send(sock, buf, 8*1024, MSG_NOSIGNAL) > 0) { + (void)tcputo_timer(); + (void)print_uto_status(sock, 0); + continue; + } + user_timeout = tcputo_timer(); + printf("Connection timeout, %d seconds.\n", user_timeout); + break; + } + /* wait for the reset packet to be captured */ + sleep(1); + (void)pthread_kill(tid, SIGTERM); + + close(sock); +} + +int +main(int argc, char *argv[]) +{ + int utoval; + + if (argc < 2) + usage(); + + if (strcmp(argv[1], "server") == 0) + tcputo_server(argc - 2, argv + 2); + else if (strcmp(argv[1], "client") == 0) + tcputo_client(argc - 2, argv + 2); + else + usage(); + + (void)print_uto_status(-1, 1); + print_result(); + + exit(0); +} + Property changes on: tools/regression/netinet/tcputo/tcputo.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: tools/regression/netinet/tcputo/Makefile =================================================================== --- tools/regression/netinet/tcputo/Makefile (revision 0) +++ tools/regression/netinet/tcputo/Makefile (revision 0) @@ -0,0 +1,8 @@ +# $FreeBSD$ + +PROG= tcputo +DPADD= ${LIBPCAP} ${LIBPTHREAD} +LDADD= -lpcap -lpthread +NO_MAN= + +.include <bsd.prog.mk> Property changes on: tools/regression/netinet/tcputo/Makefile ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: tools/regression/netinet/tcputo/README =================================================================== --- tools/regression/netinet/tcputo/README (revision 0) +++ tools/regression/netinet/tcputo/README (revision 0) @@ -0,0 +1,23 @@ +tcputo - a simple TCP user timeout test tool +-------------------------------------------- + +tcputo generates TCP connections between a 'client' and a 'server'. The +client and server writes a fix byte stream using write sizes that greater +than socket output buffer and reads all available bytes in nonblock mode. +To run TCP user timeout test, the TCP connection must be broken without +detected by operating system, for example, bring down an intermediate +router that does not connect to the client or server directly. Then, select +a port number (for the client and server) and decide whether enable TCP user +timeout and the value of user timeout to send. Typical use might be: + +Run the server on port 8080 and enable TCP user timeout: + + tcputo server 8080 uto + +Now run the client on a second machine with the server's IP, port 8080, enable +TCP user timeout, and send a 700 seconds user timeout value: + + tcputo client 192.168.10.10 8080 uto 700 + +After that, interrupt the connection without detected by operating system, and +wait until process exits.
_______________________________________________ freebsd-net@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-net To unsubscribe, send any mail to "freebsd-net-unsubscr...@freebsd.org"