Author: glebius
Date: Sun Feb  5 16:53:02 2012
New Revision: 231025
URL: http://svn.freebsd.org/changeset/base/231025

Log:
  Add new socket options: TCP_KEEPINIT, TCP_KEEPIDLE, TCP_KEEPINTVL and
  TCP_KEEPCNT, that allow to control initial timeout, idle time, idle
  re-send interval and idle send count on a per-socket basis.
  
  Reviewed by:  andre, bz, lstewart

Modified:
  head/share/man/man4/tcp.4
  head/sys/netinet/tcp.h
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_syncache.c
  head/sys/netinet/tcp_timer.c
  head/sys/netinet/tcp_timer.h
  head/sys/netinet/tcp_usrreq.c
  head/sys/netinet/tcp_var.h
  head/sys/sys/param.h

Modified: head/share/man/man4/tcp.4
==============================================================================
--- head/share/man/man4/tcp.4   Sun Feb  5 16:41:06 2012        (r231024)
+++ head/share/man/man4/tcp.4   Sun Feb  5 16:53:02 2012        (r231025)
@@ -38,7 +38,7 @@
 .\"     From: @(#)tcp.4        8.1 (Berkeley) 6/5/93
 .\" $FreeBSD$
 .\"
-.Dd November 14, 2011
+.Dd February 5, 2012
 .Dt TCP 4
 .Os
 .Sh NAME
@@ -146,6 +146,65 @@ connection.
 See
 .Xr mod_cc 4
 for details.
+.It Dv TCP_KEEPINIT
+This write-only 
+.Xr setsockopt 2
+option accepts a per-socket timeout argument of
+.Vt "u_int"
+in seconds, for new, non-established
+.Tn TCP
+connections.
+For the global default in milliseconds see
+.Va keepinit
+in the
+.Sx MIB Variables
+section further down.
+.It Dv TCP_KEEPIDLE
+This write-only 
+.Xr setsockopt 2
+option accepts an argument of
+.Vt "u_int"
+for the amount of time, in seconds, that the connection must be idle
+before keepalive probes (if enabled) are sent for the connection of this
+socket.
+If set on a listening socket, the value is inherited by the newly created
+socket upon
+.Xr accept 2 .
+For the global default in milliseconds see
+.Va keepidle
+in the
+.Sx MIB Variables
+section further down.
+.It Dv TCP_KEEPINTVL
+This write-only 
+.Xr setsockopt 2
+option accepts an argument of
+.Vt "u_int"
+to set the per-socket interval, in seconds, between keepalive probes sent
+to a peer.
+If set on a listening socket, the value is inherited by the newly created
+socket upon
+.Xr accept 2 .
+For the global default in milliseconds see
+.Va keepintvl
+in the
+.Sx MIB Variables
+section further down.
+.It Dv TCP_KEEPCNT
+This write-only 
+.Xr setsockopt 2
+option accepts an argument of
+.Vt "u_int"
+and allows a per-socket tuning of the number of probes sent, with no response,
+before the connection will be dropped.
+If set on a listening socket, the value is inherited by the newly created
+socket upon
+.Xr accept 2 .
+For the global default see the
+.Va keepcnt
+in the
+.Sx MIB Variables
+section further down.
 .It Dv TCP_NODELAY
 Under most circumstances,
 .Tn TCP
@@ -296,17 +355,21 @@ The Maximum Segment Lifetime, in millise
 Timeout, in milliseconds, for new, non-established
 .Tn TCP
 connections.
+The default is 75000 msec.
 .It Va keepidle
 Amount of time, in milliseconds, that the connection must be idle
 before keepalive probes (if enabled) are sent.
+The default is 7200000 msec (2 hours).
 .It Va keepintvl
 The interval, in milliseconds, between keepalive probes sent to remote
 machines, when no response is received on a
 .Va keepidle
 probe.
-After
-.Dv TCPTV_KEEPCNT
-(default 8) probes are sent, with no response, the connection is dropped.
+The default is 75000 msec.
+.It Va keepcnt
+Number of probes sent, with no response, before a connection
+is dropped.
+The default is 8 packets.
 .It Va always_keepalive
 Assume that
 .Dv SO_KEEPALIVE

Modified: head/sys/netinet/tcp.h
==============================================================================
--- head/sys/netinet/tcp.h      Sun Feb  5 16:41:06 2012        (r231024)
+++ head/sys/netinet/tcp.h      Sun Feb  5 16:53:02 2012        (r231025)
@@ -159,6 +159,10 @@ struct tcphdr {
 #define TCP_MD5SIG     0x10    /* use MD5 digests (RFC2385) */
 #define        TCP_INFO        0x20    /* retrieve tcp_info structure */
 #define        TCP_CONGESTION  0x40    /* get/set congestion control algorithm 
*/
+#define        TCP_KEEPINIT    0x80    /* N, time to establish connection */
+#define        TCP_KEEPIDLE    0x100   /* L,N,X start keeplives after this 
period */
+#define        TCP_KEEPINTVL   0x200   /* L,N interval between keepalives */
+#define        TCP_KEEPCNT     0x400   /* L,N number of keepalives before 
close */
 
 #define        TCP_CA_NAME_MAX 16      /* max congestion control name length */
 

Modified: head/sys/netinet/tcp_input.c
==============================================================================
--- head/sys/netinet/tcp_input.c        Sun Feb  5 16:41:06 2012        
(r231024)
+++ head/sys/netinet/tcp_input.c        Sun Feb  5 16:53:02 2012        
(r231025)
@@ -1446,7 +1446,7 @@ tcp_do_segment(struct mbuf *m, struct tc
         */
        tp->t_rcvtime = ticks;
        if (TCPS_HAVEESTABLISHED(tp->t_state))
-               tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
+               tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
 
        /*
         * Unscale the window into a 32-bit value.
@@ -1889,7 +1889,8 @@ tcp_do_segment(struct mbuf *m, struct tc
                        } else {
                                tp->t_state = TCPS_ESTABLISHED;
                                cc_conn_init(tp);
-                               tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
+                               tcp_timer_activate(tp, TT_KEEP,
+                                   TP_KEEPIDLE(tp));
                        }
                } else {
                        /*
@@ -2293,7 +2294,7 @@ tcp_do_segment(struct mbuf *m, struct tc
                } else {
                        tp->t_state = TCPS_ESTABLISHED;
                        cc_conn_init(tp);
-                       tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
+                       tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
                }
                /*
                 * If segment contains data or ACK, will call tcp_reass()
@@ -2630,12 +2631,11 @@ process_ACK:
                                 * compressed state.
                                 */
                                if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
-                                       int timeout;
-
                                        soisdisconnected(so);
-                                       timeout = (tcp_fast_finwait2_recycle) ? 
-                                               tcp_finwait2_timeout : 
tcp_maxidle;
-                                       tcp_timer_activate(tp, TT_2MSL, 
timeout);
+                                       tcp_timer_activate(tp, TT_2MSL,
+                                           (tcp_fast_finwait2_recycle ?
+                                           tcp_finwait2_timeout :
+                                           TP_MAXIDLE(tp)));
                                }
                                tp->t_state = TCPS_FIN_WAIT_2;
                        }

Modified: head/sys/netinet/tcp_syncache.c
==============================================================================
--- head/sys/netinet/tcp_syncache.c     Sun Feb  5 16:41:06 2012        
(r231024)
+++ head/sys/netinet/tcp_syncache.c     Sun Feb  5 16:53:02 2012        
(r231025)
@@ -845,7 +845,15 @@ syncache_socket(struct syncache *sc, str
         */
        if (sc->sc_rxmits > 1)
                tp->snd_cwnd = tp->t_maxseg;
-       tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
+
+       /*
+        * Copy and activate timers.
+        */
+       tp->t_keepinit = sototcpcb(lso)->t_keepinit;
+       tp->t_keepidle = sototcpcb(lso)->t_keepidle;
+       tp->t_keepintvl = sototcpcb(lso)->t_keepintvl;
+       tp->t_keepcnt = sototcpcb(lso)->t_keepcnt;
+       tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
 
        INP_WUNLOCK(inp);
 

Modified: head/sys/netinet/tcp_timer.c
==============================================================================
--- head/sys/netinet/tcp_timer.c        Sun Feb  5 16:41:06 2012        
(r231024)
+++ head/sys/netinet/tcp_timer.c        Sun Feb  5 16:53:02 2012        
(r231025)
@@ -111,12 +111,12 @@ int    tcp_finwait2_timeout;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
 
+int    tcp_keepcnt = TCPTV_KEEPCNT;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
+    "Number of keepalive probes to send");
 
-static int     tcp_keepcnt = TCPTV_KEEPCNT;
        /* max idle probes */
 int    tcp_maxpersistidle;
-       /* max idle time in persist */
-int    tcp_maxidle;
 
 static int     per_cpu_timers = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
@@ -138,7 +138,6 @@ tcp_slowtimo(void)
        VNET_LIST_RLOCK_NOSLEEP();
        VNET_FOREACH(vnet_iter) {
                CURVNET_SET(vnet_iter);
-               tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
                INP_INFO_WLOCK(&V_tcbinfo);
                (void) tcp_tw_2msl_scan(0);
                INP_INFO_WUNLOCK(&V_tcbinfo);
@@ -255,9 +254,9 @@ tcp_timer_2msl(void *xtp)
                tp = tcp_close(tp);             
        } else {
                if (tp->t_state != TCPS_TIME_WAIT &&
-                  ticks - tp->t_rcvtime <= tcp_maxidle)
-                      callout_reset_on(&tp->t_timers->tt_2msl, tcp_keepintvl,
-                          tcp_timer_2msl, tp, INP_CPU(inp));
+                  ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
+                      callout_reset_on(&tp->t_timers->tt_2msl,
+                          TP_KEEPINTVL(tp), tcp_timer_2msl, tp, INP_CPU(inp));
               else
                       tp = tcp_close(tp);
        }
@@ -318,7 +317,7 @@ tcp_timer_keep(void *xtp)
                goto dropit;
        if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
            tp->t_state <= TCPS_CLOSING) {
-               if (ticks - tp->t_rcvtime >= tcp_keepidle + tcp_maxidle)
+               if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
                        goto dropit;
                /*
                 * Send a packet designed to force a response
@@ -340,9 +339,11 @@ tcp_timer_keep(void *xtp)
                                    tp->rcv_nxt, tp->snd_una - 1, 0);
                        free(t_template, M_TEMP);
                }
-               callout_reset_on(&tp->t_timers->tt_keep, tcp_keepintvl, 
tcp_timer_keep, tp, INP_CPU(inp));
+               callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
+                   tcp_timer_keep, tp, INP_CPU(inp));
        } else
-               callout_reset_on(&tp->t_timers->tt_keep, tcp_keepidle, 
tcp_timer_keep, tp, INP_CPU(inp));
+               callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
+                   tcp_timer_keep, tp, INP_CPU(inp));
 
 #ifdef TCPDEBUG
        if (inp->inp_socket->so_options & SO_DEBUG)

Modified: head/sys/netinet/tcp_timer.h
==============================================================================
--- head/sys/netinet/tcp_timer.h        Sun Feb  5 16:41:06 2012        
(r231024)
+++ head/sys/netinet/tcp_timer.h        Sun Feb  5 16:53:02 2012        
(r231025)
@@ -153,10 +153,16 @@ struct tcp_timer {
 #define TT_KEEP                0x08
 #define TT_2MSL                0x10
 
+#define        TP_KEEPINIT(tp) ((tp)->t_keepinit ? (tp)->t_keepinit : 
tcp_keepinit)
+#define        TP_KEEPIDLE(tp) ((tp)->t_keepidle ? (tp)->t_keepidle : 
tcp_keepidle)
+#define        TP_KEEPINTVL(tp) ((tp)->t_keepintvl ? (tp)->t_keepintvl : 
tcp_keepintvl)
+#define        TP_KEEPCNT(tp)  ((tp)->t_keepcnt ? (tp)->t_keepcnt : 
tcp_keepcnt)
+#define        TP_MAXIDLE(tp)  (TP_KEEPCNT(tp) * TP_KEEPINTVL(tp))
+
 extern int tcp_keepinit;               /* time to establish connection */
 extern int tcp_keepidle;               /* time before keepalive probes begin */
 extern int tcp_keepintvl;              /* time between keepalive probes */
-extern int tcp_maxidle;                        /* time to drop after starting 
probes */
+extern int tcp_keepcnt;                        /* number of keepalives */
 extern int tcp_delacktime;             /* time before sending a delayed ACK */
 extern int tcp_maxpersistidle;
 extern int tcp_rexmit_min;

Modified: head/sys/netinet/tcp_usrreq.c
==============================================================================
--- head/sys/netinet/tcp_usrreq.c       Sun Feb  5 16:41:06 2012        
(r231024)
+++ head/sys/netinet/tcp_usrreq.c       Sun Feb  5 16:53:02 2012        
(r231025)
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
@@ -1118,7 +1119,7 @@ tcp_connect(struct tcpcb *tp, struct soc
        soisconnecting(so);
        TCPSTAT_INC(tcps_connattempt);
        tp->t_state = TCPS_SYN_SENT;
-       tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
+       tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
        tp->iss = tcp_new_isn(tp);
        tcp_sendseqinit(tp);
 
@@ -1191,7 +1192,7 @@ tcp6_connect(struct tcpcb *tp, struct so
        soisconnecting(so);
        TCPSTAT_INC(tcps_connattempt);
        tp->t_state = TCPS_SYN_SENT;
-       tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
+       tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
        tp->iss = tcp_new_isn(tp);
        tcp_sendseqinit(tp);
 
@@ -1272,6 +1273,7 @@ int
 tcp_ctloutput(struct socket *so, struct sockopt *sopt)
 {
        int     error, opt, optval;
+       u_int   ui;
        struct  inpcb *inp;
        struct  tcpcb *tp;
        struct  tcp_info ti;
@@ -1439,6 +1441,59 @@ tcp_ctloutput(struct socket *so, struct 
                        INP_WUNLOCK(inp);
                        break;
 
+               case TCP_KEEPIDLE:
+               case TCP_KEEPINTVL:
+               case TCP_KEEPCNT:
+               case TCP_KEEPINIT:
+                       INP_WUNLOCK(inp);
+                       error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
+                       if (error)
+                               return (error);
+
+                       if (ui > (UINT_MAX / hz)) {
+                               error = EINVAL;
+                               break;
+                       }
+                       ui *= hz;
+
+                       INP_WLOCK_RECHECK(inp);
+                       switch (sopt->sopt_name) {
+                       case TCP_KEEPIDLE:
+                               tp->t_keepidle = ui;
+                               /*
+                                * XXX: better check current remaining
+                                * timeout and "merge" it with new value.
+                                */
+                               if ((tp->t_state > TCPS_LISTEN) &&
+                                   (tp->t_state <= TCPS_CLOSING))
+                                       tcp_timer_activate(tp, TT_KEEP,
+                                           TP_KEEPIDLE(tp));
+                               break;
+                       case TCP_KEEPINTVL:
+                               tp->t_keepintvl = ui;
+                               if ((tp->t_state == TCPS_FIN_WAIT_2) &&
+                                   (TP_MAXIDLE(tp) > 0))
+                                       tcp_timer_activate(tp, TT_2MSL,
+                                           TP_MAXIDLE(tp));
+                               break;
+                       case TCP_KEEPCNT:
+                               tp->t_keepcnt = ui;
+                               if ((tp->t_state == TCPS_FIN_WAIT_2) &&
+                                   (TP_MAXIDLE(tp) > 0))
+                                       tcp_timer_activate(tp, TT_2MSL,
+                                           TP_MAXIDLE(tp));
+                               break;
+                       case TCP_KEEPINIT:
+                               tp->t_keepinit = ui;
+                               if (tp->t_state == TCPS_SYN_RECEIVED ||
+                                   tp->t_state == TCPS_SYN_SENT)
+                                       tcp_timer_activate(tp, TT_KEEP,
+                                           TP_KEEPINIT(tp));
+                               break;
+                       }
+                       INP_WUNLOCK(inp);
+                       break;
+
                default:
                        INP_WUNLOCK(inp);
                        error = ENOPROTOOPT;
@@ -1636,7 +1691,7 @@ tcp_usrclosed(struct tcpcb *tp)
                        int timeout;
 
                        timeout = (tcp_fast_finwait2_recycle) ? 
-                           tcp_finwait2_timeout : tcp_maxidle;
+                           tcp_finwait2_timeout : TP_MAXIDLE(tp);
                        tcp_timer_activate(tp, TT_2MSL, timeout);
                }
        }

Modified: head/sys/netinet/tcp_var.h
==============================================================================
--- head/sys/netinet/tcp_var.h  Sun Feb  5 16:41:06 2012        (r231024)
+++ head/sys/netinet/tcp_var.h  Sun Feb  5 16:53:02 2012        (r231025)
@@ -203,7 +203,12 @@ struct tcpcb {
        struct cc_var   *ccv;           /* congestion control specific vars */
        struct osd      *osd;           /* storage for Khelp module data */
 
-       uint32_t t_ispare[12];          /* 4 keep timers, 5 UTO, 3 TBD */
+       u_int   t_keepinit;             /* time to establish connection */
+       u_int   t_keepidle;             /* time before keepalive probes begin */
+       u_int   t_keepintvl;            /* interval between keepalives */
+       u_int   t_keepcnt;              /* number of keepalives before close */
+
+       uint32_t t_ispare[8];           /* 5 UTO, 3 TBD */
        void    *t_pspare2[4];          /* 4 TBD */
        uint64_t _pad[6];               /* 6 TBD (1-2 CC/RTT?) */
 };

Modified: head/sys/sys/param.h
==============================================================================
--- head/sys/sys/param.h        Sun Feb  5 16:41:06 2012        (r231024)
+++ head/sys/sys/param.h        Sun Feb  5 16:53:02 2012        (r231025)
@@ -58,7 +58,7 @@
  *             in the range 5 to 9.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1000006      /* Master, propagated to newvers */
+#define __FreeBSD_version 1000007      /* Master, propagated to newvers */
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to