On Tue, 4 Jul 2023 12:14:47 +0300 Alexander Bluhm <alexander.bl...@gmx.net> wrote: > After changing tcp now tick to milliseconds, it will wrap around > after 49 days of uptime. That may be a problem in some places of > our stack. Better use a 64 bit counter.
I agree since we sometimes hit a problem from where we could not see in advance. > As timestamp option is 32 bit in TCP protocol, we have to use the > lower 32 bit there. There are casts to 32 bits that should behave > correctly. More eyes to review would be helpful. > > As a bonus, start with random 63 bit offset to avoid uptime leakage. > I am not aware that we leak anywhere, but more random is always > good. 2^63 milliseconds gives us 2.9*10^8 years of possible uptime. > > ok? ok yasuoka > bluhm > > Index: netinet/tcp_input.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v > retrieving revision 1.388 > diff -u -p -r1.388 tcp_input.c > --- netinet/tcp_input.c 30 May 2023 19:32:57 -0000 1.388 > +++ netinet/tcp_input.c 4 Jul 2023 08:49:47 -0000 > @@ -130,8 +130,8 @@ struct timeval tcp_ackdrop_ppslim_last; > #define TCP_PAWS_IDLE TCP_TIME(24 * 24 * 60 * 60) > > /* for modulo comparisons of timestamps */ > -#define TSTMP_LT(a,b) ((int)((a)-(b)) < 0) > -#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0) > +#define TSTMP_LT(a,b) ((int32_t)((a)-(b)) < 0) > +#define TSTMP_GEQ(a,b) ((int32_t)((a)-(b)) >= 0) > > /* for TCP SACK comparisons */ > #define SEQ_MIN(a,b) (SEQ_LT(a,b) ? (a) : (b)) > @@ -190,7 +190,7 @@ void tcp_newreno_partialack(struct tcpc > > void syn_cache_put(struct syn_cache *); > void syn_cache_rm(struct syn_cache *); > -int syn_cache_respond(struct syn_cache *, struct mbuf *, uint32_t); > +int syn_cache_respond(struct syn_cache *, struct mbuf *, uint64_t); > void syn_cache_timer(void *); > void syn_cache_reaper(void *); > void syn_cache_insert(struct syn_cache *, struct tcpcb *); > @@ -198,10 +198,10 @@ void syn_cache_reset(struct sockaddr *, > struct tcphdr *, u_int); > int syn_cache_add(struct sockaddr *, struct sockaddr *, struct tcphdr *, > unsigned int, struct socket *, struct mbuf *, u_char *, int, > - struct tcp_opt_info *, tcp_seq *, uint32_t); > + struct tcp_opt_info *, tcp_seq *, uint64_t); > struct socket *syn_cache_get(struct sockaddr *, struct sockaddr *, > struct tcphdr *, unsigned int, unsigned int, struct socket *, > - struct mbuf *, uint32_t); > + struct mbuf *, uint64_t); > struct syn_cache *syn_cache_lookup(struct sockaddr *, struct sockaddr *, > struct syn_cache_head **, u_int); > > @@ -375,7 +375,7 @@ tcp_input(struct mbuf **mp, int *offp, i > short ostate; > caddr_t saveti; > tcp_seq iss, *reuse = NULL; > - uint32_t now; > + uint64_t now; > u_long tiwin; > struct tcp_opt_info opti; > struct tcphdr *th; > @@ -885,7 +885,7 @@ findpcb: > goto drop; > > if (opti.ts_present && opti.ts_ecr) { > - int rtt_test; > + int32_t rtt_test; > > /* subtract out the tcp timestamp modulator */ > opti.ts_ecr -= tp->ts_modulate; > @@ -1272,7 +1272,7 @@ trimthenstep6: > TSTMP_LT(opti.ts_val, tp->ts_recent)) { > > /* Check to see if ts_recent is over 24 days old. */ > - if ((int)(now - tp->ts_recent_age) > TCP_PAWS_IDLE) { > + if (now - tp->ts_recent_age > TCP_PAWS_IDLE) { > /* > * Invalidate ts_recent. If this segment updates > * ts_recent, the age will be reset later and ts_recent > @@ -2120,7 +2120,7 @@ drop: > int > tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt, struct tcphdr *th, > struct mbuf *m, int iphlen, struct tcp_opt_info *oi, > - u_int rtableid, uint32_t now) > + u_int rtableid, uint64_t now) > { > u_int16_t mss = 0; > int opt, optlen; > @@ -2686,7 +2686,7 @@ tcp_pulloutofband(struct socket *so, u_i > * and update averages and current timeout. > */ > void > -tcp_xmit_timer(struct tcpcb *tp, int rtt) > +tcp_xmit_timer(struct tcpcb *tp, int32_t rtt) > { > int delta, rttmin; > > @@ -3335,7 +3335,7 @@ void > syn_cache_timer(void *arg) > { > struct syn_cache *sc = arg; > - uint32_t now; > + uint64_t now; > > NET_LOCK(); > if (sc->sc_flags & SCF_DEAD) > @@ -3469,7 +3469,7 @@ syn_cache_lookup(struct sockaddr *src, s > */ > struct socket * > syn_cache_get(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, > - u_int hlen, u_int tlen, struct socket *so, struct mbuf *m, uint32_t now) > + u_int hlen, u_int tlen, struct socket *so, struct mbuf *m, uint64_t now) > { > struct syn_cache *sc; > struct syn_cache_head *scp; > @@ -3744,7 +3744,7 @@ syn_cache_unreach(struct sockaddr *src, > int > syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th, > u_int iphlen, struct socket *so, struct mbuf *m, u_char *optp, int > optlen, > - struct tcp_opt_info *oi, tcp_seq *issp, uint32_t now) > + struct tcp_opt_info *oi, tcp_seq *issp, uint64_t now) > { > struct tcpcb tb, *tp; > long win; > @@ -3911,7 +3911,7 @@ syn_cache_add(struct sockaddr *src, stru > } > > int > -syn_cache_respond(struct syn_cache *sc, struct mbuf *m, uint32_t now) > +syn_cache_respond(struct syn_cache *sc, struct mbuf *m, uint64_t now) > { > u_int8_t *optp; > int optlen, error; > Index: netinet/tcp_output.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_output.c,v > retrieving revision 1.138 > diff -u -p -r1.138 tcp_output.c > --- netinet/tcp_output.c 15 May 2023 16:34:56 -0000 1.138 > +++ netinet/tcp_output.c 4 Jul 2023 08:49:47 -0000 > @@ -204,7 +204,7 @@ tcp_output(struct tcpcb *tp) > int idle, sendalot = 0; > int i, sack_rxmit = 0; > struct sackhole *p; > - uint32_t now; > + uint64_t now; > #ifdef TCP_SIGNATURE > unsigned int sigoff; > #endif /* TCP_SIGNATURE */ > Index: netinet/tcp_subr.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v > retrieving revision 1.191 > diff -u -p -r1.191 tcp_subr.c > --- netinet/tcp_subr.c 10 May 2023 12:07:16 -0000 1.191 > +++ netinet/tcp_subr.c 4 Jul 2023 08:49:47 -0000 > @@ -137,6 +137,7 @@ struct cpumem *tcpcounters; /* tcp stat > u_char tcp_secret[16]; /* [I] */ > SHA2_CTX tcp_secret_ctx; /* [I] */ > tcp_seq tcp_iss; /* [T] updated by timer and connection > */ > +uint64_t tcp_starttime; /* [I] random offset for tcp_now() */ > > /* > * Tcp initialization > @@ -145,6 +146,9 @@ void > tcp_init(void) > { > tcp_iss = 1; /* wrong */ > + /* 0 is treated special so add 1, 63 bits to count is enough */ > + arc4random_buf(&tcp_starttime, sizeof(tcp_starttime)); > + tcp_starttime = 1ULL + (tcp_starttime / 2); > pool_init(&tcpcb_pool, sizeof(struct tcpcb), 0, IPL_SOFTNET, 0, > "tcpcb", NULL); > pool_init(&tcpqe_pool, sizeof(struct tcpqent), 0, IPL_SOFTNET, 0, > @@ -289,7 +293,7 @@ tcp_template(struct tcpcb *tp) > */ > void > tcp_respond(struct tcpcb *tp, caddr_t template, struct tcphdr *th0, > - tcp_seq ack, tcp_seq seq, int flags, u_int rtableid, uint32_t now) > + tcp_seq ack, tcp_seq seq, int flags, u_int rtableid, uint64_t now) > { > int tlen; > int win = 0; > Index: netinet/tcp_timer.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v > retrieving revision 1.72 > diff -u -p -r1.72 tcp_timer.c > --- netinet/tcp_timer.c 14 Mar 2023 00:24:05 -0000 1.72 > +++ netinet/tcp_timer.c 4 Jul 2023 08:49:47 -0000 > @@ -394,7 +394,7 @@ tcp_timer_persist(void *arg) > struct tcpcb *otp = NULL, *tp = arg; > uint32_t rto; > short ostate; > - uint32_t now; > + uint64_t now; > > NET_LOCK(); > /* Ignore canceled timeouts or timeouts that have been rescheduled. */ > @@ -463,7 +463,7 @@ tcp_timer_keep(void *arg) > tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) && > tp->t_state <= TCPS_CLOSING) { > int maxidle; > - uint32_t now; > + uint64_t now; > > maxidle = READ_ONCE(tcp_maxidle); > now = tcp_now(); > @@ -506,7 +506,7 @@ tcp_timer_2msl(void *arg) > struct tcpcb *otp = NULL, *tp = arg; > short ostate; > int maxidle; > - uint32_t now; > + uint64_t now; > > NET_LOCK(); > /* Ignore canceled timeouts or timeouts that have been rescheduled. */ > Index: netinet/tcp_usrreq.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v > retrieving revision 1.220 > diff -u -p -r1.220 tcp_usrreq.c > --- netinet/tcp_usrreq.c 2 Jul 2023 19:59:15 -0000 1.220 > +++ netinet/tcp_usrreq.c 4 Jul 2023 08:49:47 -0000 > @@ -211,7 +211,7 @@ tcp_fill_info(struct tcpcb *tp, struct s > struct proc *p = curproc; > struct tcp_info *ti; > u_int t = 1000; /* msec => usec */ > - uint32_t now; > + uint64_t now; > > if (sizeof(*ti) > MLEN) { > MCLGETL(m, M_WAITOK, sizeof(*ti)); > Index: netinet/tcp_var.h > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v > retrieving revision 1.168 > diff -u -p -r1.168 tcp_var.h > --- netinet/tcp_var.h 2 Jul 2023 19:59:15 -0000 1.168 > +++ netinet/tcp_var.h 4 Jul 2023 08:49:47 -0000 > @@ -150,8 +150,8 @@ struct tcpcb { > */ > > /* auto-sizing variables */ > + uint64_t rfbuf_ts; /* recv buffer autoscaling time stamp */ > u_int rfbuf_cnt; /* recv buffer autoscaling byte count */ > - u_int32_t rfbuf_ts; /* recv buffer autoscaling time stamp */ > > u_short t_maxopd; /* mss plus options */ > u_short t_peermss; /* peer's maximum segment size */ > @@ -160,11 +160,11 @@ struct tcpcb { > * transmit timing stuff. See below for scale of srtt and rttvar. > * "Variance" is actually smoothed difference. > */ > - uint32_t t_rcvtime; /* time last segment received */ > - uint32_t t_rcvacktime; /* time last ack received */ > - uint32_t t_sndtime; /* time last segment sent */ > - uint32_t t_sndacktime; /* time last ack sent */ > - uint32_t t_rtttime; /* time we started measuring rtt */ > + uint64_t t_rcvtime; /* time last segment received */ > + uint64_t t_rcvacktime; /* time last ack received */ > + uint64_t t_sndtime; /* time last segment sent */ > + uint64_t t_sndacktime; /* time last ack sent */ > + uint64_t t_rtttime; /* time we started measuring rtt */ > tcp_seq t_rtseq; /* sequence number being timed */ > int t_srtt; /* smoothed round-trip time */ > int t_rttvar; /* variance in round-trip time */ > @@ -183,9 +183,9 @@ struct tcpcb { > u_char rcv_scale; /* window scaling for recv window */ > u_char request_r_scale; /* pending window scaling */ > u_char requested_s_scale; > - u_int32_t ts_recent; /* timestamp echo data */ > - u_int32_t ts_modulate; /* modulation on timestamp */ > - u_int32_t ts_recent_age; /* when last updated */ > + uint32_t ts_recent; /* timestamp echo data */ > + uint32_t ts_modulate; /* modulation on timestamp */ > + uint64_t ts_recent_age; /* when last updated */ > tcp_seq last_ack_sent; > > /* pointer for syn cache entries*/ > @@ -250,12 +250,9 @@ struct syn_cache { > long sc_win; /* advertised window */ > struct syn_cache_head *sc_buckethead; /* our bucket index */ > struct syn_cache_set *sc_set; /* our syn cache set */ > + u_int64_t sc_timestamp; /* timestamp from SYN */ > u_int32_t sc_hash; > - u_int32_t sc_timestamp; /* timestamp from SYN */ > u_int32_t sc_modulate; /* our timestamp modulator */ > -#if 0 > - u_int32_t sc_timebase; /* our local timebase */ > -#endif > union syn_cache_sa sc_src; > union syn_cache_sa sc_dst; > tcp_seq sc_irs; > @@ -657,10 +654,13 @@ tcpstat_pkt(enum tcpstat_counters pcount > counters_pkt(tcpcounters, pcounter, bcounter, v); > } > > -static inline uint32_t > +extern uint64_t tcp_starttime; > + > +static inline uint64_t > tcp_now(void) > { > - return (getnsecruntime() / 1000000); > + /* TCP time ticks in 63 bit milliseconds with 63 bit random offset. */ > + return tcp_starttime + (getnsecruntime() / 1000000ULL); > } > > #define TCP_TIME(_sec) ((_sec) * 1000) /* tcp_now() is in milliseconds > */ > @@ -712,7 +712,7 @@ struct tcpcb * > struct tcpcb * > tcp_drop(struct tcpcb *, int); > int tcp_dooptions(struct tcpcb *, u_char *, int, struct tcphdr *, > - struct mbuf *, int, struct tcp_opt_info *, u_int, uint32_t); > + struct mbuf *, int, struct tcp_opt_info *, u_int, uint64_t); > void tcp_init(void); > int tcp_input(struct mbuf **, int *, int, int); > int tcp_mss(struct tcpcb *, int); > @@ -735,7 +735,7 @@ void tcp_pulloutofband(struct socket *, > int tcp_reass(struct tcpcb *, struct tcphdr *, struct mbuf *, int *); > void tcp_rscale(struct tcpcb *, u_long); > void tcp_respond(struct tcpcb *, caddr_t, struct tcphdr *, tcp_seq, > - tcp_seq, int, u_int, uint32_t); > + tcp_seq, int, u_int, uint64_t); > void tcp_setpersist(struct tcpcb *); > void tcp_update_sndspace(struct tcpcb *); > void tcp_update_rcvspace(struct tcpcb *); > @@ -767,7 +767,7 @@ int tcp_sense(struct socket *, struct s > int tcp_rcvoob(struct socket *, struct mbuf *, int); > int tcp_sendoob(struct socket *, struct mbuf *, struct mbuf *, > struct mbuf *); > -void tcp_xmit_timer(struct tcpcb *, int); > +void tcp_xmit_timer(struct tcpcb *, int32_t); > void tcpdropoldhalfopen(struct tcpcb *, u_int16_t); > void tcp_sack_option(struct tcpcb *,struct tcphdr *,u_char *,int); > void tcp_update_sack_list(struct tcpcb *tp, tcp_seq, tcp_seq);