> On 7. Jun 2021, at 11:01, Michael Tuexen <tue...@freebsd.org> wrote: > > The branch stable/13 has been updated by tuexen: > > URL: > https://cgit.FreeBSD.org/src/commit/?id=fa50e98328b48da4fa8dbd97d0a787962cf249f5 > > commit fa50e98328b48da4fa8dbd97d0a787962cf249f5 > Author: Michael Tuexen <tue...@freebsd.org> > AuthorDate: 2021-04-18 14:08:08 +0000 > Commit: Michael Tuexen <tue...@freebsd.org> > CommitDate: 2021-06-07 09:01:28 +0000 > > mend Not sure how the commit ended up this way, but it is MFCing https://cgit.FreeBSD.org/src/commit/?id=9e644c23000c2f5028b235f6263d17ffb24d3605 and manually resolving the merge conflicts.
Best regards Michael > --- > share/man/man4/tcp.4 | 15 +- > sys/netinet/tcp.h | 1 + > sys/netinet/tcp_input.c | 48 ++++- > sys/netinet/tcp_output.c | 80 ++++++-- > sys/netinet/tcp_stacks/bbr.c | 38 +--- > sys/netinet/tcp_stacks/rack.c | 26 +-- > sys/netinet/tcp_subr.c | 462 ++++++++++++++++++++++++++++++++++++++++-- > sys/netinet/tcp_syncache.c | 127 +++++++++--- > sys/netinet/tcp_syncache.h | 12 +- > sys/netinet/tcp_timewait.c | 84 ++++++-- > sys/netinet/tcp_usrreq.c | 30 +++ > sys/netinet/tcp_var.h | 27 ++- > sys/netinet/toecore.c | 4 +- > sys/netinet6/tcp6_var.h | 2 + > sys/sys/mbuf.h | 1 + > usr.bin/netstat/inet.c | 4 + > usr.bin/sockstat/sockstat.1 | 6 +- > usr.bin/sockstat/sockstat.c | 13 +- > 18 files changed, 822 insertions(+), 158 deletions(-) > > diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4 > index d01505e58427..b5735a40b320 100644 > --- a/share/man/man4/tcp.4 > +++ b/share/man/man4/tcp.4 > @@ -34,7 +34,7 @@ > .\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93 > .\" $FreeBSD$ > .\" > -.Dd April 8, 2021 > +.Dd April 18, 2021 > .Dt TCP 4 > .Os > .Sh NAME > @@ -329,6 +329,9 @@ currently executing. > This is typically used after a process or thread inherits a listen > socket from its parent, and sets its CPU affinity to a particular core. > .El > +.It Dv TCP_REMOTE_UDP_ENCAPS_PORT > +Set and get the remote UDP encapsulation port. > +It can only be set on a closed TCP socket. > .El > .Pp > The option level for the > @@ -752,6 +755,16 @@ A CSV list of template_spec=percent key-value pairs > which controls the per > template sampling rates when > .Xr stats 3 > sampling is enabled. > +.It Va udp_tunneling_port > +The local UDP encapsulation port. > +A value of 0 indicates that UDP encapsulation is disabled. > +The default is 0. > +.It Va udp_tunneling_overhead > +The overhead taken into account when using UDP encapsulation. > +Since MSS clamping by middleboxes will most likely not work, values larger > than > +8 (the size of the UDP header) are also supported. > +Supported values are between 8 and 1024. > +The default is 8. > .El > .Sh ERRORS > A socket operation may fail with one of the following errors returned: > diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h > index 0b71bd4658f8..d2bf1f8431fd 100644 > --- a/sys/netinet/tcp.h > +++ b/sys/netinet/tcp.h > @@ -183,6 +183,7 @@ struct tcphdr { > #define TCP_RXTLS_MODE 42 /* Receive TLS mode */ > #define TCP_CONGESTION 64 /* get/set congestion control algorithm > */ > #define TCP_CCALGOOPT 65 /* get/set cc algorithm specific > options */ > +#define TCP_REMOTE_UDP_ENCAPS_PORT 71 /* Enable TCP over UDP > tunneling via the specified port */ > #define TCP_DELACK 72 /* socket option for delayed ack */ > #define TCP_FIN_IS_RST 73 /* A fin from the peer is treated has a RST */ > #define TCP_LOG_LIMIT 74 /* Limit to number of records in tcp-log */ > diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c > index 397cbc5084e6..d36f9566ffba 100644 > --- a/sys/netinet/tcp_input.c > +++ b/sys/netinet/tcp_input.c > @@ -123,6 +123,7 @@ __FBSDID("$FreeBSD$"); > #ifdef TCP_OFFLOAD > #include <netinet/tcp_offload.h> > #endif > +#include <netinet/udp.h> > > #include <netipsec/ipsec_support.h> > > @@ -573,7 +574,7 @@ cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, > uint8_t iptos) > */ > #ifdef INET6 > int > -tcp6_input(struct mbuf **mp, int *offp, int proto) > +tcp6_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port) > { > struct mbuf *m; > struct in6_ifaddr *ia6; > @@ -603,12 +604,19 @@ tcp6_input(struct mbuf **mp, int *offp, int proto) > } > > *mp = m; > - return (tcp_input(mp, offp, proto)); > + return (tcp_input_with_port(mp, offp, proto, port)); > +} > + > +int > +tcp6_input(struct mbuf **mp, int *offp, int proto) > +{ > + > + return(tcp6_input_with_port(mp, offp, proto, 0)); > } > #endif /* INET6 */ > > int > -tcp_input(struct mbuf **mp, int *offp, int proto) > +tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port) > { > struct mbuf *m = *mp; > struct tcphdr *th = NULL; > @@ -664,6 +672,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto) > ip6 = mtod(m, struct ip6_hdr *); > th = (struct tcphdr *)((caddr_t)ip6 + off0); > tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0; > + if (port) > + goto skip6_csum; > if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) { > if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) > th->th_sum = m->m_pkthdr.csum_data; > @@ -677,7 +687,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto) > TCPSTAT_INC(tcps_rcvbadsum); > goto drop; > } > - > + skip6_csum: > /* > * Be proactive about unspecified IPv6 address in source. > * As we use all-zero to indicate unbounded/unconnected pcb, > @@ -718,6 +728,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto) > tlen = ntohs(ip->ip_len) - off0; > > iptos = ip->ip_tos; > + if (port) > + goto skip_csum; > if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { > if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) > th->th_sum = m->m_pkthdr.csum_data; > @@ -747,8 +759,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto) > ip->ip_v = IPVERSION; > ip->ip_hl = off0 >> 2; > } > - > - if (th->th_sum) { > + skip_csum: > + if (th->th_sum && (port == 0)) { > TCPSTAT_INC(tcps_rcvbadsum); > goto drop; > } > @@ -1006,6 +1018,11 @@ findpcb: > goto dropwithreset; > } > > + if ((tp->t_port != port) && (tp->t_state > TCPS_LISTEN)) { > + rstreason = BANDLIM_RST_CLOSEDPORT; > + goto dropwithreset; > + } > + > #ifdef TCP_OFFLOAD > if (tp->t_flags & TF_TOE) { > tcp_offload_input(tp, m); > @@ -1077,7 +1094,7 @@ findpcb: > * NB: syncache_expand() doesn't unlock > * inp and tcpinfo locks. > */ > - rstreason = syncache_expand(&inc, &to, th, &so, m); > + rstreason = syncache_expand(&inc, &to, th, &so, m, > port); > if (rstreason < 0) { > /* > * A failing TCP MD5 signature comparison > @@ -1157,7 +1174,7 @@ tfo_socket_result: > * causes. > */ > if (thflags & TH_RST) { > - syncache_chkrst(&inc, th, m); > + syncache_chkrst(&inc, th, m, port); > goto dropunlock; > } > /* > @@ -1179,7 +1196,7 @@ tfo_socket_result: > log(LOG_DEBUG, "%s; %s: Listen socket: " > "SYN|ACK invalid, segment rejected\n", > s, __func__); > - syncache_badack(&inc); /* XXX: Not needed! */ > + syncache_badack(&inc, port); /* XXX: Not needed! */ > TCPSTAT_INC(tcps_badsyn); > rstreason = BANDLIM_RST_OPENPORT; > goto dropwithreset; > @@ -1336,7 +1353,8 @@ tfo_socket_result: > #endif > TCP_PROBE3(debug__input, tp, th, m); > tcp_dooptions(&to, optp, optlen, TO_SYN); > - if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL, iptos)) > + if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL, iptos, > + port)) > goto tfo_socket_result; > > /* > @@ -1467,6 +1485,12 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, > struct socket *so, > return (newsize); > } > > +int > +tcp_input(struct mbuf **mp, int *offp, int proto) > +{ > + return(tcp_input_with_port(mp, offp, proto, 0)); > +} > + > void > tcp_handle_wakeup(struct tcpcb *tp, struct socket *so) > { > @@ -3671,11 +3695,13 @@ tcp_mss_update(struct tcpcb *tp, int offer, int > mtuoffer, > sizeof (struct ip6_hdr) + sizeof (struct tcphdr) : > sizeof (struct tcpiphdr); > #else > - const size_t min_protoh = sizeof(struct tcpiphdr); > + size_t min_protoh = sizeof(struct tcpiphdr); > #endif > > INP_WLOCK_ASSERT(tp->t_inpcb); > > + if (tp->t_port) > + min_protoh += V_tcp_udp_tunneling_overhead; > if (mtuoffer != -1) { > KASSERT(offer == -1, ("%s: conflict", __func__)); > offer = mtuoffer - min_protoh; > diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c > index e23cdc749e98..5bda2be14df0 100644 > --- a/sys/netinet/tcp_output.c > +++ b/sys/netinet/tcp_output.c > @@ -101,6 +101,8 @@ __FBSDID("$FreeBSD$"); > > #include <netipsec/ipsec_support.h> > > +#include <netinet/udp.h> > +#include <netinet/udp_var.h> > #include <machine/in_cksum.h> > > #include <security/mac/mac_framework.h> > @@ -207,7 +209,7 @@ tcp_output(struct tcpcb *tp) > #endif > struct tcphdr *th; > u_char opt[TCP_MAXOLEN]; > - unsigned ipoptlen, optlen, hdrlen; > + unsigned ipoptlen, optlen, hdrlen, ulen; > #if defined(IPSEC) || defined(IPSEC_SUPPORT) > unsigned ipsec_optlen = 0; > #endif > @@ -216,6 +218,7 @@ tcp_output(struct tcpcb *tp) > struct sackhole *p; > int tso, mtu; > struct tcpopt to; > + struct udphdr *udp = NULL; > unsigned int wanted_cookie = 0; > unsigned int dont_sendalot = 0; > #if 0 > @@ -558,6 +561,7 @@ after_sack_rexmit: > #endif > > if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg && > + (tp->t_port == 0) && > ((tp->t_flags & TF_SIGNATURE) == 0) && > tp->rcv_numsacks == 0 && sack_rxmit == 0 && > ipoptlen == 0 && !(flags & TH_SYN)) > @@ -800,6 +804,8 @@ send: > /* Maximum segment size. */ > if (flags & TH_SYN) { > to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc); > + if (tp->t_port) > + to.to_mss -= V_tcp_udp_tunneling_overhead; > to.to_flags |= TOF_MSS; > > /* > @@ -887,7 +893,14 @@ send: > !(to.to_flags & TOF_FASTOPEN)) > len = 0; > } > - > + if (tp->t_port) { > + if (V_tcp_udp_tunneling_port == 0) { > + /* The port was removed?? */ > + SOCKBUF_UNLOCK(&so->so_snd); > + return (EHOSTUNREACH); > + } > + hdrlen += sizeof(struct udphdr); > + } > /* > * Adjust data length if insertion of options will > * bump the packet length beyond the t_maxseg length. > @@ -1140,8 +1153,17 @@ send: > #ifdef INET6 > if (isipv6) { > ip6 = mtod(m, struct ip6_hdr *); > - th = (struct tcphdr *)(ip6 + 1); > - tcpip_fillheaders(tp->t_inpcb, ip6, th); > + if (tp->t_port) { > + udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + > sizeof(struct ip6_hdr)); > + udp->uh_sport = htons(V_tcp_udp_tunneling_port); > + udp->uh_dport = tp->t_port; > + ulen = hdrlen + len - sizeof(struct ip6_hdr); > + udp->uh_ulen = htons(ulen); > + th = (struct tcphdr *)(udp + 1); > + } else { > + th = (struct tcphdr *)(ip6 + 1); > + } > + tcpip_fillheaders(tp->t_inpcb, tp->t_port, ip6, th); > } else > #endif /* INET6 */ > { > @@ -1149,8 +1171,16 @@ send: > #ifdef TCPDEBUG > ipov = (struct ipovly *)ip; > #endif > - th = (struct tcphdr *)(ip + 1); > - tcpip_fillheaders(tp->t_inpcb, ip, th); > + if (tp->t_port) { > + udp = (struct udphdr *)((caddr_t)ip + ipoptlen + > sizeof(struct ip)); > + udp->uh_sport = htons(V_tcp_udp_tunneling_port); > + udp->uh_dport = tp->t_port; > + ulen = hdrlen + len - sizeof(struct ip); > + udp->uh_ulen = htons(ulen); > + th = (struct tcphdr *)(udp + 1); > + } else > + th = (struct tcphdr *)(ip + 1); > + tcpip_fillheaders(tp->t_inpcb, tp->t_port, ip, th); > } > > /* > @@ -1309,7 +1339,6 @@ send: > * checksum extended header and data. > */ > m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */ > - m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); > > #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) > if (to.to_flags & TOF_SIGNATURE) { > @@ -1336,9 +1365,19 @@ send: > * There is no need to fill in ip6_plen right now. > * It will be filled later by ip6_output. > */ > - m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; > - th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) + > - optlen + len, IPPROTO_TCP, 0); > + if (tp->t_port) { > + m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; > + m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); > + udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, > 0); > + th->th_sum = htons(0); > + UDPSTAT_INC(udps_opackets); > + } else { > + m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; > + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); > + th->th_sum = in6_cksum_pseudo(ip6, > + sizeof(struct tcphdr) + optlen + len, IPPROTO_TCP, > + 0); > + } > } > #endif > #if defined(INET6) && defined(INET) > @@ -1346,9 +1385,20 @@ send: > #endif > #ifdef INET > { > - m->m_pkthdr.csum_flags = CSUM_TCP; > - th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, > - htons(sizeof(struct tcphdr) + IPPROTO_TCP + len + optlen)); > + if (tp->t_port) { > + m->m_pkthdr.csum_flags = CSUM_UDP; > + m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); > + udp->uh_sum = in_pseudo(ip->ip_src.s_addr, > + ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP)); > + th->th_sum = htons(0); > + UDPSTAT_INC(udps_opackets); > + } else { > + m->m_pkthdr.csum_flags = CSUM_TCP; > + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); > + th->th_sum = in_pseudo(ip->ip_src.s_addr, > + ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) + > + IPPROTO_TCP + len + optlen)); > + } > > /* IP version must be set here for ipv4/ipv6 checking later */ > KASSERT(ip->ip_v == IPVERSION, > @@ -1473,8 +1523,10 @@ send: > * NB: Don't set DF on small MTU/MSS to have a safe fallback. > */ > if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) { > - ip->ip_off |= htons(IP_DF); > tp->t_flags2 |= TF2_PLPMTU_PMTUD; > + if (tp->t_port == 0 || len < V_tcp_minmss) { > + ip->ip_off |= htons(IP_DF); > + } > } else { > tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; > } > diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c > index cc20d6bf52ca..1ee8d26446fd 100644 > --- a/sys/netinet/tcp_stacks/bbr.c > +++ b/sys/netinet/tcp_stacks/bbr.c > @@ -11969,14 +11969,10 @@ bbr_output_wtime(struct tcpcb *tp, const struct > timeval *tv) > #endif > struct tcp_bbr *bbr; > struct tcphdr *th; > -#ifdef NETFLIX_TCPOUDP > struct udphdr *udp = NULL; > -#endif > u_char opt[TCP_MAXOLEN]; > unsigned ipoptlen, optlen, hdrlen; > -#ifdef NETFLIX_TCPOUDP > unsigned ulen; > -#endif > uint32_t bbr_seq; > uint32_t delay_calc=0; > uint8_t doing_tlp = 0; > @@ -12991,10 +12987,8 @@ send: > /* Maximum segment size. */ > if (flags & TH_SYN) { > to.to_mss = tcp_mssopt(&inp->inp_inc); > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) > to.to_mss -= V_tcp_udp_tunneling_overhead; > -#endif > to.to_flags |= TOF_MSS; > /* > * On SYN or SYN|ACK transmits on TFO connections, > @@ -13063,7 +13057,6 @@ send: > !(to.to_flags & TOF_FASTOPEN)) > len = 0; > } > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) { > if (V_tcp_udp_tunneling_port == 0) { > /* The port was removed?? */ > @@ -13072,7 +13065,6 @@ send: > } > hdrlen += sizeof(struct udphdr); > } > -#endif > #ifdef INET6 > if (isipv6) > ipoptlen = ip6_optlen(tp->t_inpcb); > @@ -13408,7 +13400,6 @@ send: > #ifdef INET6 > if (isipv6) { > ip6 = mtod(m, struct ip6_hdr *); > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) { > udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + > sizeof(struct ip6_hdr)); > udp->uh_sport = htons(V_tcp_udp_tunneling_port); > @@ -13417,17 +13408,9 @@ send: > udp->uh_ulen = htons(ulen); > th = (struct tcphdr *)(udp + 1); > } else { > -#endif > th = (struct tcphdr *)(ip6 + 1); > - > -#ifdef NETFLIX_TCPOUDP > } > -#endif > - tcpip_fillheaders(inp, > -#ifdef NETFLIX_TCPOUDP > - tp->t_port, > -#endif > - ip6, th); > + tcpip_fillheaders(inp, tp->t_port, ip6, th); > } else > #endif /* INET6 */ > { > @@ -13435,7 +13418,6 @@ send: > #ifdef TCPDEBUG > ipov = (struct ipovly *)ip; > #endif > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) { > udp = (struct udphdr *)((caddr_t)ip + ipoptlen + > sizeof(struct ip)); > udp->uh_sport = htons(V_tcp_udp_tunneling_port); > @@ -13443,14 +13425,10 @@ send: > ulen = hdrlen + len - sizeof(struct ip); > udp->uh_ulen = htons(ulen); > th = (struct tcphdr *)(udp + 1); > - } else > -#endif > + } else { > th = (struct tcphdr *)(ip + 1); > - tcpip_fillheaders(inp, > -#ifdef NETFLIX_TCPOUDP > - tp->t_port, > -#endif > - ip, th); > + } > + tcpip_fillheaders(inp, tp->t_port, ip, th); > } > /* > * If we are doing retransmissions, then snd_nxt will not reflect > @@ -13600,7 +13578,6 @@ send: > * ip6_plen is not need to be filled now, and will be filled > * in ip6_output. > */ > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) { > m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; > m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); > @@ -13608,14 +13585,11 @@ send: > th->th_sum = htons(0); > UDPSTAT_INC(udps_opackets); > } else { > -#endif > csum_flags = m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; > m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); > th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct > tcphdr) + > optlen + len, IPPROTO_TCP, 0); > -#ifdef NETFLIX_TCPOUDP > } > -#endif > } > #endif > #if defined(INET6) && defined(INET) > @@ -13623,7 +13597,6 @@ send: > #endif > #ifdef INET > { > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) { > m->m_pkthdr.csum_flags = CSUM_UDP; > m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); > @@ -13632,15 +13605,12 @@ send: > th->th_sum = htons(0); > UDPSTAT_INC(udps_opackets); > } else { > -#endif > csum_flags = m->m_pkthdr.csum_flags = CSUM_TCP; > m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); > th->th_sum = in_pseudo(ip->ip_src.s_addr, > ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) + > IPPROTO_TCP + len + optlen)); > -#ifdef NETFLIX_TCPOUDP > } > -#endif > /* IP version must be set here for ipv4/ipv6 checking later */ > KASSERT(ip->ip_v == IPVERSION, > ("%s: IP version incorrect: %d", __func__, ip->ip_v)); > diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c > index 0ee73a95a6d7..12827d1699d0 100644 > --- a/sys/netinet/tcp_stacks/rack.c > +++ b/sys/netinet/tcp_stacks/rack.c > @@ -13008,10 +13008,8 @@ send: > if (flags & TH_SYN) { > tp->snd_nxt = tp->iss; > to.to_mss = tcp_mssopt(&inp->inp_inc); > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) > to.to_mss -= V_tcp_udp_tunneling_overhead; > -#endif > to.to_flags |= TOF_MSS; > > /* > @@ -13088,7 +13086,6 @@ send: > !(to.to_flags & TOF_FASTOPEN)) > len = 0; > } > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) { > if (V_tcp_udp_tunneling_port == 0) { > /* The port was removed?? */ > @@ -13097,7 +13094,6 @@ send: > } > hdrlen += sizeof(struct udphdr); > } > -#endif > #ifdef INET6 > if (isipv6) > ipoptlen = ip6_optlen(tp->t_inpcb); > @@ -13372,7 +13368,6 @@ send: > #ifdef INET6 > if (isipv6) { > ip6 = mtod(m, struct ip6_hdr *); > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) { > udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + > sizeof(struct ip6_hdr)); > udp->uh_sport = htons(V_tcp_udp_tunneling_port); > @@ -13380,14 +13375,10 @@ send: > ulen = hdrlen + len - sizeof(struct ip6_hdr); > udp->uh_ulen = htons(ulen); > th = (struct tcphdr *)(udp + 1); > - } else > -#endif > + } else { > th = (struct tcphdr *)(ip6 + 1); > - tcpip_fillheaders(inp, > -#ifdef NETFLIX_TCPOUDP > - tp->t_port, > -#endif > - ip6, th); > + } > + tcpip_fillheaders(inp, tp->t_port, ip6, th); > } else > #endif /* INET6 */ > { > @@ -13395,7 +13386,6 @@ send: > #ifdef TCPDEBUG > ipov = (struct ipovly *)ip; > #endif > -#ifdef NETFLIX_TCPOUDP > if (tp->t_port) { > udp = (struct udphdr *)((caddr_t)ip + ipoptlen + > sizeof(struct ip)); > udp->uh_sport = htons(V_tcp_udp_tunneling_port); > @@ -13403,14 +13393,10 @@ send: > ulen = hdrlen + len - sizeof(struct ip); > udp->uh_ulen = htons(ulen); > th = (struct tcphdr *)(udp + 1); > - } else > -#endif > + } else { > th = (struct tcphdr *)(ip + 1); > - tcpip_fillheaders(inp, > -#ifdef NETFLIX_TCPOUDP > - tp->t_port, > -#endif > - ip, th); > + } > + tcpip_fillheaders(inp, tp->t_port, ip, th); > } > /* > * Fill in fields, remembering maximum advertised window for use in > diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c > index dff7767cd9cf..6bdeb3984aee 100644 > --- a/sys/netinet/tcp_subr.c > +++ b/sys/netinet/tcp_subr.c > @@ -126,6 +126,8 @@ __FBSDID("$FreeBSD$"); > #ifdef TCP_OFFLOAD > #include <netinet/tcp_offload.h> > #endif > +#include <netinet/udp.h> > +#include <netinet/udp_var.h> > > #include <netipsec/ipsec_support.h> > > @@ -501,6 +503,80 @@ tcp_switch_back_to_default(struct tcpcb *tp) > } > } > > +static void > +tcp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *inp, > + const struct sockaddr *sa, void *ctx) > +{ > + struct ip *iph; > +#ifdef INET6 > + struct ip6_hdr *ip6; > +#endif > + struct udphdr *uh; > + struct tcphdr *th; > + int thlen; > + uint16_t port; > + > + TCPSTAT_INC(tcps_tunneled_pkts); > + if ((m->m_flags & M_PKTHDR) == 0) { > + /* Can't handle one that is not a pkt hdr */ > + TCPSTAT_INC(tcps_tunneled_errs); > + goto out; > + } > + thlen = sizeof(struct tcphdr); > + if (m->m_len < off + sizeof(struct udphdr) + thlen && > + (m = m_pullup(m, off + sizeof(struct udphdr) + thlen)) == NULL) { > + TCPSTAT_INC(tcps_tunneled_errs); > + goto out; > + } > + iph = mtod(m, struct ip *); > + uh = (struct udphdr *)((caddr_t)iph + off); > + th = (struct tcphdr *)(uh + 1); > + thlen = th->th_off << 2; > + if (m->m_len < off + sizeof(struct udphdr) + thlen) { > + m = m_pullup(m, off + sizeof(struct udphdr) + thlen); > + if (m == NULL) { > + TCPSTAT_INC(tcps_tunneled_errs); > + goto out; > + } else { > + iph = mtod(m, struct ip *); > + uh = (struct udphdr *)((caddr_t)iph + off); > + th = (struct tcphdr *)(uh + 1); > + } > + } > + m->m_pkthdr.tcp_tun_port = port = uh->uh_sport; > + bcopy(th, uh, m->m_len - off); > + m->m_len -= sizeof(struct udphdr); > + m->m_pkthdr.len -= sizeof(struct udphdr); > + /* > + * We use the same algorithm for > + * both UDP and TCP for c-sum. So > + * the code in tcp_input will skip > + * the checksum. So we do nothing > + * with the flag (m->m_pkthdr.csum_flags). > + */ > + switch (iph->ip_v) { > +#ifdef INET > + case IPVERSION: > + iph->ip_len = htons(ntohs(iph->ip_len) - sizeof(struct udphdr)); > + tcp_input_with_port(&m, &off, IPPROTO_TCP, port); > + break; > +#endif > +#ifdef INET6 > + case IPV6_VERSION >> 4: > + ip6 = mtod(m, struct ip6_hdr *); > + ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - sizeof(struct > udphdr)); > + tcp6_input_with_port(&m, &off, IPPROTO_TCP, port); > + break; > +#endif > + default: > + goto out; > + break; > + } > + return; > +out: > + m_freem(m); > +} > + > static int > sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS) > { > @@ -598,6 +674,183 @@ SYSCTL_PROC(_net_inet_tcp, OID_AUTO, > functions_available, > NULL, 0, sysctl_net_inet_list_available, "A", > "list available TCP Function sets"); > > +VNET_DEFINE(int, tcp_udp_tunneling_port) = TCP_TUNNELING_PORT_DEFAULT; > + > +#ifdef INET > +VNET_DEFINE(struct socket *, udp4_tun_socket) = NULL; > +#define V_udp4_tun_socket VNET(udp4_tun_socket) > +#endif > +#ifdef INET6 > +VNET_DEFINE(struct socket *, udp6_tun_socket) = NULL; > +#define V_udp6_tun_socket VNET(udp6_tun_socket) > +#endif > + > +static void > +tcp_over_udp_stop(void) > +{ > + /* > + * This function assumes sysctl caller holds inp_rinfo_lock() > + * for writting! > + */ > +#ifdef INET > + if (V_udp4_tun_socket != NULL) { > + soclose(V_udp4_tun_socket); > + V_udp4_tun_socket = NULL; > + } > +#endif > +#ifdef INET6 > + if (V_udp6_tun_socket != NULL) { > + soclose(V_udp6_tun_socket); > + V_udp6_tun_socket = NULL; > + } > +#endif > +} > + > +static int > +tcp_over_udp_start(void) > +{ > + uint16_t port; > + int ret; > +#ifdef INET > + struct sockaddr_in sin; > +#endif > +#ifdef INET6 > + struct sockaddr_in6 sin6; > +#endif > + /* > + * This function assumes sysctl caller holds inp_info_rlock() > + * for writting! > + */ > + port = V_tcp_udp_tunneling_port; > + if (ntohs(port) == 0) { > + /* Must have a port set */ > + return (EINVAL); > + } > +#ifdef INET > + if (V_udp4_tun_socket != NULL) { > + /* Already running -- must stop first */ > + return (EALREADY); > + } > +#endif > +#ifdef INET6 > + if (V_udp6_tun_socket != NULL) { > + /* Already running -- must stop first */ > + return (EALREADY); > + } > +#endif > +#ifdef INET > + if ((ret = socreate(PF_INET, &V_udp4_tun_socket, > + SOCK_DGRAM, IPPROTO_UDP, > + curthread->td_ucred, curthread))) { > + tcp_over_udp_stop(); > + return (ret); > + } > + /* Call the special UDP hook. */ > + if ((ret = udp_set_kernel_tunneling(V_udp4_tun_socket, > + tcp_recv_udp_tunneled_packet, > + tcp_ctlinput_viaudp, > + NULL))) { > + tcp_over_udp_stop(); > + return (ret); > + } > + /* Ok, we have a socket, bind it to the port. */ > + memset(&sin, 0, sizeof(struct sockaddr_in)); > + sin.sin_len = sizeof(struct sockaddr_in); > + sin.sin_family = AF_INET; > + sin.sin_port = htons(port); > + if ((ret = sobind(V_udp4_tun_socket, > + (struct sockaddr *)&sin, curthread))) { > + tcp_over_udp_stop(); > + return (ret); > + } > +#endif > +#ifdef INET6 > + if ((ret = socreate(PF_INET6, &V_udp6_tun_socket, > + SOCK_DGRAM, IPPROTO_UDP, > + curthread->td_ucred, curthread))) { > + tcp_over_udp_stop(); > + return (ret); > + } > + /* Call the special UDP hook. */ > + if ((ret = udp_set_kernel_tunneling(V_udp6_tun_socket, > + tcp_recv_udp_tunneled_packet, > + tcp6_ctlinput_viaudp, > + NULL))) { > + tcp_over_udp_stop(); > + return (ret); > + } > + /* Ok, we have a socket, bind it to the port. */ > + memset(&sin6, 0, sizeof(struct sockaddr_in6)); > + sin6.sin6_len = sizeof(struct sockaddr_in6); > + sin6.sin6_family = AF_INET6; > + sin6.sin6_port = htons(port); > + if ((ret = sobind(V_udp6_tun_socket, > + (struct sockaddr *)&sin6, curthread))) { > + tcp_over_udp_stop(); > + return (ret); > + } > +#endif > + return (0); > +} > + > +static int > +sysctl_net_inet_tcp_udp_tunneling_port_check(SYSCTL_HANDLER_ARGS) > +{ > + int error; > + uint32_t old, new; > + > + old = V_tcp_udp_tunneling_port; > + new = old; > + error = sysctl_handle_int(oidp, &new, 0, req); > + if ((error == 0) && > + (req->newptr != NULL)) { > + if ((new < TCP_TUNNELING_PORT_MIN) || > + (new > TCP_TUNNELING_PORT_MAX)) { > + error = EINVAL; > + } else { > + V_tcp_udp_tunneling_port = new; > + if (old != 0) { > + tcp_over_udp_stop(); > + } > + if (new != 0) { > + error = tcp_over_udp_start(); > + } > + } > + } > + return (error); > +} > + > +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, udp_tunneling_port, > + CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, > + &VNET_NAME(tcp_udp_tunneling_port), > + 0, &sysctl_net_inet_tcp_udp_tunneling_port_check, "IU", > + "Tunneling port for tcp over udp"); > + > +VNET_DEFINE(int, tcp_udp_tunneling_overhead) = > TCP_TUNNELING_OVERHEAD_DEFAULT; > + > +static int > +sysctl_net_inet_tcp_udp_tunneling_overhead_check(SYSCTL_HANDLER_ARGS) > +{ > + int error, new; > + > + new = V_tcp_udp_tunneling_overhead; > + error = sysctl_handle_int(oidp, &new, 0, req); > + if (error == 0 && req->newptr) { > + if ((new < TCP_TUNNELING_OVERHEAD_MIN) || > + (new > TCP_TUNNELING_OVERHEAD_MAX)) > + error = EINVAL; > + else > + V_tcp_udp_tunneling_overhead = new; > + } > + return (error); > +} > + > +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, udp_tunneling_overhead, > + CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, > + &VNET_NAME(tcp_udp_tunneling_overhead), > + 0, &sysctl_net_inet_tcp_udp_tunneling_overhead_check, "IU", > + "MSS reduction when using tcp over udp"); > + > /* > * Exports one (struct tcp_function_info) for each alias/name. > */ > @@ -1305,7 +1558,7 @@ tcp_fini(void *xtp) > * of the tcpcb each time to conserve mbufs. > */ > void > -tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr) > +tcpip_fillheaders(struct inpcb *inp, uint16_t port, void *ip_ptr, void > *tcp_ptr) > { > struct tcphdr *th = (struct tcphdr *)tcp_ptr; > > @@ -1320,7 +1573,10 @@ tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, > void *tcp_ptr) > (inp->inp_flow & IPV6_FLOWINFO_MASK); > ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | > (IPV6_VERSION & IPV6_VERSION_MASK); > - ip6->ip6_nxt = IPPROTO_TCP; > + if (port == 0) > + ip6->ip6_nxt = IPPROTO_TCP; > + else > + ip6->ip6_nxt = IPPROTO_UDP; > ip6->ip6_plen = htons(sizeof(struct tcphdr)); > ip6->ip6_src = inp->in6p_laddr; > ip6->ip6_dst = inp->in6p_faddr; > @@ -1342,7 +1598,10 @@ tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, > void *tcp_ptr) > ip->ip_off = 0; > ip->ip_ttl = inp->inp_ip_ttl; > ip->ip_sum = 0; > - ip->ip_p = IPPROTO_TCP; > + if (port == 0) > + ip->ip_p = IPPROTO_TCP; > + else > + ip->ip_p = IPPROTO_UDP; > ip->ip_src = inp->inp_laddr; > ip->ip_dst = inp->inp_faddr; > } > @@ -1372,7 +1631,7 @@ tcpip_maketemplate(struct inpcb *inp) > t = malloc(sizeof(*t), M_TEMP, M_NOWAIT); > if (t == NULL) > return (NULL); > - tcpip_fillheaders(inp, (void *)&t->tt_ipgen, (void *)&t->tt_t); > + tcpip_fillheaders(inp, 0, (void *)&t->tt_ipgen, (void *)&t->tt_t); > return (t); > } > > @@ -1398,14 +1657,16 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct > tcphdr *th, struct mbuf *m, > struct inpcb *inp; > struct ip *ip; > struct mbuf *optm; > + struct udphdr *uh = NULL; > struct tcphdr *nth; > u_char *optp; > #ifdef INET6 > struct ip6_hdr *ip6; > int isipv6; > #endif /* INET6 */ > - int optlen, tlen, win; > + int optlen, tlen, win, ulen; > bool incl_opts; > + uint16_t port; > > KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL")); > NET_EPOCH_ASSERT(); > @@ -1423,6 +1684,19 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct > tcphdr *th, struct mbuf *m, > } else > inp = NULL; > > + if (m != NULL) { > +#ifdef INET6 > + if (isipv6 && ip6 && (ip6->ip6_nxt == IPPROTO_UDP)) > + port = m->m_pkthdr.tcp_tun_port; > + else > *** 1128 LINES SKIPPED *** _______________________________________________ dev-commits-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/dev-commits-src-all To unsubscribe, send any mail to "dev-commits-src-all-unsubscr...@freebsd.org"