> On 7. Jun 2021, at 11:01, Michael Tuexen <tue...@freebsd.org> wrote:
> 
> The branch stable/13 has been updated by tuexen:
> 
> URL: 
> https://cgit.FreeBSD.org/src/commit/?id=fa50e98328b48da4fa8dbd97d0a787962cf249f5
> 
> commit fa50e98328b48da4fa8dbd97d0a787962cf249f5
> Author:     Michael Tuexen <tue...@freebsd.org>
> AuthorDate: 2021-04-18 14:08:08 +0000
> Commit:     Michael Tuexen <tue...@freebsd.org>
> CommitDate: 2021-06-07 09:01:28 +0000
> 
>    mend
Not sure how the commit ended up this way, but it is MFCing
https://cgit.FreeBSD.org/src/commit/?id=9e644c23000c2f5028b235f6263d17ffb24d3605
and manually resolving the merge conflicts.

Best regards
Michael
> ---
> share/man/man4/tcp.4          |  15 +-
> sys/netinet/tcp.h             |   1 +
> sys/netinet/tcp_input.c       |  48 ++++-
> sys/netinet/tcp_output.c      |  80 ++++++--
> sys/netinet/tcp_stacks/bbr.c  |  38 +---
> sys/netinet/tcp_stacks/rack.c |  26 +--
> sys/netinet/tcp_subr.c        | 462 ++++++++++++++++++++++++++++++++++++++++--
> sys/netinet/tcp_syncache.c    | 127 +++++++++---
> sys/netinet/tcp_syncache.h    |  12 +-
> sys/netinet/tcp_timewait.c    |  84 ++++++--
> sys/netinet/tcp_usrreq.c      |  30 +++
> sys/netinet/tcp_var.h         |  27 ++-
> sys/netinet/toecore.c         |   4 +-
> sys/netinet6/tcp6_var.h       |   2 +
> sys/sys/mbuf.h                |   1 +
> usr.bin/netstat/inet.c        |   4 +
> usr.bin/sockstat/sockstat.1   |   6 +-
> usr.bin/sockstat/sockstat.c   |  13 +-
> 18 files changed, 822 insertions(+), 158 deletions(-)
> 
> diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4
> index d01505e58427..b5735a40b320 100644
> --- a/share/man/man4/tcp.4
> +++ b/share/man/man4/tcp.4
> @@ -34,7 +34,7 @@
> .\"     From: @(#)tcp.4       8.1 (Berkeley) 6/5/93
> .\" $FreeBSD$
> .\"
> -.Dd April 8, 2021
> +.Dd April 18, 2021
> .Dt TCP 4
> .Os
> .Sh NAME
> @@ -329,6 +329,9 @@ currently executing.
> This is typically used after a process or thread inherits a listen
> socket from its parent, and sets its CPU affinity to a particular core.
> .El
> +.It Dv TCP_REMOTE_UDP_ENCAPS_PORT
> +Set and get the remote UDP encapsulation port.
> +It can only be set on a closed TCP socket.
> .El
> .Pp
> The option level for the
> @@ -752,6 +755,16 @@ A CSV list of template_spec=percent key-value pairs 
> which controls the per
> template sampling rates when
> .Xr stats 3
> sampling is enabled.
> +.It Va udp_tunneling_port
> +The local UDP encapsulation port.
> +A value of 0 indicates that UDP encapsulation is disabled.
> +The default is 0.
> +.It Va udp_tunneling_overhead
> +The overhead taken into account when using UDP encapsulation.
> +Since MSS clamping by middleboxes will most likely not work, values larger 
> than
> +8 (the size of the UDP header) are also supported.
> +Supported values are between 8 and 1024.
> +The default is 8.
> .El
> .Sh ERRORS
> A socket operation may fail with one of the following errors returned:
> diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
> index 0b71bd4658f8..d2bf1f8431fd 100644
> --- a/sys/netinet/tcp.h
> +++ b/sys/netinet/tcp.h
> @@ -183,6 +183,7 @@ struct tcphdr {
> #define       TCP_RXTLS_MODE  42      /* Receive TLS mode */
> #define       TCP_CONGESTION  64      /* get/set congestion control algorithm 
> */
> #define       TCP_CCALGOOPT   65      /* get/set cc algorithm specific 
> options */
> +#define TCP_REMOTE_UDP_ENCAPS_PORT 71        /* Enable TCP over UDP 
> tunneling via the specified port */
> #define TCP_DELACK    72      /* socket option for delayed ack */
> #define TCP_FIN_IS_RST 73     /* A fin from the peer is treated has a RST */
> #define TCP_LOG_LIMIT  74     /* Limit to number of records in tcp-log */
> diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
> index 397cbc5084e6..d36f9566ffba 100644
> --- a/sys/netinet/tcp_input.c
> +++ b/sys/netinet/tcp_input.c
> @@ -123,6 +123,7 @@ __FBSDID("$FreeBSD$");
> #ifdef TCP_OFFLOAD
> #include <netinet/tcp_offload.h>
> #endif
> +#include <netinet/udp.h>
> 
> #include <netipsec/ipsec_support.h>
> 
> @@ -573,7 +574,7 @@ cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, 
> uint8_t iptos)
>  */
> #ifdef INET6
> int
> -tcp6_input(struct mbuf **mp, int *offp, int proto)
> +tcp6_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
> {
>       struct mbuf *m;
>       struct in6_ifaddr *ia6;
> @@ -603,12 +604,19 @@ tcp6_input(struct mbuf **mp, int *offp, int proto)
>       }
> 
>       *mp = m;
> -     return (tcp_input(mp, offp, proto));
> +     return (tcp_input_with_port(mp, offp, proto, port));
> +}
> +
> +int
> +tcp6_input(struct mbuf **mp, int *offp, int proto)
> +{
> +
> +     return(tcp6_input_with_port(mp, offp, proto, 0));
> }
> #endif /* INET6 */
> 
> int
> -tcp_input(struct mbuf **mp, int *offp, int proto)
> +tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
> {
>       struct mbuf *m = *mp;
>       struct tcphdr *th = NULL;
> @@ -664,6 +672,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
>               ip6 = mtod(m, struct ip6_hdr *);
>               th = (struct tcphdr *)((caddr_t)ip6 + off0);
>               tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
> +             if (port)
> +                     goto skip6_csum;
>               if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
>                       if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
>                               th->th_sum = m->m_pkthdr.csum_data;
> @@ -677,7 +687,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
>                       TCPSTAT_INC(tcps_rcvbadsum);
>                       goto drop;
>               }
> -
> +     skip6_csum:
>               /*
>                * Be proactive about unspecified IPv6 address in source.
>                * As we use all-zero to indicate unbounded/unconnected pcb,
> @@ -718,6 +728,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
>               tlen = ntohs(ip->ip_len) - off0;
> 
>               iptos = ip->ip_tos;
> +             if (port)
> +                     goto skip_csum;
>               if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
>                       if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
>                               th->th_sum = m->m_pkthdr.csum_data;
> @@ -747,8 +759,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
>                       ip->ip_v = IPVERSION;
>                       ip->ip_hl = off0 >> 2;
>               }
> -
> -             if (th->th_sum) {
> +     skip_csum:
> +             if (th->th_sum && (port == 0)) {
>                       TCPSTAT_INC(tcps_rcvbadsum);
>                       goto drop;
>               }
> @@ -1006,6 +1018,11 @@ findpcb:
>               goto dropwithreset;
>       }
> 
> +     if ((tp->t_port != port) && (tp->t_state > TCPS_LISTEN)) {
> +             rstreason = BANDLIM_RST_CLOSEDPORT;
> +             goto dropwithreset;
> +     }
> +
> #ifdef TCP_OFFLOAD
>       if (tp->t_flags & TF_TOE) {
>               tcp_offload_input(tp, m);
> @@ -1077,7 +1094,7 @@ findpcb:
>                        * NB: syncache_expand() doesn't unlock
>                        * inp and tcpinfo locks.
>                        */
> -                     rstreason = syncache_expand(&inc, &to, th, &so, m);
> +                     rstreason = syncache_expand(&inc, &to, th, &so, m, 
> port);
>                       if (rstreason < 0) {
>                               /*
>                                * A failing TCP MD5 signature comparison
> @@ -1157,7 +1174,7 @@ tfo_socket_result:
>                * causes.
>                */
>               if (thflags & TH_RST) {
> -                     syncache_chkrst(&inc, th, m);
> +                     syncache_chkrst(&inc, th, m, port);
>                       goto dropunlock;
>               }
>               /*
> @@ -1179,7 +1196,7 @@ tfo_socket_result:
>                               log(LOG_DEBUG, "%s; %s: Listen socket: "
>                                   "SYN|ACK invalid, segment rejected\n",
>                                   s, __func__);
> -                     syncache_badack(&inc);  /* XXX: Not needed! */
> +                     syncache_badack(&inc, port);    /* XXX: Not needed! */
>                       TCPSTAT_INC(tcps_badsyn);
>                       rstreason = BANDLIM_RST_OPENPORT;
>                       goto dropwithreset;
> @@ -1336,7 +1353,8 @@ tfo_socket_result:
> #endif
>               TCP_PROBE3(debug__input, tp, th, m);
>               tcp_dooptions(&to, optp, optlen, TO_SYN);
> -             if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL, iptos))
> +             if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL, iptos,
> +                 port))
>                       goto tfo_socket_result;
> 
>               /*
> @@ -1467,6 +1485,12 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, 
> struct socket *so,
>       return (newsize);
> }
> 
> +int
> +tcp_input(struct mbuf **mp, int *offp, int proto)
> +{
> +     return(tcp_input_with_port(mp, offp, proto, 0));
> +}
> +
> void
> tcp_handle_wakeup(struct tcpcb *tp, struct socket *so)
> {
> @@ -3671,11 +3695,13 @@ tcp_mss_update(struct tcpcb *tp, int offer, int 
> mtuoffer,
>                           sizeof (struct ip6_hdr) + sizeof (struct tcphdr) :
>                           sizeof (struct tcpiphdr);
> #else
> -     const size_t min_protoh = sizeof(struct tcpiphdr);
> +      size_t min_protoh = sizeof(struct tcpiphdr);
> #endif
> 
>       INP_WLOCK_ASSERT(tp->t_inpcb);
> 
> +     if (tp->t_port)
> +             min_protoh += V_tcp_udp_tunneling_overhead;
>       if (mtuoffer != -1) {
>               KASSERT(offer == -1, ("%s: conflict", __func__));
>               offer = mtuoffer - min_protoh;
> diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
> index e23cdc749e98..5bda2be14df0 100644
> --- a/sys/netinet/tcp_output.c
> +++ b/sys/netinet/tcp_output.c
> @@ -101,6 +101,8 @@ __FBSDID("$FreeBSD$");
> 
> #include <netipsec/ipsec_support.h>
> 
> +#include <netinet/udp.h>
> +#include <netinet/udp_var.h>
> #include <machine/in_cksum.h>
> 
> #include <security/mac/mac_framework.h>
> @@ -207,7 +209,7 @@ tcp_output(struct tcpcb *tp)
> #endif
>       struct tcphdr *th;
>       u_char opt[TCP_MAXOLEN];
> -     unsigned ipoptlen, optlen, hdrlen;
> +     unsigned ipoptlen, optlen, hdrlen, ulen;
> #if defined(IPSEC) || defined(IPSEC_SUPPORT)
>       unsigned ipsec_optlen = 0;
> #endif
> @@ -216,6 +218,7 @@ tcp_output(struct tcpcb *tp)
>       struct sackhole *p;
>       int tso, mtu;
>       struct tcpopt to;
> +     struct udphdr *udp = NULL;
>       unsigned int wanted_cookie = 0;
>       unsigned int dont_sendalot = 0;
> #if 0
> @@ -558,6 +561,7 @@ after_sack_rexmit:
> #endif
> 
>       if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg &&
> +         (tp->t_port == 0) &&
>           ((tp->t_flags & TF_SIGNATURE) == 0) &&
>           tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
>           ipoptlen == 0 && !(flags & TH_SYN))
> @@ -800,6 +804,8 @@ send:
>               /* Maximum segment size. */
>               if (flags & TH_SYN) {
>                       to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
> +                     if (tp->t_port)
> +                             to.to_mss -= V_tcp_udp_tunneling_overhead;
>                       to.to_flags |= TOF_MSS;
> 
>                       /*
> @@ -887,7 +893,14 @@ send:
>                   !(to.to_flags & TOF_FASTOPEN))
>                       len = 0;
>       }
> -
> +     if (tp->t_port) {
> +             if (V_tcp_udp_tunneling_port == 0) {
> +                     /* The port was removed?? */
> +                     SOCKBUF_UNLOCK(&so->so_snd);
> +                     return (EHOSTUNREACH);
> +             }
> +             hdrlen += sizeof(struct udphdr);
> +     }
>       /*
>        * Adjust data length if insertion of options will
>        * bump the packet length beyond the t_maxseg length.
> @@ -1140,8 +1153,17 @@ send:
> #ifdef INET6
>       if (isipv6) {
>               ip6 = mtod(m, struct ip6_hdr *);
> -             th = (struct tcphdr *)(ip6 + 1);
> -             tcpip_fillheaders(tp->t_inpcb, ip6, th);
> +             if (tp->t_port) {
> +                     udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + 
> sizeof(struct ip6_hdr));
> +                     udp->uh_sport = htons(V_tcp_udp_tunneling_port);
> +                     udp->uh_dport = tp->t_port;
> +                     ulen = hdrlen + len - sizeof(struct ip6_hdr);
> +                     udp->uh_ulen = htons(ulen);
> +                     th = (struct tcphdr *)(udp + 1);
> +             } else {
> +                     th = (struct tcphdr *)(ip6 + 1);
> +             }
> +             tcpip_fillheaders(tp->t_inpcb, tp->t_port, ip6, th);
>       } else
> #endif /* INET6 */
>       {
> @@ -1149,8 +1171,16 @@ send:
> #ifdef TCPDEBUG
>               ipov = (struct ipovly *)ip;
> #endif
> -             th = (struct tcphdr *)(ip + 1);
> -             tcpip_fillheaders(tp->t_inpcb, ip, th);
> +             if (tp->t_port) {
> +                     udp = (struct udphdr *)((caddr_t)ip + ipoptlen + 
> sizeof(struct ip));
> +                     udp->uh_sport = htons(V_tcp_udp_tunneling_port);
> +                     udp->uh_dport = tp->t_port;
> +                     ulen = hdrlen + len - sizeof(struct ip);
> +                     udp->uh_ulen = htons(ulen);
> +                     th = (struct tcphdr *)(udp + 1);
> +             } else
> +                     th = (struct tcphdr *)(ip + 1);
> +             tcpip_fillheaders(tp->t_inpcb, tp->t_port, ip, th);
>       }
> 
>       /*
> @@ -1309,7 +1339,6 @@ send:
>        * checksum extended header and data.
>        */
>       m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
> -     m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
> 
> #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
>       if (to.to_flags & TOF_SIGNATURE) {
> @@ -1336,9 +1365,19 @@ send:
>                * There is no need to fill in ip6_plen right now.
>                * It will be filled later by ip6_output.
>                */
> -             m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
> -             th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) +
> -                 optlen + len, IPPROTO_TCP, 0);
> +             if (tp->t_port) {
> +                     m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
> +                     m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
> +                     udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 
> 0);
> +                     th->th_sum = htons(0);
> +                     UDPSTAT_INC(udps_opackets);
> +             } else {
> +                     m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
> +                     m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
> +                     th->th_sum = in6_cksum_pseudo(ip6,
> +                         sizeof(struct tcphdr) + optlen + len, IPPROTO_TCP,
> +                         0);
> +             }
>       }
> #endif
> #if defined(INET6) && defined(INET)
> @@ -1346,9 +1385,20 @@ send:
> #endif
> #ifdef INET
>       {
> -             m->m_pkthdr.csum_flags = CSUM_TCP;
> -             th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
> -                 htons(sizeof(struct tcphdr) + IPPROTO_TCP + len + optlen));
> +             if (tp->t_port) {
> +                     m->m_pkthdr.csum_flags = CSUM_UDP;
> +                     m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
> +                     udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
> +                        ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP));
> +                     th->th_sum = htons(0);
> +                     UDPSTAT_INC(udps_opackets);
> +             } else {
> +                     m->m_pkthdr.csum_flags = CSUM_TCP;
> +                     m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
> +                     th->th_sum = in_pseudo(ip->ip_src.s_addr,
> +                         ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) +
> +                         IPPROTO_TCP + len + optlen));
> +             }
> 
>               /* IP version must be set here for ipv4/ipv6 checking later */
>               KASSERT(ip->ip_v == IPVERSION,
> @@ -1473,8 +1523,10 @@ send:
>        * NB: Don't set DF on small MTU/MSS to have a safe fallback.
>        */
>       if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) {
> -             ip->ip_off |= htons(IP_DF);
>               tp->t_flags2 |= TF2_PLPMTU_PMTUD;
> +             if (tp->t_port == 0 || len < V_tcp_minmss) {
> +                     ip->ip_off |= htons(IP_DF);
> +             }
>       } else {
>               tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
>       }
> diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
> index cc20d6bf52ca..1ee8d26446fd 100644
> --- a/sys/netinet/tcp_stacks/bbr.c
> +++ b/sys/netinet/tcp_stacks/bbr.c
> @@ -11969,14 +11969,10 @@ bbr_output_wtime(struct tcpcb *tp, const struct 
> timeval *tv)
> #endif
>       struct tcp_bbr *bbr;
>       struct tcphdr *th;
> -#ifdef NETFLIX_TCPOUDP
>       struct udphdr *udp = NULL;
> -#endif
>       u_char opt[TCP_MAXOLEN];
>       unsigned ipoptlen, optlen, hdrlen;
> -#ifdef NETFLIX_TCPOUDP
>       unsigned ulen;
> -#endif
>       uint32_t bbr_seq;
>       uint32_t delay_calc=0;
>       uint8_t doing_tlp = 0;
> @@ -12991,10 +12987,8 @@ send:
>               /* Maximum segment size. */
>               if (flags & TH_SYN) {
>                       to.to_mss = tcp_mssopt(&inp->inp_inc);
> -#ifdef NETFLIX_TCPOUDP
>                       if (tp->t_port)
>                               to.to_mss -= V_tcp_udp_tunneling_overhead;
> -#endif
>                       to.to_flags |= TOF_MSS;
>                       /*
>                        * On SYN or SYN|ACK transmits on TFO connections,
> @@ -13063,7 +13057,6 @@ send:
>                   !(to.to_flags & TOF_FASTOPEN))
>                       len = 0;
>       }
> -#ifdef NETFLIX_TCPOUDP
>       if (tp->t_port) {
>               if (V_tcp_udp_tunneling_port == 0) {
>                       /* The port was removed?? */
> @@ -13072,7 +13065,6 @@ send:
>               }
>               hdrlen += sizeof(struct udphdr);
>       }
> -#endif
> #ifdef INET6
>       if (isipv6)
>               ipoptlen = ip6_optlen(tp->t_inpcb);
> @@ -13408,7 +13400,6 @@ send:
> #ifdef INET6
>       if (isipv6) {
>               ip6 = mtod(m, struct ip6_hdr *);
> -#ifdef NETFLIX_TCPOUDP
>               if (tp->t_port) {
>                       udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + 
> sizeof(struct ip6_hdr));
>                       udp->uh_sport = htons(V_tcp_udp_tunneling_port);
> @@ -13417,17 +13408,9 @@ send:
>                       udp->uh_ulen = htons(ulen);
>                       th = (struct tcphdr *)(udp + 1);
>               } else {
> -#endif
>                       th = (struct tcphdr *)(ip6 + 1);
> -
> -#ifdef NETFLIX_TCPOUDP
>               }
> -#endif
> -             tcpip_fillheaders(inp,
> -#ifdef NETFLIX_TCPOUDP
> -                               tp->t_port,
> -#endif
> -                               ip6, th);
> +             tcpip_fillheaders(inp, tp->t_port, ip6, th);
>       } else
> #endif                                /* INET6 */
>       {
> @@ -13435,7 +13418,6 @@ send:
> #ifdef TCPDEBUG
>               ipov = (struct ipovly *)ip;
> #endif
> -#ifdef NETFLIX_TCPOUDP
>               if (tp->t_port) {
>                       udp = (struct udphdr *)((caddr_t)ip + ipoptlen + 
> sizeof(struct ip));
>                       udp->uh_sport = htons(V_tcp_udp_tunneling_port);
> @@ -13443,14 +13425,10 @@ send:
>                       ulen = hdrlen + len - sizeof(struct ip);
>                       udp->uh_ulen = htons(ulen);
>                       th = (struct tcphdr *)(udp + 1);
> -             } else
> -#endif
> +             } else {
>                       th = (struct tcphdr *)(ip + 1);
> -             tcpip_fillheaders(inp,
> -#ifdef NETFLIX_TCPOUDP
> -                               tp->t_port,
> -#endif
> -                               ip, th);
> +             }
> +             tcpip_fillheaders(inp, tp->t_port, ip, th);
>       }
>       /*
>        * If we are doing retransmissions, then snd_nxt will not reflect
> @@ -13600,7 +13578,6 @@ send:
>                * ip6_plen is not need to be filled now, and will be filled
>                * in ip6_output.
>                */
> -#ifdef NETFLIX_TCPOUDP
>               if (tp->t_port) {
>                       m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
>                       m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
> @@ -13608,14 +13585,11 @@ send:
>                       th->th_sum = htons(0);
>                       UDPSTAT_INC(udps_opackets);
>               } else {
> -#endif
>                       csum_flags = m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
>                       m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
>                       th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct 
> tcphdr) +
>                           optlen + len, IPPROTO_TCP, 0);
> -#ifdef NETFLIX_TCPOUDP
>               }
> -#endif
>       }
> #endif
> #if defined(INET6) && defined(INET)
> @@ -13623,7 +13597,6 @@ send:
> #endif
> #ifdef INET
>       {
> -#ifdef NETFLIX_TCPOUDP
>               if (tp->t_port) {
>                       m->m_pkthdr.csum_flags = CSUM_UDP;
>                       m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
> @@ -13632,15 +13605,12 @@ send:
>                       th->th_sum = htons(0);
>                       UDPSTAT_INC(udps_opackets);
>               } else {
> -#endif
>                       csum_flags = m->m_pkthdr.csum_flags = CSUM_TCP;
>                       m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
>                       th->th_sum = in_pseudo(ip->ip_src.s_addr,
>                           ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) +
>                           IPPROTO_TCP + len + optlen));
> -#ifdef NETFLIX_TCPOUDP
>               }
> -#endif
>               /* IP version must be set here for ipv4/ipv6 checking later */
>               KASSERT(ip->ip_v == IPVERSION,
>                   ("%s: IP version incorrect: %d", __func__, ip->ip_v));
> diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
> index 0ee73a95a6d7..12827d1699d0 100644
> --- a/sys/netinet/tcp_stacks/rack.c
> +++ b/sys/netinet/tcp_stacks/rack.c
> @@ -13008,10 +13008,8 @@ send:
>               if (flags & TH_SYN) {
>                       tp->snd_nxt = tp->iss;
>                       to.to_mss = tcp_mssopt(&inp->inp_inc);
> -#ifdef NETFLIX_TCPOUDP
>                       if (tp->t_port)
>                               to.to_mss -= V_tcp_udp_tunneling_overhead;
> -#endif
>                       to.to_flags |= TOF_MSS;
> 
>                       /*
> @@ -13088,7 +13086,6 @@ send:
>                   !(to.to_flags & TOF_FASTOPEN))
>                       len = 0;
>       }
> -#ifdef NETFLIX_TCPOUDP
>       if (tp->t_port) {
>               if (V_tcp_udp_tunneling_port == 0) {
>                       /* The port was removed?? */
> @@ -13097,7 +13094,6 @@ send:
>               }
>               hdrlen += sizeof(struct udphdr);
>       }
> -#endif
> #ifdef INET6
>       if (isipv6)
>               ipoptlen = ip6_optlen(tp->t_inpcb);
> @@ -13372,7 +13368,6 @@ send:
> #ifdef INET6
>       if (isipv6) {
>               ip6 = mtod(m, struct ip6_hdr *);
> -#ifdef NETFLIX_TCPOUDP
>               if (tp->t_port) {
>                       udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + 
> sizeof(struct ip6_hdr));
>                       udp->uh_sport = htons(V_tcp_udp_tunneling_port);
> @@ -13380,14 +13375,10 @@ send:
>                       ulen = hdrlen + len - sizeof(struct ip6_hdr);
>                       udp->uh_ulen = htons(ulen);
>                       th = (struct tcphdr *)(udp + 1);
> -             } else
> -#endif
> +             } else {
>                       th = (struct tcphdr *)(ip6 + 1);
> -             tcpip_fillheaders(inp,
> -#ifdef NETFLIX_TCPOUDP
> -                               tp->t_port,
> -#endif
> -                               ip6, th);
> +             }
> +             tcpip_fillheaders(inp, tp->t_port, ip6, th);
>       } else
> #endif                                /* INET6 */
>       {
> @@ -13395,7 +13386,6 @@ send:
> #ifdef TCPDEBUG
>               ipov = (struct ipovly *)ip;
> #endif
> -#ifdef NETFLIX_TCPOUDP
>               if (tp->t_port) {
>                       udp = (struct udphdr *)((caddr_t)ip + ipoptlen + 
> sizeof(struct ip));
>                       udp->uh_sport = htons(V_tcp_udp_tunneling_port);
> @@ -13403,14 +13393,10 @@ send:
>                       ulen = hdrlen + len - sizeof(struct ip);
>                       udp->uh_ulen = htons(ulen);
>                       th = (struct tcphdr *)(udp + 1);
> -             } else
> -#endif
> +             } else {
>                       th = (struct tcphdr *)(ip + 1);
> -             tcpip_fillheaders(inp,
> -#ifdef NETFLIX_TCPOUDP
> -                               tp->t_port,
> -#endif
> -                               ip, th);
> +             }
> +             tcpip_fillheaders(inp, tp->t_port, ip, th);
>       }
>       /*
>        * Fill in fields, remembering maximum advertised window for use in
> diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
> index dff7767cd9cf..6bdeb3984aee 100644
> --- a/sys/netinet/tcp_subr.c
> +++ b/sys/netinet/tcp_subr.c
> @@ -126,6 +126,8 @@ __FBSDID("$FreeBSD$");
> #ifdef TCP_OFFLOAD
> #include <netinet/tcp_offload.h>
> #endif
> +#include <netinet/udp.h>
> +#include <netinet/udp_var.h>
> 
> #include <netipsec/ipsec_support.h>
> 
> @@ -501,6 +503,80 @@ tcp_switch_back_to_default(struct tcpcb *tp)
>       }
> }
> 
> +static void
> +tcp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *inp,
> +    const struct sockaddr *sa, void *ctx)
> +{
> +     struct ip *iph;
> +#ifdef INET6
> +     struct ip6_hdr *ip6;
> +#endif
> +     struct udphdr *uh;
> +     struct tcphdr *th;
> +     int thlen;
> +     uint16_t port;
> +
> +     TCPSTAT_INC(tcps_tunneled_pkts);
> +     if ((m->m_flags & M_PKTHDR) == 0) {
> +             /* Can't handle one that is not a pkt hdr */
> +             TCPSTAT_INC(tcps_tunneled_errs);
> +             goto out;
> +     }
> +     thlen = sizeof(struct tcphdr);
> +     if (m->m_len < off + sizeof(struct udphdr) + thlen &&
> +         (m =  m_pullup(m, off + sizeof(struct udphdr) + thlen)) == NULL) {
> +             TCPSTAT_INC(tcps_tunneled_errs);
> +             goto out;
> +     }
> +     iph = mtod(m, struct ip *);
> +     uh = (struct udphdr *)((caddr_t)iph + off);
> +     th = (struct tcphdr *)(uh + 1);
> +     thlen = th->th_off << 2;
> +     if (m->m_len < off + sizeof(struct udphdr) + thlen) {
> +             m =  m_pullup(m, off + sizeof(struct udphdr) + thlen);
> +             if (m == NULL) {
> +                     TCPSTAT_INC(tcps_tunneled_errs);
> +                     goto out;
> +             } else {
> +                     iph = mtod(m, struct ip *);
> +                     uh = (struct udphdr *)((caddr_t)iph + off);
> +                     th = (struct tcphdr *)(uh + 1);
> +             }
> +     }
> +     m->m_pkthdr.tcp_tun_port = port = uh->uh_sport;
> +     bcopy(th, uh, m->m_len - off);
> +     m->m_len -= sizeof(struct udphdr);
> +     m->m_pkthdr.len -= sizeof(struct udphdr);
> +     /*
> +      * We use the same algorithm for
> +      * both UDP and TCP for c-sum. So
> +      * the code in tcp_input will skip
> +      * the checksum. So we do nothing
> +      * with the flag (m->m_pkthdr.csum_flags).
> +      */
> +     switch (iph->ip_v) {
> +#ifdef INET
> +     case IPVERSION:
> +             iph->ip_len = htons(ntohs(iph->ip_len) - sizeof(struct udphdr));
> +             tcp_input_with_port(&m, &off, IPPROTO_TCP, port);
> +             break;
> +#endif
> +#ifdef INET6
> +     case IPV6_VERSION >> 4:
> +             ip6 = mtod(m, struct ip6_hdr *);
> +             ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - sizeof(struct 
> udphdr));
> +             tcp6_input_with_port(&m, &off, IPPROTO_TCP, port);
> +             break;
> +#endif
> +     default:
> +             goto out;
> +             break;
> +     }
> +     return;
> +out:
> +     m_freem(m);
> +}
> +
> static int
> sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
> {
> @@ -598,6 +674,183 @@ SYSCTL_PROC(_net_inet_tcp, OID_AUTO, 
> functions_available,
>     NULL, 0, sysctl_net_inet_list_available, "A",
>     "list available TCP Function sets");
> 
> +VNET_DEFINE(int, tcp_udp_tunneling_port) = TCP_TUNNELING_PORT_DEFAULT;
> +
> +#ifdef INET
> +VNET_DEFINE(struct socket *, udp4_tun_socket) = NULL;
> +#define      V_udp4_tun_socket       VNET(udp4_tun_socket)
> +#endif
> +#ifdef INET6
> +VNET_DEFINE(struct socket *, udp6_tun_socket) = NULL;
> +#define      V_udp6_tun_socket       VNET(udp6_tun_socket)
> +#endif
> +
> +static void
> +tcp_over_udp_stop(void)
> +{
> +     /*
> +      * This function assumes sysctl caller holds inp_rinfo_lock()
> +      * for writting!
> +      */
> +#ifdef INET
> +     if (V_udp4_tun_socket != NULL) {
> +             soclose(V_udp4_tun_socket);
> +             V_udp4_tun_socket = NULL;
> +     }
> +#endif
> +#ifdef INET6
> +     if (V_udp6_tun_socket != NULL) {
> +             soclose(V_udp6_tun_socket);
> +             V_udp6_tun_socket = NULL;
> +     }
> +#endif
> +}
> +
> +static int
> +tcp_over_udp_start(void)
> +{
> +     uint16_t port;
> +     int ret;
> +#ifdef INET
> +     struct sockaddr_in sin;
> +#endif
> +#ifdef INET6
> +     struct sockaddr_in6 sin6;
> +#endif
> +     /*
> +      * This function assumes sysctl caller holds inp_info_rlock()
> +      * for writting!
> +      */
> +     port = V_tcp_udp_tunneling_port;
> +     if (ntohs(port) == 0) {
> +             /* Must have a port set */
> +             return (EINVAL);
> +     }
> +#ifdef INET
> +     if (V_udp4_tun_socket != NULL) {
> +             /* Already running -- must stop first */
> +             return (EALREADY);
> +     }
> +#endif
> +#ifdef INET6
> +     if (V_udp6_tun_socket != NULL) {
> +             /* Already running -- must stop first */
> +             return (EALREADY);
> +     }
> +#endif
> +#ifdef INET
> +     if ((ret = socreate(PF_INET, &V_udp4_tun_socket,
> +         SOCK_DGRAM, IPPROTO_UDP,
> +         curthread->td_ucred, curthread))) {
> +             tcp_over_udp_stop();
> +             return (ret);
> +     }
> +     /* Call the special UDP hook. */
> +     if ((ret = udp_set_kernel_tunneling(V_udp4_tun_socket,
> +         tcp_recv_udp_tunneled_packet,
> +         tcp_ctlinput_viaudp,
> +         NULL))) {
> +             tcp_over_udp_stop();
> +             return (ret);
> +     }
> +     /* Ok, we have a socket, bind it to the port. */
> +     memset(&sin, 0, sizeof(struct sockaddr_in));
> +     sin.sin_len = sizeof(struct sockaddr_in);
> +     sin.sin_family = AF_INET;
> +     sin.sin_port = htons(port);
> +     if ((ret = sobind(V_udp4_tun_socket,
> +         (struct sockaddr *)&sin, curthread))) {
> +             tcp_over_udp_stop();
> +             return (ret);
> +     }
> +#endif
> +#ifdef INET6
> +     if ((ret = socreate(PF_INET6, &V_udp6_tun_socket,
> +         SOCK_DGRAM, IPPROTO_UDP,
> +         curthread->td_ucred, curthread))) {
> +             tcp_over_udp_stop();
> +             return (ret);
> +     }
> +     /* Call the special UDP hook. */
> +     if ((ret = udp_set_kernel_tunneling(V_udp6_tun_socket,
> +         tcp_recv_udp_tunneled_packet,
> +         tcp6_ctlinput_viaudp,
> +         NULL))) {
> +             tcp_over_udp_stop();
> +             return (ret);
> +     }
> +     /* Ok, we have a socket, bind it to the port. */
> +     memset(&sin6, 0, sizeof(struct sockaddr_in6));
> +     sin6.sin6_len = sizeof(struct sockaddr_in6);
> +     sin6.sin6_family = AF_INET6;
> +     sin6.sin6_port = htons(port);
> +     if ((ret = sobind(V_udp6_tun_socket,
> +         (struct sockaddr *)&sin6, curthread))) {
> +             tcp_over_udp_stop();
> +             return (ret);
> +     }
> +#endif
> +     return (0);
> +}
> +
> +static int
> +sysctl_net_inet_tcp_udp_tunneling_port_check(SYSCTL_HANDLER_ARGS)
> +{
> +     int error;
> +     uint32_t old, new;
> +
> +     old = V_tcp_udp_tunneling_port;
> +     new = old;
> +     error = sysctl_handle_int(oidp, &new, 0, req);
> +     if ((error == 0) &&
> +         (req->newptr != NULL)) {
> +             if ((new < TCP_TUNNELING_PORT_MIN) ||
> +                 (new > TCP_TUNNELING_PORT_MAX)) {
> +                     error = EINVAL;
> +             } else {
> +                     V_tcp_udp_tunneling_port = new;
> +                     if (old != 0) {
> +                             tcp_over_udp_stop();
> +                     }
> +                     if (new != 0) {
> +                             error = tcp_over_udp_start();
> +                     }
> +             }
> +     }
> +     return (error);
> +}
> +
> +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, udp_tunneling_port,
> +    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
> +    &VNET_NAME(tcp_udp_tunneling_port),
> +    0, &sysctl_net_inet_tcp_udp_tunneling_port_check, "IU",
> +    "Tunneling port for tcp over udp");
> +
> +VNET_DEFINE(int, tcp_udp_tunneling_overhead) = 
> TCP_TUNNELING_OVERHEAD_DEFAULT;
> +
> +static int
> +sysctl_net_inet_tcp_udp_tunneling_overhead_check(SYSCTL_HANDLER_ARGS)
> +{
> +     int error, new;
> +
> +     new = V_tcp_udp_tunneling_overhead;
> +     error = sysctl_handle_int(oidp, &new, 0, req);
> +     if (error == 0 && req->newptr) {
> +             if ((new < TCP_TUNNELING_OVERHEAD_MIN) ||
> +                 (new > TCP_TUNNELING_OVERHEAD_MAX))
> +                     error = EINVAL;
> +             else
> +                     V_tcp_udp_tunneling_overhead = new;
> +     }
> +     return (error);
> +}
> +
> +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, udp_tunneling_overhead,
> +    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
> +    &VNET_NAME(tcp_udp_tunneling_overhead),
> +    0, &sysctl_net_inet_tcp_udp_tunneling_overhead_check, "IU",
> +    "MSS reduction when using tcp over udp");
> +
> /*
>  * Exports one (struct tcp_function_info) for each alias/name.
>  */
> @@ -1305,7 +1558,7 @@ tcp_fini(void *xtp)
>  * of the tcpcb each time to conserve mbufs.
>  */
> void
> -tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr)
> +tcpip_fillheaders(struct inpcb *inp, uint16_t port, void *ip_ptr, void 
> *tcp_ptr)
> {
>       struct tcphdr *th = (struct tcphdr *)tcp_ptr;
> 
> @@ -1320,7 +1573,10 @@ tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, 
> void *tcp_ptr)
>                       (inp->inp_flow & IPV6_FLOWINFO_MASK);
>               ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
>                       (IPV6_VERSION & IPV6_VERSION_MASK);
> -             ip6->ip6_nxt = IPPROTO_TCP;
> +             if (port == 0)
> +                     ip6->ip6_nxt = IPPROTO_TCP;
> +             else
> +                     ip6->ip6_nxt = IPPROTO_UDP;
>               ip6->ip6_plen = htons(sizeof(struct tcphdr));
>               ip6->ip6_src = inp->in6p_laddr;
>               ip6->ip6_dst = inp->in6p_faddr;
> @@ -1342,7 +1598,10 @@ tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, 
> void *tcp_ptr)
>               ip->ip_off = 0;
>               ip->ip_ttl = inp->inp_ip_ttl;
>               ip->ip_sum = 0;
> -             ip->ip_p = IPPROTO_TCP;
> +             if (port == 0)
> +                     ip->ip_p = IPPROTO_TCP;
> +             else
> +                     ip->ip_p = IPPROTO_UDP;
>               ip->ip_src = inp->inp_laddr;
>               ip->ip_dst = inp->inp_faddr;
>       }
> @@ -1372,7 +1631,7 @@ tcpip_maketemplate(struct inpcb *inp)
>       t = malloc(sizeof(*t), M_TEMP, M_NOWAIT);
>       if (t == NULL)
>               return (NULL);
> -     tcpip_fillheaders(inp, (void *)&t->tt_ipgen, (void *)&t->tt_t);
> +     tcpip_fillheaders(inp, 0, (void *)&t->tt_ipgen, (void *)&t->tt_t);
>       return (t);
> }
> 
> @@ -1398,14 +1657,16 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct 
> tcphdr *th, struct mbuf *m,
>       struct inpcb *inp;
>       struct ip *ip;
>       struct mbuf *optm;
> +     struct udphdr *uh = NULL;
>       struct tcphdr *nth;
>       u_char *optp;
> #ifdef INET6
>       struct ip6_hdr *ip6;
>       int isipv6;
> #endif /* INET6 */
> -     int optlen, tlen, win;
> +     int optlen, tlen, win, ulen;
>       bool incl_opts;
> +     uint16_t port;
> 
>       KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL"));
>       NET_EPOCH_ASSERT();
> @@ -1423,6 +1684,19 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct 
> tcphdr *th, struct mbuf *m,
>       } else
>               inp = NULL;
> 
> +     if (m != NULL) {
> +#ifdef INET6
> +             if (isipv6 && ip6 && (ip6->ip6_nxt == IPPROTO_UDP))
> +                     port = m->m_pkthdr.tcp_tun_port;
> +             else
> *** 1128 LINES SKIPPED ***

_______________________________________________
dev-commits-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-all
To unsubscribe, send any mail to "dev-commits-src-all-unsubscr...@freebsd.org"

Reply via email to