The branch stable/13 has been updated by tuexen:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=fa50e98328b48da4fa8dbd97d0a787962cf249f5

commit fa50e98328b48da4fa8dbd97d0a787962cf249f5
Author:     Michael Tuexen <tue...@freebsd.org>
AuthorDate: 2021-04-18 14:08:08 +0000
Commit:     Michael Tuexen <tue...@freebsd.org>
CommitDate: 2021-06-07 09:01:28 +0000

    mend
---
 share/man/man4/tcp.4          |  15 +-
 sys/netinet/tcp.h             |   1 +
 sys/netinet/tcp_input.c       |  48 ++++-
 sys/netinet/tcp_output.c      |  80 ++++++--
 sys/netinet/tcp_stacks/bbr.c  |  38 +---
 sys/netinet/tcp_stacks/rack.c |  26 +--
 sys/netinet/tcp_subr.c        | 462 ++++++++++++++++++++++++++++++++++++++++--
 sys/netinet/tcp_syncache.c    | 127 +++++++++---
 sys/netinet/tcp_syncache.h    |  12 +-
 sys/netinet/tcp_timewait.c    |  84 ++++++--
 sys/netinet/tcp_usrreq.c      |  30 +++
 sys/netinet/tcp_var.h         |  27 ++-
 sys/netinet/toecore.c         |   4 +-
 sys/netinet6/tcp6_var.h       |   2 +
 sys/sys/mbuf.h                |   1 +
 usr.bin/netstat/inet.c        |   4 +
 usr.bin/sockstat/sockstat.1   |   6 +-
 usr.bin/sockstat/sockstat.c   |  13 +-
 18 files changed, 822 insertions(+), 158 deletions(-)

diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4
index d01505e58427..b5735a40b320 100644
--- a/share/man/man4/tcp.4
+++ b/share/man/man4/tcp.4
@@ -34,7 +34,7 @@
 .\"     From: @(#)tcp.4        8.1 (Berkeley) 6/5/93
 .\" $FreeBSD$
 .\"
-.Dd April 8, 2021
+.Dd April 18, 2021
 .Dt TCP 4
 .Os
 .Sh NAME
@@ -329,6 +329,9 @@ currently executing.
 This is typically used after a process or thread inherits a listen
 socket from its parent, and sets its CPU affinity to a particular core.
 .El
+.It Dv TCP_REMOTE_UDP_ENCAPS_PORT
+Set and get the remote UDP encapsulation port.
+It can only be set on a closed TCP socket.
 .El
 .Pp
 The option level for the
@@ -752,6 +755,16 @@ A CSV list of template_spec=percent key-value pairs which 
controls the per
 template sampling rates when
 .Xr stats 3
 sampling is enabled.
+.It Va udp_tunneling_port
+The local UDP encapsulation port.
+A value of 0 indicates that UDP encapsulation is disabled.
+The default is 0.
+.It Va udp_tunneling_overhead
+The overhead taken into account when using UDP encapsulation.
+Since MSS clamping by middleboxes will most likely not work, values larger than
+8 (the size of the UDP header) are also supported.
+Supported values are between 8 and 1024.
+The default is 8.
 .El
 .Sh ERRORS
 A socket operation may fail with one of the following errors returned:
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
index 0b71bd4658f8..d2bf1f8431fd 100644
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -183,6 +183,7 @@ struct tcphdr {
 #define        TCP_RXTLS_MODE  42      /* Receive TLS mode */
 #define        TCP_CONGESTION  64      /* get/set congestion control algorithm 
*/
 #define        TCP_CCALGOOPT   65      /* get/set cc algorithm specific 
options */
+#define TCP_REMOTE_UDP_ENCAPS_PORT 71  /* Enable TCP over UDP tunneling via 
the specified port */
 #define TCP_DELACK     72      /* socket option for delayed ack */
 #define TCP_FIN_IS_RST 73      /* A fin from the peer is treated has a RST */
 #define TCP_LOG_LIMIT  74      /* Limit to number of records in tcp-log */
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 397cbc5084e6..d36f9566ffba 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -123,6 +123,7 @@ __FBSDID("$FreeBSD$");
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
+#include <netinet/udp.h>
 
 #include <netipsec/ipsec_support.h>
 
@@ -573,7 +574,7 @@ cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, 
uint8_t iptos)
  */
 #ifdef INET6
 int
-tcp6_input(struct mbuf **mp, int *offp, int proto)
+tcp6_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
 {
        struct mbuf *m;
        struct in6_ifaddr *ia6;
@@ -603,12 +604,19 @@ tcp6_input(struct mbuf **mp, int *offp, int proto)
        }
 
        *mp = m;
-       return (tcp_input(mp, offp, proto));
+       return (tcp_input_with_port(mp, offp, proto, port));
+}
+
+int
+tcp6_input(struct mbuf **mp, int *offp, int proto)
+{
+
+       return(tcp6_input_with_port(mp, offp, proto, 0));
 }
 #endif /* INET6 */
 
 int
-tcp_input(struct mbuf **mp, int *offp, int proto)
+tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
 {
        struct mbuf *m = *mp;
        struct tcphdr *th = NULL;
@@ -664,6 +672,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
                ip6 = mtod(m, struct ip6_hdr *);
                th = (struct tcphdr *)((caddr_t)ip6 + off0);
                tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
+               if (port)
+                       goto skip6_csum;
                if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
                        if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
                                th->th_sum = m->m_pkthdr.csum_data;
@@ -677,7 +687,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
                        TCPSTAT_INC(tcps_rcvbadsum);
                        goto drop;
                }
-
+       skip6_csum:
                /*
                 * Be proactive about unspecified IPv6 address in source.
                 * As we use all-zero to indicate unbounded/unconnected pcb,
@@ -718,6 +728,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
                tlen = ntohs(ip->ip_len) - off0;
 
                iptos = ip->ip_tos;
+               if (port)
+                       goto skip_csum;
                if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
                        if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
                                th->th_sum = m->m_pkthdr.csum_data;
@@ -747,8 +759,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
                        ip->ip_v = IPVERSION;
                        ip->ip_hl = off0 >> 2;
                }
-
-               if (th->th_sum) {
+       skip_csum:
+               if (th->th_sum && (port == 0)) {
                        TCPSTAT_INC(tcps_rcvbadsum);
                        goto drop;
                }
@@ -1006,6 +1018,11 @@ findpcb:
                goto dropwithreset;
        }
 
+       if ((tp->t_port != port) && (tp->t_state > TCPS_LISTEN)) {
+               rstreason = BANDLIM_RST_CLOSEDPORT;
+               goto dropwithreset;
+       }
+
 #ifdef TCP_OFFLOAD
        if (tp->t_flags & TF_TOE) {
                tcp_offload_input(tp, m);
@@ -1077,7 +1094,7 @@ findpcb:
                         * NB: syncache_expand() doesn't unlock
                         * inp and tcpinfo locks.
                         */
-                       rstreason = syncache_expand(&inc, &to, th, &so, m);
+                       rstreason = syncache_expand(&inc, &to, th, &so, m, 
port);
                        if (rstreason < 0) {
                                /*
                                 * A failing TCP MD5 signature comparison
@@ -1157,7 +1174,7 @@ tfo_socket_result:
                 * causes.
                 */
                if (thflags & TH_RST) {
-                       syncache_chkrst(&inc, th, m);
+                       syncache_chkrst(&inc, th, m, port);
                        goto dropunlock;
                }
                /*
@@ -1179,7 +1196,7 @@ tfo_socket_result:
                                log(LOG_DEBUG, "%s; %s: Listen socket: "
                                    "SYN|ACK invalid, segment rejected\n",
                                    s, __func__);
-                       syncache_badack(&inc);  /* XXX: Not needed! */
+                       syncache_badack(&inc, port);    /* XXX: Not needed! */
                        TCPSTAT_INC(tcps_badsyn);
                        rstreason = BANDLIM_RST_OPENPORT;
                        goto dropwithreset;
@@ -1336,7 +1353,8 @@ tfo_socket_result:
 #endif
                TCP_PROBE3(debug__input, tp, th, m);
                tcp_dooptions(&to, optp, optlen, TO_SYN);
-               if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL, iptos))
+               if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL, iptos,
+                   port))
                        goto tfo_socket_result;
 
                /*
@@ -1467,6 +1485,12 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct 
socket *so,
        return (newsize);
 }
 
+int
+tcp_input(struct mbuf **mp, int *offp, int proto)
+{
+       return(tcp_input_with_port(mp, offp, proto, 0));
+}
+
 void
 tcp_handle_wakeup(struct tcpcb *tp, struct socket *so)
 {
@@ -3671,11 +3695,13 @@ tcp_mss_update(struct tcpcb *tp, int offer, int 
mtuoffer,
                            sizeof (struct ip6_hdr) + sizeof (struct tcphdr) :
                            sizeof (struct tcpiphdr);
 #else
-       const size_t min_protoh = sizeof(struct tcpiphdr);
+        size_t min_protoh = sizeof(struct tcpiphdr);
 #endif
 
        INP_WLOCK_ASSERT(tp->t_inpcb);
 
+       if (tp->t_port)
+               min_protoh += V_tcp_udp_tunneling_overhead;
        if (mtuoffer != -1) {
                KASSERT(offer == -1, ("%s: conflict", __func__));
                offer = mtuoffer - min_protoh;
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index e23cdc749e98..5bda2be14df0 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -101,6 +101,8 @@ __FBSDID("$FreeBSD$");
 
 #include <netipsec/ipsec_support.h>
 
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
@@ -207,7 +209,7 @@ tcp_output(struct tcpcb *tp)
 #endif
        struct tcphdr *th;
        u_char opt[TCP_MAXOLEN];
-       unsigned ipoptlen, optlen, hdrlen;
+       unsigned ipoptlen, optlen, hdrlen, ulen;
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
        unsigned ipsec_optlen = 0;
 #endif
@@ -216,6 +218,7 @@ tcp_output(struct tcpcb *tp)
        struct sackhole *p;
        int tso, mtu;
        struct tcpopt to;
+       struct udphdr *udp = NULL;
        unsigned int wanted_cookie = 0;
        unsigned int dont_sendalot = 0;
 #if 0
@@ -558,6 +561,7 @@ after_sack_rexmit:
 #endif
 
        if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg &&
+           (tp->t_port == 0) &&
            ((tp->t_flags & TF_SIGNATURE) == 0) &&
            tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
            ipoptlen == 0 && !(flags & TH_SYN))
@@ -800,6 +804,8 @@ send:
                /* Maximum segment size. */
                if (flags & TH_SYN) {
                        to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
+                       if (tp->t_port)
+                               to.to_mss -= V_tcp_udp_tunneling_overhead;
                        to.to_flags |= TOF_MSS;
 
                        /*
@@ -887,7 +893,14 @@ send:
                    !(to.to_flags & TOF_FASTOPEN))
                        len = 0;
        }
-
+       if (tp->t_port) {
+               if (V_tcp_udp_tunneling_port == 0) {
+                       /* The port was removed?? */
+                       SOCKBUF_UNLOCK(&so->so_snd);
+                       return (EHOSTUNREACH);
+               }
+               hdrlen += sizeof(struct udphdr);
+       }
        /*
         * Adjust data length if insertion of options will
         * bump the packet length beyond the t_maxseg length.
@@ -1140,8 +1153,17 @@ send:
 #ifdef INET6
        if (isipv6) {
                ip6 = mtod(m, struct ip6_hdr *);
-               th = (struct tcphdr *)(ip6 + 1);
-               tcpip_fillheaders(tp->t_inpcb, ip6, th);
+               if (tp->t_port) {
+                       udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + 
sizeof(struct ip6_hdr));
+                       udp->uh_sport = htons(V_tcp_udp_tunneling_port);
+                       udp->uh_dport = tp->t_port;
+                       ulen = hdrlen + len - sizeof(struct ip6_hdr);
+                       udp->uh_ulen = htons(ulen);
+                       th = (struct tcphdr *)(udp + 1);
+               } else {
+                       th = (struct tcphdr *)(ip6 + 1);
+               }
+               tcpip_fillheaders(tp->t_inpcb, tp->t_port, ip6, th);
        } else
 #endif /* INET6 */
        {
@@ -1149,8 +1171,16 @@ send:
 #ifdef TCPDEBUG
                ipov = (struct ipovly *)ip;
 #endif
-               th = (struct tcphdr *)(ip + 1);
-               tcpip_fillheaders(tp->t_inpcb, ip, th);
+               if (tp->t_port) {
+                       udp = (struct udphdr *)((caddr_t)ip + ipoptlen + 
sizeof(struct ip));
+                       udp->uh_sport = htons(V_tcp_udp_tunneling_port);
+                       udp->uh_dport = tp->t_port;
+                       ulen = hdrlen + len - sizeof(struct ip);
+                       udp->uh_ulen = htons(ulen);
+                       th = (struct tcphdr *)(udp + 1);
+               } else
+                       th = (struct tcphdr *)(ip + 1);
+               tcpip_fillheaders(tp->t_inpcb, tp->t_port, ip, th);
        }
 
        /*
@@ -1309,7 +1339,6 @@ send:
         * checksum extended header and data.
         */
        m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
-       m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
        if (to.to_flags & TOF_SIGNATURE) {
@@ -1336,9 +1365,19 @@ send:
                 * There is no need to fill in ip6_plen right now.
                 * It will be filled later by ip6_output.
                 */
-               m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
-               th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) +
-                   optlen + len, IPPROTO_TCP, 0);
+               if (tp->t_port) {
+                       m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
+                       m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+                       udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 
0);
+                       th->th_sum = htons(0);
+                       UDPSTAT_INC(udps_opackets);
+               } else {
+                       m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
+                       m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+                       th->th_sum = in6_cksum_pseudo(ip6,
+                           sizeof(struct tcphdr) + optlen + len, IPPROTO_TCP,
+                           0);
+               }
        }
 #endif
 #if defined(INET6) && defined(INET)
@@ -1346,9 +1385,20 @@ send:
 #endif
 #ifdef INET
        {
-               m->m_pkthdr.csum_flags = CSUM_TCP;
-               th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
-                   htons(sizeof(struct tcphdr) + IPPROTO_TCP + len + optlen));
+               if (tp->t_port) {
+                       m->m_pkthdr.csum_flags = CSUM_UDP;
+                       m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+                       udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
+                          ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP));
+                       th->th_sum = htons(0);
+                       UDPSTAT_INC(udps_opackets);
+               } else {
+                       m->m_pkthdr.csum_flags = CSUM_TCP;
+                       m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+                       th->th_sum = in_pseudo(ip->ip_src.s_addr,
+                           ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) +
+                           IPPROTO_TCP + len + optlen));
+               }
 
                /* IP version must be set here for ipv4/ipv6 checking later */
                KASSERT(ip->ip_v == IPVERSION,
@@ -1473,8 +1523,10 @@ send:
         * NB: Don't set DF on small MTU/MSS to have a safe fallback.
         */
        if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) {
-               ip->ip_off |= htons(IP_DF);
                tp->t_flags2 |= TF2_PLPMTU_PMTUD;
+               if (tp->t_port == 0 || len < V_tcp_minmss) {
+                       ip->ip_off |= htons(IP_DF);
+               }
        } else {
                tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
        }
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index cc20d6bf52ca..1ee8d26446fd 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -11969,14 +11969,10 @@ bbr_output_wtime(struct tcpcb *tp, const struct 
timeval *tv)
 #endif
        struct tcp_bbr *bbr;
        struct tcphdr *th;
-#ifdef NETFLIX_TCPOUDP
        struct udphdr *udp = NULL;
-#endif
        u_char opt[TCP_MAXOLEN];
        unsigned ipoptlen, optlen, hdrlen;
-#ifdef NETFLIX_TCPOUDP
        unsigned ulen;
-#endif
        uint32_t bbr_seq;
        uint32_t delay_calc=0;
        uint8_t doing_tlp = 0;
@@ -12991,10 +12987,8 @@ send:
                /* Maximum segment size. */
                if (flags & TH_SYN) {
                        to.to_mss = tcp_mssopt(&inp->inp_inc);
-#ifdef NETFLIX_TCPOUDP
                        if (tp->t_port)
                                to.to_mss -= V_tcp_udp_tunneling_overhead;
-#endif
                        to.to_flags |= TOF_MSS;
                        /*
                         * On SYN or SYN|ACK transmits on TFO connections,
@@ -13063,7 +13057,6 @@ send:
                    !(to.to_flags & TOF_FASTOPEN))
                        len = 0;
        }
-#ifdef NETFLIX_TCPOUDP
        if (tp->t_port) {
                if (V_tcp_udp_tunneling_port == 0) {
                        /* The port was removed?? */
@@ -13072,7 +13065,6 @@ send:
                }
                hdrlen += sizeof(struct udphdr);
        }
-#endif
 #ifdef INET6
        if (isipv6)
                ipoptlen = ip6_optlen(tp->t_inpcb);
@@ -13408,7 +13400,6 @@ send:
 #ifdef INET6
        if (isipv6) {
                ip6 = mtod(m, struct ip6_hdr *);
-#ifdef NETFLIX_TCPOUDP
                if (tp->t_port) {
                        udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + 
sizeof(struct ip6_hdr));
                        udp->uh_sport = htons(V_tcp_udp_tunneling_port);
@@ -13417,17 +13408,9 @@ send:
                        udp->uh_ulen = htons(ulen);
                        th = (struct tcphdr *)(udp + 1);
                } else {
-#endif
                        th = (struct tcphdr *)(ip6 + 1);
-
-#ifdef NETFLIX_TCPOUDP
                }
-#endif
-               tcpip_fillheaders(inp,
-#ifdef NETFLIX_TCPOUDP
-                                 tp->t_port,
-#endif
-                                 ip6, th);
+               tcpip_fillheaders(inp, tp->t_port, ip6, th);
        } else
 #endif                         /* INET6 */
        {
@@ -13435,7 +13418,6 @@ send:
 #ifdef TCPDEBUG
                ipov = (struct ipovly *)ip;
 #endif
-#ifdef NETFLIX_TCPOUDP
                if (tp->t_port) {
                        udp = (struct udphdr *)((caddr_t)ip + ipoptlen + 
sizeof(struct ip));
                        udp->uh_sport = htons(V_tcp_udp_tunneling_port);
@@ -13443,14 +13425,10 @@ send:
                        ulen = hdrlen + len - sizeof(struct ip);
                        udp->uh_ulen = htons(ulen);
                        th = (struct tcphdr *)(udp + 1);
-               } else
-#endif
+               } else {
                        th = (struct tcphdr *)(ip + 1);
-               tcpip_fillheaders(inp,
-#ifdef NETFLIX_TCPOUDP
-                                 tp->t_port,
-#endif
-                                 ip, th);
+               }
+               tcpip_fillheaders(inp, tp->t_port, ip, th);
        }
        /*
         * If we are doing retransmissions, then snd_nxt will not reflect
@@ -13600,7 +13578,6 @@ send:
                 * ip6_plen is not need to be filled now, and will be filled
                 * in ip6_output.
                 */
-#ifdef NETFLIX_TCPOUDP
                if (tp->t_port) {
                        m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
                        m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
@@ -13608,14 +13585,11 @@ send:
                        th->th_sum = htons(0);
                        UDPSTAT_INC(udps_opackets);
                } else {
-#endif
                        csum_flags = m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
                        m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
                        th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct 
tcphdr) +
                            optlen + len, IPPROTO_TCP, 0);
-#ifdef NETFLIX_TCPOUDP
                }
-#endif
        }
 #endif
 #if defined(INET6) && defined(INET)
@@ -13623,7 +13597,6 @@ send:
 #endif
 #ifdef INET
        {
-#ifdef NETFLIX_TCPOUDP
                if (tp->t_port) {
                        m->m_pkthdr.csum_flags = CSUM_UDP;
                        m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
@@ -13632,15 +13605,12 @@ send:
                        th->th_sum = htons(0);
                        UDPSTAT_INC(udps_opackets);
                } else {
-#endif
                        csum_flags = m->m_pkthdr.csum_flags = CSUM_TCP;
                        m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
                        th->th_sum = in_pseudo(ip->ip_src.s_addr,
                            ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) +
                            IPPROTO_TCP + len + optlen));
-#ifdef NETFLIX_TCPOUDP
                }
-#endif
                /* IP version must be set here for ipv4/ipv6 checking later */
                KASSERT(ip->ip_v == IPVERSION,
                    ("%s: IP version incorrect: %d", __func__, ip->ip_v));
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 0ee73a95a6d7..12827d1699d0 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -13008,10 +13008,8 @@ send:
                if (flags & TH_SYN) {
                        tp->snd_nxt = tp->iss;
                        to.to_mss = tcp_mssopt(&inp->inp_inc);
-#ifdef NETFLIX_TCPOUDP
                        if (tp->t_port)
                                to.to_mss -= V_tcp_udp_tunneling_overhead;
-#endif
                        to.to_flags |= TOF_MSS;
 
                        /*
@@ -13088,7 +13086,6 @@ send:
                    !(to.to_flags & TOF_FASTOPEN))
                        len = 0;
        }
-#ifdef NETFLIX_TCPOUDP
        if (tp->t_port) {
                if (V_tcp_udp_tunneling_port == 0) {
                        /* The port was removed?? */
@@ -13097,7 +13094,6 @@ send:
                }
                hdrlen += sizeof(struct udphdr);
        }
-#endif
 #ifdef INET6
        if (isipv6)
                ipoptlen = ip6_optlen(tp->t_inpcb);
@@ -13372,7 +13368,6 @@ send:
 #ifdef INET6
        if (isipv6) {
                ip6 = mtod(m, struct ip6_hdr *);
-#ifdef NETFLIX_TCPOUDP
                if (tp->t_port) {
                        udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + 
sizeof(struct ip6_hdr));
                        udp->uh_sport = htons(V_tcp_udp_tunneling_port);
@@ -13380,14 +13375,10 @@ send:
                        ulen = hdrlen + len - sizeof(struct ip6_hdr);
                        udp->uh_ulen = htons(ulen);
                        th = (struct tcphdr *)(udp + 1);
-               } else
-#endif
+               } else {
                        th = (struct tcphdr *)(ip6 + 1);
-               tcpip_fillheaders(inp,
-#ifdef NETFLIX_TCPOUDP
-                                 tp->t_port,
-#endif
-                                 ip6, th);
+               }
+               tcpip_fillheaders(inp, tp->t_port, ip6, th);
        } else
 #endif                         /* INET6 */
        {
@@ -13395,7 +13386,6 @@ send:
 #ifdef TCPDEBUG
                ipov = (struct ipovly *)ip;
 #endif
-#ifdef NETFLIX_TCPOUDP
                if (tp->t_port) {
                        udp = (struct udphdr *)((caddr_t)ip + ipoptlen + 
sizeof(struct ip));
                        udp->uh_sport = htons(V_tcp_udp_tunneling_port);
@@ -13403,14 +13393,10 @@ send:
                        ulen = hdrlen + len - sizeof(struct ip);
                        udp->uh_ulen = htons(ulen);
                        th = (struct tcphdr *)(udp + 1);
-               } else
-#endif
+               } else {
                        th = (struct tcphdr *)(ip + 1);
-               tcpip_fillheaders(inp,
-#ifdef NETFLIX_TCPOUDP
-                                 tp->t_port,
-#endif
-                                 ip, th);
+               }
+               tcpip_fillheaders(inp, tp->t_port, ip, th);
        }
        /*
         * Fill in fields, remembering maximum advertised window for use in
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index dff7767cd9cf..6bdeb3984aee 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -126,6 +126,8 @@ __FBSDID("$FreeBSD$");
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
 
 #include <netipsec/ipsec_support.h>
 
@@ -501,6 +503,80 @@ tcp_switch_back_to_default(struct tcpcb *tp)
        }
 }
 
+static void
+tcp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *inp,
+    const struct sockaddr *sa, void *ctx)
+{
+       struct ip *iph;
+#ifdef INET6
+       struct ip6_hdr *ip6;
+#endif
+       struct udphdr *uh;
+       struct tcphdr *th;
+       int thlen;
+       uint16_t port;
+
+       TCPSTAT_INC(tcps_tunneled_pkts);
+       if ((m->m_flags & M_PKTHDR) == 0) {
+               /* Can't handle one that is not a pkt hdr */
+               TCPSTAT_INC(tcps_tunneled_errs);
+               goto out;
+       }
+       thlen = sizeof(struct tcphdr);
+       if (m->m_len < off + sizeof(struct udphdr) + thlen &&
+           (m =  m_pullup(m, off + sizeof(struct udphdr) + thlen)) == NULL) {
+               TCPSTAT_INC(tcps_tunneled_errs);
+               goto out;
+       }
+       iph = mtod(m, struct ip *);
+       uh = (struct udphdr *)((caddr_t)iph + off);
+       th = (struct tcphdr *)(uh + 1);
+       thlen = th->th_off << 2;
+       if (m->m_len < off + sizeof(struct udphdr) + thlen) {
+               m =  m_pullup(m, off + sizeof(struct udphdr) + thlen);
+               if (m == NULL) {
+                       TCPSTAT_INC(tcps_tunneled_errs);
+                       goto out;
+               } else {
+                       iph = mtod(m, struct ip *);
+                       uh = (struct udphdr *)((caddr_t)iph + off);
+                       th = (struct tcphdr *)(uh + 1);
+               }
+       }
+       m->m_pkthdr.tcp_tun_port = port = uh->uh_sport;
+       bcopy(th, uh, m->m_len - off);
+       m->m_len -= sizeof(struct udphdr);
+       m->m_pkthdr.len -= sizeof(struct udphdr);
+       /*
+        * We use the same algorithm for
+        * both UDP and TCP for c-sum. So
+        * the code in tcp_input will skip
+        * the checksum. So we do nothing
+        * with the flag (m->m_pkthdr.csum_flags).
+        */
+       switch (iph->ip_v) {
+#ifdef INET
+       case IPVERSION:
+               iph->ip_len = htons(ntohs(iph->ip_len) - sizeof(struct udphdr));
+               tcp_input_with_port(&m, &off, IPPROTO_TCP, port);
+               break;
+#endif
+#ifdef INET6
+       case IPV6_VERSION >> 4:
+               ip6 = mtod(m, struct ip6_hdr *);
+               ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - sizeof(struct 
udphdr));
+               tcp6_input_with_port(&m, &off, IPPROTO_TCP, port);
+               break;
+#endif
+       default:
+               goto out;
+               break;
+       }
+       return;
+out:
+       m_freem(m);
+}
+
 static int
 sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
 {
@@ -598,6 +674,183 @@ SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_available,
     NULL, 0, sysctl_net_inet_list_available, "A",
     "list available TCP Function sets");
 
+VNET_DEFINE(int, tcp_udp_tunneling_port) = TCP_TUNNELING_PORT_DEFAULT;
+
+#ifdef INET
+VNET_DEFINE(struct socket *, udp4_tun_socket) = NULL;
+#define        V_udp4_tun_socket       VNET(udp4_tun_socket)
+#endif
+#ifdef INET6
+VNET_DEFINE(struct socket *, udp6_tun_socket) = NULL;
+#define        V_udp6_tun_socket       VNET(udp6_tun_socket)
+#endif
+
+static void
+tcp_over_udp_stop(void)
+{
+       /*
+        * This function assumes sysctl caller holds inp_rinfo_lock()
+        * for writting!
+        */
+#ifdef INET
+       if (V_udp4_tun_socket != NULL) {
+               soclose(V_udp4_tun_socket);
+               V_udp4_tun_socket = NULL;
+       }
+#endif
+#ifdef INET6
+       if (V_udp6_tun_socket != NULL) {
+               soclose(V_udp6_tun_socket);
+               V_udp6_tun_socket = NULL;
+       }
+#endif
+}
+
+static int
+tcp_over_udp_start(void)
+{
+       uint16_t port;
+       int ret;
+#ifdef INET
+       struct sockaddr_in sin;
+#endif
+#ifdef INET6
+       struct sockaddr_in6 sin6;
+#endif
+       /*
+        * This function assumes sysctl caller holds inp_info_rlock()
+        * for writting!
+        */
+       port = V_tcp_udp_tunneling_port;
+       if (ntohs(port) == 0) {
+               /* Must have a port set */
+               return (EINVAL);
+       }
+#ifdef INET
+       if (V_udp4_tun_socket != NULL) {
+               /* Already running -- must stop first */
+               return (EALREADY);
+       }
+#endif
+#ifdef INET6
+       if (V_udp6_tun_socket != NULL) {
+               /* Already running -- must stop first */
+               return (EALREADY);
+       }
+#endif
+#ifdef INET
+       if ((ret = socreate(PF_INET, &V_udp4_tun_socket,
+           SOCK_DGRAM, IPPROTO_UDP,
+           curthread->td_ucred, curthread))) {
+               tcp_over_udp_stop();
+               return (ret);
+       }
+       /* Call the special UDP hook. */
+       if ((ret = udp_set_kernel_tunneling(V_udp4_tun_socket,
+           tcp_recv_udp_tunneled_packet,
+           tcp_ctlinput_viaudp,
+           NULL))) {
+               tcp_over_udp_stop();
+               return (ret);
+       }
+       /* Ok, we have a socket, bind it to the port. */
+       memset(&sin, 0, sizeof(struct sockaddr_in));
+       sin.sin_len = sizeof(struct sockaddr_in);
+       sin.sin_family = AF_INET;
+       sin.sin_port = htons(port);
+       if ((ret = sobind(V_udp4_tun_socket,
+           (struct sockaddr *)&sin, curthread))) {
+               tcp_over_udp_stop();
+               return (ret);
+       }
+#endif
+#ifdef INET6
+       if ((ret = socreate(PF_INET6, &V_udp6_tun_socket,
+           SOCK_DGRAM, IPPROTO_UDP,
+           curthread->td_ucred, curthread))) {
+               tcp_over_udp_stop();
+               return (ret);
+       }
+       /* Call the special UDP hook. */
+       if ((ret = udp_set_kernel_tunneling(V_udp6_tun_socket,
+           tcp_recv_udp_tunneled_packet,
+           tcp6_ctlinput_viaudp,
+           NULL))) {
+               tcp_over_udp_stop();
+               return (ret);
+       }
+       /* Ok, we have a socket, bind it to the port. */
+       memset(&sin6, 0, sizeof(struct sockaddr_in6));
+       sin6.sin6_len = sizeof(struct sockaddr_in6);
+       sin6.sin6_family = AF_INET6;
+       sin6.sin6_port = htons(port);
+       if ((ret = sobind(V_udp6_tun_socket,
+           (struct sockaddr *)&sin6, curthread))) {
+               tcp_over_udp_stop();
+               return (ret);
+       }
+#endif
+       return (0);
+}
+
+static int
+sysctl_net_inet_tcp_udp_tunneling_port_check(SYSCTL_HANDLER_ARGS)
+{
+       int error;
+       uint32_t old, new;
+
+       old = V_tcp_udp_tunneling_port;
+       new = old;
+       error = sysctl_handle_int(oidp, &new, 0, req);
+       if ((error == 0) &&
+           (req->newptr != NULL)) {
+               if ((new < TCP_TUNNELING_PORT_MIN) ||
+                   (new > TCP_TUNNELING_PORT_MAX)) {
+                       error = EINVAL;
+               } else {
+                       V_tcp_udp_tunneling_port = new;
+                       if (old != 0) {
+                               tcp_over_udp_stop();
+                       }
+                       if (new != 0) {
+                               error = tcp_over_udp_start();
+                       }
+               }
+       }
+       return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, udp_tunneling_port,
+    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+    &VNET_NAME(tcp_udp_tunneling_port),
+    0, &sysctl_net_inet_tcp_udp_tunneling_port_check, "IU",
+    "Tunneling port for tcp over udp");
+
+VNET_DEFINE(int, tcp_udp_tunneling_overhead) = TCP_TUNNELING_OVERHEAD_DEFAULT;
+
+static int
+sysctl_net_inet_tcp_udp_tunneling_overhead_check(SYSCTL_HANDLER_ARGS)
+{
+       int error, new;
+
+       new = V_tcp_udp_tunneling_overhead;
+       error = sysctl_handle_int(oidp, &new, 0, req);
+       if (error == 0 && req->newptr) {
+               if ((new < TCP_TUNNELING_OVERHEAD_MIN) ||
+                   (new > TCP_TUNNELING_OVERHEAD_MAX))
+                       error = EINVAL;
+               else
+                       V_tcp_udp_tunneling_overhead = new;
+       }
+       return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, udp_tunneling_overhead,
+    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+    &VNET_NAME(tcp_udp_tunneling_overhead),
+    0, &sysctl_net_inet_tcp_udp_tunneling_overhead_check, "IU",
+    "MSS reduction when using tcp over udp");
+
 /*
  * Exports one (struct tcp_function_info) for each alias/name.
  */
@@ -1305,7 +1558,7 @@ tcp_fini(void *xtp)
  * of the tcpcb each time to conserve mbufs.
  */
 void
-tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr)
+tcpip_fillheaders(struct inpcb *inp, uint16_t port, void *ip_ptr, void 
*tcp_ptr)
 {
        struct tcphdr *th = (struct tcphdr *)tcp_ptr;
 
@@ -1320,7 +1573,10 @@ tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void 
*tcp_ptr)
                        (inp->inp_flow & IPV6_FLOWINFO_MASK);
                ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
                        (IPV6_VERSION & IPV6_VERSION_MASK);
-               ip6->ip6_nxt = IPPROTO_TCP;
+               if (port == 0)
+                       ip6->ip6_nxt = IPPROTO_TCP;
+               else
+                       ip6->ip6_nxt = IPPROTO_UDP;
                ip6->ip6_plen = htons(sizeof(struct tcphdr));
                ip6->ip6_src = inp->in6p_laddr;
                ip6->ip6_dst = inp->in6p_faddr;
@@ -1342,7 +1598,10 @@ tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void 
*tcp_ptr)
                ip->ip_off = 0;
                ip->ip_ttl = inp->inp_ip_ttl;
                ip->ip_sum = 0;
-               ip->ip_p = IPPROTO_TCP;
+               if (port == 0)
+                       ip->ip_p = IPPROTO_TCP;
+               else
+                       ip->ip_p = IPPROTO_UDP;
                ip->ip_src = inp->inp_laddr;
                ip->ip_dst = inp->inp_faddr;
        }
@@ -1372,7 +1631,7 @@ tcpip_maketemplate(struct inpcb *inp)
        t = malloc(sizeof(*t), M_TEMP, M_NOWAIT);
        if (t == NULL)
                return (NULL);
-       tcpip_fillheaders(inp, (void *)&t->tt_ipgen, (void *)&t->tt_t);
+       tcpip_fillheaders(inp, 0, (void *)&t->tt_ipgen, (void *)&t->tt_t);
        return (t);
 }
 
@@ -1398,14 +1657,16 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct 
tcphdr *th, struct mbuf *m,
        struct inpcb *inp;
        struct ip *ip;
        struct mbuf *optm;
+       struct udphdr *uh = NULL;
        struct tcphdr *nth;
        u_char *optp;
 #ifdef INET6
        struct ip6_hdr *ip6;
        int isipv6;
 #endif /* INET6 */
-       int optlen, tlen, win;
+       int optlen, tlen, win, ulen;
        bool incl_opts;
+       uint16_t port;
 
        KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL"));
        NET_EPOCH_ASSERT();
@@ -1423,6 +1684,19 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr 
*th, struct mbuf *m,
        } else
                inp = NULL;
 
+       if (m != NULL) {
+#ifdef INET6
+               if (isipv6 && ip6 && (ip6->ip6_nxt == IPPROTO_UDP))
+                       port = m->m_pkthdr.tcp_tun_port;
+               else
*** 1128 LINES SKIPPED ***
_______________________________________________
dev-commits-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-all
To unsubscribe, send any mail to "dev-commits-src-all-unsubscr...@freebsd.org"

Reply via email to