Hi,

Claudio suggested to implement TCP send offloading in software as
a fallback if hardware cannot do it.  So I took Jan's diff, ripped
that part out and polished it.

Result is that software TSO can be a bit faster than regular TCP.

http://bluhm.genua.de/perform/results/2023-05-04T07%3A05%3A11Z/perform.html

Left column is OpenBSD-current, middle is with this diff, but TSO
disabled, and right is software TSO enabled.  So comparing middle
and right shows the effect.

http://bluhm.genua.de/perform/results/2023-05-04T07%3A05%3A11Z/patch-sys-tso-soft-disable.0/btrace/tcpbench_-S1000000_-t10_10.3.45.35-btrace-kstack.0.svg
http://bluhm.genua.de/perform/results/2023-05-04T07%3A05%3A11Z/patch-sys-tso-soft.0/btrace/tcpbench_-S1000000_-t10_10.3.45.35-btrace-kstack.0.svg

When looking at kstack you see that m_copym() load moves from
tcp_output() to ip_output() where the large packet is chopped.  So
pf_test() needs less CPU cycles what explains the increased througput.

I round the large TCP packet to a multiple of the maximum segment
size with at least two segments.  So I avoid residual small packets.

Implementation in pf_route() is still missing.  Also IP option
handling has to be fixed and TCP output size decisions have to be
rechecked.

I just wanted to show where this is heading.  When software TSO is
done, it should be easy to use network interface hardware to speed
things up.

bluhm

Index: netinet/in.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in.h,v
retrieving revision 1.142
diff -u -p -r1.142 in.h
--- netinet/in.h        11 Apr 2023 00:45:09 -0000      1.142
+++ netinet/in.h        5 May 2023 15:02:12 -0000
@@ -780,6 +780,7 @@ int    in_canforward(struct in_addr);
 int       in_cksum(struct mbuf *, int);
 int       in4_cksum(struct mbuf *, u_int8_t, int, int);
 void      in_proto_cksum_out(struct mbuf *, struct ifnet *);
+int       in_ifcap_cksum(struct mbuf *, struct ifnet *, int);
 void      in_ifdetach(struct ifnet *);
 int       in_mask2len(struct in_addr *);
 void      in_len2mask(struct in_addr *, int);
Index: netinet/ip_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.382
diff -u -p -r1.382 ip_output.c
--- netinet/ip_output.c 12 Aug 2022 17:04:16 -0000      1.382
+++ netinet/ip_output.c 5 May 2023 15:02:12 -0000
@@ -84,7 +84,6 @@ void ip_mloopback(struct ifnet *, struct
 static __inline u_int16_t __attribute__((__unused__))
     in_cksum_phdr(u_int32_t, u_int32_t, u_int32_t);
 void in_delayed_cksum(struct mbuf *);
-int in_ifcap_cksum(struct mbuf *, struct ifnet *, int);
 
 int ip_output_ipsec_lookup(struct mbuf *m, int hlen, struct inpcb *inp,
     struct tdb **, int ipsecflowinfo);
@@ -104,7 +103,7 @@ ip_output(struct mbuf *m, struct mbuf *o
 {
        struct ip *ip;
        struct ifnet *ifp = NULL;
-       struct mbuf_list fml;
+       struct mbuf_list ml;
        int hlen = sizeof (struct ip);
        int error = 0;
        struct route iproute;
@@ -469,6 +468,24 @@ sendit:
                goto done;
        }
 
+       if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) &&
+           m->m_pkthdr.ph_mss <= mtu) {
+               error = tcp_chopper(m, &ml, ifp, m->m_pkthdr.ph_mss);
+               if (error)
+                       goto done;
+
+               while ((m = ml_dequeue(&ml)) != NULL) {
+                       error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt);
+                       if (error)
+                               break;
+               }
+               if (error)
+                       ml_purge(&ml);
+               else
+                       tcpstat_inc(tcps_outswtso);
+               goto done;
+       }
+
        /*
         * Too large for interface; fragment if possible.
         * Must be able to put at least 8 bytes per fragment.
@@ -505,17 +522,17 @@ sendit:
                goto bad;
        }
 
-       error = ip_fragment(m, &fml, ifp, mtu);
+       error = ip_fragment(m, &ml, ifp, mtu);
        if (error)
                goto done;
 
-       while ((m = ml_dequeue(&fml)) != NULL) {
+       while ((m = ml_dequeue(&ml)) != NULL) {
                error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt);
                if (error)
                        break;
        }
        if (error)
-               ml_purge(&fml);
+               ml_purge(&ml);
        else
                ipstat_inc(ips_fragmented);
 
@@ -677,16 +694,15 @@ ip_output_ipsec_send(struct tdb *tdb, st
 #endif /* IPSEC */
 
 int
-ip_fragment(struct mbuf *m0, struct mbuf_list *fml, struct ifnet *ifp,
+ip_fragment(struct mbuf *m0, struct mbuf_list *ml, struct ifnet *ifp,
     u_long mtu)
 {
-       struct mbuf *m;
        struct ip *ip;
        int firstlen, hlen, tlen, len, off;
        int error;
 
-       ml_init(fml);
-       ml_enqueue(fml, m0);
+       ml_init(ml);
+       ml_enqueue(ml, m0);
 
        ip = mtod(m0, struct ip *);
        hlen = ip->ip_hl << 2;
@@ -705,10 +721,11 @@ ip_fragment(struct mbuf *m0, struct mbuf
        in_proto_cksum_out(m0, NULL);
 
        /*
-        * Loop through length of segment after first fragment,
+        * Loop through length of payload after first fragment,
         * make new header and copy data of each part and link onto chain.
         */
        for (off = hlen + firstlen; off < tlen; off += len) {
+               struct mbuf *m;
                struct ip *mhip;
                int mhlen;
 
@@ -717,8 +734,7 @@ ip_fragment(struct mbuf *m0, struct mbuf
                        error = ENOBUFS;
                        goto bad;
                }
-               ml_enqueue(fml, m);
-
+               ml_enqueue(ml, m);
                if ((error = m_dup_pkthdr(m, m0, M_DONTWAIT)) != 0)
                        goto bad;
                m->m_data += max_linkhdr;
@@ -762,25 +778,26 @@ ip_fragment(struct mbuf *m0, struct mbuf
         * Update first fragment by trimming what's been copied out
         * and updating header, then send each fragment (in order).
         */
-       m = m0;
-       m_adj(m, hlen + firstlen - tlen);
-       ip->ip_off |= htons(IP_MF);
-       ip->ip_len = htons(m->m_pkthdr.len);
+       if (hlen + firstlen < tlen) {
+               m_adj(m0, hlen + firstlen - tlen);
+               ip->ip_off |= htons(IP_MF);
+       }
+       ip->ip_len = htons(m0->m_pkthdr.len);
 
        ip->ip_sum = 0;
-       if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4))
-               m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
+       if (in_ifcap_cksum(m0, ifp, IFCAP_CSUM_IPv4))
+               m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
        else {
                ipstat_inc(ips_outswcsum);
-               ip->ip_sum = in_cksum(m, hlen);
+               ip->ip_sum = in_cksum(m0, hlen);
        }
 
-       ipstat_add(ips_ofragments, ml_len(fml));
+       ipstat_add(ips_ofragments, ml_len(ml));
        return (0);
 
 bad:
        ipstat_inc(ips_odropped);
-       ml_purge(fml);
+       ml_purge(ml);
        return (error);
 }
 
@@ -1870,7 +1887,11 @@ in_proto_cksum_out(struct mbuf *m, struc
                u_int16_t csum = 0, offset;
 
                offset = ip->ip_hl << 2;
-               if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT))
+               if (m->m_pkthdr.csum_flags & M_TCP_TSO)
+                       csum = in_cksum_phdr(ip->ip_src.s_addr,
+                           ip->ip_dst.s_addr, htonl(ip->ip_p));
+               else if (m->m_pkthdr.csum_flags &
+                   (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT))
                        csum = in_cksum_phdr(ip->ip_src.s_addr,
                            ip->ip_dst.s_addr, htonl(ntohs(ip->ip_len) -
                            offset + ip->ip_p));
Index: netinet/tcp_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_output.c,v
retrieving revision 1.135
diff -u -p -r1.135 tcp_output.c
--- netinet/tcp_output.c        25 Apr 2023 22:56:28 -0000      1.135
+++ netinet/tcp_output.c        5 May 2023 15:02:12 -0000
@@ -210,6 +210,7 @@ tcp_output(struct tcpcb *tp)
 #ifdef TCP_ECN
        int needect;
 #endif
+       int tso;
 
        if (tp->t_flags & TF_BLOCKOUTPUT) {
                tp->t_flags |= TF_NEEDOUTPUT;
@@ -279,6 +280,7 @@ again:
        }
 
        sendalot = 0;
+       tso = 0;
        /*
         * If in persist timeout with window of 0, send 1 byte.
         * Otherwise, if window is small but nonzero
@@ -346,8 +348,23 @@ again:
        txmaxseg = ulmin(so->so_snd.sb_hiwat / 2, tp->t_maxseg);
 
        if (len > txmaxseg) {
-               len = txmaxseg;
-               sendalot = 1;
+               if (1 &&
+#ifdef TCP_SIGNATURE
+                   ((tp->t_flags & TF_SIGNATURE) == 0) &&
+#endif
+                   len >= 2 * tp->t_maxseg &&
+                   tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
+                   !(flags & (TH_SYN|TH_RST|TH_FIN))) {
+                       tso = 1;
+                       /* avoid small chopped packets */
+                       if (len > (len / tp->t_maxseg) * tp->t_maxseg) {
+                               len = (len / tp->t_maxseg) * tp->t_maxseg;
+                               sendalot = 1;
+                       }
+               } else {
+                       len = txmaxseg;
+                       sendalot = 1;
+               }
        }
        if (off + len < so->so_snd.sb_cc)
                flags &= ~TH_FIN;
@@ -365,7 +382,7 @@ again:
         * to send into a small window), then must resend.
         */
        if (len) {
-               if (len == txmaxseg)
+               if (len >= txmaxseg)
                        goto send;
                if ((idle || (tp->t_flags & TF_NODELAY)) &&
                    len + off >= so->so_snd.sb_cc && !soissending(so) &&
@@ -616,10 +633,19 @@ send:
        /*
         * Adjust data length if insertion of options will
         * bump the packet length beyond the t_maxopd length.
+        * Clear the FIN bit because we cut off the tail of
+        * the segment.
         */
        if (len > tp->t_maxopd - optlen) {
-               len = tp->t_maxopd - optlen;
-               sendalot = 1;
+               if (tso) {
+                       if (len + hdrlen + max_linkhdr > MAXMCLBYTES) {
+                               len = MAXMCLBYTES - hdrlen - max_linkhdr;
+                               sendalot = 1;
+                       }
+               } else {
+                       len = tp->t_maxopd - optlen;
+                       sendalot = 1;
+               }
                flags &= ~TH_FIN;
        }
 
@@ -723,6 +749,12 @@ send:
        m->m_pkthdr.ph_ifidx = 0;
        m->m_pkthdr.len = hdrlen + len;
 
+       /* Enable TSO and specify the size of the resulting segments. */
+       if (tso) {
+               m->m_pkthdr.csum_flags |= M_TCP_TSO;
+               m->m_pkthdr.ph_mss = tp->t_maxseg;
+       }
+
        if (!tp->t_template)
                panic("tcp_output");
 #ifdef DIAGNOSTIC
@@ -1152,4 +1184,180 @@ tcp_setpersist(struct tcpcb *tp)
        TCP_TIMER_ARM(tp, TCPT_PERSIST, msec);
        if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
                tp->t_rxtshift++;
+}
+
+int
+tcp_chopper(struct mbuf *m0, struct mbuf_list *ml, struct ifnet *ifp,
+    u_long mss)
+{
+       struct ip *ip = NULL;
+#ifdef INET6
+       struct ip6_hdr *ip6 = NULL;
+#endif
+       struct tcphdr *th;
+       int firstlen, iphlen, hlen, tlen, off;
+       int error;
+
+       ml_init(ml);
+       ml_enqueue(ml, m0);
+
+       ip = mtod(m0, struct ip *);
+       switch (ip->ip_v) {
+       case 4:
+               iphlen = ip->ip_hl << 2;
+               if (ISSET(ip->ip_off, htons(IP_OFFMASK | IP_MF)) ||
+                   iphlen != sizeof(struct ip) || ip->ip_p != IPPROTO_TCP) {
+                       /* only TCP without fragment or IP option supported */
+                       error = EPROTOTYPE;
+                       goto bad;
+               }
+               break;
+#ifdef INET6
+       case 6:
+               ip = NULL;
+               ip6 = mtod(m0, struct ip6_hdr *);
+               iphlen = sizeof(struct ip6_hdr);
+               if (ip6->ip6_nxt != IPPROTO_TCP) {
+                       /* only TCP without IPv6 header chain supported */
+                       error = EPROTOTYPE;
+                       goto bad;
+               }
+               break;
+#endif
+       default:
+               panic("%s: unknown ip version %d", __func__, ip->ip_v);
+       }
+
+       tlen = m0->m_pkthdr.len;
+       if (tlen < iphlen + sizeof(struct tcphdr)) {
+               error = EMSGSIZE;
+               goto bad;
+       }
+       /* IP and TCP header should be contiguous, this check is paranoia */
+       if (m0->m_len < iphlen + sizeof(*th)) {
+               ml_dequeue(ml);
+               if ((m0 = m_pullup(m0, iphlen + sizeof(*th))) == NULL) {
+                       error = ENOBUFS;
+                       goto bad;
+               }
+               ml_enqueue(ml, m0);
+       }
+       th = (struct tcphdr *)(mtod(m0, caddr_t) + iphlen);
+       hlen = iphlen + (th->th_off << 2);
+       if (tlen < hlen) {
+               error = EMSGSIZE;
+               goto bad;
+       }
+       firstlen = MIN(tlen - hlen, mss);
+
+       CLR(m0->m_pkthdr.csum_flags, M_TCP_TSO);
+
+       /*
+        * Loop through length of payload after first segment,
+        * make new header and copy data of each part and link onto chain.
+        */
+       for (off = hlen + firstlen; off < tlen; off += mss) {
+               struct mbuf *m;
+               struct tcphdr *mhth;
+               int len;
+
+               len = MIN(tlen - off, mss);
+
+               MGETHDR(m, M_DONTWAIT, MT_HEADER);
+               if (m == NULL) {
+                       error = ENOBUFS;
+                       goto bad;
+               }
+               ml_enqueue(ml, m);
+               if ((error = m_dup_pkthdr(m, m0, M_DONTWAIT)) != 0)
+                       goto bad;
+
+               /* IP and TCP header to the end, space for link layer header */
+               m->m_len = hlen;
+               m_align(m, hlen);
+
+               /* copy and adjust TCP header */
+               mhth = (struct tcphdr *)(mtod(m, caddr_t) + iphlen);
+               memcpy(mhth, th, hlen - iphlen);
+               mhth->th_seq = htonl(ntohl(th->th_seq) + (off - hlen));
+               if (off + len < tlen)
+                       CLR(mhth->th_flags, TH_PUSH|TH_FIN);
+
+               /* add mbuf chain with payload */
+               m->m_pkthdr.len = hlen + len;
+               if ((m->m_next = m_copym(m0, off, len, M_DONTWAIT)) == NULL) {
+                       error = ENOBUFS;
+                       goto bad;
+               }
+
+               /* copy and adjust IP header, calculate checksum */
+               SET(m->m_pkthdr.csum_flags, M_TCP_CSUM_OUT);
+               mhth->th_sum = 0;
+               if (ip) {
+                       struct ip *mhip;
+
+                       mhip = mtod(m, struct ip *);
+                       *mhip = *ip;
+                       mhip->ip_len = htons(hlen + len);
+                       mhip->ip_id = htons(ip_randomid());
+                       mhip->ip_sum = 0;
+                       if (ifp && in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4)) {
+                               m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
+                       } else {
+                               ipstat_inc(ips_outswcsum);
+                               mhip->ip_sum = in_cksum(m, iphlen);
+                       }
+                       in_proto_cksum_out(m, ifp);
+               }
+#ifdef INET6
+               if (ip6) {
+                       struct ip6_hdr *mhip6;
+
+                       mhip6 = mtod(m, struct ip6_hdr *);
+                       *mhip6 = *ip6;
+                       mhip6->ip6_plen = htons(hlen - iphlen + len);
+                       in6_proto_cksum_out(m, ifp);
+               }
+#endif
+       }
+
+       /*
+        * Update first segment by trimming what's been copied out
+        * and updating header, then send each segment (in order).
+        */
+       if (hlen + firstlen < tlen) {
+               m_adj(m0, hlen + firstlen - tlen);
+               CLR(th->th_flags, TH_PUSH|TH_FIN);
+       }
+       /* adjust IP header, calculate checksum */
+       SET(m0->m_pkthdr.csum_flags, M_TCP_CSUM_OUT);
+       th->th_sum = 0;
+       if (ip) {
+               ip->ip_len = htons(m0->m_pkthdr.len);
+               ip->ip_sum = 0;
+               if (ifp && in_ifcap_cksum(m0, ifp, IFCAP_CSUM_IPv4)) {
+                       m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
+               } else {
+                       ipstat_inc(ips_outswcsum);
+                       ip->ip_sum = in_cksum(m0, iphlen);
+               }
+               in_proto_cksum_out(m0, ifp);
+       }
+#ifdef INET6
+       if (ip6) {
+               ip6->ip6_plen = htons(m0->m_pkthdr.len - iphlen);
+               in6_proto_cksum_out(m0, ifp);
+       }
+#endif
+       return 0;
+
+ bad:
+       if (ip)
+               ipstat_inc(ips_odropped);
+#ifdef INET6
+       if (ip6)
+               ip6stat_inc(ip6s_odropped);
+#endif
+       ml_purge(ml);
+       return error;
 }
Index: netinet/tcp_usrreq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.217
diff -u -p -r1.217 tcp_usrreq.c
--- netinet/tcp_usrreq.c        14 Mar 2023 00:24:05 -0000      1.217
+++ netinet/tcp_usrreq.c        5 May 2023 15:02:12 -0000
@@ -1335,6 +1335,7 @@ tcp_sysctl_tcpstat(void *oldp, size_t *o
        ASSIGN(tcps_sack_rcv_opts);
        ASSIGN(tcps_sack_snd_opts);
        ASSIGN(tcps_sack_drop_opts);
+       ASSIGN(tcps_outswtso);
 
 #undef ASSIGN
 
Index: netinet/tcp_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
retrieving revision 1.163
diff -u -p -r1.163 tcp_var.h
--- netinet/tcp_var.h   14 Mar 2023 00:24:05 -0000      1.163
+++ netinet/tcp_var.h   5 May 2023 15:02:12 -0000
@@ -442,6 +442,8 @@ struct      tcpstat {
        u_int64_t tcps_sack_rcv_opts;           /* SACK options received */
        u_int64_t tcps_sack_snd_opts;           /* SACK options sent */
        u_int64_t tcps_sack_drop_opts;          /* SACK options dropped */
+
+       u_int32_t tcps_outswtso;        /* output software chopped packets */
 };
 
 /*
@@ -614,6 +616,7 @@ enum tcpstat_counters {
        tcps_sack_rcv_opts,
        tcps_sack_snd_opts,
        tcps_sack_drop_opts,
+       tcps_outswtso,
        tcps_ncounters,
 };
 
@@ -706,6 +709,8 @@ struct tcpcb *
         tcp_newtcpcb(struct inpcb *, int);
 void    tcp_notify(struct inpcb *, int);
 int     tcp_output(struct tcpcb *);
+int     tcp_chopper(struct mbuf *, struct mbuf_list *, struct ifnet *,
+            u_long);
 void    tcp_pulloutofband(struct socket *, u_int, struct mbuf *, int);
 int     tcp_reass(struct tcpcb *, struct tcphdr *, struct mbuf *, int *);
 void    tcp_rscale(struct tcpcb *, u_long);
Index: netinet6/ip6_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v
retrieving revision 1.272
diff -u -p -r1.272 ip6_output.c
--- netinet6/ip6_output.c       12 Nov 2022 02:50:59 -0000      1.272
+++ netinet6/ip6_output.c       5 May 2023 15:05:28 -0000
@@ -165,7 +165,7 @@ ip6_output(struct mbuf *m, struct ip6_pk
 {
        struct ip6_hdr *ip6;
        struct ifnet *ifp = NULL;
-       struct mbuf_list fml;
+       struct mbuf_list ml;
        int hlen, tlen;
        struct route_in6 ip6route;
        struct rtentry *rt = NULL;
@@ -688,7 +688,8 @@ reroute:
                dontfrag = 1;
        else
                dontfrag = 0;
-       if (dontfrag && tlen > ifp->if_mtu) {   /* case 2-b */
+       if (dontfrag && tlen > ifp->if_mtu &&
+           !ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {        /* case 2-b */
 #ifdef IPSEC
                if (ip_mtudisc)
                        ipsec_adjust_mtu(m, mtu);
@@ -705,6 +706,24 @@ reroute:
                goto done;
        }
 
+       if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) &&
+           m->m_pkthdr.ph_mss <= mtu) {
+               error = tcp_chopper(m, &ml, ifp, m->m_pkthdr.ph_mss);
+               if (error)
+                       goto done;
+
+               while ((m = ml_dequeue(&ml)) != NULL) {
+                       error = ifp->if_output(ifp, m, sin6tosa(dst), 
ro->ro_rt);
+                       if (error)
+                               break;
+               }
+               if (error)
+                       ml_purge(&ml);
+               else
+                       tcpstat_inc(tcps_outswtso);
+               goto done;
+       }
+
        /*
         * try to fragment the packet.  case 1-b
         */
@@ -751,17 +770,17 @@ reroute:
                ip6->ip6_nxt = IPPROTO_FRAGMENT;
        }
 
-       error = ip6_fragment(m, &fml, hlen, nextproto, mtu);
+       error = ip6_fragment(m, &ml, hlen, nextproto, mtu);
        if (error)
                goto done;
 
-       while ((m = ml_dequeue(&fml)) != NULL) {
+       while ((m = ml_dequeue(&ml)) != NULL) {
                error = ifp->if_output(ifp, m, sin6tosa(dst), ro->ro_rt);
                if (error)
                        break;
        }
        if (error)
-               ml_purge(&fml);
+               ml_purge(&ml);
        else
                ip6stat_inc(ip6s_fragmented);
 
@@ -789,16 +808,15 @@ bad:
 }
 
 int
-ip6_fragment(struct mbuf *m0, struct mbuf_list *fml, int hlen,
-    u_char nextproto, u_long mtu)
+ip6_fragment(struct mbuf *m0, struct mbuf_list *ml, int hlen, u_char nextproto,
+    u_long mtu)
 {
-       struct mbuf *m;
        struct ip6_hdr *ip6;
        u_int32_t id;
        int tlen, len, off;
        int error;
 
-       ml_init(fml);
+       ml_init(ml);
 
        ip6 = mtod(m0, struct ip6_hdr *);
        tlen = m0->m_pkthdr.len;
@@ -810,10 +828,11 @@ ip6_fragment(struct mbuf *m0, struct mbu
        id = htonl(ip6_randomid());
 
        /*
-        * Loop through length of segment,
+        * Loop through length of payload,
         * make new header and copy data of each part and link onto chain.
         */
        for (off = hlen; off < tlen; off += len) {
+               struct mbuf *m;
                struct mbuf *mlast;
                struct ip6_hdr *mhip6;
                struct ip6_frag *ip6f;
@@ -823,8 +842,7 @@ ip6_fragment(struct mbuf *m0, struct mbu
                        error = ENOBUFS;
                        goto bad;
                }
-               ml_enqueue(fml, m);
-
+               ml_enqueue(ml, m);
                if ((error = m_dup_pkthdr(m, m0, M_DONTWAIT)) != 0)
                        goto bad;
                m->m_data += max_linkhdr;
@@ -856,13 +874,13 @@ ip6_fragment(struct mbuf *m0, struct mbu
                ip6f->ip6f_nxt = nextproto;
        }
 
-       ip6stat_add(ip6s_ofragments, ml_len(fml));
+       ip6stat_add(ip6s_ofragments, ml_len(ml));
        m_freem(m0);
        return (0);
 
 bad:
        ip6stat_inc(ip6s_odropped);
-       ml_purge(fml);
+       ml_purge(ml);
        m_freem(m0);
        return (error);
 }
@@ -2714,8 +2732,12 @@ in6_proto_cksum_out(struct mbuf *m, stru
                u_int16_t csum;
 
                offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
-               csum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst,
-                   htonl(m->m_pkthdr.len - offset), htonl(nxt));
+               if (m->m_pkthdr.csum_flags & M_TCP_TSO)
+                       csum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst, 0,
+                           htonl(nxt));
+               else
+                       csum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst,
+                           htonl(m->m_pkthdr.len - offset), htonl(nxt));
                if (nxt == IPPROTO_TCP)
                        offset += offsetof(struct tcphdr, th_sum);
                else if (nxt == IPPROTO_UDP)
Index: sys/mbuf.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.256
diff -u -p -r1.256 mbuf.h
--- sys/mbuf.h  5 May 2023 01:19:51 -0000       1.256
+++ sys/mbuf.h  5 May 2023 15:01:39 -0000
@@ -129,12 +129,13 @@ struct    pkthdr {
        SLIST_HEAD(, m_tag)      ph_tags;       /* list of packet tags */
        int64_t                  ph_timestamp;  /* packet timestamp */
        int                      len;           /* total packet length */
+       u_int                    ph_rtableid;   /* routing table id */
+       u_int                    ph_ifidx;      /* rcv interface index */
        u_int16_t                ph_tagsset;    /* mtags attached */
        u_int16_t                ph_flowid;     /* pseudo unique flow id */
        u_int16_t                csum_flags;    /* checksum flags */
        u_int16_t                ether_vtag;    /* Ethernet 802.1p+Q vlan tag */
-       u_int                    ph_rtableid;   /* routing table id */
-       u_int                    ph_ifidx;      /* rcv interface index */
+       u_int16_t                ph_mss;        /* TCP max segment size */
        u_int8_t                 ph_loopcnt;    /* mbuf is looping in kernel */
        u_int8_t                 ph_family;     /* af, used when queueing */
        struct pkthdr_pf         pf;
@@ -226,6 +227,7 @@ struct mbuf {
 #define        M_IPV6_DF_OUT           0x1000  /* don't fragment outgoing IPv6 
*/
 #define        M_TIMESTAMP             0x2000  /* ph_timestamp is set */
 #define        M_FLOWID                0x4000  /* ph_flowid is set */
+#define        M_TCP_TSO               0x8000  /* TCP Segmentation Offload 
needed */
 
 #ifdef _KERNEL
 #define MCS_BITS \

Reply via email to