hselasky updated this revision to Diff 5720.
hselasky added a comment.
Herald added a subscriber: imp.

Minor fix for computing correct ip6_plen in ip6_input().
Previously only the first 64K of the payload was consumed.


REPOSITORY
  rS FreeBSD src repository

CHANGES SINCE LAST UPDATE
  https://reviews.freebsd.org/D1761?vs=3630&id=5720

REVISION DETAIL
  https://reviews.freebsd.org/D1761

AFFECTED FILES
  sys/conf/options
  sys/netinet/ip_input.c
  sys/netinet/ip_output.c
  sys/netinet/tcp_input.c
  sys/netinet/tcp_lro.c
  sys/netinet6/ip6_input.c
  sys/sys/mbuf.h

EMAIL PREFERENCES
  https://reviews.freebsd.org/settings/panel/emailpreferences/

To: hselasky, rrs, glebius, gnn, emaste, lstewart, rwatson, bz, imp, np, jfv, 
adrian
Cc: imp, freebsd-net
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -306,6 +306,7 @@
 #define	M_HASHTYPE_RSS_UDP_IPV6		9	/* IPv6 UDP 4-tuple */
 #define	M_HASHTYPE_RSS_UDP_IPV6_EX	10	/* IPv6 UDP 4-tuple + ext hdrs */
 
+#define	M_HASHTYPE_LRO_TCP		254	/* TCP large receive offload */
 #define	M_HASHTYPE_OPAQUE		255	/* ordering, not affinity */
 
 #define	M_HASHTYPE_CLEAR(m)	((m)->m_pkthdr.rsstype = 0)
diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c
--- a/sys/netinet6/ip6_input.c
+++ b/sys/netinet6/ip6_input.c
@@ -694,7 +694,13 @@
 	 * m may be modified in ip6_hopopts_input().
 	 * If a JumboPayload option is included, plen will also be modified.
 	 */
-	plen = (u_int32_t)ntohs(ip6->ip6_plen);
+	if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP) {
+		if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
+			plen = 0;
+		else
+			plen = m->m_pkthdr.len - sizeof(struct ip6_hdr);
+	} else
+		plen = (u_int32_t)ntohs(ip6->ip6_plen);
 	if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
 		if (ip6_input_hbh(m, &plen, &rtalert, &off, &nxt, &ours) != 0)
 			return;
diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c
--- a/sys/netinet/tcp_lro.c
+++ b/sys/netinet/tcp_lro.c
@@ -32,6 +32,7 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_lro.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
@@ -62,6 +63,14 @@
 #define	LRO_ENTRIES	8	/* # of LRO entries per RX queue. */
 #endif
 
+#ifndef	LRO_PAYLOAD_MAX
+#define	LRO_PAYLOAD_MAX	IP_MAXPACKET
+#endif
+
+#if (LRO_PAYLOAD_MAX < 65535)
+#error "LRO_PAYLOAD_MAX must be at least 65535 bytes"
+#endif
+
 #define	TCP_LRO_UPDATE_CSUM	1
 #ifndef	TCP_LRO_UPDATE_CSUM
 #define	TCP_LRO_INVALID_CSUM	0x0000
@@ -219,8 +228,20 @@
 	if (le->append_cnt > 0) {
 		struct tcphdr *th;
 		uint16_t p_len;
-
-		p_len = htons(le->p_len);
+		/*
+		 * The TCP/IP stack should use the "m_pkthdr.len"
+		 * field instead of the IP-payload length field to
+		 * compute the total TCP payload length when it
+		 * recognizes the M_HASHTYPE_LRO_TCP hash type. This
+		 * allows accumulation of more than 64Kbytes worth of
+		 * payload data.
+		 */
+		if (le->p_len > IP_MAXPACKET) {
+			M_HASHTYPE_SET(le->m_head, M_HASHTYPE_LRO_TCP);
+			p_len = htons(IP_MAXPACKET);
+		} else {
+			p_len = htons(le->p_len);
+		}
 		switch (le->eh_type) {
 #ifdef INET6
 		case ETHERTYPE_IPV6:
@@ -501,7 +522,7 @@
 		}
 
 		/* Flush now if appending will result in overflow. */
-		if (le->p_len > (65535 - tcp_data_len)) {
+		if (le->p_len > (LRO_PAYLOAD_MAX - tcp_data_len)) {
 			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
 			tcp_lro_flush(lc, le);
 			break;
@@ -559,7 +580,7 @@
 		 * If a possible next full length packet would cause an
 		 * overflow, pro-actively flush now.
 		 */
-		if (le->p_len > (65535 - lc->ifp->if_mtu)) {
+		if (le->p_len > (LRO_PAYLOAD_MAX - lc->ifp->if_mtu)) {
 			SLIST_REMOVE(&lc->lro_active, le, lro_entry, next);
 			tcp_lro_flush(lc, le);
 		} else
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -644,7 +644,10 @@
 
 		ip6 = mtod(m, struct ip6_hdr *);
 		th = (struct tcphdr *)((caddr_t)ip6 + off0);
-		tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
+		if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP)
+			tlen = m->m_pkthdr.len - off0;
+		else
+			tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 				th->th_sum = m->m_pkthdr.csum_data;
@@ -695,8 +698,10 @@
 		}
 		ip = mtod(m, struct ip *);
 		th = (struct tcphdr *)((caddr_t)ip + off0);
-		tlen = ntohs(ip->ip_len) - off0;
-
+		if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP)
+			tlen = m->m_pkthdr.len - off0;
+		else
+			tlen = ntohs(ip->ip_len) - off0;
 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 				th->th_sum = m->m_pkthdr.csum_data;
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -124,13 +124,14 @@
 	struct ifnet *ifp = NULL;	/* keep compiler happy */
 	struct mbuf *m0;
 	int hlen = sizeof (struct ip);
+	int ip_len;
 	int mtu;
 	int error = 0;
 	struct sockaddr_in *dst;
 	const struct sockaddr_in *gw;
 	struct in_ifaddr *ia;
 	int isbroadcast;
-	uint16_t ip_len, ip_off;
+	uint16_t ip_off;
 	struct route iproute;
 	struct rtentry *rte;	/* cache for ro->ro_rt */
 	struct in_addr odst;
@@ -169,7 +170,11 @@
 			hlen = len; /* ip->ip_hl is updated above */
 	}
 	ip = mtod(m, struct ip *);
-	ip_len = ntohs(ip->ip_len);
+
+	if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP)
+		ip_len = m->m_pkthdr.len;
+	else
+		ip_len = ntohs(ip->ip_len);
 	ip_off = ntohs(ip->ip_off);
 
 	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
@@ -688,9 +693,13 @@
 	int firstlen;
 	struct mbuf **mnext;
 	int nfrags;
-	uint16_t ip_len, ip_off;
+	int ip_len;
+	uint16_t ip_off;
 
-	ip_len = ntohs(ip->ip_len);
+	if (M_HASHTYPE_GET(m0) == M_HASHTYPE_LRO_TCP)
+		ip_len = m0->m_pkthdr.len;
+	else
+		ip_len = ntohs(ip->ip_len);
 	ip_off = ntohs(ip->ip_off);
 
 	if (ip_off & IP_DF) {	/* Fragmentation not allowed */
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -397,7 +397,8 @@
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 	int    checkif, hlen = 0;
-	uint16_t sum, ip_len;
+	uint32_t ip_len;
+	uint16_t sum;
 	int dchg = 0;				/* dest changed after fw */
 	struct in_addr odst;			/* original dst address */
 
@@ -474,7 +475,10 @@
 		return;
 #endif
 
-	ip_len = ntohs(ip->ip_len);
+	if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP)
+		ip_len = m->m_pkthdr.len;
+	else
+		ip_len = ntohs(ip->ip_len);
 	if (ip_len < hlen) {
 		IPSTAT_INC(ips_badlen);
 		goto bad;
@@ -900,6 +904,7 @@
 	struct in_addr dest;
 	struct route ro;
 	int error, type = 0, code = 0, mtu = 0;
+	int ip_len;
 
 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
 		IPSTAT_INC(ips_cantforward);
@@ -965,8 +970,14 @@
 		m_free(mcopy);
 		mcopy = NULL;
 	}
+
+	if (M_HASHTYPE_GET(m) == M_HASHTYPE_LRO_TCP)
+		ip_len = m->m_pkthdr.len;
+	else
+		ip_len = ntohs(ip->ip_len);
+
 	if (mcopy != NULL) {
-		mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
+		mcopy->m_len = min(ip_len, M_TRAILINGSPACE(mcopy));
 		mcopy->m_pkthdr.len = mcopy->m_len;
 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
 	}
@@ -1094,7 +1105,7 @@
 			if (ia != NULL)
 				mtu = ia->ia_ifp->if_mtu;
 			else
-				mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
+				mtu = ip_next_mtu(ip_len, 0);
 		}
 		IPSTAT_INC(ips_cantfrag);
 		break;
diff --git a/sys/conf/options b/sys/conf/options
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -402,6 +402,8 @@
 DUMMYNET		opt_ipdn.h
 INET			opt_inet.h
 INET6			opt_inet6.h
+LRO_ENTRIES		opt_lro.h
+LRO_PAYLOAD_MAX		opt_lro.h
 IPDIVERT
 IPFILTER		opt_ipfilter.h
 IPFILTER_DEFAULT_BLOCK	opt_ipfilter.h

_______________________________________________
freebsd-net@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-net
To unsubscribe, send any mail to "freebsd-net-unsubscr...@freebsd.org"

Reply via email to