Module Name: src Committed By: msaitoh Date: Thu Sep 16 09:55:28 UTC 2021
Modified Files: src/sys/dev/pci/ixgbe: ixgbe.c ixgbe.h ixv.c Log Message: Fix a bug that an mbuf chain which has more than 63488bytes MAY fail on TX. - Currently, The TX buffer dmamap's max number of segments is set to 32. MCLBYTES(== 2048) * 32 = 65536 and it's enough for IP_MAXPACKET(65535). If an mbuf chain has more than 32 mbufs, we call m_defrag() to make it lower than equal to 32, but it might not work. The reason is that our m_defrag() don't modify the first mbuf entry of the chain. e.g.: if an mbuf chain contains 63600bytes data and the first mbuf has 100bytes in the m_data, the new chain has 100+2048+2048+...+12 and the total number of the chain is not 32 but 33. It result in 43 TCP packets will drop. - One of the way to fix this problem is to change m_defrag() which add a new mbuf cluster to the first mbuf. It's need discussion. - Another solution is to change the max number of the TX DMA segment. It should be at least 33 to avoid the m_defrag()'s current limitation. The document (82599-X550 DS 7.2.1.1 "Transmit Storage in system Memory") says that a packet can be fragmented into 40 - WTHRESH - 2 (for 82598, the document say nothing and some people and code says it's unlimited). Currently WTHRESH is set to 8. 40 - 8 - 2 = 30. !?!?!? {Net,Free,Open}BSD and Linux use 32. Is that safe? Anyway, we change WTHRESH from 8 to 5 to fit it. The added comment in ixgbe.h is based on DragonFly's though they don't use WTHRESH. - Yet another solution is to use the Tx Head Pointer Write Back function instead of WTHRESH based write back (see 82599-X550 DS 7.2.3.5.2 "Tx Head Pointer Write Back" or 82598 "Transmit Completions Head Write Back" ). DragonFly, illumos and DPDK use it. - Yet yet another solution is to add tso_maxsize entry to struct ifnet and use it in the TCP stack. To generate a diff of this commit: cvs rdiff -u -r1.291 -r1.292 src/sys/dev/pci/ixgbe/ixgbe.c cvs rdiff -u -r1.80 -r1.81 src/sys/dev/pci/ixgbe/ixgbe.h cvs rdiff -u -r1.167 -r1.168 src/sys/dev/pci/ixgbe/ixv.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/dev/pci/ixgbe/ixgbe.c diff -u src/sys/dev/pci/ixgbe/ixgbe.c:1.291 src/sys/dev/pci/ixgbe/ixgbe.c:1.292 --- src/sys/dev/pci/ixgbe/ixgbe.c:1.291 Thu Sep 16 07:08:26 2021 +++ src/sys/dev/pci/ixgbe/ixgbe.c Thu Sep 16 09:55:28 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: ixgbe.c,v 1.291 2021/09/16 07:08:26 msaitoh Exp $ */ +/* $NetBSD: ixgbe.c,v 1.292 2021/09/16 09:55:28 msaitoh Exp $ */ /****************************************************************************** @@ -64,7 +64,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ixgbe.c,v 1.291 2021/09/16 07:08:26 msaitoh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ixgbe.c,v 1.292 2021/09/16 09:55:28 msaitoh Exp $"); #ifdef _KERNEL_OPT #include "opt_inet.h" @@ -927,11 +927,12 @@ ixgbe_attach(device_t parent, device_t d hw->allow_unsupported_sfp = allow_unsupported_sfp; /* Pick up the 82599 settings */ - if (hw->mac.type != ixgbe_mac_82598EB) { + if (hw->mac.type != ixgbe_mac_82598EB) hw->phy.smart_speed = ixgbe_smart_speed; - adapter->num_segs = IXGBE_82599_SCATTER; - } else - adapter->num_segs = IXGBE_82598_SCATTER; + + /* Set the right number of segments */ + KASSERT(IXGBE_82599_SCATTER_MAX >= IXGBE_SCATTER_DEFAULT); + adapter->num_segs = IXGBE_SCATTER_DEFAULT; /* Ensure SW/FW semaphore is free */ ixgbe_init_swfw_semaphore(hw); @@ -4102,7 +4103,7 @@ ixgbe_init_locked(struct adapter *adapte txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txr->me)); txdctl |= IXGBE_TXDCTL_ENABLE; /* Set WTHRESH to 8, burst writeback */ - txdctl |= (8 << IXGBE_TXDCTL_WTHRESH_SHIFT); + txdctl |= IXGBE_TX_WTHRESH << IXGBE_TXDCTL_WTHRESH_SHIFT; /* * When the internal queue falls below PTHRESH (32), * start prefetching as long as there are at least Index: src/sys/dev/pci/ixgbe/ixgbe.h diff -u src/sys/dev/pci/ixgbe/ixgbe.h:1.80 src/sys/dev/pci/ixgbe/ixgbe.h:1.81 --- src/sys/dev/pci/ixgbe/ixgbe.h:1.80 Fri Sep 3 08:57:58 2021 +++ src/sys/dev/pci/ixgbe/ixgbe.h Thu Sep 16 09:55:28 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: ixgbe.h,v 1.80 2021/09/03 08:57:58 msaitoh Exp $ */ +/* $NetBSD: ixgbe.h,v 1.81 2021/09/16 09:55:28 msaitoh Exp $ */ /****************************************************************************** SPDX-License-Identifier: BSD-3-Clause @@ -185,6 +185,23 @@ */ #define IXGBE_RX_COPY_LEN_MAX (MHLEN - ETHER_ALIGN) +/* + * Default TX WTHRESH value. + * Currently, we don't use the Tx Head Pointer Write Back function. + */ +#define IXGBE_TX_WTHRESH 5 + +/* + * The max number of descriptors that one packet can use is 40 - WTHRESH - 2. + * Though 82598 does not have this limit, we don't want long TX chain. + * 33 should be large enough even for 64K TSO + * (32 * 2K mbuf cluster and 1 x mbuf header). + * + * Rerefence: 82599-X550 dataseet 7.2.1.1 "Transmit Storage in System Memory". + */ +#define IXGBE_82599_SCATTER_MAX (40 - IXGBE_TX_WTHRESH - 2) +#define IXGBE_SCATTER_DEFAULT 33 + /* Keep older OS drivers building... */ #if !defined(SYSCTL_ADD_UQUAD) #define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD @@ -206,8 +223,6 @@ #define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B) #define MAX_NUM_MULTICAST_ADDRESSES 128 -#define IXGBE_82598_SCATTER 100 -#define IXGBE_82599_SCATTER 32 #define MSIX_82598_BAR 3 #define MSIX_82599_BAR 4 #define IXGBE_TSO_SIZE 262140 Index: src/sys/dev/pci/ixgbe/ixv.c diff -u src/sys/dev/pci/ixgbe/ixv.c:1.167 src/sys/dev/pci/ixgbe/ixv.c:1.168 --- src/sys/dev/pci/ixgbe/ixv.c:1.167 Thu Sep 16 07:08:26 2021 +++ src/sys/dev/pci/ixgbe/ixv.c Thu Sep 16 09:55:28 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: ixv.c,v 1.167 2021/09/16 07:08:26 msaitoh Exp $ */ +/* $NetBSD: ixv.c,v 1.168 2021/09/16 09:55:28 msaitoh Exp $ */ /****************************************************************************** @@ -35,7 +35,7 @@ /*$FreeBSD: head/sys/dev/ixgbe/if_ixv.c 331224 2018-03-19 20:55:05Z erj $*/ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ixv.c,v 1.167 2021/09/16 07:08:26 msaitoh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ixv.c,v 1.168 2021/09/16 09:55:28 msaitoh Exp $"); #ifdef _KERNEL_OPT #include "opt_inet.h" @@ -431,7 +431,8 @@ ixv_attach(device_t parent, device_t dev ixgbe_init_mbx_params_vf(hw); /* Set the right number of segments */ - adapter->num_segs = IXGBE_82599_SCATTER; + KASSERT(IXGBE_82599_SCATTER_MAX >= IXGBE_SCATTER_DEFAULT); + adapter->num_segs = IXGBE_SCATTER_DEFAULT; /* Reset mbox api to 1.0 */ error = hw->mac.ops.reset_hw(hw); @@ -1716,7 +1717,7 @@ ixv_initialize_transmit_units(struct ada /* Set WTHRESH to 8, burst writeback */ txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(j)); - txdctl |= (8 << IXGBE_TXDCTL_WTHRESH_SHIFT); + txdctl |= IXGBE_TX_WTHRESH << IXGBE_TXDCTL_WTHRESH_SHIFT; IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(j), txdctl); /* Set the HW Tx Head and Tail indices */