Hello guys.
(bootstrapping people involved in previous version of this topic, sorry
for that)
There were several problem descriptions/discussions on using 9k+ mbufs
with current allocator in:
if_em: kern/183381
cxgbe:
http://lists.freebsd.org/pipermail/freebsd-net/2014-February/037834.html
general one:
http://lists.freebsd.org/pipermail/freebsd-net/2014-January/037673.html
I'd like to add ixgbe (and i40e with igb) to the list. We're facing the
same problem for a long time.
As far as I can understand,
a) everyone (tm) is aware of current 9/16k allocation problems leading
to sudden network failures.
b) such mbufs sizes are not absolute evil and can be useful on 40/100G
and for TSO cases.
c) however, no one is able to / willing to fix our allocator to
pre-allocate special arena for mbufs >= 4k page size.
d) so most people have written their own local hacks to disable 9k mbufs
and use 4k ones.
e) our list is not full, people with
mellanox/solarflare/broadcom/emulex/etc are still not there (and most if
not all 10g NICs support scatter/gather).
Can we add more generic hack moving default mbuf size decision from NIC
driver to OS and make it tunable for user?
Example path for Intel ones is attached.
Index: sys/kern/kern_mbuf.c
===================================================================
--- sys/kern/kern_mbuf.c (revision 265236)
+++ sys/kern/kern_mbuf.c (working copy)
@@ -103,6 +103,11 @@ int nmbjumbop; /* limits number of page size jum
int nmbjumbo9; /* limits number of 9k jumbo clusters */
int nmbjumbo16; /* limits number of 16k jumbo clusters */
+static int nojumbobuf; /* Use MCLBYTES mbufs */
+static int nojumbo9buf; /* Use either MCLBYTES or MJUMPAGESIZE */
+static int nojumbo16buf; /* Use any mbuf size less than MJUM16BYTES */
+
+
static quad_t maxmbufmem; /* overall real memory limit for all mbufs */
SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN, &maxmbufmem, 0,
@@ -151,6 +156,17 @@ tunable_mbinit(void *dummy)
if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16)
nmbufs = lmax(maxmbufmem / MSIZE / 5,
nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16);
+
+ /*
+ * Defaults to disable 9/16-kbyte pages
+ */
+ nojumbobuf = 0;
+ nojumbo9buf = 1;
+ nojumbo16buf = 1;
+
+ TUNABLE_INT_FETCH("kern.ipc.nojumbobuf", &nojumbobuf);
+ TUNABLE_INT_FETCH("kern.ipc.nojumbo9buf", &nojumbo9buf);
+ TUNABLE_INT_FETCH("kern.ipc.nojumbo16buf", &nojumbo16buf);
}
SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
@@ -261,6 +277,27 @@ SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_I
"Maximum number of mbufs allowed");
/*
+ * Determine the correct mbuf pool
+ * for given mtu size
+ */
+int
+m_preferredsize(int mtu)
+{
+ int size;
+
+ if (mtu <= 2048 || nojumbobuf != 0)
+ size = MCLBYTES;
+ else if (mtu <= 4096 || nojumbo9buf != 0)
+ size = MJUMPAGESIZE;
+ else if (mtu <= 9216 || nojumbo16buf != 0)
+ size = MJUM9BYTES;
+ else
+ size = MJUM16BYTES;
+
+ return (size);
+}
+
+/*
* Zones from which we allocate.
*/
uma_zone_t zone_mbuf;
Index: sys/dev/ixgbe/ixgbe.c
===================================================================
--- sys/dev/ixgbe/ixgbe.c (revision 265236)
+++ sys/dev/ixgbe/ixgbe.c (working copy)
@@ -1138,14 +1138,7 @@ ixgbe_init_locked(struct adapter *adapter)
** Determine the correct mbuf pool
** for doing jumbo frames
*/
- if (adapter->max_frame_size <= 2048)
- adapter->rx_mbuf_sz = MCLBYTES;
- else if (adapter->max_frame_size <= 4096)
- adapter->rx_mbuf_sz = MJUMPAGESIZE;
- else if (adapter->max_frame_size <= 9216)
- adapter->rx_mbuf_sz = MJUM9BYTES;
- else
- adapter->rx_mbuf_sz = MJUM16BYTES;
+ adapter->rx_mbuf_sz = m_preferredsize(adapter->max_frame_size);
/* Prepare receive descriptors and buffers */
if (ixgbe_setup_receive_structures(adapter)) {
Index: sys/dev/e1000/if_em.c
===================================================================
--- sys/dev/e1000/if_em.c (revision 265236)
+++ sys/dev/e1000/if_em.c (working copy)
@@ -1342,12 +1342,7 @@ em_init_locked(struct adapter *adapter)
** Figure out the desired mbuf
** pool for doing jumbos
*/
- if (adapter->hw.mac.max_frame_size <= 2048)
- adapter->rx_mbuf_sz = MCLBYTES;
- else if (adapter->hw.mac.max_frame_size <= 4096)
- adapter->rx_mbuf_sz = MJUMPAGESIZE;
- else
- adapter->rx_mbuf_sz = MJUM9BYTES;
+ adapter->rx_mbuf_sz = m_preferredsize(adapter->hw.mac.max_frame_size);
/* Prepare receive descriptors and buffers */
if (em_setup_receive_structures(adapter)) {
Index: sys/dev/e1000/if_igb.c
===================================================================
--- sys/dev/e1000/if_igb.c (revision 265236)
+++ sys/dev/e1000/if_igb.c (working copy)
@@ -1335,12 +1335,7 @@ igb_init_locked(struct adapter *adapter)
** Figure out the desired mbuf pool
** for doing jumbo/packetsplit
*/
- if (adapter->max_frame_size <= 2048)
- adapter->rx_mbuf_sz = MCLBYTES;
- else if (adapter->max_frame_size <= 4096)
- adapter->rx_mbuf_sz = MJUMPAGESIZE;
- else
- adapter->rx_mbuf_sz = MJUM9BYTES;
+ adapter->rx_mbuf_sz = m_preferredsize(adapter->max_frame_size);
/* Prepare receive descriptors and buffers */
if (igb_setup_receive_structures(adapter)) {
_______________________________________________
freebsd-net@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-net
To unsubscribe, send any mail to "freebsd-net-unsubscr...@freebsd.org"