Andrew, On Tue, Mar 18, 2014 at 01:55:01PM +0400, Andrew Rybchenko wrote: A> sfxge: limit software Tx queue size A> A> Previous implementation limits put queue size only (when Tx lock can't A> be acquired), A> but get queue may grow unboundedly which results in mbuf pools A> exhaustion and A> latency growth. A> A> Submitted-by: Andrew Rybchenko <andrew.rybche...@oktetlabs.ru> A> Sponsored by: Solarflare Communications, Inc.
The interaction between sfxge_tx_qdpl_put() and sfxge_tx_packet_add() is quite complex and I couldn't resist from suggesting you to simplify the code. Can you please look into attached patch? - Inline sfxge_tx_qdpl_put() into sfxge_tx_packet_add(). - Simplify the 'locked' logic. - Add your PATCH 1/6, the mbuf leak fix. - Add your PATCH 2/6, the SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT check. -- Totus tuus, Glebius.
Index: sfxge_tx.c =================================================================== --- sfxge_tx.c (revision 263297) +++ sfxge_tx.c (working copy) @@ -437,32 +437,39 @@ sfxge_tx_qdpl_service(struct sfxge_txq *txq) } /* - * Put a packet on the deferred packet list. - * - * If we are called with the txq lock held, we put the packet on the "get - * list", otherwise we atomically push it on the "put list". The swizzle - * function takes care of ordering. - * - * The length of the put list is bounded by SFXGE_TX_MAX_DEFFERED. We - * overload the csum_data field in the mbuf to keep track of this length - * because there is no cheap alternative to avoid races. + * Called from if_transmit - will try to grab the txq lock and enqueue to the + * put list if it succeeds, otherwise will push onto the defer list. */ -static inline int -sfxge_tx_qdpl_put(struct sfxge_txq *txq, struct mbuf *mbuf, int locked) +int +sfxge_tx_packet_add(struct sfxge_txq *txq, struct mbuf *m) { - struct sfxge_tx_dpl *stdp; + struct sfxge_tx_dpl *stdp = &txq->dpl; - stdp = &txq->dpl; + KASSERT(m->m_nextpkt == NULL, ("m->m_nextpkt != NULL")); - KASSERT(mbuf->m_nextpkt == NULL, ("mbuf->m_nextpkt != NULL")); + /* + * Try to grab the txq lock. If we are able to get the lock, + * the packet will be appended to the "get list" of the deferred + * packet list. Otherwise, it will be pushed on the "put list". + * The swizzle function takes care of ordering. + * + * The length of the get and put lists are bounded by + * SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT and + * SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT respectively. + * We overload the csum_data field in the mbuf to keep track of this + * length because there is no cheap alternative to avoid races. + */ + if (mtx_trylock(&txq->lock)) { + sfxge_tx_qdpl_swizzle(txq); - if (locked) { - mtx_assert(&txq->lock, MA_OWNED); + if (stdp->std_count >= SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT) { + mtx_unlock(&txq->lock); + m_freem(m); + return (ENOBUFS); + } - sfxge_tx_qdpl_swizzle(txq); - - *(stdp->std_getp) = mbuf; - stdp->std_getp = &mbuf->m_nextpkt; + *(stdp->std_getp) = m; + stdp->std_getp = &m->m_nextpkt; stdp->std_count++; } else { volatile uintptr_t *putp; @@ -471,7 +478,7 @@ sfxge_tx_qdpl_service(struct sfxge_txq *txq) unsigned old_len; putp = &stdp->std_put; - new = (uintptr_t)mbuf; + new = (uintptr_t)m; do { old = *putp; @@ -480,64 +487,30 @@ sfxge_tx_qdpl_service(struct sfxge_txq *txq) old_len = mp->m_pkthdr.csum_data; } else old_len = 0; - if (old_len >= SFXGE_TX_MAX_DEFERRED) - return ENOBUFS; - mbuf->m_pkthdr.csum_data = old_len + 1; - mbuf->m_nextpkt = (void *)old; + if (old_len >= SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT) { + m_freem(m); + return (ENOBUFS); + } + m->m_pkthdr.csum_data = old_len + 1; + m->m_nextpkt = (void *)old; } while (atomic_cmpset_ptr(putp, old, new) == 0); - } - return (0); -} - -/* - * Called from if_transmit - will try to grab the txq lock and enqueue to the - * put list if it succeeds, otherwise will push onto the defer list. - */ -int -sfxge_tx_packet_add(struct sfxge_txq *txq, struct mbuf *m) -{ - int locked; - int rc; - - /* - * Try to grab the txq lock. If we are able to get the lock, - * the packet will be appended to the "get list" of the deferred - * packet list. Otherwise, it will be pushed on the "put list". - */ - locked = mtx_trylock(&txq->lock); - - /* - * Can only fail if we weren't able to get the lock. - */ - if (sfxge_tx_qdpl_put(txq, m, locked) != 0) { - KASSERT(!locked, - ("sfxge_tx_qdpl_put() failed locked")); - rc = ENOBUFS; - goto fail; + /* + * Again try to grab the lock. + * + * If we are able to get the lock, we need to process the + * deferred packet list. If we are not able to get the lock, + * another thread is processing the list, and we return. + */ + if (mtx_trylock(&txq->lock) == 0) + return (0); } - /* - * Try to grab the lock again. - * - * If we are able to get the lock, we need to process the deferred - * packet list. If we are not able to get the lock, another thread - * is processing the list. - */ - if (!locked) - locked = mtx_trylock(&txq->lock); + /* Try to service the list. */ + sfxge_tx_qdpl_service(txq); + /* Lock has been dropped. */ - if (locked) { - /* Try to service the list. */ - sfxge_tx_qdpl_service(txq); - /* Lock has been dropped. */ - } - return (0); - -fail: - return (rc); - } static void Index: sfxge_tx.h =================================================================== --- sfxge_tx.h (revision 263297) +++ sfxge_tx.h (working copy) @@ -75,7 +75,8 @@ struct sfxge_tx_mapping { enum sfxge_tx_buf_flags flags; }; -#define SFXGE_TX_MAX_DEFERRED 64 +#define SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT 64 +#define SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT 64 /* * Deferred packet list.
_______________________________________________ freebsd-net@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-net To unsubscribe, send any mail to "freebsd-net-unsubscr...@freebsd.org"