On Tue, May 03, 2011 at 07:53:56PM +0200, Mike Belopuhov wrote:
> hi,
>
> recently me and jsg have figured out that a traffic burst can kill
> your interface (deplete the rx ring) if you're using ipsec. the
> problem is that there's no limit on number of outstanding crypto
> operations. every job contains a pointer to a cluster that comes
> from the nic driver.
>
> under high load a number of jobs on a crypto queue reaches the
> limit for the mclpool and network card fails to allocate clusters
> for the rx ring and stalls (both bnx and ix don't recover).
>
> we experimented with large watermarks (>20000 clusters) but that
> just delays the problem a bit. real solution is to prevent an
> excessive amount of operations to be enqueued to the crypto queue.
> the problem is as usual: how to define a limit? in this diff i've
> used a 1/3 of the cluster pool hardlimit which behaves pretty good
> with different kern.maxclusters values.
>
> opinions? OKs?
While reading this text I only had one thought. That is why we have
ifqueues. Like for example the IP input queue. Sure the crypto thread
works differently but it should behave similar. Does it actually make
sense to queue 2000+ packets on the crypto thread? Aren't the packets
delayed for a very long time?
> (i'm not sure that using `min' in the m_clhardlimit is anyhow better
> than `max', so please speak up if you know how to improve this.)
>
> Index: crypto/crypto.c
> ===================================================================
> RCS file: /home/cvs/src/sys/crypto/crypto.c,v
> retrieving revision 1.59
> diff -u -p -r1.59 crypto.c
> --- crypto/crypto.c 11 Jan 2011 15:42:05 -0000 1.59
> +++ crypto/crypto.c 3 May 2011 13:56:41 -0000
> @@ -677,3 +677,14 @@ out:
> *featp = feat;
> return (0);
> }
> +
> +int
> +crypto_onqueue(u_int64_t sid)
> +{
> + u_int32_t hid;
> +
> + hid = (sid >> 32) & 0xffffffff;
> + if (hid < crypto_drivers_num)
> + return (crypto_drivers[hid].cc_queued);
> + return (0);
> +}
> Index: crypto/cryptodev.h
> ===================================================================
> RCS file: /home/cvs/src/sys/crypto/cryptodev.h,v
> retrieving revision 1.55
> diff -u -p -r1.55 cryptodev.h
> --- crypto/cryptodev.h 16 Dec 2010 16:56:08 -0000 1.55
> +++ crypto/cryptodev.h 3 May 2011 13:21:36 -0000
> @@ -323,6 +323,7 @@ int crypto_kinvoke(struct cryptkop *);
> void crypto_done(struct cryptop *);
> void crypto_kdone(struct cryptkop *);
> int crypto_getfeat(int *);
> +int crypto_onqueue(u_int64_t);
>
> void cuio_copydata(struct uio *, int, int, caddr_t);
> void cuio_copyback(struct uio *, int, int, const void *);
> Index: kern/uipc_mbuf.c
> ===================================================================
> RCS file: /home/cvs/src/sys/kern/uipc_mbuf.c,v
> retrieving revision 1.156
> diff -u -p -r1.156 uipc_mbuf.c
> --- kern/uipc_mbuf.c 18 Apr 2011 19:23:46 -0000 1.156
> +++ kern/uipc_mbuf.c 3 May 2011 17:01:14 -0000
> @@ -454,6 +454,30 @@ m_clget(struct mbuf *m, int how, struct
> return (m);
> }
>
> +int
> +m_clhardlimit(struct mbuf *m)
> +{
> + struct pool *pp;
> + int limit = 0;
> +
> + if (m == NULL)
> + return (-1);
> +
> + do {
> + if ((m->m_flags & (M_EXT|M_CLUSTER)) != (M_EXT|M_CLUSTER))
> + continue;
> + pp = &mclpools[m->m_ext.ext_backend];
> + if (limit)
> + limit = min(limit, pp->pr_hardlimit);
> + else
> + limit = pp->pr_hardlimit;
> + } while ((m = m->m_next) != NULL);
> +
> + if (!limit)
> + limit = -1;
> + return (limit);
> +}
> +
> struct mbuf *
> m_free_unlocked(struct mbuf *m)
> {
> Index: netinet/ip_ah.c
> ===================================================================
> RCS file: /home/cvs/src/sys/netinet/ip_ah.c,v
> retrieving revision 1.99
> diff -u -p -r1.99 ip_ah.c
> --- netinet/ip_ah.c 11 Jan 2011 15:42:05 -0000 1.99
> +++ netinet/ip_ah.c 3 May 2011 17:35:48 -0000
> @@ -548,7 +548,7 @@ ah_input(struct mbuf *m, struct tdb *tdb
> int rplen;
>
> struct cryptodesc *crda = NULL;
> - struct cryptop *crp;
> + struct cryptop *crp = NULL;
>
> if (!(tdb->tdb_flags & TDBF_NOREPLAY))
> rplen = AH_FLENGTH + sizeof(u_int32_t);
> @@ -632,7 +632,9 @@ ah_input(struct mbuf *m, struct tdb *tdb
> }
>
> /* Get crypto descriptors. */
> - crp = crypto_getreq(1);
> + if (m_clhardlimit(m) == -1 ||
> + crypto_onqueue(tdb->tdb_cryptoid) < m_clhardlimit(m) / 3)
> + crp = crypto_getreq(1);
> if (crp == NULL) {
> m_freem(m);
> DPRINTF(("ah_input(): failed to acquire crypto "
> @@ -982,7 +984,7 @@ ah_output(struct mbuf *m, struct tdb *td
> struct cryptodesc *crda;
> struct tdb_crypto *tc;
> struct mbuf *mo, *mi;
> - struct cryptop *crp;
> + struct cryptop *crp = NULL;
> u_int16_t iplen;
> int len, rplen;
> u_int8_t prot;
> @@ -1151,7 +1153,9 @@ ah_output(struct mbuf *m, struct tdb *td
> }
>
> /* Get crypto descriptors. */
> - crp = crypto_getreq(1);
> + if (m_clhardlimit(m) == -1 ||
> + crypto_onqueue(tdb->tdb_cryptoid) < m_clhardlimit(m) / 3)
> + crp = crypto_getreq(1);
> if (crp == NULL) {
> m_freem(m);
> DPRINTF(("ah_output(): failed to acquire crypto "
> Index: netinet/ip_esp.c
> ===================================================================
> RCS file: /home/cvs/src/sys/netinet/ip_esp.c,v
> retrieving revision 1.116
> diff -u -p -r1.116 ip_esp.c
> --- netinet/ip_esp.c 11 Jan 2011 15:42:05 -0000 1.116
> +++ netinet/ip_esp.c 3 May 2011 17:35:41 -0000
> @@ -324,7 +324,7 @@ esp_input(struct mbuf *m, struct tdb *td
> struct auth_hash *esph = (struct auth_hash *) tdb->tdb_authalgxform;
> struct enc_xform *espx = (struct enc_xform *) tdb->tdb_encalgxform;
> struct cryptodesc *crde = NULL, *crda = NULL;
> - struct cryptop *crp;
> + struct cryptop *crp = NULL;
> struct tdb_crypto *tc;
> int plen, alen, hlen;
> struct m_tag *mtag;
> @@ -427,7 +427,9 @@ esp_input(struct mbuf *m, struct tdb *td
> #endif
>
> /* Get crypto descriptors */
> - crp = crypto_getreq(esph && espx ? 2 : 1);
> + if (m_clhardlimit(m) == -1 ||
> + crypto_onqueue(tdb->tdb_cryptoid) < m_clhardlimit(m) / 3)
> + crp = crypto_getreq(esph && espx ? 2 : 1);
> if (crp == NULL) {
> m_freem(m);
> DPRINTF(("esp_input(): failed to acquire crypto
> descriptors\n"));
> @@ -771,7 +773,7 @@ esp_output(struct mbuf *m, struct tdb *t
> u_int8_t prot;
>
> struct cryptodesc *crde = NULL, *crda = NULL;
> - struct cryptop *crp;
> + struct cryptop *crp = NULL;
> #if NBPFILTER > 0
> struct ifnet *encif;
>
> @@ -953,7 +955,9 @@ esp_output(struct mbuf *m, struct tdb *t
> m_copyback(m, protoff, sizeof(u_int8_t), &prot, M_NOWAIT);
>
> /* Get crypto descriptors. */
> - crp = crypto_getreq(esph && espx ? 2 : 1);
> + if (m_clhardlimit(m) == -1 ||
> + crypto_onqueue(tdb->tdb_cryptoid) < m_clhardlimit(m) / 3)
> + crp = crypto_getreq(esph && espx ? 2 : 1);
> if (crp == NULL) {
> m_freem(m);
> DPRINTF(("esp_output(): failed to acquire crypto "
> Index: sys/mbuf.h
> ===================================================================
> RCS file: /home/cvs/src/sys/sys/mbuf.h,v
> retrieving revision 1.151
> diff -u -p -r1.151 mbuf.h
> --- sys/mbuf.h 24 Apr 2011 19:36:54 -0000 1.151
> +++ sys/mbuf.h 3 May 2011 13:52:39 -0000
> @@ -415,6 +415,7 @@ int m_cldrop(struct ifnet *, int);
> void m_clcount(struct ifnet *, int);
> void m_cluncount(struct mbuf *, int);
> void m_clinitifp(struct ifnet *);
> +int m_clhardlimit(struct mbuf *);
> void m_adj(struct mbuf *, int);
> int m_copyback(struct mbuf *, int, int, const void *, int);
> void m_freem(struct mbuf *);
>
--
:wq Claudio