On 04/05/2011, at 3:53 AM, Mike Belopuhov wrote:
> hi,
>
> recently me and jsg have figured out that a traffic burst can kill
> your interface (deplete the rx ring) if you're using ipsec. the
> problem is that there's no limit on number of outstanding crypto
> operations. every job contains a pointer to a cluster that comes
> from the nic driver.
>
> under high load a number of jobs on a crypto queue reaches the
> limit for the mclpool and network card fails to allocate clusters
> for the rx ring and stalls (both bnx and ix don't recover).
>
> we experimented with large watermarks (>20000 clusters) but that
> just delays the problem a bit. real solution is to prevent an
> excessive amount of operations to be enqueued to the crypto queue.
> the problem is as usual: how to define a limit? in this diff i've
> used a 1/3 of the cluster pool hardlimit which behaves pretty good
> with different kern.maxclusters values.
>
> opinions? OKs?
you still need to fix bnx and ix to cope with rx exhaustion.
i like this. i think it should go in. in the future id like to see this apply
back pressure on the interfaces rx rings though.
> (i'm not sure that using `min' in the m_clhardlimit is anyhow better
> than `max', so please speak up if you know how to improve this.)
>
> Index: crypto/crypto.c
> ===================================================================
> RCS file: /home/cvs/src/sys/crypto/crypto.c,v
> retrieving revision 1.59
> diff -u -p -r1.59 crypto.c
> --- crypto/crypto.c 11 Jan 2011 15:42:05 -0000 1.59
> +++ crypto/crypto.c 3 May 2011 13:56:41 -0000
> @@ -677,3 +677,14 @@ out:
> *featp = feat;
> return (0);
> }
> +
> +int
> +crypto_onqueue(u_int64_t sid)
> +{
> + u_int32_t hid;
> +
> + hid = (sid >> 32) & 0xffffffff;
> + if (hid < crypto_drivers_num)
> + return (crypto_drivers[hid].cc_queued);
> + return (0);
> +}
> Index: crypto/cryptodev.h
> ===================================================================
> RCS file: /home/cvs/src/sys/crypto/cryptodev.h,v
> retrieving revision 1.55
> diff -u -p -r1.55 cryptodev.h
> --- crypto/cryptodev.h 16 Dec 2010 16:56:08 -0000 1.55
> +++ crypto/cryptodev.h 3 May 2011 13:21:36 -0000
> @@ -323,6 +323,7 @@ int crypto_kinvoke(struct cryptkop *);
> void crypto_done(struct cryptop *);
> void crypto_kdone(struct cryptkop *);
> int crypto_getfeat(int *);
> +int crypto_onqueue(u_int64_t);
>
> void cuio_copydata(struct uio *, int, int, caddr_t);
> void cuio_copyback(struct uio *, int, int, const void *);
> Index: kern/uipc_mbuf.c
> ===================================================================
> RCS file: /home/cvs/src/sys/kern/uipc_mbuf.c,v
> retrieving revision 1.156
> diff -u -p -r1.156 uipc_mbuf.c
> --- kern/uipc_mbuf.c 18 Apr 2011 19:23:46 -0000 1.156
> +++ kern/uipc_mbuf.c 3 May 2011 17:01:14 -0000
> @@ -454,6 +454,30 @@ m_clget(struct mbuf *m, int how, struct
> return (m);
> }
>
> +int
> +m_clhardlimit(struct mbuf *m)
> +{
> + struct pool *pp;
> + int limit = 0;
> +
> + if (m == NULL)
> + return (-1);
> +
> + do {
> + if ((m->m_flags & (M_EXT|M_CLUSTER)) != (M_EXT|M_CLUSTER))
> + continue;
> + pp = &mclpools[m->m_ext.ext_backend];
> + if (limit)
> + limit = min(limit, pp->pr_hardlimit);
> + else
> + limit = pp->pr_hardlimit;
> + } while ((m = m->m_next) != NULL);
> +
> + if (!limit)
> + limit = -1;
> + return (limit);
> +}
> +
> struct mbuf *
> m_free_unlocked(struct mbuf *m)
> {
> Index: netinet/ip_ah.c
> ===================================================================
> RCS file: /home/cvs/src/sys/netinet/ip_ah.c,v
> retrieving revision 1.99
> diff -u -p -r1.99 ip_ah.c
> --- netinet/ip_ah.c 11 Jan 2011 15:42:05 -0000 1.99
> +++ netinet/ip_ah.c 3 May 2011 17:35:48 -0000
> @@ -548,7 +548,7 @@ ah_input(struct mbuf *m, struct tdb *tdb
> int rplen;
>
> struct cryptodesc *crda = NULL;
> - struct cryptop *crp;
> + struct cryptop *crp = NULL;
>
> if (!(tdb->tdb_flags & TDBF_NOREPLAY))
> rplen = AH_FLENGTH + sizeof(u_int32_t);
> @@ -632,7 +632,9 @@ ah_input(struct mbuf *m, struct tdb *tdb
> }
>
> /* Get crypto descriptors. */
> - crp = crypto_getreq(1);
> + if (m_clhardlimit(m) == -1 ||
> + crypto_onqueue(tdb->tdb_cryptoid) < m_clhardlimit(m) / 3)
> + crp = crypto_getreq(1);
> if (crp == NULL) {
> m_freem(m);
> DPRINTF(("ah_input(): failed to acquire crypto "
> @@ -982,7 +984,7 @@ ah_output(struct mbuf *m, struct tdb *td
> struct cryptodesc *crda;
> struct tdb_crypto *tc;
> struct mbuf *mo, *mi;
> - struct cryptop *crp;
> + struct cryptop *crp = NULL;
> u_int16_t iplen;
> int len, rplen;
> u_int8_t prot;
> @@ -1151,7 +1153,9 @@ ah_output(struct mbuf *m, struct tdb *td
> }
>
> /* Get crypto descriptors. */
> - crp = crypto_getreq(1);
> + if (m_clhardlimit(m) == -1 ||
> + crypto_onqueue(tdb->tdb_cryptoid) < m_clhardlimit(m) / 3)
> + crp = crypto_getreq(1);
> if (crp == NULL) {
> m_freem(m);
> DPRINTF(("ah_output(): failed to acquire crypto "
> Index: netinet/ip_esp.c
> ===================================================================
> RCS file: /home/cvs/src/sys/netinet/ip_esp.c,v
> retrieving revision 1.116
> diff -u -p -r1.116 ip_esp.c
> --- netinet/ip_esp.c 11 Jan 2011 15:42:05 -0000 1.116
> +++ netinet/ip_esp.c 3 May 2011 17:35:41 -0000
> @@ -324,7 +324,7 @@ esp_input(struct mbuf *m, struct tdb *td
> struct auth_hash *esph = (struct auth_hash *) tdb->tdb_authalgxform;
> struct enc_xform *espx = (struct enc_xform *) tdb->tdb_encalgxform;
> struct cryptodesc *crde = NULL, *crda = NULL;
> - struct cryptop *crp;
> + struct cryptop *crp = NULL;
> struct tdb_crypto *tc;
> int plen, alen, hlen;
> struct m_tag *mtag;
> @@ -427,7 +427,9 @@ esp_input(struct mbuf *m, struct tdb *td
> #endif
>
> /* Get crypto descriptors */
> - crp = crypto_getreq(esph && espx ? 2 : 1);
> + if (m_clhardlimit(m) == -1 ||
> + crypto_onqueue(tdb->tdb_cryptoid) < m_clhardlimit(m) / 3)
> + crp = crypto_getreq(esph && espx ? 2 : 1);
> if (crp == NULL) {
> m_freem(m);
> DPRINTF(("esp_input(): failed to acquire crypto
> descriptors\n"));
> @@ -771,7 +773,7 @@ esp_output(struct mbuf *m, struct tdb *t
> u_int8_t prot;
>
> struct cryptodesc *crde = NULL, *crda = NULL;
> - struct cryptop *crp;
> + struct cryptop *crp = NULL;
> #if NBPFILTER > 0
> struct ifnet *encif;
>
> @@ -953,7 +955,9 @@ esp_output(struct mbuf *m, struct tdb *t
> m_copyback(m, protoff, sizeof(u_int8_t), &prot, M_NOWAIT);
>
> /* Get crypto descriptors. */
> - crp = crypto_getreq(esph && espx ? 2 : 1);
> + if (m_clhardlimit(m) == -1 ||
> + crypto_onqueue(tdb->tdb_cryptoid) < m_clhardlimit(m) / 3)
> + crp = crypto_getreq(esph && espx ? 2 : 1);
> if (crp == NULL) {
> m_freem(m);
> DPRINTF(("esp_output(): failed to acquire crypto "
> Index: sys/mbuf.h
> ===================================================================
> RCS file: /home/cvs/src/sys/sys/mbuf.h,v
> retrieving revision 1.151
> diff -u -p -r1.151 mbuf.h
> --- sys/mbuf.h 24 Apr 2011 19:36:54 -0000 1.151
> +++ sys/mbuf.h 3 May 2011 13:52:39 -0000
> @@ -415,6 +415,7 @@ int m_cldrop(struct ifnet *, int);
> void m_clcount(struct ifnet *, int);
> void m_cluncount(struct mbuf *, int);
> void m_clinitifp(struct ifnet *);
> +int m_clhardlimit(struct mbuf *);
> void m_adj(struct mbuf *, int);
> int m_copyback(struct mbuf *, int, int, const void *, int);
> void m_freem(struct mbuf *);