The branch main has been updated by kib:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=81b38bce07940b7a4001dfeb8cd63774229ca950

commit 81b38bce07940b7a4001dfeb8cd63774229ca950
Author:     Andrew Gallatin <galla...@freebsd.org>
AuthorDate: 2024-11-23 09:43:17 +0000
Commit:     Konstantin Belousov <k...@freebsd.org>
CommitDate: 2024-11-23 10:01:50 +0000

    mlx5e tls: Ensure all allocated tags have a hw context associated
    
    Ensure all allocated tags have a hardware context associated.
    The hardware context allocation is moved into the zone import
    routine, as suggested by kib.  This is safe because these zone
    allocations are always done in a sleepable context.
    
    I have removed the now pointless num_resources tracking,
    and added sysctls / tunables to control UMA zone limits
    for these tls tags, as well as a tunable to let the
    driver pre-allocate tags at boot.
    
    MFC after:      2 weeks
---
 sys/dev/mlx5/mlx5_en/en_hw_tls.h      |   2 +-
 sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c | 132 ++++++++++++++++++++++++----------
 2 files changed, 95 insertions(+), 39 deletions(-)

diff --git a/sys/dev/mlx5/mlx5_en/en_hw_tls.h b/sys/dev/mlx5/mlx5_en/en_hw_tls.h
index 2018198e5e52..d637314e040e 100644
--- a/sys/dev/mlx5/mlx5_en/en_hw_tls.h
+++ b/sys/dev/mlx5/mlx5_en/en_hw_tls.h
@@ -84,7 +84,7 @@ struct mlx5e_tls {
        struct workqueue_struct *wq;
        uma_zone_t zone;
        uint32_t max_resources;         /* max number of resources */
-       volatile uint32_t num_resources;        /* current number of resources 
*/
+       int zone_max;
        int init;                       /* set when ready */
        char zname[32];
 };
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c 
b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c
index b5caa3ba53dd..8b5121066bdf 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c
@@ -81,17 +81,57 @@ static const char *mlx5e_tls_stats_desc[] = {
 
 static void mlx5e_tls_work(struct work_struct *);
 
+/*
+ * Expand the tls tag UMA zone in a sleepable context
+ */
+
+static void
+mlx5e_prealloc_tags(struct mlx5e_priv *priv, int nitems)
+{
+       struct mlx5e_tls_tag **tags;
+       int i;
+
+       tags = malloc(sizeof(tags[0]) * nitems,
+           M_MLX5E_TLS, M_WAITOK);
+       for (i = 0; i < nitems; i++)
+               tags[i] = uma_zalloc(priv->tls.zone, M_WAITOK);
+       __compiler_membar();
+       for (i = 0; i < nitems; i++)
+               uma_zfree(priv->tls.zone, tags[i]);
+       free(tags, M_MLX5E_TLS);
+}
+
 static int
 mlx5e_tls_tag_import(void *arg, void **store, int cnt, int domain, int flags)
 {
        struct mlx5e_tls_tag *ptag;
-       int i;
+       struct mlx5e_priv *priv = arg;
+       int err, i;
+
+       /*
+        * mlx5_tls_open_tis() sleeps on a firmware command, so
+        * zone allocations must be done from a sleepable context.
+        * Note that the uma_zalloc() in mlx5e_tls_snd_tag_alloc()
+        * is done with M_NOWAIT so that hitting the zone limit does
+        * not cause the allocation to pause forever.
+        */
 
        for (i = 0; i != cnt; i++) {
                ptag = malloc_domainset(sizeof(*ptag), M_MLX5E_TLS,
                    mlx5_dev_domainset(arg), flags | M_ZERO);
+               if (ptag == NULL)
+                       return (i);
+               ptag->tls = &priv->tls;
                mtx_init(&ptag->mtx, "mlx5-tls-tag-mtx", NULL, MTX_DEF);
                INIT_WORK(&ptag->work, mlx5e_tls_work);
+               err = mlx5_tls_open_tis(priv->mdev, 0, priv->tdn,
+                   priv->pdn, &ptag->tisn);
+               if (err) {
+                       MLX5E_TLS_STAT_INC(ptag, tx_error, 1);
+                       free(ptag, M_MLX5E_TLS);
+                       return (i);
+               }
+
                store[i] = ptag;
        }
        return (i);
@@ -114,7 +154,6 @@ mlx5e_tls_tag_release(void *arg, void **store, int cnt)
 
                if (ptag->tisn != 0) {
                        mlx5_tls_close_tis(priv->mdev, ptag->tisn);
-                       atomic_add_32(&ptls->num_resources, -1U);
                }
 
                mtx_destroy(&ptag->mtx);
@@ -136,20 +175,38 @@ mlx5e_tls_tag_zfree(struct mlx5e_tls_tag *ptag)
        /* avoid leaking keys */
        memset(ptag->crypto_params, 0, sizeof(ptag->crypto_params));
 
-       /* update number of TIS contexts */
-       if (ptag->tisn == 0)
-               atomic_add_32(&ptag->tls->num_resources, -1U);
-
        /* return tag to UMA */
        uma_zfree(ptag->tls->zone, ptag);
 }
 
+static int
+mlx5e_max_tag_proc(SYSCTL_HANDLER_ARGS)
+{
+       struct mlx5e_priv *priv = (struct mlx5e_priv *)arg1;
+       struct mlx5e_tls *ptls = &priv->tls;
+       int err;
+       unsigned int max_tags;
+
+       max_tags = ptls->zone_max;
+       err = sysctl_handle_int(oidp, &max_tags, arg2, req);
+       if (err != 0 || req->newptr == NULL )
+               return err;
+       if (max_tags == ptls->zone_max)
+               return 0;
+       if (max_tags > priv->tls.max_resources || max_tags == 0)
+               return (EINVAL);
+       ptls->zone_max = max_tags;
+       uma_zone_set_max(ptls->zone, ptls->zone_max);
+       return 0;
+}
+
 int
 mlx5e_tls_init(struct mlx5e_priv *priv)
 {
        struct mlx5e_tls *ptls = &priv->tls;
        struct sysctl_oid *node;
-       uint32_t x;
+       uint32_t max_dek, max_tis, x;
+       int zone_max = 0, prealloc_tags = 0;
 
        if (MLX5_CAP_GEN(priv->mdev, tls_tx) == 0 ||
            MLX5_CAP_GEN(priv->mdev, log_max_dek) == 0)
@@ -164,13 +221,31 @@ mlx5e_tls_init(struct mlx5e_priv *priv)
        snprintf(ptls->zname, sizeof(ptls->zname),
            "mlx5_%u_tls", device_get_unit(priv->mdev->pdev->dev.bsddev));
 
+
+       TUNABLE_INT_FETCH("hw.mlx5.tls_max_tags", &zone_max);
+       TUNABLE_INT_FETCH("hw.mlx5.tls_prealloc_tags", &prealloc_tags);
+
        ptls->zone = uma_zcache_create(ptls->zname,
             sizeof(struct mlx5e_tls_tag), NULL, NULL, NULL, NULL,
-            mlx5e_tls_tag_import, mlx5e_tls_tag_release, priv->mdev,
-            UMA_ZONE_UNMANAGED);
+            mlx5e_tls_tag_import, mlx5e_tls_tag_release, priv,
+            UMA_ZONE_UNMANAGED | (prealloc_tags ? UMA_ZONE_NOFREE : 0));
 
        /* shared between RX and TX TLS */
-       ptls->max_resources = 1U << (MLX5_CAP_GEN(priv->mdev, log_max_dek) - 1);
+       max_dek = 1U << (MLX5_CAP_GEN(priv->mdev, log_max_dek) - 1);
+       max_tis = 1U << (MLX5_CAP_GEN(priv->mdev, log_max_tis) - 1);
+       ptls->max_resources = MIN(max_dek, max_tis);
+
+       if (zone_max != 0) {
+               ptls->zone_max = zone_max;
+               if (ptls->zone_max > priv->tls.max_resources)
+                       ptls->zone_max = priv->tls.max_resources;
+       } else {
+               ptls->zone_max = priv->tls.max_resources;
+       }
+
+       uma_zone_set_max(ptls->zone, ptls->zone_max);
+       if (prealloc_tags != 0)
+               mlx5e_prealloc_tags(priv, ptls->zone_max);
 
        for (x = 0; x != MLX5E_TLS_STATS_NUM; x++)
                ptls->stats.arg[x] = counter_u64_alloc(M_WAITOK);
@@ -183,6 +258,10 @@ mlx5e_tls_init(struct mlx5e_priv *priv)
        if (node == NULL)
                return (0);
 
+       SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO, 
"tls_max_tag",
+           CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_MPSAFE, priv, 0, 
mlx5e_max_tag_proc,
+           "IU", "Max number of TLS offload session tags");
+
        mlx5e_create_counter_stats(&ptls->ctx,
            SYSCTL_CHILDREN(node), "stats",
            mlx5e_tls_stats_desc, MLX5E_TLS_STATS_NUM,
@@ -206,9 +285,6 @@ mlx5e_tls_cleanup(struct mlx5e_priv *priv)
        uma_zdestroy(ptls->zone);
        destroy_workqueue(ptls->wq);
 
-       /* check if all resources are freed */
-       MPASS(priv->tls.num_resources == 0);
-
        for (x = 0; x != MLX5E_TLS_STATS_NUM; x++)
                counter_u64_free(ptls->stats.arg[x]);
 }
@@ -334,28 +410,16 @@ mlx5e_tls_snd_tag_alloc(if_t ifp,
        if (priv->gone != 0 || priv->tls.init == 0)
                return (EOPNOTSUPP);
 
-       /* allocate new tag from zone, if any */
        ptag = uma_zalloc(priv->tls.zone, M_WAITOK);
+       if (ptag == NULL)
+               return (ENOMEM);
 
        /* sanity check default values */
        MPASS(ptag->dek_index == 0);
        MPASS(ptag->dek_index_ok == 0);
 
-       /* setup TLS tag */
-       ptag->tls = &priv->tls;
-
        /* check if there is no TIS context */
-       if (ptag->tisn == 0) {
-               uint32_t value;
-
-               value = atomic_fetchadd_32(&priv->tls.num_resources, 1U);
-
-               /* check resource limits */
-               if (value >= priv->tls.max_resources) {
-                       error = ENOMEM;
-                       goto failure;
-               }
-       }
+       KASSERT(ptag->tisn != 0, ("ptag %p w/0 tisn", ptag));
 
        en = &params->tls.tls->params;
 
@@ -448,17 +512,9 @@ mlx5e_tls_snd_tag_alloc(if_t ifp,
        /* reset state */
        ptag->state = MLX5E_TLS_ST_INIT;
 
-       /*
-        *  Try to immediately init the tag.  We may fail if the NIC's
-        *  resources are tied up with send tags that are in the work
-        *  queue, waiting to be freed.  So if we fail, put ourselves
-        *  on the queue so as to try again after resouces have been freed.
-        */
        error = mlx5e_tls_st_init(priv, ptag);
-       if (error != 0) {
-               queue_work(priv->tls.wq, &ptag->work);
-               flush_work(&ptag->work);
-       }
+       if (error != 0)
+               goto failure;
 
        return (0);
 

Reply via email to