trie_modify() updates rsvd_tbl8s by depth_diff computed from the
current RIB state. The RIB is not invariant between the ADD of a
prefix and its later DEL (a covering parent may be added or removed
in between), so depth_diff at DEL time may not match depth_diff at
ADD time. Repeated over asymmetric pairs, rsvd_tbl8s drifts and
eventually wraps to UINT32_MAX, after which the pre-check rejects
all long-prefix ADDs with -ENOSPC even when the pool is empty.

Replace rsvd_tbl8s with tbl8_pool_pos, which tbl8_get()/tbl8_put()
maintain exactly. To preserve the QSBR_MODE_DQ safety net previously
provided by the retry-with-reclaim inside tbl8_alloc(), the pre-check
now calls rte_rcu_qsbr_dq_reclaim(depth_diff) before returning
-ENOSPC.

The single-tbl8 retry inside tbl8_alloc() is removed: depth_diff is
the algorithmic upper bound for new tbl8 allocations, and the
pre-check now performs the DQ reclaim before allocation, so the
retry inside the allocator is no longer needed.

Fixes: c3e12e0f0354 ("fib: add dataplane algorithm for IPv6")
Cc: [email protected]
Signed-off-by: Maxime Leroy <[email protected]>
---
 lib/fib/trie.c | 23 ++++++++++-------------
 lib/fib/trie.h |  3 +--
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/lib/fib/trie.c b/lib/fib/trie.c
index fa5d9ec6b0..52f25d499c 100644
--- a/lib/fib/trie.c
+++ b/lib/fib/trie.c
@@ -161,12 +161,6 @@ tbl8_alloc(struct rte_trie_tbl *dp, uint64_t nh)
        uint8_t         *tbl8_ptr;
 
        tbl8_idx = tbl8_get(dp);
-
-       /* If there are no tbl8 groups try to reclaim one. */
-       if (unlikely(tbl8_idx == -ENOSPC && dp->dq &&
-                       !rte_rcu_qsbr_dq_reclaim(dp->dq, 1, NULL, NULL, NULL)))
-               tbl8_idx = tbl8_get(dp);
-
        if (tbl8_idx < 0)
                return tbl8_idx;
        tbl8_ptr = get_tbl_p_by_idx(dp->tbl8,
@@ -603,8 +597,15 @@ trie_modify(struct rte_fib6 *fib, const struct 
rte_ipv6_addr *ip,
                        return 0;
                }
 
-               if ((depth > 24) && (dp->rsvd_tbl8s + depth_diff > 
dp->number_tbl8s))
-                       return -ENOSPC;
+               if ((depth > 24) &&
+                   (dp->tbl8_pool_pos + depth_diff > dp->number_tbl8s)) {
+                       /* Reclaim deferred tbl8s before failing. */
+                       if (dp->dq != NULL)
+                               rte_rcu_qsbr_dq_reclaim(dp->dq, depth_diff,
+                                       NULL, NULL, NULL);
+                       if (dp->tbl8_pool_pos + depth_diff > dp->number_tbl8s)
+                               return -ENOSPC;
+               }
 
                node = rte_rib6_insert(rib, &ip_masked, depth);
                if (node == NULL)
@@ -614,15 +615,13 @@ trie_modify(struct rte_fib6 *fib, const struct 
rte_ipv6_addr *ip,
                if (parent != NULL) {
                        rte_rib6_get_nh(parent, &par_nh);
                        if (par_nh == next_hop)
-                               goto successfully_added;
+                               return 0;
                }
                ret = modify_dp(dp, rib, &ip_masked, depth, next_hop);
                if (ret != 0) {
                        rte_rib6_remove(rib, &ip_masked, depth);
                        return ret;
                }
-successfully_added:
-               dp->rsvd_tbl8s += depth_diff;
                return 0;
        case RTE_FIB6_DEL:
                if (node == NULL)
@@ -641,8 +640,6 @@ trie_modify(struct rte_fib6 *fib, const struct 
rte_ipv6_addr *ip,
                if (ret != 0)
                        return ret;
                rte_rib6_remove(rib, ip, depth);
-
-               dp->rsvd_tbl8s -= depth_diff;
                return 0;
        default:
                break;
diff --git a/lib/fib/trie.h b/lib/fib/trie.h
index c34cc2c057..b42a28f84e 100644
--- a/lib/fib/trie.h
+++ b/lib/fib/trie.h
@@ -31,8 +31,7 @@
 
 struct rte_trie_tbl {
        uint32_t        number_tbl8s;   /**< Total number of tbl8s */
-       uint32_t        rsvd_tbl8s;     /**< Number of reserved tbl8s */
-       uint32_t        cur_tbl8s;      /**< Current cumber of tbl8s */
+       uint32_t        cur_tbl8s;      /**< Current number of tbl8s */
        uint64_t        def_nh;         /**< Default next hop */
        enum rte_fib_trie_nh_sz nh_sz;  /**< Size of nexthop entry */
        uint64_t        *tbl8;          /**< tbl8 table. */
-- 
2.43.0

Reply via email to