> When rte_hash_add_key_data() overwrites an existing key, the old data
> pointer is silently lost. With RCU-protected readers still potentially
> accessing the old data, the application has no safe way to free it.
> 
> When RCU is configured with a free_key_data_func callback, automatically
> enqueue the old data for deferred freeing via the RCU defer queue on
> overwrite. In SYNC mode, synchronize and call free_key_data_func
> directly.
> 
> Signed-off-by: Robin Jarry <[email protected]>
> ---
>  app/test/test_hash.c                   | 134 +++++++++++++++++++++++++
>  doc/guides/rel_notes/release_26_03.rst |   6 ++
>  lib/hash/rte_cuckoo_hash.c             | 101 ++++++++++++++-----
>  lib/hash/rte_hash.h                    |   8 +-
>  4 files changed, 224 insertions(+), 25 deletions(-)
> 
> diff --git a/app/test/test_hash.c b/app/test/test_hash.c
> index 3fb3d96d0546..56a7779e09b9 100644
> --- a/app/test/test_hash.c
> +++ b/app/test/test_hash.c
> @@ -2342,6 +2342,137 @@ test_hash_rcu_qsbr_dq_reclaim(void)
>       return 0;
>  }
> 
> +static void *old_data;
> +
> +static void
> +test_hash_free_key_data_func(void *p __rte_unused, void *key_data)
> +{
> +     old_data = key_data;
> +}
> +
> +/*
> + * Test automatic RCU free on overwrite via rte_hash_add_key_data.
> + *  - Create hash with RW_CONCURRENCY_LF and RCU QSBR in DQ mode
> + *    with a free_key_data_func callback that increments a counter.
> + *  - Register a pseudo reader thread.
> + *  - Add key with data (void *)1.
> + *  - Overwrite same key with data (void *)2 via rte_hash_add_key_data.
> + *  - Report quiescent state, trigger reclamation.
> + *  - Verify the free callback was called exactly once.
> + *  - Delete the key, report quiescent state, reclaim again.
> + *  - Verify the free callback was called a second time.
> + */
> +static int
> +test_hash_rcu_qsbr_replace_auto_free(void)
> +{
> +     struct rte_hash_rcu_config rcu = {
> +             .v = NULL,
> +             .mode = RTE_HASH_QSBR_MODE_DQ,
> +             .free_key_data_func = test_hash_free_key_data_func,
> +             .key_data_ptr = NULL,
> +     };
> +     struct rte_hash_parameters params = {
> +             .name = "test_replace_auto_free",
> +             .entries = 16,
> +             .key_len = sizeof(uint32_t),
> +             .hash_func = NULL,
> +             .hash_func_init_val = 0,
> +             .socket_id = SOCKET_ID_ANY,
> +             .extra_flag = RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF,
> +     };
> +     struct rte_hash *hash = NULL;
> +     uint32_t key = 55;
> +     int32_t status;
> +     int ret = -1;
> +     size_t sz;
> +
> +     printf("\n# Running RCU replace auto-free test\n");
> +
> +     hash = rte_hash_create(&params);
> +     if (hash == NULL) {
> +             printf("hash creation failed\n");
> +             goto end;
> +     }
> +
> +     sz = rte_rcu_qsbr_get_memsize(RTE_MAX_LCORE);
> +     rcu.v = rte_zmalloc(NULL, sz, RTE_CACHE_LINE_SIZE);
> +     if (rcu.v == NULL) {
> +             printf("RCU QSBR alloc failed\n");
> +             goto end;
> +     }
> +     status = rte_rcu_qsbr_init(rcu.v, RTE_MAX_LCORE);
> +     if (status != 0) {
> +             printf("RCU QSBR init failed\n");
> +             goto end;
> +     }
> +
> +     status = rte_hash_rcu_qsbr_add(hash, &rcu);
> +     if (status != 0) {
> +             printf("RCU QSBR add failed\n");
> +             goto end;
> +     }
> +
> +     /* Register pseudo reader */
> +     status = rte_rcu_qsbr_thread_register(rcu.v, 0);
> +     if (status != 0) {
> +             printf("RCU QSBR thread register failed\n");
> +             goto end;
> +     }
> +     rte_rcu_qsbr_thread_online(rcu.v, 0);
> +
> +     old_data = NULL;
> +
> +     /* Add key with data = (void *)1 */
> +     status = rte_hash_add_key_data(hash, &key, (void *)(uintptr_t)1);
> +     if (status != 0) {
> +             printf("failed to add key (status=%d)\n", status);
> +             goto end;
> +     }
> +
> +     /* Overwrite same key with data = (void *)2 */
> +     status = rte_hash_add_key_data(hash, &key, (void *)(uintptr_t)2);
> +     if (status != 0) {
> +             printf("failed to overwrite key (status=%d)\n", status);
> +             goto end;
> +     }
> +
> +     /* Reader quiescent and reclaim */
> +     rte_rcu_qsbr_quiescent(rcu.v, 0);
> +     rte_hash_rcu_qsbr_dq_reclaim(hash, NULL, NULL, NULL);
> +
> +     if (old_data != (void *)(uintptr_t)1) {
> +             printf("old data should be 0x1 but is %p\n", old_data);
> +             goto end;
> +     }
> +
> +     /* Delete the key */
> +     status = rte_hash_del_key(hash, &key);
> +     if (status < 0) {
> +             printf("failed to delete key (status=%d)\n", status);
> +             goto end;
> +     }
> +
> +     /* Reader quiescent and reclaim again */
> +     rte_rcu_qsbr_quiescent(rcu.v, 0);
> +     rte_hash_rcu_qsbr_dq_reclaim(hash, NULL, NULL, NULL);
> +
> +     if (old_data != (void *)(uintptr_t)2) {
> +             printf("old data should be 2 but is %p\n", old_data);
> +             goto end;
> +     }
> +
> +     ret = 0;
> +end:
> +     if (rcu.v != NULL) {
> +             rte_rcu_qsbr_thread_offline(rcu.v, 0);
> +             rte_rcu_qsbr_thread_unregister(rcu.v, 0);
> +     }
> +     rte_hash_free(hash);
> +     rte_free(rcu.v);
> +
> +     return ret;
> +}
> +
>  /*
>   * Do all unit and performance tests.
>   */
> @@ -2423,6 +2554,9 @@ test_hash(void)
>       if (test_hash_rcu_qsbr_dq_reclaim() < 0)
>               return -1;
> 
> +     if (test_hash_rcu_qsbr_replace_auto_free() < 0)
> +             return -1;
> +
>       return 0;
>  }
> 
> diff --git a/doc/guides/rel_notes/release_26_03.rst
> b/doc/guides/rel_notes/release_26_03.rst
> index afdf1af06c20..693034bcb0d2 100644
> --- a/doc/guides/rel_notes/release_26_03.rst
> +++ b/doc/guides/rel_notes/release_26_03.rst
> @@ -87,6 +87,12 @@ New Features
>    * Added support for AES-XTS cipher algorithm.
>    * Added support for SHAKE-128 and SHAKE-256 authentication algorithms.
> 
> +* **Added automatic deferred free on hash data overwrite.**
> +
> +  When RCU is configured with a ``free_key_data_func`` callback,
> +  ``rte_hash_add_key_data`` now automatically defers freeing the old data
> +  pointer on key overwrite via the RCU defer queue.
> +
> 
>  Removed Items
>  -------------
> diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
> index 8189bde024be..f487b3b725dd 100644
> --- a/lib/hash/rte_cuckoo_hash.c
> +++ b/lib/hash/rte_cuckoo_hash.c
> @@ -75,6 +75,7 @@ EAL_REGISTER_TAILQ(rte_hash_tailq)
>  struct __rte_hash_rcu_dq_entry {
>       uint32_t key_idx;
>       uint32_t ext_bkt_idx;
> +     void *old_data;
>  };
> 
>  RTE_EXPORT_SYMBOL(rte_hash_find_existing)
> @@ -763,10 +764,11 @@ enqueue_slot_back(const struct rte_hash *h,
> 
>  /* Search a key from bucket and update its data.
>   * Writer holds the lock before calling this.
> + * If old_data is non-NULL, save the previous data pointer before 
> overwriting.
>   */
>  static inline int32_t
>  search_and_update(const struct rte_hash *h, void *data, const void *key,
> -     struct rte_hash_bucket *bkt, uint16_t sig)
> +     struct rte_hash_bucket *bkt, uint16_t sig, void **old_data)
>  {
>       int i;
>       struct rte_hash_key *k, *keys = h->key_store;
> @@ -776,6 +778,8 @@ search_and_update(const struct rte_hash *h, void *data,
> const void *key,
>                       k = (struct rte_hash_key *) ((char *)keys +
>                                       bkt->key_idx[i] * h->key_entry_size);
>                       if (rte_hash_cmp_eq(key, k->key, h) == 0) {
> +                             if (old_data != NULL)
> +                                     *old_data = k->pdata;

For consistency, shouldn't we use here instead of load/store:
*old_data = rte_atomic_exchange_explicit(&k->pdata, data, ...);
?
 

>                               /* The store to application data at *data
>                                * should not leak after the store to pdata
>                                * in the key store. i.e. pdata is the guard
> @@ -807,7 +811,7 @@ rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
>               struct rte_hash_bucket *sec_bkt,
>               const struct rte_hash_key *key, void *data,
>               uint16_t sig, uint32_t new_idx,
> -             int32_t *ret_val)
> +             int32_t *ret_val, void **old_data)
>  {
>       unsigned int i;
>       struct rte_hash_bucket *cur_bkt;
> @@ -817,7 +821,7 @@ rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
>       /* Check if key was inserted after last check but before this
>        * protected region in case of inserting duplicated keys.
>        */
> -     ret = search_and_update(h, data, key, prim_bkt, sig);
> +     ret = search_and_update(h, data, key, prim_bkt, sig, old_data);
>       if (ret != -1) {
>               __hash_rw_writer_unlock(h);
>               *ret_val = ret;
> @@ -825,7 +829,7 @@ rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
>       }
> 
>       FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
> -             ret = search_and_update(h, data, key, cur_bkt, sig);
> +             ret = search_and_update(h, data, key, cur_bkt, sig, old_data);
>               if (ret != -1) {
>                       __hash_rw_writer_unlock(h);
>                       *ret_val = ret;
> @@ -872,7 +876,7 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash
> *h,
>                       const struct rte_hash_key *key, void *data,
>                       struct queue_node *leaf, uint32_t leaf_slot,
>                       uint16_t sig, uint32_t new_idx,
> -                     int32_t *ret_val)
> +                     int32_t *ret_val, void **old_data)
>  {
>       uint32_t prev_alt_bkt_idx;
>       struct rte_hash_bucket *cur_bkt;
> @@ -892,7 +896,7 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash
> *h,
>       /* Check if key was inserted after last check but before this
>        * protected region.
>        */
> -     ret = search_and_update(h, data, key, bkt, sig);
> +     ret = search_and_update(h, data, key, bkt, sig, old_data);
>       if (ret != -1) {
>               __hash_rw_writer_unlock(h);
>               *ret_val = ret;
> @@ -900,7 +904,7 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash
> *h,
>       }
> 
>       FOR_EACH_BUCKET(cur_bkt, alt_bkt) {
> -             ret = search_and_update(h, data, key, cur_bkt, sig);
> +             ret = search_and_update(h, data, key, cur_bkt, sig, old_data);
>               if (ret != -1) {
>                       __hash_rw_writer_unlock(h);
>                       *ret_val = ret;
> @@ -997,7 +1001,8 @@ rte_hash_cuckoo_make_space_mw(const struct rte_hash
> *h,
>                       struct rte_hash_bucket *sec_bkt,
>                       const struct rte_hash_key *key, void *data,
>                       uint16_t sig, uint32_t bucket_idx,
> -                     uint32_t new_idx, int32_t *ret_val)
> +                     uint32_t new_idx, int32_t *ret_val,
> +                     void **old_data)
>  {
>       unsigned int i;
>       struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
> @@ -1023,7 +1028,7 @@ rte_hash_cuckoo_make_space_mw(const struct rte_hash
> *h,
>                               int32_t ret = rte_hash_cuckoo_move_insert_mw(h,
>                                               bkt, sec_bkt, key, data,
>                                               tail, i, sig,
> -                                             new_idx, ret_val);
> +                                             new_idx, ret_val, old_data);
>                               if (likely(ret != -1))
>                                       return ret;
>                       }
> @@ -1076,6 +1081,29 @@ alloc_slot(const struct rte_hash *h, struct lcore_cache
> *cached_free_slots)
>       return slot_id;
>  }
> 
> +/*
> + * When RCU is configured with a free function, auto-free the overwritten
> + * data pointer via RCU.
> + */
> +static inline void
> +__hash_rcu_auto_free_old_data(const struct rte_hash *h, void *old_data_val)
> +{
> +     struct __rte_hash_rcu_dq_entry rcu_dq_entry = {
> +             .key_idx = EMPTY_SLOT, /* sentinel value for
> __hash_rcu_qsbr_free_resource */
> +             .old_data = old_data_val,
> +     };
> +
> +     if (h->hash_rcu_cfg == NULL || h->hash_rcu_cfg->free_key_data_func ==
> NULL)
> +             return;
> +
> +     if (h->dq == NULL || rte_rcu_qsbr_dq_enqueue(h->dq, &rcu_dq_entry) != 0)
> {
> +             /* SYNC mode or enqueue failed in DQ mode */
> +             rte_rcu_qsbr_synchronize(h->hash_rcu_cfg->v,
> RTE_QSBR_THRID_INVALID);
> +             h->hash_rcu_cfg->free_key_data_func(h->hash_rcu_cfg-
> >key_data_ptr,
> +                                                 old_data_val);
> +     }
> +}
> +
>  static inline int32_t
>  __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
>                                               hash_sig_t sig, void *data)
> @@ -1092,6 +1120,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h,
> const void *key,
>       struct lcore_cache *cached_free_slots = NULL;
>       int32_t ret_val;
>       struct rte_hash_bucket *last;
> +     void *saved_old_data = NULL;
> 
>       short_sig = get_short_sig(sig);
>       prim_bucket_idx = get_prim_bucket_index(h, sig);
> @@ -1103,18 +1132,20 @@ __rte_hash_add_key_with_hash(const struct rte_hash
> *h, const void *key,
> 
>       /* Check if key is already inserted in primary location */
>       __hash_rw_writer_lock(h);
> -     ret = search_and_update(h, data, key, prim_bkt, short_sig);
> +     ret = search_and_update(h, data, key, prim_bkt, short_sig,
> +                             &saved_old_data);
>       if (ret != -1) {
>               __hash_rw_writer_unlock(h);
> -             return ret;
> +             goto overwrite;
>       }
> 
>       /* Check if key is already inserted in secondary location */
>       FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
> -             ret = search_and_update(h, data, key, cur_bkt, short_sig);
> +             ret = search_and_update(h, data, key, cur_bkt, short_sig,
> +                                     &saved_old_data);
>               if (ret != -1) {
>                       __hash_rw_writer_unlock(h);
> -                     return ret;
> +                     goto overwrite;
>               }
>       }
> 
> @@ -1153,33 +1184,39 @@ __rte_hash_add_key_with_hash(const struct rte_hash
> *h, const void *key,
> 
>       /* Find an empty slot and insert */
>       ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
> -                                     short_sig, slot_id, &ret_val);
> +                                     short_sig, slot_id, &ret_val,
> +                                     &saved_old_data);
>       if (ret == 0)
>               return slot_id - 1;
>       else if (ret == 1) {
>               enqueue_slot_back(h, cached_free_slots, slot_id);
> -             return ret_val;
> +             ret = ret_val;
> +             goto overwrite;
>       }
> 
>       /* Primary bucket full, need to make space for new entry */
>       ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data,
> -                             short_sig, prim_bucket_idx, slot_id, &ret_val);
> +                             short_sig, prim_bucket_idx, slot_id, &ret_val,
> +                             &saved_old_data);
>       if (ret == 0)
>               return slot_id - 1;
>       else if (ret == 1) {
>               enqueue_slot_back(h, cached_free_slots, slot_id);
> -             return ret_val;
> +             ret = ret_val;
> +             goto overwrite;
>       }
> 
>       /* Also search secondary bucket to get better occupancy */
>       ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data,
> -                             short_sig, sec_bucket_idx, slot_id, &ret_val);
> +                             short_sig, sec_bucket_idx, slot_id, &ret_val,
> +                             &saved_old_data);
> 
>       if (ret == 0)
>               return slot_id - 1;
>       else if (ret == 1) {
>               enqueue_slot_back(h, cached_free_slots, slot_id);
> -             return ret_val;
> +             ret = ret_val;
> +             goto overwrite;
>       }
> 
>       /* if ext table not enabled, we failed the insertion */
> @@ -1193,17 +1230,21 @@ __rte_hash_add_key_with_hash(const struct rte_hash
> *h, const void *key,
>        */
>       __hash_rw_writer_lock(h);
>       /* We check for duplicates again since could be inserted before the 
> lock */
> -     ret = search_and_update(h, data, key, prim_bkt, short_sig);
> +     ret = search_and_update(h, data, key, prim_bkt, short_sig,
> +                             &saved_old_data);
>       if (ret != -1) {
>               enqueue_slot_back(h, cached_free_slots, slot_id);
> -             goto failure;
> +             __hash_rw_writer_unlock(h);
> +             goto overwrite;
>       }
> 
>       FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
> -             ret = search_and_update(h, data, key, cur_bkt, short_sig);
> +             ret = search_and_update(h, data, key, cur_bkt, short_sig,
> +                                     &saved_old_data);
>               if (ret != -1) {
>                       enqueue_slot_back(h, cached_free_slots, slot_id);
> -                     goto failure;
> +                     __hash_rw_writer_unlock(h);
> +                     goto overwrite;
>               }
>       }
> 
> @@ -1263,6 +1304,11 @@ __rte_hash_add_key_with_hash(const struct rte_hash
> *h, const void *key,
>       __hash_rw_writer_unlock(h);
>       return slot_id - 1;

With multiple goto labels that function becomes even more complicated.
Can we just add old_data as an extra parameter: 
__rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
                   hash_sig_t sig, void *data, void **old_data)
And make the callers to invoke __hash_rcu_auto_free_old_data()?

> +overwrite:
> +     if (saved_old_data != NULL)
> +             __hash_rcu_auto_free_old_data(h, saved_old_data);
> +     return ret;
> +
>  failure:
>       __hash_rw_writer_unlock(h);
>       return ret;
> @@ -1566,6 +1612,15 @@ __hash_rcu_qsbr_free_resource(void *p, void *e,
> unsigned int n)
>                       *((struct __rte_hash_rcu_dq_entry *)e);
> 
>       RTE_SET_USED(n);
> +
> +     if (rcu_dq_entry.key_idx == EMPTY_SLOT) {
> +             /* Overwrite case: free old data only, do not recycle slot */
> +             RTE_ASSERT(h->hash_rcu_cfg->free_key_data_func != NULL);
> +             h->hash_rcu_cfg->free_key_data_func(h->hash_rcu_cfg-
> >key_data_ptr,
> +                                                 rcu_dq_entry.old_data);
> +             return;
> +     }
> +
>       keys = h->key_store;
> 
>       k = (struct rte_hash_key *) ((char *)keys +
> diff --git a/lib/hash/rte_hash.h b/lib/hash/rte_hash.h
> index f692e0868dcf..e33f0aea0f5e 100644
> --- a/lib/hash/rte_hash.h
> +++ b/lib/hash/rte_hash.h
> @@ -226,7 +226,9 @@ rte_hash_max_key_id(const struct rte_hash *h);
>   * Thread safety can be enabled by setting flag during
>   * table creation.
>   * If the key exists already in the table, this API updates its value
> - * with 'data' passed in this API. It is the responsibility of
> + * with 'data' passed in this API. If RCU is configured with a
> + * free_key_data_func callback, the old data is automatically
> + * deferred-freed via RCU. Otherwise, it is the responsibility of
>   * the application to manage any memory associated with the old value.
>   * The readers might still be using the old value even after this API
>   * has returned.
> @@ -253,7 +255,9 @@ rte_hash_add_key_data(const struct rte_hash *h, const void
> *key, void *data);
>   * Thread safety can be enabled by setting flag during
>   * table creation.
>   * If the key exists already in the table, this API updates its value
> - * with 'data' passed in this API. It is the responsibility of
> + * with 'data' passed in this API. If RCU is configured with a
> + * free_key_data_func callback, the old data is automatically
> + * deferred-freed via RCU. Otherwise, it is the responsibility of
>   * the application to manage any memory associated with the old value.
>   * The readers might still be using the old value even after this API
>   * has returned.
> --
> 2.53.0

Reply via email to