When a hash entry is added, there are 2 sets of stores.

1) The application writes its data to memory (whose address
is provided in rte_hash_add_key_with_hash_data API (or NULL))
2) The rte_hash library writes to its own internal data structures;
key store entry and the hash table.

The only ordering requirement between these 2 is that - the store
to the application data must complete before the store to key_index.
There are no ordering requirements between the stores to the
key/signature and store to application data. The synchronization
point for application data can be any point between the 'store to
application data' and 'store to the key_index'. So, pData should not
be a guard variable for the data in hash table. It should be a guard
variable only for the application data written to the memory location
pointed by pData. Hence, pData can be loaded after full key comparison.

Fixes: e605a1d36 ("hash: add lock-free r/w concurrency")
Cc: sta...@dpdk.org

Signed-off-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
Reviewed-by: Gavin Hu <gavin...@arm.com>
Tested-by: Ruifeng Wang <ruifeng.w...@arm.com>
 lib/librte_hash/rte_cuckoo_hash.c | 67 +++++++++++++++----------------
 1 file changed, 32 insertions(+), 35 deletions(-)

diff --git a/lib/librte_hash/rte_cuckoo_hash.c 
index f37f6957d..077328fed 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -649,9 +649,11 @@ search_and_update(const struct rte_hash *h, void *data, 
const void *key,
                        k = (struct rte_hash_key *) ((char *)keys +
                                        bkt->key_idx[i] * h->key_entry_size);
                        if (rte_hash_cmp_eq(key, k->key, h) == 0) {
-                               /* 'pdata' acts as the synchronization point
-                                * when an existing hash entry is updated.
-                                * Key is not updated in this case.
+                               /* The store to application data at *data
+                                * should not leak after the store to pdata
+                                * in the key store. i.e. pdata is the guard
+                                * variable. Release the application data
+                                * to the readers.
@@ -711,11 +713,10 @@ rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
                /* Check if slot is available */
                if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
                        prim_bkt->sig_current[i] = sig;
-                       /* Key can be of arbitrary length, so it is
-                        * not possible to store it atomically.
-                        * Hence the new key element's memory stores
-                        * (key as well as data) should be complete
-                        * before it is referenced.
+                       /* Store to signature and key should not
+                        * leak after the store to key_idx. i.e.
+                        * key_idx is the guard variable for signature
+                        * and key.
@@ -990,17 +991,15 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, 
const void *key,
        new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size);
        new_idx = (uint32_t)((uintptr_t) slot_id);
-       /* Copy key */
-       memcpy(new_k->key, key, h->key_len);
-       /* Key can be of arbitrary length, so it is not possible to store
-        * it atomically. Hence the new key element's memory stores
-        * (key as well as data) should be complete before it is referenced.
-        * 'pdata' acts as the synchronization point when an existing hash
-        * entry is updated.
+       /* The store to application data (by the application) at *data should
+        * not leak after the store of pdata in the key store. i.e. pdata is
+        * the guard variable. Release the application data to the readers.
+       /* Copy key */
+       memcpy(new_k->key, key, h->key_len);
        /* Find an empty slot and insert */
        ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
@@ -1064,8 +1063,10 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, 
const void *key,
                        /* Check if slot is available */
                        if (likely(cur_bkt->key_idx[i] == EMPTY_SLOT)) {
                                cur_bkt->sig_current[i] = short_sig;
-                               /* Store to signature should not leak after
-                                * the store to key_idx
+                               /* Store to signature and key should not
+                                * leak after the store to key_idx. i.e.
+                                * key_idx is the guard variable for signature
+                                * and key.
@@ -1087,8 +1088,9 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, 
const void *key,
        bkt_id = (uint32_t)((uintptr_t)ext_bkt_id) - 1;
        /* Use the first location of the new bucket */
        (h->buckets_ext[bkt_id]).sig_current[0] = short_sig;
-       /* Store to signature should not leak after
-        * the store to key_idx
+       /* Store to signature and key should not leak after
+        * the store to key_idx. i.e. key_idx is the guard variable
+        * for signature and key.
@@ -1184,7 +1186,6 @@ search_one_bucket_lf(const struct rte_hash *h, const void 
*key, uint16_t sig,
        int i;
        uint32_t key_idx;
-       void *pdata;
        struct rte_hash_key *k, *keys = h->key_store;
        for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
@@ -1199,12 +1200,13 @@ search_one_bucket_lf(const struct rte_hash *h, const 
void *key, uint16_t sig,
                        if (key_idx != EMPTY_SLOT) {
                                k = (struct rte_hash_key *) ((char *)keys +
                                                key_idx * h->key_entry_size);
-                               pdata = __atomic_load_n(&k->pdata,
-                                               __ATOMIC_ACQUIRE);
                                if (rte_hash_cmp_eq(key, k->key, h) == 0) {
-                                       if (data != NULL)
-                                               *data = pdata;
+                                       if (data != NULL) {
+                                               *data = __atomic_load_n(
+                                                       &k->pdata,
+                                                       __ATOMIC_ACQUIRE);
+                                       }
                                         * Return index where key is stored,
                                         * subtracting the first dummy index
@@ -1902,7 +1904,6 @@ __rte_hash_lookup_bulk_lf(const struct rte_hash *h, const 
void **keys,
        uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
        uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
        struct rte_hash_bucket *cur_bkt, *next_bkt;
-       void *pdata[RTE_HASH_LOOKUP_BULK_MAX];
        uint32_t cnt_b, cnt_a;
        /* Prefetch first keys */
@@ -2004,10 +2005,6 @@ __rte_hash_lookup_bulk_lf(const struct rte_hash *h, 
const void **keys,
                                        (const char *)h->key_store +
                                        key_idx * h->key_entry_size);
-                               if (key_idx != EMPTY_SLOT)
-                                       pdata[i] = __atomic_load_n(
-                                                       &key_slot->pdata,
-                                                       __ATOMIC_ACQUIRE);
                                 * If key index is 0, do not compare key,
                                 * as it is checking the dummy slot
@@ -2016,7 +2013,9 @@ __rte_hash_lookup_bulk_lf(const struct rte_hash *h, const 
void **keys,
                                                key_slot->key, keys[i], h)) {
                                        if (data != NULL)
-                                               data[i] = pdata[i];
+                                               data[i] = __atomic_load_n(
+                                                       &key_slot->pdata,
+                                                       __ATOMIC_ACQUIRE);
                                        hits |= 1ULL << i;
                                        positions[i] = key_idx - 1;
@@ -2038,10 +2037,6 @@ __rte_hash_lookup_bulk_lf(const struct rte_hash *h, 
const void **keys,
                                        (const char *)h->key_store +
                                        key_idx * h->key_entry_size);
-                               if (key_idx != EMPTY_SLOT)
-                                       pdata[i] = __atomic_load_n(
-                                                       &key_slot->pdata,
-                                                       __ATOMIC_ACQUIRE);
                                 * If key index is 0, do not compare key,
                                 * as it is checking the dummy slot
@@ -2051,7 +2046,9 @@ __rte_hash_lookup_bulk_lf(const struct rte_hash *h, const 
void **keys,
                                                key_slot->key, keys[i], h)) {
                                        if (data != NULL)
-                                               data[i] = pdata[i];
+                                               data[i] = __atomic_load_n(
+                                                       &key_slot->pdata,
+                                                       __ATOMIC_ACQUIRE);
                                        hits |= 1ULL << i;
                                        positions[i] = key_idx - 1;

Reply via email to