Some users of rhashtable might need to change the key
of an object and move it to a different location in the table.
Other users might want to allocate objects using
SLAB_TYPESAFE_BY_RCU which can result in the same memory allocation
being used for a different (type-compatible) purpose and similarly
end up in a different hash-chain.

To support these, we store a unique NULLS_MARKER at the end of
each chain, and when a search fails to find a match, we check
if the NULLS marker found was the expected one.  If not,
the search is repeated.

The unique NULLS_MARKER is derived from the address of the
head of the chain.

If an object is removed and re-added to the same hash chain, we won't
notice by looking that the NULLS marker.  In this case we must be sure
that it was not re-added *after* its original location, or a lookup may
incorrectly fail.  The easiest solution is to ensure it is inserted at
the start of the chain.  insert_slow() already does that,
insert_fast() does not.  So this patch changes insert_fast to always
insert at the head of the chain.

Note that such a user must do their own double-checking of
the object found by rhashtable_lookup_fast() after ensuring
mutual exclusion which anything that might change the key, such as
successfully taking a new reference.

Signed-off-by: NeilBrown <ne...@suse.com>
---
 include/linux/rhashtable.h | 35 +++++++++++++++++++++++------------
 lib/rhashtable.c           |  8 +++++---
 2 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index eb7111039247..10435a77b156 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -75,8 +75,10 @@ struct bucket_table {
        struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
 };
 
+#define        RHT_NULLS_MARKER(ptr)   \
+       ((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1))
 #define INIT_RHT_NULLS_HEAD(ptr)       \
-       ((ptr) = (typeof(ptr)) NULLS_MARKER(0))
+       ((ptr) = RHT_NULLS_MARKER(&(ptr)))
 
 static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
 {
@@ -471,6 +473,7 @@ static inline struct rhash_head *__rhashtable_lookup(
                .ht = ht,
                .key = key,
        };
+       struct rhash_head __rcu * const *head;
        struct bucket_table *tbl;
        struct rhash_head *he;
        unsigned int hash;
@@ -478,13 +481,19 @@ static inline struct rhash_head *__rhashtable_lookup(
        tbl = rht_dereference_rcu(ht->tbl, ht);
 restart:
        hash = rht_key_hashfn(ht, tbl, key, params);
-       rht_for_each_rcu(he, tbl, hash) {
-               if (params.obj_cmpfn ?
-                   params.obj_cmpfn(&arg, rht_obj(ht, he)) :
-                   rhashtable_compare(&arg, rht_obj(ht, he)))
-                       continue;
-               return he;
-       }
+       head = rht_bucket(tbl, hash);
+       do {
+               rht_for_each_rcu_continue(he, *head, tbl, hash) {
+                       if (params.obj_cmpfn ?
+                           params.obj_cmpfn(&arg, rht_obj(ht, he)) :
+                           rhashtable_compare(&arg, rht_obj(ht, he)))
+                               continue;
+                       return he;
+               }
+               /* An object might have been moved to a different hash chain,
+                * while we walk along it - better check and retry.
+                */
+       } while (he != RHT_NULLS_MARKER(head));
 
        /* Ensure we see any new tables. */
        smp_rmb();
@@ -580,6 +589,7 @@ static inline void *__rhashtable_insert_fast(
                .ht = ht,
                .key = key,
        };
+       struct rhash_head __rcu **headp;
        struct rhash_head __rcu **pprev;
        struct bucket_table *tbl;
        struct rhash_head *head;
@@ -603,12 +613,13 @@ static inline void *__rhashtable_insert_fast(
        }
 
        elasticity = RHT_ELASTICITY;
-       pprev = rht_bucket_insert(ht, tbl, hash);
+       headp = rht_bucket_insert(ht, tbl, hash);
+       pprev = headp;
        data = ERR_PTR(-ENOMEM);
        if (!pprev)
                goto out;
 
-       rht_for_each_continue(head, *pprev, tbl, hash) {
+       rht_for_each_continue(head, *headp, tbl, hash) {
                struct rhlist_head *plist;
                struct rhlist_head *list;
 
@@ -648,7 +659,7 @@ static inline void *__rhashtable_insert_fast(
        if (unlikely(rht_grow_above_100(ht, tbl)))
                goto slow_path;
 
-       head = rht_dereference_bucket(*pprev, tbl, hash);
+       head = rht_dereference_bucket(*headp, tbl, hash);
 
        RCU_INIT_POINTER(obj->next, head);
        if (rhlist) {
@@ -658,7 +669,7 @@ static inline void *__rhashtable_insert_fast(
                RCU_INIT_POINTER(list->next, NULL);
        }
 
-       rcu_assign_pointer(*pprev, obj);
+       rcu_assign_pointer(*headp, obj);
 
        atomic_inc(&ht->nelems);
        if (rht_grow_above_75(ht, tbl))
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 0e04947b7e0c..1737fbd049da 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -1164,8 +1164,7 @@ struct rhash_head __rcu **rht_bucket_nested(const struct 
bucket_table *tbl,
                                            unsigned int hash)
 {
        const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
-       static struct rhash_head __rcu *rhnull =
-               (struct rhash_head __rcu *)NULLS_MARKER(0);
+       static struct rhash_head __rcu *rhnull;
        unsigned int index = hash & ((1 << tbl->nest) - 1);
        unsigned int size = tbl->size >> tbl->nest;
        unsigned int subhash = hash;
@@ -1183,8 +1182,11 @@ struct rhash_head __rcu **rht_bucket_nested(const struct 
bucket_table *tbl,
                subhash >>= shift;
        }
 
-       if (!ntbl)
+       if (!ntbl) {
+               if (!rhnull)
+                       INIT_RHT_NULLS_HEAD(rhnull);
                return &rhnull;
+       }
 
        return &ntbl[subhash].bucket;
 
-- 
2.14.0.rc0.dirty

Attachment: signature.asc
Description: PGP signature

Reply via email to