write concurreny flags

Bruce Richardson Thu, 01 Nov 2018 02:48:41 -0700
On Thu, Nov 01, 2018 at 09:45:31AM +0000, Bruce Richardson wrote:
> On Wed, Oct 31, 2018 at 11:54:52PM -0500, Honnappa Nagarahalli wrote:
> > Hash library should provide read/write concurrency by default
> > as most of the use cases require read/write concurrency. Hence
> > the flag RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY is deprecated.
> > The library will decide if locking is required to provide
> > the concurrency based on other configuration flags.
> > 
> > If a lock is used to provide the read/write concurrency, best
> > possible locking mechanism available on the platform should
> > be used. Hence, the flag RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT
> > is deprecated. The library will use transactional memory
> > if the platform supports it.
> > 
> > Signed-off-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
> > Reviewed-by: Dharmik Thakkar <dharmik.thak...@arm.com>
> > Reviewed-by: Gavin Hu <gavin...@arm.com>
> > ---
> >  lib/librte_hash/rte_cuckoo_hash.c    | 319 ++++++++++++++++++++++++++-
> >  lib/librte_hash/rte_hash.h           |  22 +-
> >  lib/librte_hash/rte_hash_version.map |   7 +
> >  3 files changed, 345 insertions(+), 3 deletions(-)
> > 
> > diff --git a/lib/librte_hash/rte_cuckoo_hash.c 
> > b/lib/librte_hash/rte_cuckoo_hash.c
> > index 5ddcccd87..a11de22be 100644
> > --- a/lib/librte_hash/rte_cuckoo_hash.c
> > +++ b/lib/librte_hash/rte_cuckoo_hash.c
> > @@ -121,7 +121,7 @@ get_alt_bucket_index(const struct rte_hash *h,
> >  }
> >  
> >  struct rte_hash *
> > -rte_hash_create(const struct rte_hash_parameters *params)
> > +rte_hash_create_v20(const struct rte_hash_parameters *params)
> >  {
> >     struct rte_hash *h = NULL;
> >     struct rte_tailq_entry *te = NULL;
> > @@ -446,6 +446,323 @@ rte_hash_create(const struct rte_hash_parameters 
> > *params)
> >     rte_free(tbl_chng_cnt);
> >     return NULL;
> >  }
> > +VERSION_SYMBOL(rte_hash_create, _v20, 2.0);
> > +
> 
> To make reviewing this easier, can I ask if in V2 you can put the v18.11
> function first, before the existing one. Hopefully that means that the "new"
> function from git's point of view is the existing one, showing it as a
> block add that we can pretty much skip reviewing. The benefit of this is that
> the changes in the v1811 should then show as line-by-line diffs in the
> patch, so we can easily review the changes made in the new case. It's hard
> to spot them in the whole function below.
> 
> > +struct rte_hash *
> > +rte_hash_create_v1811(const struct rte_hash_parameters *params)
> > +{
> > +   struct rte_hash *h = NULL;
> > +   struct rte_tailq_entry *te = NULL;
> > +   struct rte_hash_list *hash_list;
> > +   struct rte_ring *r = NULL;
> > +   struct rte_ring *r_ext = NULL;
> > +   char hash_name[RTE_HASH_NAMESIZE];
> > +   void *k = NULL;
> > +   void *buckets = NULL;
> > +   void *buckets_ext = NULL;
> > +   char ring_name[RTE_RING_NAMESIZE];
> > +   char ext_ring_name[RTE_RING_NAMESIZE];
> > +   unsigned num_key_slots;
> > +   unsigned i;
> > +   unsigned int use_local_cache = 0;
> > +   unsigned int ext_table_support = 0;
> > +   unsigned int readwrite_concur_support = 1;
> > +   unsigned int writer_takes_lock = 1;
> > +   unsigned int no_free_on_del = 0;
> > +   uint32_t *tbl_chng_cnt = NULL;
> > +   unsigned int readwrite_concur_lf_support = 0;
> > +
> > +   rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
> > +
> > +   hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
> > +
> > +   if (params == NULL) {
> > +           RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");
> > +           return NULL;
> > +   }
> > +
> > +   /* Check for valid parameters */
> > +   if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
> > +                   (params->entries < RTE_HASH_BUCKET_ENTRIES) ||
> > +                   (params->key_len == 0)) {
> > +           rte_errno = EINVAL;
> > +           RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
> > +           return NULL;
> > +   }
> > +
> > +   /* Validate correct usage of extra options */
> > +   if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) &&
> > +       (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)) {
> > +           rte_errno = EINVAL;
> > +           RTE_LOG(ERR, HASH, "rte_hash_create: extendable bucket "
> > +                   "feature not supported with rw concurrency "
> > +                   "lock free\n");
> 
> Please don't split the error text across lines. If you put it on a line by
> itself, hopefully checkpatch should not complain. If checkpatch does
> complain about the long line, just ignore it!
> [Yes, I know this is copied from original code, but since it appears as new
> code in this patch, we should fix it]
> 
> > +           return NULL;
> > +   }
> > +
> > +   /* Check extra flags field to check extra options. */
> > +   if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD)
> > +           use_local_cache = 1;
> > +
> > +   if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)
> > +           ext_table_support = 1;
> > +
> > +   if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL)
> > +           no_free_on_del = 1;
> > +
> > +   if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) {
> > +           /* Do not use lock for RW concurrency */
> > +           readwrite_concur_support = 0;
> > +           readwrite_concur_lf_support = 1;
> > +           /* Enable not freeing internal memory/index on delete */
> > +           no_free_on_del = 1;
> > +   }
> > +
> > +   if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) &&
> > +       !(params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD))
> > +           writer_takes_lock = 0;
> > +
> > +   /* Store all keys and leave the first entry as a dummy entry for 
> > lookup_bulk */
> > +   if (use_local_cache)
> > +           /*
> > +            * Increase number of slots by total number of indices
> > +            * that can be stored in the lcore caches
> > +            * except for the first cache
> > +            */
> > +           num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
> > +                                   (LCORE_CACHE_SIZE - 1) + 1;
> > +   else
> > +           num_key_slots = params->entries + 1;
> > +
> > +   snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
> > +   /* Create ring (Dummy slot index is not enqueued) */
> > +   r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
> > +                   params->socket_id, 0);
> > +   if (r == NULL) {
> > +           RTE_LOG(ERR, HASH, "memory allocation failed\n");
> > +           goto err;
> > +   }
> > +
> > +   const uint32_t num_buckets = rte_align32pow2(params->entries) /
> > +                                           RTE_HASH_BUCKET_ENTRIES;
> > +
> > +   /* Create ring for extendable buckets. */
> > +   if (ext_table_support) {
> > +           snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s",
> > +                                                           params->name);
> > +           r_ext = rte_ring_create(ext_ring_name,
> > +                           rte_align32pow2(num_buckets + 1),
> > +                           params->socket_id, 0);
> > +
> > +           if (r_ext == NULL) {
> > +                   RTE_LOG(ERR, HASH, "ext buckets memory allocation "
> > +                                                           "failed\n");
> > +                   goto err;
> > +           }
> > +   }
> > +
> > +   snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
> > +
> > +   rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
> > +
> > +   /* guarantee there's no existing: this is normally already checked
> > +    * by ring creation above */
> > +   TAILQ_FOREACH(te, hash_list, next) {
> > +           h = (struct rte_hash *) te->data;
> > +           if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
> > +                   break;
> > +   }
> > +   h = NULL;
> > +   if (te != NULL) {
> > +           rte_errno = EEXIST;
> > +           te = NULL;
> > +           goto err_unlock;
> > +   }
> > +
> > +   te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);
> > +   if (te == NULL) {
> > +           RTE_LOG(ERR, HASH, "tailq entry allocation failed\n");
> > +           goto err_unlock;
> > +   }
> > +
> > +   h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct 
> > rte_hash),
> > +                                   RTE_CACHE_LINE_SIZE, params->socket_id);
> > +
> > +   if (h == NULL) {
> > +           RTE_LOG(ERR, HASH, "memory allocation failed\n");
> > +           goto err_unlock;
> > +   }
> > +
> > +   buckets = rte_zmalloc_socket(NULL,
> > +                           num_buckets * sizeof(struct rte_hash_bucket),
> > +                           RTE_CACHE_LINE_SIZE, params->socket_id);
> > +
> > +   if (buckets == NULL) {
> > +           RTE_LOG(ERR, HASH, "buckets memory allocation failed\n");
> > +           goto err_unlock;
> > +   }
> > +
> > +   /* Allocate same number of extendable buckets */
> > +   if (ext_table_support) {
> > +           buckets_ext = rte_zmalloc_socket(NULL,
> > +                           num_buckets * sizeof(struct rte_hash_bucket),
> > +                           RTE_CACHE_LINE_SIZE, params->socket_id);
> > +           if (buckets_ext == NULL) {
> > +                   RTE_LOG(ERR, HASH, "ext buckets memory allocation "
> > +                                                   "failed\n");
> > +                   goto err_unlock;
> > +           }
> > +           /* Populate ext bkt ring. We reserve 0 similar to the
> > +            * key-data slot, just in case in future we want to
> > +            * use bucket index for the linked list and 0 means NULL
> > +            * for next bucket
> > +            */
> > +           for (i = 1; i <= num_buckets; i++)
> > +                   rte_ring_sp_enqueue(r_ext, (void *)((uintptr_t) i));
> > +   }
> > +
> > +   const uint32_t key_entry_size =
> > +           RTE_ALIGN(sizeof(struct rte_hash_key) + params->key_len,
> > +                     KEY_ALIGNMENT);
> > +   const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
> > +
> > +   k = rte_zmalloc_socket(NULL, key_tbl_size,
> > +                   RTE_CACHE_LINE_SIZE, params->socket_id);
> > +
> > +   if (k == NULL) {
> > +           RTE_LOG(ERR, HASH, "memory allocation failed\n");
> > +           goto err_unlock;
> > +   }
> > +
> > +   tbl_chng_cnt = rte_zmalloc_socket(NULL, sizeof(uint32_t),
> > +                   RTE_CACHE_LINE_SIZE, params->socket_id);
> > +
> > +   if (tbl_chng_cnt == NULL) {
> > +           RTE_LOG(ERR, HASH, "memory allocation failed\n");
> > +           goto err_unlock;
> > +   }
> > +
> > +/*
> > + * If x86 architecture is used, select appropriate compare function,
> > + * which may use x86 intrinsics, otherwise use memcmp
> > + */
> > +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
> > +   /* Select function to compare keys */
> > +   switch (params->key_len) {
> > +   case 16:
> > +           h->cmp_jump_table_idx = KEY_16_BYTES;
> > +           break;
> > +   case 32:
> > +           h->cmp_jump_table_idx = KEY_32_BYTES;
> > +           break;
> > +   case 48:
> > +           h->cmp_jump_table_idx = KEY_48_BYTES;
> > +           break;
> > +   case 64:
> > +           h->cmp_jump_table_idx = KEY_64_BYTES;
> > +           break;
> > +   case 80:
> > +           h->cmp_jump_table_idx = KEY_80_BYTES;
> > +           break;
> > +   case 96:
> > +           h->cmp_jump_table_idx = KEY_96_BYTES;
> > +           break;
> > +   case 112:
> > +           h->cmp_jump_table_idx = KEY_112_BYTES;
> > +           break;
> > +   case 128:
> > +           h->cmp_jump_table_idx = KEY_128_BYTES;
> > +           break;
> > +   default:
> > +           /* If key is not multiple of 16, use generic memcmp */
> > +           h->cmp_jump_table_idx = KEY_OTHER_BYTES;
> > +   }
> > +#else
> > +   h->cmp_jump_table_idx = KEY_OTHER_BYTES;
> > +#endif
> > +
> > +   if (use_local_cache) {
> > +           h->local_free_slots = rte_zmalloc_socket(NULL,
> > +                           sizeof(struct lcore_cache) * RTE_MAX_LCORE,
> > +                           RTE_CACHE_LINE_SIZE, params->socket_id);
> > +   }
> > +
> > +   /* Default hash function */
> > +#if defined(RTE_ARCH_X86)
> > +   default_hash_func = (rte_hash_function)rte_hash_crc;
> > +#elif defined(RTE_ARCH_ARM64)
> > +   if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
> > +           default_hash_func = (rte_hash_function)rte_hash_crc;
> > +#endif
> > +   /* Setup hash context */
> > +   snprintf(h->name, sizeof(h->name), "%s", params->name);
> > +   h->entries = params->entries;
> > +   h->key_len = params->key_len;
> > +   h->key_entry_size = key_entry_size;
> > +   h->hash_func_init_val = params->hash_func_init_val;
> > +
> > +   h->num_buckets = num_buckets;
> > +   h->bucket_bitmask = h->num_buckets - 1;
> > +   h->buckets = buckets;
> > +   h->buckets_ext = buckets_ext;
> > +   h->free_ext_bkts = r_ext;
> > +   h->hash_func = (params->hash_func == NULL) ?
> > +           default_hash_func : params->hash_func;
> > +   h->key_store = k;
> > +   h->free_slots = r;
> > +   h->tbl_chng_cnt = tbl_chng_cnt;
> > +   *h->tbl_chng_cnt = 0;
> > +   h->hw_trans_mem_support = rte_tm_supported();
> > +   h->use_local_cache = use_local_cache;
> > +   h->readwrite_concur_support = readwrite_concur_support;
> > +   h->ext_table_support = ext_table_support;
> > +   h->writer_takes_lock = writer_takes_lock;
> > +   h->no_free_on_del = no_free_on_del;
> > +   h->readwrite_concur_lf_support = readwrite_concur_lf_support;
> > +
> > +#if defined(RTE_ARCH_X86)
> > +   if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
> > +           h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
> > +   else
> > +#endif
> > +           h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
> > +
> > +   if (h->writer_takes_lock) {
> > +           h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),
> > +                                           RTE_CACHE_LINE_SIZE);
> > +           if (h->readwrite_lock == NULL)
> > +                   goto err_unlock;
> > +
> > +           rte_rwlock_init(h->readwrite_lock);
> > +   }
> > +
> > +   /* Populate free slots ring. Entry zero is reserved for key misses. */
> > +   for (i = 1; i < num_key_slots; i++)
> > +           rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
> > +
> > +   te->data = (void *) h;
> > +   TAILQ_INSERT_TAIL(hash_list, te, next);
> > +   rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
> > +
> > +   return h;
> > +err_unlock:
> > +   rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
> > +err:
> > +   rte_ring_free(r);
> > +   rte_ring_free(r_ext);
> > +   rte_free(te);
> > +   rte_free(h);
> > +   rte_free(buckets);
> > +   rte_free(buckets_ext);
> > +   rte_free(k);
> > +   rte_free(tbl_chng_cnt);
> > +   return NULL;
> > +}
> > +BIND_DEFAULT_SYMBOL(rte_hash_create, _v1811, 18.11);
> > +MAP_STATIC_SYMBOL(
> > +   struct rte_hash *rte_hash_create(
> > +           const struct rte_hash_parameters *params),
> > +           rte_hash_create_v1811);
> >  
> >  void
> >  rte_hash_free(struct rte_hash *h)
> > diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
> > index c93d1a137..93c7019ec 100644
> > --- a/lib/librte_hash/rte_hash.h
> > +++ b/lib/librte_hash/rte_hash.h
> > @@ -30,13 +30,27 @@ extern "C" {
> >  #define RTE_HASH_LOOKUP_BULK_MAX           64
> >  #define RTE_HASH_LOOKUP_MULTI_MAX          RTE_HASH_LOOKUP_BULK_MAX
> >  
> > -/** Enable Hardware transactional memory support. */
> > +/**
> > + * @deprecated
> > + * This define will be removed in the next release.
> > + * If the target platform supports hardware transactional memory
> > + * it will be used without user consent as it provides the best possible
> > + * performance.
> > + *
> > + * Enable Hardware transactional memory support.
> > + */
> >  #define RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT     0x01
> >  
> >  /** Default behavior of insertion, single writer/multi writer */
> >  #define RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD 0x02
> >  
> > -/** Flag to support reader writer concurrency */
> > +/**
> > + * @deprecated
> > + * This define will be removed in the next release.
> > + * This library should be thread-safe by default.
> > + *
> > + * Flag to support reader writer concurrency
> > + */
> >  #define RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY 0x04
> >  
> 
> Do we not need/want to add some new flags to disable these features, in
> case there are cases where either RW concurrency, or transaction memory is
> unwanted?
> 
Commenting more on my own comment - I'm looking for suggestions here from
others. Perhaps we should take the YAGNI principle and not add in such
flags unless someone comes with a use-case that needs it. Having negative
flags where before we had positive could be confusing.
Re: [dpdk-dev] [PATCH 1/3] hash: deprecate lock ellision and read/write concurreny flags

Reply via email to