Hi Konstantin,

On 08/10/2021 13:31, Ananyev, Konstantin wrote:

This patch enables new GFNI Toeplitz hash in
predictable RSS library.

Signed-off-by: Vladimir Medvedkin <vladimir.medved...@intel.com>
---
  lib/hash/rte_thash.c | 43 +++++++++++++++++++++++++++++++++++++++----
  lib/hash/rte_thash.h | 19 +++++++++++++++++++
  lib/hash/version.map |  1 +
  3 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/lib/hash/rte_thash.c b/lib/hash/rte_thash.c
index 07447f7..86a0e96 100644
--- a/lib/hash/rte_thash.c
+++ b/lib/hash/rte_thash.c
@@ -86,6 +86,8 @@ struct rte_thash_ctx {
        uint32_t        reta_sz_log;    /** < size of the RSS ReTa in bits */
        uint32_t        subtuples_nb;   /** < number of subtuples */
        uint32_t        flags;
+       uint64_t        *matrices;

Comment, what is that, etc.


I'll rephrase the comment below.

+       /**< rte_thash_complete_matrix generated matrices */
        uint8_t         hash_key[0];
  };

@@ -253,12 +255,25 @@ rte_thash_init_ctx(const char *name, uint32_t key_len, 
uint32_t reta_sz,
                        ctx->hash_key[i] = rte_rand();
        }

+       if (rte_thash_gfni_supported) {

I think it should be:
if (rte_thash_gfni_supported && rte_vect_get_max_simd_bitwidth() >= 
RTE_VECT_SIMD_512)



Agree

+               ctx->matrices = rte_zmalloc(NULL, key_len * sizeof(uint64_t),
+                       RTE_CACHE_LINE_SIZE);

You can do it probably before allocation ctx, at the same place where te is 
allocated.
Might be a bit nicer.


I'd prefer to keep allocation and initialization of matrices in one place, below there is rte_thash_complete_matrix() which uses previously generated ctx->hash_key.

+               if (ctx->matrices == NULL)

                RTE_LOG(ERR, ...);
                rte_ernno = ENOMEM;


Agree

+                       goto free_ctx;
+
+               rte_thash_complete_matrix(ctx->matrices, ctx->hash_key,
+                       key_len);
+       }
+
        te->data = (void *)ctx;
        TAILQ_INSERT_TAIL(thash_list, te, next);

        rte_mcfg_tailq_write_unlock();

        return ctx;
+
+free_ctx:
+       rte_free(ctx);
  free_te:
        rte_free(te);
  exit:
@@ -372,6 +387,10 @@ generate_subkey(struct rte_thash_ctx *ctx, struct 
thash_lfsr *lfsr,
                        set_bit(ctx->hash_key, get_rev_bit_lfsr(lfsr), i);
        }

+       if (rte_thash_gfni_supported)

Here and in data-path functions, I think it would be better:
if (ctx->matrices != NULL)

Agree

+               rte_thash_complete_matrix(ctx->matrices, ctx->hash_key,
+                       ctx->key_len);
+
        return 0;
  }

@@ -628,6 +647,16 @@ rte_thash_get_key(struct rte_thash_ctx *ctx)
        return ctx->hash_key;
  }

+const uint64_t *
+rte_thash_get_gfni_matrices(struct rte_thash_ctx *ctx)
+{
+       if (rte_thash_gfni_supported)
+               return ctx->matrices;

Why not just always:
return ctx->matices;
?


Agree

+
+       rte_errno = ENOTSUP;
+       return NULL;
+}
+
  static inline uint8_t
  read_unaligned_byte(uint8_t *ptr, unsigned int len, unsigned int offset)
  {
@@ -739,11 +768,17 @@ rte_thash_adjust_tuple(struct rte_thash_ctx *ctx,
        attempts = RTE_MIN(attempts, 1U << (h->tuple_len - ctx->reta_sz_log));

        for (i = 0; i < attempts; i++) {
-               for (j = 0; j < (tuple_len / 4); j++)
-                       tmp_tuple[j] =
-                               rte_be_to_cpu_32(*(uint32_t *)&tuple[j * 4]);
+               if (rte_thash_gfni_supported)
if (ctx->matrices)

+                       hash = rte_thash_gfni(ctx->matrices, tuple, tuple_len);
+               else {
+                       for (j = 0; j < (tuple_len / 4); j++)
+                               tmp_tuple[j] =
+                                       rte_be_to_cpu_32(
+                                               *(uint32_t *)&tuple[j * 4]);
+
+                       hash = rte_softrss(tmp_tuple, tuple_len / 4, hash_key);
+               }

-               hash = rte_softrss(tmp_tuple, tuple_len / 4, hash_key);
                adj_bits = rte_thash_get_complement(h, hash, desired_value);

                /*
diff --git a/lib/hash/rte_thash.h b/lib/hash/rte_thash.h
index e3f1fc6..6e6861c 100644
--- a/lib/hash/rte_thash.h
+++ b/lib/hash/rte_thash.h
@@ -410,6 +410,25 @@ const uint8_t *
  rte_thash_get_key(struct rte_thash_ctx *ctx);

  /**
+ * Get a pointer to the toeplitz hash matrices contained in the context.
+ * These matrices could be used with fast toeplitz hash implementation if
+ * CPU supports GFNI.
+ * Matrices changes after each addition of a helper.
+ *
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * @param ctx
+ *  Thash context
+ * @return
+ *  A pointer to the toeplitz hash key matrices on success
+ *  NULL if GFNI is not supported.
+ */
+__rte_experimental
+const uint64_t *
+rte_thash_get_gfni_matrices(struct rte_thash_ctx *ctx);
+
+/**
   * Function prototype for the rte_thash_adjust_tuple
   * to check if adjusted tuple could be used.
   * Generally it is some kind of lookup function to check
diff --git a/lib/hash/version.map b/lib/hash/version.map
index cecf922..3eda695 100644
--- a/lib/hash/version.map
+++ b/lib/hash/version.map
@@ -43,6 +43,7 @@ EXPERIMENTAL {
        rte_thash_find_existing;
        rte_thash_free_ctx;
        rte_thash_get_complement;
+       rte_thash_get_gfni_matrices;
        rte_thash_get_helper;
        rte_thash_get_key;
        rte_thash_gfni_supported;
--
2.7.4


--
Regards,
Vladimir

Reply via email to